Interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input

interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
    audio: string | { body?: object; contentType?: string };
    beam_size?: number;
    compression_ratio_threshold?: number;
    condition_on_previous_text?: boolean;
    hallucination_silence_threshold?: number;
    initial_prompt?: string;
    language?: string;
    log_prob_threshold?: number;
    no_speech_threshold?: number;
    prefix?: string;
    task?: string;
    vad_filter?: boolean;
}

Index

Properties

audio beam_size? compression_ratio_threshold? condition_on_previous_text? hallucination_silence_threshold? initial_prompt? language? log_prob_threshold? no_speech_threshold? prefix? task? vad_filter?

Properties

audio

audio: string | { body?: object; contentType?: string }

`Optional`beam_size

beam_size?: number

The number of beams to use in beam search decoding. Higher values may improve accuracy at the cost of speed.

`Optional`compression_ratio_threshold

compression_ratio_threshold?: number

Threshold for filtering out segments with high compression ratio, which often indicate repetitive or hallucinated text.

`Optional`condition_on_previous_text

condition_on_previous_text?: boolean

Whether to condition on previous text during transcription. Setting to false may help prevent hallucination loops.

`Optional`hallucination_silence_threshold

hallucination_silence_threshold?: number

Optional threshold (in seconds) to skip silent periods that may cause hallucinations.

`Optional`initial_prompt

initial_prompt?: string

A text prompt to help provide context to the model on the contents of the audio.

`Optional`language

language?: string

The language of the audio being transcribed or translated.

`Optional`log_prob_threshold

log_prob_threshold?: number

Threshold for filtering out segments with low average log probability, indicating low confidence.

`Optional`no_speech_threshold

no_speech_threshold?: number

Threshold for detecting no-speech segments. Segments with no-speech probability above this value are skipped.

`Optional`prefix

prefix?: string

The prefix appended to the beginning of the output of the transcription and can guide the transcription result.

`Optional`task

task?: string

Supported tasks are 'translate' or 'transcribe'.

`Optional`vad_filter

vad_filter?: boolean

Preprocess the audio with a voice activity detection model.

Interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input

Index

Properties

Properties

audio

`Optional`beam_size

`Optional`compression_ratio_threshold

`Optional`condition_on_previous_text

`Optional`hallucination_silence_threshold

`Optional`initial_prompt

`Optional`language

`Optional`log_prob_threshold

`Optional`no_speech_threshold

`Optional`prefix

`Optional`task

`Optional`vad_filter

Settings

On This Page

Interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input

Index

Properties

Properties

audio

Optionalbeam_size

Optionalcompression_ratio_threshold

Optionalcondition_on_previous_text

Optionalhallucination_silence_threshold

Optionalinitial_prompt

Optionallanguage

Optionallog_prob_threshold

Optionalno_speech_threshold

Optionalprefix

Optionaltask

Optionalvad_filter

Settings

On This Page

`Optional`beam_size

`Optional`compression_ratio_threshold

`Optional`condition_on_previous_text

`Optional`hallucination_silence_threshold

`Optional`initial_prompt

`Optional`language

`Optional`log_prob_threshold

`Optional`no_speech_threshold

`Optional`prefix

`Optional`task

`Optional`vad_filter