{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/FallbackSpeechmaticsTranscriber",
"title": "FallbackSpeechmaticsTranscriber",
"type": "object",
"properties": {
"provider": {
"type": "string",
"description": "This is the transcription provider that will be used.",
"enum": [
"speechmatics"
]
},
"model": {
"type": "string",
"description": "This is the model that will be used for the transcription.",
"enum": [
"default"
]
},
"language": {
"type": "string",
"enum": [
"auto",
"ar",
"ar_en",
"ba",
"eu",
"be",
"bn",
"bg",
"yue",
"ca",
"hr",
"cs",
"da",
"nl",
"en",
"eo",
"et",
"fi",
"fr",
"gl",
"de",
"el",
"he",
"hi",
"hu",
"id",
"ia",
"ga",
"it",
"ja",
"ko",
"lv",
"lt",
"ms",
"en_ms",
"mt",
"cmn",
"cmn_en",
"mr",
"mn",
"no",
"fa",
"pl",
"pt",
"ro",
"ru",
"sk",
"sl",
"es",
"en_es",
"sw",
"sv",
"tl",
"ta",
"en_ta",
"th",
"tr",
"uk",
"ur",
"ug",
"vi",
"cy"
]
},
"operatingPoint": {
"type": "string",
"description": "This is the operating point for the transcription. Choose between `standard` for faster turnaround with strong accuracy or `enhanced` for highest accuracy when precision is critical.\n\n@default 'enhanced'",
"example": "enhanced",
"enum": [
"standard",
"enhanced"
],
"default": "enhanced"
},
"region": {
"type": "string",
"description": "This is the region for the Speechmatics API. Choose between EU (Europe) and US (United States) regions for lower latency and data sovereignty compliance.\n\n@default 'eu'",
"example": "us",
"enum": [
"eu",
"us"
],
"default": "eu"
},
"enableDiarization": {
"type": "boolean",
"description": "This enables speaker diarization, which identifies and separates speakers in the transcription. Essential for multi-speaker conversations and conference calls.\n\n@default false",
"example": true,
"default": false
},
"maxDelay": {
"type": "number",
"description": "This sets the maximum delay in milliseconds for partial transcripts. Balances latency and accuracy.\n\n@default 3000",
"example": 1500,
"minimum": 500,
"maximum": 10000,
"default": 3000
},
"customVocabulary": {
"example": [
{
"content": "Speechmatics",
"soundsLike": [
"speech mattix"
]
}
],
"type": "array",
"items": {
"$ref": "#/components/schemas/SpeechmaticsCustomVocabularyItem"
}
},
"numeralStyle": {
"type": "string",
"description": "This controls how numbers, dates, currencies, and other entities are formatted in the transcription output.\n\n@default 'written'",
"example": "spoken",
"enum": [
"written",
"spoken"
],
"default": "written"
},
"endOfTurnSensitivity": {
"type": "number",
"description": "This is the sensitivity level for end-of-turn detection, which determines when a speaker has finished talking. Higher values are more sensitive.\n\n@default 0.5",
"example": 0.8,
"minimum": 0,
"maximum": 1,
"default": 0.5
},
"removeDisfluencies": {
"type": "boolean",
"description": "This enables removal of disfluencies (um, uh) from the transcript to create cleaner, more professional output.\n\nThis is only supported for the English language transcriber.\n\n@default false",
"example": true,
"default": false
},
"minimumSpeechDuration": {
"type": "number",
"description": "This is the minimum duration in seconds for speech segments. Shorter segments will be filtered out. Helps remove noise and improve accuracy.\n\n@default 0.0",
"example": 0.2,
"minimum": 0,
"maximum": 5,
"default": 0
}
},
"required": [
"provider",
"customVocabulary"
]
}