{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/SonioxTranscriber",
"title": "SonioxTranscriber",
"type": "object",
"properties": {
"provider": {
"type": "string",
"enum": [
"soniox"
]
},
"model": {
"type": "string",
"enum": [
"stt-rt-v4"
],
"description": "The Soniox model to use for transcription."
},
"language": {
"type": "string",
"enum": [
"aa",
"ab",
"ae",
"af",
"ak",
"am",
"an",
"ar",
"as",
"av",
"ay",
"az",
"ba",
"be",
"bg",
"bh",
"bi",
"bm",
"bn",
"bo",
"br",
"bs",
"ca",
"ce",
"ch",
"co",
"cr",
"cs",
"cu",
"cv",
"cy",
"da",
"de",
"dv",
"dz",
"ee",
"el",
"en",
"eo",
"es",
"et",
"eu",
"fa",
"ff",
"fi",
"fj",
"fo",
"fr",
"fy",
"ga",
"gd",
"gl",
"gn",
"gu",
"gv",
"ha",
"he",
"hi",
"ho",
"hr",
"ht",
"hu",
"hy",
"hz",
"ia",
"id",
"ie",
"ig",
"ii",
"ik",
"io",
"is",
"it",
"iu",
"ja",
"jv",
"ka",
"kg",
"ki",
"kj",
"kk",
"kl",
"km",
"kn",
"ko",
"kr",
"ks",
"ku",
"kv",
"kw",
"ky",
"la",
"lb",
"lg",
"li",
"ln",
"lo",
"lt",
"lu",
"lv",
"mg",
"mh",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"na",
"nb",
"nd",
"ne",
"ng",
"nl",
"nn",
"no",
"nr",
"nv",
"ny",
"oc",
"oj",
"om",
"or",
"os",
"pa",
"pi",
"pl",
"ps",
"pt",
"qu",
"rm",
"rn",
"ro",
"ru",
"rw",
"sa",
"sc",
"sd",
"se",
"sg",
"si",
"sk",
"sl",
"sm",
"sn",
"so",
"sq",
"sr",
"ss",
"st",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"ti",
"tk",
"tl",
"tn",
"to",
"tr",
"ts",
"tt",
"tw",
"ty",
"ug",
"uk",
"ur",
"uz",
"ve",
"vi",
"vo",
"wa",
"wo",
"xh",
"yi",
"yue",
"yo",
"za",
"zh",
"zu"
],
"description": "Single language for transcription as an ISO 639-1 code (e.g., `en`, `es`). For multi-language hints or to enable Soniox auto-detect, use `languages` instead \u2014 when `languages` is set (including to an empty array), this field is ignored when building the Soniox request. Defaults to `en` if neither this nor `languages` is set."
},
"languages": {
"type": "string",
"enum": [
"aa",
"ab",
"ae",
"af",
"ak",
"am",
"an",
"ar",
"as",
"av",
"ay",
"az",
"ba",
"be",
"bg",
"bh",
"bi",
"bm",
"bn",
"bo",
"br",
"bs",
"ca",
"ce",
"ch",
"co",
"cr",
"cs",
"cu",
"cv",
"cy",
"da",
"de",
"dv",
"dz",
"ee",
"el",
"en",
"eo",
"es",
"et",
"eu",
"fa",
"ff",
"fi",
"fj",
"fo",
"fr",
"fy",
"ga",
"gd",
"gl",
"gn",
"gu",
"gv",
"ha",
"he",
"hi",
"ho",
"hr",
"ht",
"hu",
"hy",
"hz",
"ia",
"id",
"ie",
"ig",
"ii",
"ik",
"io",
"is",
"it",
"iu",
"ja",
"jv",
"ka",
"kg",
"ki",
"kj",
"kk",
"kl",
"km",
"kn",
"ko",
"kr",
"ks",
"ku",
"kv",
"kw",
"ky",
"la",
"lb",
"lg",
"li",
"ln",
"lo",
"lt",
"lu",
"lv",
"mg",
"mh",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"na",
"nb",
"nd",
"ne",
"ng",
"nl",
"nn",
"no",
"nr",
"nv",
"ny",
"oc",
"oj",
"om",
"or",
"os",
"pa",
"pi",
"pl",
"ps",
"pt",
"qu",
"rm",
"rn",
"ro",
"ru",
"rw",
"sa",
"sc",
"sd",
"se",
"sg",
"si",
"sk",
"sl",
"sm",
"sn",
"so",
"sq",
"sr",
"ss",
"st",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"ti",
"tk",
"tl",
"tn",
"to",
"tr",
"ts",
"tt",
"tw",
"ty",
"ug",
"uk",
"ur",
"uz",
"ve",
"vi",
"vo",
"wa",
"wo",
"xh",
"yi",
"yue",
"yo",
"za",
"zh",
"zu"
],
"description": "Language hints sent to Soniox as `language_hints`. Provide `[lang1, lang2, ...]` (ISO 639-1 codes) to bias recognition toward specific languages, or provide an explicit empty array `[]` to enable Soniox auto-detect across all 60+ supported languages. When set (including the empty array), this field takes precedence over the singular `language` field. When omitted, falls back to the singular `language` (which defaults to `en` if also unset). Best accuracy is achieved with a single language."
},
"languageHintsStrict": {
"type": "boolean",
"description": "When `true`, Soniox strictly restricts transcription to the languages in `languages` (or the singular `language` if `languages` is unset). When `false`, Soniox biases toward those languages but still allows transcription in other languages. Has no effect when no language hints are sent (e.g., `languages: []` for auto-detect). Defaults to `true` (strict mode)."
},
"maxEndpointDelayMs": {
"type": "number",
"minimum": 500,
"maximum": 3000,
"description": "Maximum delay in milliseconds between when the speaker stops and when the endpoint is detected. Lower values mean faster turn-taking but more false endpoints. Range: 500-3000. Default: 500."
},
"customVocabulary": {
"description": "Custom vocabulary terms to boost recognition accuracy. Useful for brand names, product names, and domain-specific terminology. Maps to Soniox context.terms.",
"type": "array",
"items": {
"type": "string"
}
},
"contextGeneral": {
"description": "General context key-value pairs that guide the AI model during transcription. Helps adapt vocabulary to the correct domain, improving accuracy. Recommended: 10 or fewer pairs. Maps to Soniox context.general.",
"example": [
{
"key": "domain",
"value": "Healthcare"
},
{
"key": "topic",
"value": "Diabetes management consultation"
}
],
"type": "array",
"items": {
"$ref": "#/components/schemas/SonioxContextGeneralItem"
}
},
"fallbackPlan": {
"description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails.",
"allOf": [
{
"$ref": "#/components/schemas/FallbackTranscriberPlan"
}
]
}
},
"required": [
"provider"
]
}