{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/FallbackMinimaxVoice",
"title": "FallbackMinimaxVoice",
"type": "object",
"properties": {
"cachingEnabled": {
"type": "boolean",
"description": "This is the flag to toggle voice caching for the assistant.",
"example": true,
"default": true
},
"provider": {
"type": "string",
"description": "This is the voice provider that will be used.",
"enum": [
"minimax"
]
},
"voiceId": {
"type": "string",
"description": "This is the provider-specific ID that will be used. Use a voice from MINIMAX_PREDEFINED_VOICES or a custom cloned voice ID.",
"title": "This is the Minimax Voice ID"
},
"model": {
"type": "string",
"description": "This is the model that will be used. Options are 'speech-02-hd' and 'speech-02-turbo'.\nspeech-02-hd is optimized for high-fidelity applications like voiceovers and audiobooks.\nspeech-02-turbo is designed for real-time applications with low latency.\n\n@default \"speech-02-turbo\"",
"enum": [
"speech-02-hd",
"speech-02-turbo",
"speech-2.5-turbo-preview"
],
"example": "speech-02-turbo",
"default": "speech-02-turbo"
},
"emotion": {
"type": "string",
"description": "The emotion to use for the voice. If not provided, will use auto-detect mode.\nOptions include: 'happy', 'sad', 'angry', 'fearful', 'surprised', 'disgusted', 'neutral'",
"example": "happy"
},
"subtitleType": {
"type": "string",
"description": "Controls the granularity of subtitle/timing data returned by Minimax\nduring synthesis. Set to 'word' to receive per-word timestamps in\nassistant.speechStarted events for karaoke-style caption rendering.\n\n@default \"sentence\"",
"enum": [
"word",
"sentence"
],
"default": "sentence"
},
"pitch": {
"type": "number",
"description": "Voice pitch adjustment. Range from -12 to 12 semitones.\n@default 0",
"minimum": -12,
"maximum": 12,
"example": 0,
"default": 0
},
"speed": {
"type": "number",
"description": "Voice speed adjustment. Range from 0.5 to 2.0.\n@default 1.0",
"minimum": 0.5,
"maximum": 2,
"example": 1,
"default": 1
},
"volume": {
"type": "number",
"description": "Voice volume adjustment. Range from 0.5 to 2.0.\n@default 1.0",
"minimum": 0.5,
"maximum": 2,
"example": 1,
"default": 1
},
"region": {
"type": "string",
"description": "The region for Minimax API. Defaults to \"worldwide\".",
"enum": [
"worldwide",
"china"
],
"default": "worldwide"
},
"languageBoost": {
"type": "string",
"description": "Language hint for MiniMax T2A. Example: yue (Cantonese), zh (Chinese), en (English).",
"enum": [
"Chinese",
"Chinese,Yue",
"English",
"Arabic",
"Russian",
"Spanish",
"French",
"Portuguese",
"German",
"Turkish",
"Dutch",
"Ukrainian",
"Vietnamese",
"Indonesian",
"Japanese",
"Italian",
"Korean",
"Thai",
"Polish",
"Romanian",
"Greek",
"Czech",
"Finnish",
"Hindi",
"Bulgarian",
"Danish",
"Hebrew",
"Malay",
"Persian",
"Slovak",
"Swedish",
"Croatian",
"Filipino",
"Hungarian",
"Norwegian",
"Slovenian",
"Catalan",
"Nynorsk",
"Tamil",
"Afrikaans",
"auto"
]
},
"textNormalizationEnabled": {
"type": "boolean",
"description": "Enable MiniMax text normalization to improve number reading and formatting.",
"default": true
},
"chunkPlan": {
"description": "This is the plan for chunking the model output before it is sent to the voice provider.",
"allOf": [
{
"$ref": "#/components/schemas/ChunkPlan"
}
]
}
},
"required": [
"provider",
"voiceId"
]
}