Vapi · Schema

FallbackMinimaxVoice

AIVoiceAgentsRealtimeCPaaS

Properties

Name	Type	Description
cachingEnabled	boolean	This is the flag to toggle voice caching for the assistant.
provider	string	This is the voice provider that will be used.
voiceId	string	This is the provider-specific ID that will be used. Use a voice from MINIMAX_PREDEFINED_VOICES or a custom cloned voice ID.
model	string	This is the model that will be used. Options are 'speech-02-hd' and 'speech-02-turbo'. speech-02-hd is optimized for high-fidelity applications like voiceovers and audiobooks. speech-02-turbo is desig
emotion	string	The emotion to use for the voice. If not provided, will use auto-detect mode. Options include: 'happy', 'sad', 'angry', 'fearful', 'surprised', 'disgusted', 'neutral'
subtitleType	string	Controls the granularity of subtitle/timing data returned by Minimax during synthesis. Set to 'word' to receive per-word timestamps in assistant.speechStarted events for karaoke-style caption renderin
pitch	number	Voice pitch adjustment. Range from -12 to 12 semitones. @default 0
speed	number	Voice speed adjustment. Range from 0.5 to 2.0. @default 1.0
volume	number	Voice volume adjustment. Range from 0.5 to 2.0. @default 1.0
region	string	The region for Minimax API. Defaults to "worldwide".
languageBoost	string	Language hint for MiniMax T2A. Example: yue (Cantonese), zh (Chinese), en (English).
textNormalizationEnabled	boolean	Enable MiniMax text normalization to improve number reading and formatting.
chunkPlan	object	This is the plan for chunking the model output before it is sent to the voice provider.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/FallbackMinimaxVoice",
  "title": "FallbackMinimaxVoice",
  "type": "object",
  "properties": {
    "cachingEnabled": {
      "type": "boolean",
      "description": "This is the flag to toggle voice caching for the assistant.",
      "example": true,
      "default": true
    },
    "provider": {
      "type": "string",
      "description": "This is the voice provider that will be used.",
      "enum": [
        "minimax"
      ]
    },
    "voiceId": {
      "type": "string",
      "description": "This is the provider-specific ID that will be used. Use a voice from MINIMAX_PREDEFINED_VOICES or a custom cloned voice ID.",
      "title": "This is the Minimax Voice ID"
    },
    "model": {
      "type": "string",
      "description": "This is the model that will be used. Options are 'speech-02-hd' and 'speech-02-turbo'.\nspeech-02-hd is optimized for high-fidelity applications like voiceovers and audiobooks.\nspeech-02-turbo is designed for real-time applications with low latency.\n\n@default \"speech-02-turbo\"",
      "enum": [
        "speech-02-hd",
        "speech-02-turbo",
        "speech-2.5-turbo-preview"
      ],
      "example": "speech-02-turbo",
      "default": "speech-02-turbo"
    },
    "emotion": {
      "type": "string",
      "description": "The emotion to use for the voice. If not provided, will use auto-detect mode.\nOptions include: 'happy', 'sad', 'angry', 'fearful', 'surprised', 'disgusted', 'neutral'",
      "example": "happy"
    },
    "subtitleType": {
      "type": "string",
      "description": "Controls the granularity of subtitle/timing data returned by Minimax\nduring synthesis. Set to 'word' to receive per-word timestamps in\nassistant.speechStarted events for karaoke-style caption rendering.\n\n@default \"sentence\"",
      "enum": [
        "word",
        "sentence"
      ],
      "default": "sentence"
    },
    "pitch": {
      "type": "number",
      "description": "Voice pitch adjustment. Range from -12 to 12 semitones.\n@default 0",
      "minimum": -12,
      "maximum": 12,
      "example": 0,
      "default": 0
    },
    "speed": {
      "type": "number",
      "description": "Voice speed adjustment. Range from 0.5 to 2.0.\n@default 1.0",
      "minimum": 0.5,
      "maximum": 2,
      "example": 1,
      "default": 1
    },
    "volume": {
      "type": "number",
      "description": "Voice volume adjustment. Range from 0.5 to 2.0.\n@default 1.0",
      "minimum": 0.5,
      "maximum": 2,
      "example": 1,
      "default": 1
    },
    "region": {
      "type": "string",
      "description": "The region for Minimax API. Defaults to \"worldwide\".",
      "enum": [
        "worldwide",
        "china"
      ],
      "default": "worldwide"
    },
    "languageBoost": {
      "type": "string",
      "description": "Language hint for MiniMax T2A. Example: yue (Cantonese), zh (Chinese), en (English).",
      "enum": [
        "Chinese",
        "Chinese,Yue",
        "English",
        "Arabic",
        "Russian",
        "Spanish",
        "French",
        "Portuguese",
        "German",
        "Turkish",
        "Dutch",
        "Ukrainian",
        "Vietnamese",
        "Indonesian",
        "Japanese",
        "Italian",
        "Korean",
        "Thai",
        "Polish",
        "Romanian",
        "Greek",
        "Czech",
        "Finnish",
        "Hindi",
        "Bulgarian",
        "Danish",
        "Hebrew",
        "Malay",
        "Persian",
        "Slovak",
        "Swedish",
        "Croatian",
        "Filipino",
        "Hungarian",
        "Norwegian",
        "Slovenian",
        "Catalan",
        "Nynorsk",
        "Tamil",
        "Afrikaans",
        "auto"
      ]
    },
    "textNormalizationEnabled": {
      "type": "boolean",
      "description": "Enable MiniMax text normalization to improve number reading and formatting.",
      "default": true
    },
    "chunkPlan": {
      "description": "This is the plan for chunking the model output before it is sent to the voice provider.",
      "allOf": [
        {
          "$ref": "#/components/schemas/ChunkPlan"
        }
      ]
    }
  },
  "required": [
    "provider",
    "voiceId"
  ]
}