Vapi · Schema

FallbackMinimaxVoice

AIVoiceAgentsRealtimeCPaaS

Properties

Name Type Description
cachingEnabled boolean This is the flag to toggle voice caching for the assistant.
provider string This is the voice provider that will be used.
voiceId string This is the provider-specific ID that will be used. Use a voice from MINIMAX_PREDEFINED_VOICES or a custom cloned voice ID.
model string This is the model that will be used. Options are 'speech-02-hd' and 'speech-02-turbo'. speech-02-hd is optimized for high-fidelity applications like voiceovers and audiobooks. speech-02-turbo is desig
emotion string The emotion to use for the voice. If not provided, will use auto-detect mode. Options include: 'happy', 'sad', 'angry', 'fearful', 'surprised', 'disgusted', 'neutral'
subtitleType string Controls the granularity of subtitle/timing data returned by Minimax during synthesis. Set to 'word' to receive per-word timestamps in assistant.speechStarted events for karaoke-style caption renderin
pitch number Voice pitch adjustment. Range from -12 to 12 semitones. @default 0
speed number Voice speed adjustment. Range from 0.5 to 2.0. @default 1.0
volume number Voice volume adjustment. Range from 0.5 to 2.0. @default 1.0
region string The region for Minimax API. Defaults to "worldwide".
languageBoost string Language hint for MiniMax T2A. Example: yue (Cantonese), zh (Chinese), en (English).
textNormalizationEnabled boolean Enable MiniMax text normalization to improve number reading and formatting.
chunkPlan object This is the plan for chunking the model output before it is sent to the voice provider.
View JSON Schema on GitHub

JSON Schema

vapi-fallbackminimaxvoice-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/FallbackMinimaxVoice",
  "title": "FallbackMinimaxVoice",
  "type": "object",
  "properties": {
    "cachingEnabled": {
      "type": "boolean",
      "description": "This is the flag to toggle voice caching for the assistant.",
      "example": true,
      "default": true
    },
    "provider": {
      "type": "string",
      "description": "This is the voice provider that will be used.",
      "enum": [
        "minimax"
      ]
    },
    "voiceId": {
      "type": "string",
      "description": "This is the provider-specific ID that will be used. Use a voice from MINIMAX_PREDEFINED_VOICES or a custom cloned voice ID.",
      "title": "This is the Minimax Voice ID"
    },
    "model": {
      "type": "string",
      "description": "This is the model that will be used. Options are 'speech-02-hd' and 'speech-02-turbo'.\nspeech-02-hd is optimized for high-fidelity applications like voiceovers and audiobooks.\nspeech-02-turbo is designed for real-time applications with low latency.\n\n@default \"speech-02-turbo\"",
      "enum": [
        "speech-02-hd",
        "speech-02-turbo",
        "speech-2.5-turbo-preview"
      ],
      "example": "speech-02-turbo",
      "default": "speech-02-turbo"
    },
    "emotion": {
      "type": "string",
      "description": "The emotion to use for the voice. If not provided, will use auto-detect mode.\nOptions include: 'happy', 'sad', 'angry', 'fearful', 'surprised', 'disgusted', 'neutral'",
      "example": "happy"
    },
    "subtitleType": {
      "type": "string",
      "description": "Controls the granularity of subtitle/timing data returned by Minimax\nduring synthesis. Set to 'word' to receive per-word timestamps in\nassistant.speechStarted events for karaoke-style caption rendering.\n\n@default \"sentence\"",
      "enum": [
        "word",
        "sentence"
      ],
      "default": "sentence"
    },
    "pitch": {
      "type": "number",
      "description": "Voice pitch adjustment. Range from -12 to 12 semitones.\n@default 0",
      "minimum": -12,
      "maximum": 12,
      "example": 0,
      "default": 0
    },
    "speed": {
      "type": "number",
      "description": "Voice speed adjustment. Range from 0.5 to 2.0.\n@default 1.0",
      "minimum": 0.5,
      "maximum": 2,
      "example": 1,
      "default": 1
    },
    "volume": {
      "type": "number",
      "description": "Voice volume adjustment. Range from 0.5 to 2.0.\n@default 1.0",
      "minimum": 0.5,
      "maximum": 2,
      "example": 1,
      "default": 1
    },
    "region": {
      "type": "string",
      "description": "The region for Minimax API. Defaults to \"worldwide\".",
      "enum": [
        "worldwide",
        "china"
      ],
      "default": "worldwide"
    },
    "languageBoost": {
      "type": "string",
      "description": "Language hint for MiniMax T2A. Example: yue (Cantonese), zh (Chinese), en (English).",
      "enum": [
        "Chinese",
        "Chinese,Yue",
        "English",
        "Arabic",
        "Russian",
        "Spanish",
        "French",
        "Portuguese",
        "German",
        "Turkish",
        "Dutch",
        "Ukrainian",
        "Vietnamese",
        "Indonesian",
        "Japanese",
        "Italian",
        "Korean",
        "Thai",
        "Polish",
        "Romanian",
        "Greek",
        "Czech",
        "Finnish",
        "Hindi",
        "Bulgarian",
        "Danish",
        "Hebrew",
        "Malay",
        "Persian",
        "Slovak",
        "Swedish",
        "Croatian",
        "Filipino",
        "Hungarian",
        "Norwegian",
        "Slovenian",
        "Catalan",
        "Nynorsk",
        "Tamil",
        "Afrikaans",
        "auto"
      ]
    },
    "textNormalizationEnabled": {
      "type": "boolean",
      "description": "Enable MiniMax text normalization to improve number reading and formatting.",
      "default": true
    },
    "chunkPlan": {
      "description": "This is the plan for chunking the model output before it is sent to the voice provider.",
      "allOf": [
        {
          "$ref": "#/components/schemas/ChunkPlan"
        }
      ]
    }
  },
  "required": [
    "provider",
    "voiceId"
  ]
}