Vapi · Schema

FallbackDeepgramTranscriber

AIVoiceAgentsRealtimeCPaaS

Properties

Name Type Description
provider string This is the transcription provider that will be used.
model object This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview
language string This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview
smartFormat boolean This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times but it's getting better.
mipOptOut boolean If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out Thi
numerals boolean If set to true, this will cause deepgram to convert spoken numbers to literal numerals. For example, "my phone number is nine-seven-two..." would become "my phone number is 972..." @default false
profanityFilter boolean If set to true, Deepgram will replace profanity in transcripts with surrounding asterisks, e.g. "f***". @default false
redaction array Enables redaction of sensitive information from transcripts. Options include: - "pci": Redacts credit card numbers, expiration dates, and CVV. - "pii": Redacts personally identifiable information (nam
confidenceThreshold number Transcripts below this confidence threshold will be discarded. @default 0.4
eotThreshold number End-of-turn confidence required to finish a turn. Only used with Flux models. @default 0.7
eotTimeoutMs number A turn will be finished when this much time has passed after speech, regardless of EOT confidence. Only used with Flux models. @default 5000
languages array Language hints to bias Flux Multilingual (`flux-general-multi`) toward specific languages. Provide BCP-47 language codes (e.g. "en", "es", "fr"). Multiple hints can be given for multilingual or code-s
keywords array These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.
keyterm array Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.
endpointing number This is the timeout after which Deepgram will send transcription on user silence. You can read in-depth documentation here: https://developers.deepgram.com/docs/endpointing. Here are the most importan
View JSON Schema on GitHub

JSON Schema

vapi-fallbackdeepgramtranscriber-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/FallbackDeepgramTranscriber",
  "title": "FallbackDeepgramTranscriber",
  "type": "object",
  "properties": {
    "provider": {
      "type": "string",
      "description": "This is the transcription provider that will be used.",
      "enum": [
        "deepgram"
      ]
    },
    "model": {
      "description": "This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview",
      "oneOf": [
        {
          "type": "string",
          "enum": [
            "nova-3",
            "nova-3-general",
            "nova-3-medical",
            "nova-2",
            "nova-2-general",
            "nova-2-meeting",
            "nova-2-phonecall",
            "nova-2-finance",
            "nova-2-conversationalai",
            "nova-2-voicemail",
            "nova-2-video",
            "nova-2-medical",
            "nova-2-drivethru",
            "nova-2-automotive",
            "nova",
            "nova-general",
            "nova-phonecall",
            "nova-medical",
            "enhanced",
            "enhanced-general",
            "enhanced-meeting",
            "enhanced-phonecall",
            "enhanced-finance",
            "base",
            "base-general",
            "base-meeting",
            "base-phonecall",
            "base-finance",
            "base-conversationalai",
            "base-voicemail",
            "base-video",
            "whisper",
            "flux-general-en",
            "flux-general-multi"
          ]
        },
        {
          "type": "string"
        }
      ]
    },
    "language": {
      "type": "string",
      "description": "This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview",
      "enum": [
        "ar",
        "az",
        "ba",
        "be",
        "bg",
        "bn",
        "br",
        "bs",
        "ca",
        "cs",
        "da",
        "da-DK",
        "de",
        "de-CH",
        "el",
        "en",
        "en-AU",
        "en-CA",
        "en-GB",
        "en-IE",
        "en-IN",
        "en-NZ",
        "en-US",
        "es",
        "es-419",
        "es-LATAM",
        "et",
        "eu",
        "fa",
        "fi",
        "fr",
        "fr-CA",
        "ha",
        "haw",
        "he",
        "hi",
        "hi-Latn",
        "hr",
        "hu",
        "id",
        "is",
        "it",
        "ja",
        "jw",
        "kn",
        "ko",
        "ko-KR",
        "ln",
        "lt",
        "lv",
        "mk",
        "mr",
        "ms",
        "multi",
        "nl",
        "nl-BE",
        "no",
        "pl",
        "pt",
        "pt-BR",
        "pt-PT",
        "ro",
        "ru",
        "sk",
        "sl",
        "sn",
        "so",
        "sr",
        "su",
        "sv",
        "sv-SE",
        "ta",
        "taq",
        "te",
        "th",
        "th-TH",
        "tl",
        "tr",
        "tt",
        "uk",
        "ur",
        "vi",
        "yo",
        "zh",
        "zh-CN",
        "zh-HK",
        "zh-Hans",
        "zh-Hant",
        "zh-TW"
      ]
    },
    "smartFormat": {
      "type": "boolean",
      "description": "This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times but it's getting better.",
      "example": false
    },
    "mipOptOut": {
      "type": "boolean",
      "description": "If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out\n\nThis will only be used if you are using your own Deepgram API key.\n\n@default false",
      "example": false,
      "default": false
    },
    "numerals": {
      "type": "boolean",
      "description": "If set to true, this will cause deepgram to convert spoken numbers to literal numerals. For example, \"my phone number is nine-seven-two...\" would become \"my phone number is 972...\"\n\n@default false",
      "example": false
    },
    "profanityFilter": {
      "type": "boolean",
      "description": "If set to true, Deepgram will replace profanity in transcripts with surrounding asterisks, e.g. \"f***\".\n\n@default false",
      "example": false
    },
    "redaction": {
      "type": "array",
      "description": "Enables redaction of sensitive information from transcripts.\n\nOptions include:\n- \"pci\": Redacts credit card numbers, expiration dates, and CVV.\n- \"pii\": Redacts personally identifiable information (names, locations, identifying numbers, etc.).\n- \"phi\": Redacts protected health information (medical conditions, drugs, injuries, etc.).\n- \"numbers\": Redacts numerical and identifying entities (dates, account numbers, SSNs, etc.).\n\nMultiple values can be provided to redact different categories simultaneously.\nRedacted content is replaced with entity labels like [CREDIT_CARD_1], [SSN_1], etc.\n\nSee https://developers.deepgram.com/docs/redaction for details.",
      "enum": [
        "pci",
        "pii",
        "phi",
        "numbers"
      ],
      "example": [
        "pci",
        "phi"
      ],
      "items": {
        "type": "string",
        "enum": [
          "pci",
          "pii",
          "phi",
          "numbers"
        ]
      }
    },
    "confidenceThreshold": {
      "type": "number",
      "description": "Transcripts below this confidence threshold will be discarded.\n\n@default 0.4",
      "minimum": 0,
      "maximum": 1,
      "example": 0.4
    },
    "eotThreshold": {
      "type": "number",
      "description": "End-of-turn confidence required to finish a turn. Only used with Flux models.\n\n@default 0.7",
      "minimum": 0.5,
      "maximum": 0.9,
      "example": 0.7
    },
    "eotTimeoutMs": {
      "type": "number",
      "description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence. Only used with Flux models.\n\n@default 5000",
      "minimum": 500,
      "maximum": 10000,
      "example": 5000
    },
    "languages": {
      "description": "Language hints to bias Flux Multilingual (`flux-general-multi`) toward specific languages.\nProvide BCP-47 language codes (e.g. \"en\", \"es\", \"fr\"). Multiple hints can be given for\nmultilingual or code-switching scenarios. Omit for auto-detection. Only used with `flux-general-multi`.",
      "example": [
        "en",
        "es"
      ],
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "keywords": {
      "description": "These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.",
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "/^\\p{L}[\\p{L}\\d]*(?::[+-]?\\d+)?$/u"
      }
    },
    "keyterm": {
      "description": "Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.",
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "endpointing": {
      "type": "number",
      "description": "This is the timeout after which Deepgram will send transcription on user silence. You can read in-depth documentation here: https://developers.deepgram.com/docs/endpointing.\n\nHere are the most important bits:\n- Defaults to 10. This is recommended for most use cases to optimize for latency.\n- 10 can cause some missing transcriptions since because of the shorter context. This mostly happens for one-word utterances. For those uses cases, it's recommended to try 300. It will add a bit of latency but the quality and reliability of the experience will be better.\n- If neither 10 nor 300 work, contact [email protected] and we'll find another solution.\n\n@default 10",
      "minimum": 10,
      "maximum": 500
    }
  },
  "required": [
    "provider"
  ]
}