Vapi · Schema

DeepgramTranscriber

AIVoiceAgentsRealtimeCPaaS

Properties

Name	Type	Description
provider	string	This is the transcription provider that will be used.
model	object	This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview
language	string	This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview
smartFormat	boolean	This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times but it's getting better.
mipOptOut	boolean	If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out Thi
numerals	boolean	If set to true, this will cause deepgram to convert spoken numbers to literal numerals. For example, "my phone number is nine-seven-two..." would become "my phone number is 972..." @default false
profanityFilter	boolean	If set to true, Deepgram will replace profanity in transcripts with surrounding asterisks, e.g. "f***". @default false
redaction	array	Enables redaction of sensitive information from transcripts. Options include: - "pci": Redacts credit card numbers, expiration dates, and CVV. - "pii": Redacts personally identifiable information (nam
confidenceThreshold	number	Transcripts below this confidence threshold will be discarded. @default 0.4
eotThreshold	number	End-of-turn confidence required to finish a turn. Only used with Flux models. @default 0.7
eotTimeoutMs	number	A turn will be finished when this much time has passed after speech, regardless of EOT confidence. Only used with Flux models. @default 5000
languages	array	Language hints to bias Flux Multilingual (`flux-general-multi`) toward specific languages. Provide BCP-47 language codes (e.g. "en", "es", "fr"). Multiple hints can be given for multilingual or code-s
keywords	array	These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.
keyterm	array	Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.
endpointing	number	This is the timeout after which Deepgram will send transcription on user silence. You can read in-depth documentation here: https://developers.deepgram.com/docs/endpointing. Here are the most importan
fallbackPlan	object	This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/DeepgramTranscriber",
  "title": "DeepgramTranscriber",
  "type": "object",
  "properties": {
    "provider": {
      "type": "string",
      "description": "This is the transcription provider that will be used.",
      "enum": [
        "deepgram"
      ]
    },
    "model": {
      "description": "This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview",
      "oneOf": [
        {
          "type": "string",
          "enum": [
            "nova-3",
            "nova-3-general",
            "nova-3-medical",
            "nova-2",
            "nova-2-general",
            "nova-2-meeting",
            "nova-2-phonecall",
            "nova-2-finance",
            "nova-2-conversationalai",
            "nova-2-voicemail",
            "nova-2-video",
            "nova-2-medical",
            "nova-2-drivethru",
            "nova-2-automotive",
            "nova",
            "nova-general",
            "nova-phonecall",
            "nova-medical",
            "enhanced",
            "enhanced-general",
            "enhanced-meeting",
            "enhanced-phonecall",
            "enhanced-finance",
            "base",
            "base-general",
            "base-meeting",
            "base-phonecall",
            "base-finance",
            "base-conversationalai",
            "base-voicemail",
            "base-video",
            "whisper",
            "flux-general-en",
            "flux-general-multi"
          ]
        },
        {
          "type": "string"
        }
      ]
    },
    "language": {
      "type": "string",
      "description": "This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview",
      "enum": [
        "ar",
        "az",
        "ba",
        "be",
        "bg",
        "bn",
        "br",
        "bs",
        "ca",
        "cs",
        "da",
        "da-DK",
        "de",
        "de-CH",
        "el",
        "en",
        "en-AU",
        "en-CA",
        "en-GB",
        "en-IE",
        "en-IN",
        "en-NZ",
        "en-US",
        "es",
        "es-419",
        "es-LATAM",
        "et",
        "eu",
        "fa",
        "fi",
        "fr",
        "fr-CA",
        "ha",
        "haw",
        "he",
        "hi",
        "hi-Latn",
        "hr",
        "hu",
        "id",
        "is",
        "it",
        "ja",
        "jw",
        "kn",
        "ko",
        "ko-KR",
        "ln",
        "lt",
        "lv",
        "mk",
        "mr",
        "ms",
        "multi",
        "nl",
        "nl-BE",
        "no",
        "pl",
        "pt",
        "pt-BR",
        "pt-PT",
        "ro",
        "ru",
        "sk",
        "sl",
        "sn",
        "so",
        "sr",
        "su",
        "sv",
        "sv-SE",
        "ta",
        "taq",
        "te",
        "th",
        "th-TH",
        "tl",
        "tr",
        "tt",
        "uk",
        "ur",
        "vi",
        "yo",
        "zh",
        "zh-CN",
        "zh-HK",
        "zh-Hans",
        "zh-Hant",
        "zh-TW"
      ]
    },
    "smartFormat": {
      "type": "boolean",
      "description": "This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times but it's getting better.",
      "example": false
    },
    "mipOptOut": {
      "type": "boolean",
      "description": "If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out\n\nThis will only be used if you are using your own Deepgram API key.\n\n@default false",
      "example": false,
      "default": false
    },
    "numerals": {
      "type": "boolean",
      "description": "If set to true, this will cause deepgram to convert spoken numbers to literal numerals. For example, \"my phone number is nine-seven-two...\" would become \"my phone number is 972...\"\n\n@default false",
      "example": false
    },
    "profanityFilter": {
      "type": "boolean",
      "description": "If set to true, Deepgram will replace profanity in transcripts with surrounding asterisks, e.g. \"f***\".\n\n@default false",
      "example": false
    },
    "redaction": {
      "type": "array",
      "description": "Enables redaction of sensitive information from transcripts.\n\nOptions include:\n- \"pci\": Redacts credit card numbers, expiration dates, and CVV.\n- \"pii\": Redacts personally identifiable information (names, locations, identifying numbers, etc.).\n- \"phi\": Redacts protected health information (medical conditions, drugs, injuries, etc.).\n- \"numbers\": Redacts numerical and identifying entities (dates, account numbers, SSNs, etc.).\n\nMultiple values can be provided to redact different categories simultaneously.\nRedacted content is replaced with entity labels like [CREDIT_CARD_1], [SSN_1], etc.\n\nSee https://developers.deepgram.com/docs/redaction for details.",
      "enum": [
        "pci",
        "pii",
        "phi",
        "numbers"
      ],
      "example": [
        "pci",
        "phi"
      ],
      "items": {
        "type": "string",
        "enum": [
          "pci",
          "pii",
          "phi",
          "numbers"
        ]
      }
    },
    "confidenceThreshold": {
      "type": "number",
      "description": "Transcripts below this confidence threshold will be discarded.\n\n@default 0.4",
      "minimum": 0,
      "maximum": 1,
      "example": 0.4
    },
    "eotThreshold": {
      "type": "number",
      "description": "End-of-turn confidence required to finish a turn. Only used with Flux models.\n\n@default 0.7",
      "minimum": 0.5,
      "maximum": 0.9,
      "example": 0.7
    },
    "eotTimeoutMs": {
      "type": "number",
      "description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence. Only used with Flux models.\n\n@default 5000",
      "minimum": 500,
      "maximum": 10000,
      "example": 5000
    },
    "languages": {
      "description": "Language hints to bias Flux Multilingual (`flux-general-multi`) toward specific languages.\nProvide BCP-47 language codes (e.g. \"en\", \"es\", \"fr\"). Multiple hints can be given for\nmultilingual or code-switching scenarios. Omit for auto-detection. Only used with `flux-general-multi`.",
      "example": [
        "en",
        "es"
      ],
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "keywords": {
      "description": "These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.",
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "/^\\p{L}[\\p{L}\\d]*(?::[+-]?\\d+)?$/u"
      }
    },
    "keyterm": {
      "description": "Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.",
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "endpointing": {
      "type": "number",
      "description": "This is the timeout after which Deepgram will send transcription on user silence. You can read in-depth documentation here: https://developers.deepgram.com/docs/endpointing.\n\nHere are the most important bits:\n- Defaults to 10. This is recommended for most use cases to optimize for latency.\n- 10 can cause some missing transcriptions since because of the shorter context. This mostly happens for one-word utterances. For those uses cases, it's recommended to try 300. It will add a bit of latency but the quality and reliability of the experience will be better.\n- If neither 10 nor 300 work, contact [email protected] and we'll find another solution.\n\n@default 10",
      "minimum": 10,
      "maximum": 500
    },
    "fallbackPlan": {
      "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails.",
      "allOf": [
        {
          "$ref": "#/components/schemas/FallbackTranscriberPlan"
        }
      ]
    }
  },
  "required": [
    "provider"
  ]
}