Vapi · Schema

CustomLLMModel

AIVoiceAgentsRealtimeCPaaS

Properties

Name Type Description
messages array This is the starting state for the conversation.
tools array These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`. Both `tools` and `toolIds` can be used together.
toolIds array These are the tools that the assistant can use during the call. To use transient tools, use `tools`. Both `tools` and `toolIds` can be used together.
knowledgeBase object These are the options for the knowledge base.
provider string This is the provider that will be used for the model. Any service, including your own server, that is compatible with the OpenAI API can be used.
metadataSendMode string This determines whether metadata is sent in requests to the custom provider. - `off` will not send any metadata. payload will look like `{ messages }` - `variable` will send `assistant.metadata` as a
headers object Custom headers to send with requests. These headers can override default OpenAI headers except for Authorization (which should be specified using a custom-llm credential).
url string These is the URL we'll use for the OpenAI client's `baseURL`. Ex. https://openrouter.ai/api/v1
wordLevelConfidenceEnabled boolean This determines whether the transcriber's word level confidence is sent in requests to the custom provider. Default is false. This only works for Deepgram transcribers.
timeoutSeconds number This sets the timeout for the connection to the custom provider without needing to stream any tokens back. Default is 20 seconds.
model string This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b
temperature number This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency.
maxTokens number This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250.
emotionRecognitionEnabled boolean This determines whether we detect user's emotion while they speak and send it as an additional info to model. Default `false` because the model is usually are good at understanding the user's emotion
numFastTurns number This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai. Def
View JSON Schema on GitHub

JSON Schema

vapi-customllmmodel-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/CustomLLMModel",
  "title": "CustomLLMModel",
  "type": "object",
  "properties": {
    "messages": {
      "description": "This is the starting state for the conversation.",
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/OpenAIMessage"
      }
    },
    "tools": {
      "type": "array",
      "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
      "items": {
        "oneOf": [
          {
            "$ref": "#/components/schemas/CreateApiRequestToolDTO",
            "title": "ApiRequestTool"
          },
          {
            "$ref": "#/components/schemas/CreateBashToolDTO",
            "title": "BashTool"
          },
          {
            "$ref": "#/components/schemas/CreateCodeToolDTO",
            "title": "CodeTool"
          },
          {
            "$ref": "#/components/schemas/CreateComputerToolDTO",
            "title": "ComputerTool"
          },
          {
            "$ref": "#/components/schemas/CreateDtmfToolDTO",
            "title": "DtmfTool"
          },
          {
            "$ref": "#/components/schemas/CreateEndCallToolDTO",
            "title": "EndCallTool"
          },
          {
            "$ref": "#/components/schemas/CreateFunctionToolDTO",
            "title": "FunctionTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoHighLevelCalendarAvailabilityToolDTO",
            "title": "GoHighLevelCalendarAvailabilityTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoHighLevelCalendarEventCreateToolDTO",
            "title": "GoHighLevelCalendarEventCreateTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoHighLevelContactCreateToolDTO",
            "title": "GoHighLevelContactCreateTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoHighLevelContactGetToolDTO",
            "title": "GoHighLevelContactGetTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoogleCalendarCheckAvailabilityToolDTO",
            "title": "GoogleCalendarCheckAvailabilityTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoogleCalendarCreateEventToolDTO",
            "title": "GoogleCalendarCreateEventTool"
          },
          {
            "$ref": "#/components/schemas/CreateGoogleSheetsRowAppendToolDTO",
            "title": "GoogleSheetsRowAppendTool"
          },
          {
            "$ref": "#/components/schemas/CreateHandoffToolDTO",
            "title": "HandoffTool"
          },
          {
            "$ref": "#/components/schemas/CreateMcpToolDTO",
            "title": "McpTool"
          },
          {
            "$ref": "#/components/schemas/CreateQueryToolDTO",
            "title": "QueryTool"
          },
          {
            "$ref": "#/components/schemas/CreateSlackSendMessageToolDTO",
            "title": "SlackSendMessageTool"
          },
          {
            "$ref": "#/components/schemas/CreateSmsToolDTO",
            "title": "SmsTool"
          },
          {
            "$ref": "#/components/schemas/CreateTextEditorToolDTO",
            "title": "TextEditorTool"
          },
          {
            "$ref": "#/components/schemas/CreateTransferCallToolDTO",
            "title": "TransferCallTool"
          },
          {
            "$ref": "#/components/schemas/CreateSipRequestToolDTO",
            "title": "SipRequestTool"
          },
          {
            "$ref": "#/components/schemas/CreateVoicemailToolDTO",
            "title": "VoicemailTool"
          }
        ]
      }
    },
    "toolIds": {
      "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "knowledgeBase": {
      "description": "These are the options for the knowledge base.",
      "oneOf": [
        {
          "$ref": "#/components/schemas/CreateCustomKnowledgeBaseDTO",
          "title": "Custom"
        }
      ]
    },
    "provider": {
      "type": "string",
      "description": "This is the provider that will be used for the model. Any service, including your own server, that is compatible with the OpenAI API can be used.",
      "enum": [
        "custom-llm"
      ]
    },
    "metadataSendMode": {
      "type": "string",
      "description": "This determines whether metadata is sent in requests to the custom provider.\n\n- `off` will not send any metadata. payload will look like `{ messages }`\n- `variable` will send `assistant.metadata` as a variable on the payload. payload will look like `{ messages, metadata }`\n- `destructured` will send `assistant.metadata` fields directly on the payload. payload will look like `{ messages, ...metadata }`\n\nFurther, `variable` and `destructured` will send `call`, `phoneNumber`, and `customer` objects in the payload.\n\nDefault is `variable`.",
      "enum": [
        "off",
        "variable",
        "destructured"
      ]
    },
    "headers": {
      "type": "object",
      "description": "Custom headers to send with requests. These headers can override default OpenAI headers except for Authorization (which should be specified using a custom-llm credential).",
      "additionalProperties": {
        "type": "string"
      },
      "example": {
        "X-Custom-Header": "value"
      }
    },
    "url": {
      "type": "string",
      "description": "These is the URL we'll use for the OpenAI client's `baseURL`. Ex. https://openrouter.ai/api/v1"
    },
    "wordLevelConfidenceEnabled": {
      "type": "boolean",
      "description": "This determines whether the transcriber's word level confidence is sent in requests to the custom provider. Default is false.\nThis only works for Deepgram transcribers."
    },
    "timeoutSeconds": {
      "type": "number",
      "description": "This sets the timeout for the connection to the custom provider without needing to stream any tokens back. Default is 20 seconds.",
      "minimum": 0,
      "maximum": 300
    },
    "model": {
      "type": "string",
      "description": "This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b"
    },
    "temperature": {
      "type": "number",
      "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency.",
      "minimum": 0,
      "maximum": 2
    },
    "maxTokens": {
      "type": "number",
      "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250.",
      "minimum": 50,
      "maximum": 10000
    },
    "emotionRecognitionEnabled": {
      "type": "boolean",
      "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
    },
    "numFastTurns": {
      "type": "number",
      "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0",
      "minimum": 0
    }
  },
  "required": [
    "provider",
    "url",
    "model"
  ]
}