Mistral AI · Schema

ChatCompletionRequest

AgentsArtificial IntelligenceBatch ProcessingChatEmbeddingsFine-TuningLarge Language ModelsOCR

Properties

Name	Type	Description
model	string	ID of the model to use. You can use the Models API to see all available models.
messages	array	A list of messages comprising the conversation so far. Each message has a role and content.
temperature	number	Sampling temperature between 0.0 and 1.5. Higher values like 0.7 produce more random output, while lower values like 0.2 produce more focused and deterministic output.
top_p	number	Nucleus sampling parameter. The model considers tokens with top_p probability mass. A value of 0.1 means only the top 10% of tokens are considered.
max_tokens	integer	The maximum number of tokens to generate in the chat completion. The total token count of the prompt plus max_tokens cannot exceed the model's context length.
stream	boolean	Whether to stream back partial progress as server-sent events. If true, tokens are sent as data-only events as they become available, terminated by a data: [DONE] message.
stop	object	Stop generation if this token is detected, or if one of these tokens is detected when providing an array.
random_seed	integer	The seed to use for random sampling. If set, different calls will generate deterministic results.
response_format	object	An object specifying the format that the model must output. Setting to json_object enables JSON mode.
tools	array	A list of tools the model may call. Currently only functions are supported as a tool.
tool_choice	string	Controls which tool is called by the model. Can be auto, none, any, or required.
presence_penalty	number	Penalizes repetition of words or phrases. A higher value encourages the model to use a wider variety of words and phrases.
frequency_penalty	number	Penalizes repetition based on frequency in the generated text. A higher value discourages repeating frequently used words.
safe_prompt	boolean	Whether to inject a safety prompt before all conversations.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ChatCompletionRequest",
  "title": "ChatCompletionRequest",
  "type": "object",
  "required": [
    "model",
    "messages"
  ],
  "properties": {
    "model": {
      "type": "string",
      "description": "ID of the model to use. You can use the Models API to see all available models.",
      "example": "mistral-large-latest"
    },
    "messages": {
      "type": "array",
      "description": "A list of messages comprising the conversation so far. Each message has a role and content.",
      "items": {
        "$ref": "#/components/schemas/ChatMessage"
      }
    },
    "temperature": {
      "type": "number",
      "description": "Sampling temperature between 0.0 and 1.5. Higher values like 0.7 produce more random output, while lower values like 0.2 produce more focused and deterministic output.",
      "minimum": 0.0,
      "maximum": 1.5,
      "default": 0.7
    },
    "top_p": {
      "type": "number",
      "description": "Nucleus sampling parameter. The model considers tokens with top_p probability mass. A value of 0.1 means only the top 10% of tokens are considered.",
      "minimum": 0.0,
      "maximum": 1.0,
      "default": 1.0
    },
    "max_tokens": {
      "type": "integer",
      "description": "The maximum number of tokens to generate in the chat completion. The total token count of the prompt plus max_tokens cannot exceed the model's context length.",
      "minimum": 1
    },
    "stream": {
      "type": "boolean",
      "description": "Whether to stream back partial progress as server-sent events. If true, tokens are sent as data-only events as they become available, terminated by a data: [DONE] message.",
      "default": false
    },
    "stop": {
      "oneOf": [
        {
          "type": "string"
        },
        {
          "type": "array",
          "items": {
            "type": "string"
          }
        }
      ],
      "description": "Stop generation if this token is detected, or if one of these tokens is detected when providing an array."
    },
    "random_seed": {
      "type": "integer",
      "description": "The seed to use for random sampling. If set, different calls will generate deterministic results."
    },
    "response_format": {
      "type": "object",
      "description": "An object specifying the format that the model must output. Setting to json_object enables JSON mode.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "text",
            "json_object"
          ],
          "description": "The format type. Use json_object to enable JSON mode."
        }
      }
    },
    "tools": {
      "type": "array",
      "description": "A list of tools the model may call. Currently only functions are supported as a tool.",
      "items": {
        "$ref": "#/components/schemas/Tool"
      }
    },
    "tool_choice": {
      "type": "string",
      "description": "Controls which tool is called by the model. Can be auto, none, any, or required.",
      "enum": [
        "auto",
        "none",
        "any",
        "required"
      ]
    },
    "presence_penalty": {
      "type": "number",
      "description": "Penalizes repetition of words or phrases. A higher value encourages the model to use a wider variety of words and phrases.",
      "minimum": -2.0,
      "maximum": 2.0,
      "default": 0.0
    },
    "frequency_penalty": {
      "type": "number",
      "description": "Penalizes repetition based on frequency in the generated text. A higher value discourages repeating frequently used words.",
      "minimum": -2.0,
      "maximum": 2.0,
      "default": 0.0
    },
    "safe_prompt": {
      "type": "boolean",
      "description": "Whether to inject a safety prompt before all conversations.",
      "default": false
    }
  }
}