Mistral AI · Schema
ChatCompletionRequest
AgentsArtificial IntelligenceBatch ProcessingChatEmbeddingsFine-TuningLarge Language ModelsOCR
Properties
| Name | Type | Description |
|---|---|---|
| model | string | ID of the model to use. You can use the Models API to see all available models. |
| messages | array | A list of messages comprising the conversation so far. Each message has a role and content. |
| temperature | number | Sampling temperature between 0.0 and 1.5. Higher values like 0.7 produce more random output, while lower values like 0.2 produce more focused and deterministic output. |
| top_p | number | Nucleus sampling parameter. The model considers tokens with top_p probability mass. A value of 0.1 means only the top 10% of tokens are considered. |
| max_tokens | integer | The maximum number of tokens to generate in the chat completion. The total token count of the prompt plus max_tokens cannot exceed the model's context length. |
| stream | boolean | Whether to stream back partial progress as server-sent events. If true, tokens are sent as data-only events as they become available, terminated by a data: [DONE] message. |
| stop | object | Stop generation if this token is detected, or if one of these tokens is detected when providing an array. |
| random_seed | integer | The seed to use for random sampling. If set, different calls will generate deterministic results. |
| response_format | object | An object specifying the format that the model must output. Setting to json_object enables JSON mode. |
| tools | array | A list of tools the model may call. Currently only functions are supported as a tool. |
| tool_choice | string | Controls which tool is called by the model. Can be auto, none, any, or required. |
| presence_penalty | number | Penalizes repetition of words or phrases. A higher value encourages the model to use a wider variety of words and phrases. |
| frequency_penalty | number | Penalizes repetition based on frequency in the generated text. A higher value discourages repeating frequently used words. |
| safe_prompt | boolean | Whether to inject a safety prompt before all conversations. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/ChatCompletionRequest",
"title": "ChatCompletionRequest",
"type": "object",
"required": [
"model",
"messages"
],
"properties": {
"model": {
"type": "string",
"description": "ID of the model to use. You can use the Models API to see all available models.",
"example": "mistral-large-latest"
},
"messages": {
"type": "array",
"description": "A list of messages comprising the conversation so far. Each message has a role and content.",
"items": {
"$ref": "#/components/schemas/ChatMessage"
}
},
"temperature": {
"type": "number",
"description": "Sampling temperature between 0.0 and 1.5. Higher values like 0.7 produce more random output, while lower values like 0.2 produce more focused and deterministic output.",
"minimum": 0.0,
"maximum": 1.5,
"default": 0.7
},
"top_p": {
"type": "number",
"description": "Nucleus sampling parameter. The model considers tokens with top_p probability mass. A value of 0.1 means only the top 10% of tokens are considered.",
"minimum": 0.0,
"maximum": 1.0,
"default": 1.0
},
"max_tokens": {
"type": "integer",
"description": "The maximum number of tokens to generate in the chat completion. The total token count of the prompt plus max_tokens cannot exceed the model's context length.",
"minimum": 1
},
"stream": {
"type": "boolean",
"description": "Whether to stream back partial progress as server-sent events. If true, tokens are sent as data-only events as they become available, terminated by a data: [DONE] message.",
"default": false
},
"stop": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "Stop generation if this token is detected, or if one of these tokens is detected when providing an array."
},
"random_seed": {
"type": "integer",
"description": "The seed to use for random sampling. If set, different calls will generate deterministic results."
},
"response_format": {
"type": "object",
"description": "An object specifying the format that the model must output. Setting to json_object enables JSON mode.",
"properties": {
"type": {
"type": "string",
"enum": [
"text",
"json_object"
],
"description": "The format type. Use json_object to enable JSON mode."
}
}
},
"tools": {
"type": "array",
"description": "A list of tools the model may call. Currently only functions are supported as a tool.",
"items": {
"$ref": "#/components/schemas/Tool"
}
},
"tool_choice": {
"type": "string",
"description": "Controls which tool is called by the model. Can be auto, none, any, or required.",
"enum": [
"auto",
"none",
"any",
"required"
]
},
"presence_penalty": {
"type": "number",
"description": "Penalizes repetition of words or phrases. A higher value encourages the model to use a wider variety of words and phrases.",
"minimum": -2.0,
"maximum": 2.0,
"default": 0.0
},
"frequency_penalty": {
"type": "number",
"description": "Penalizes repetition based on frequency in the generated text. A higher value discourages repeating frequently used words.",
"minimum": -2.0,
"maximum": 2.0,
"default": 0.0
},
"safe_prompt": {
"type": "boolean",
"description": "Whether to inject a safety prompt before all conversations.",
"default": false
}
}
}