Hugging Face · Schema
ChatCompletionRequest
Properties
| Name | Type | Description |
|---|---|---|
| model | string | Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier. |
| messages | array | List of messages comprising the conversation |
| frequency_penalty | number | Penalize tokens based on frequency in text so far |
| logprobs | boolean | Whether to return log probabilities |
| max_tokens | integer | Maximum number of tokens to generate |
| presence_penalty | number | Penalize tokens based on presence in text so far |
| reasoning_effort | string | Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh. |
| response_format | string | |
| seed | integer | Random seed for reproducibility |
| stop | array | Up to 4 sequences where generation will stop |
| stream | boolean | Whether to stream partial responses using SSE |
| stream_options | object | |
| temperature | number | Sampling temperature |
| tool_choice | string | Controls tool usage |
| tool_prompt | string | Prompt prepended before tools |
| tools | array | List of tools the model may call |
| top_logprobs | integer | Number of most likely tokens to return per position |
| top_p | number | Nucleus sampling parameter |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "ChatCompletionRequest",
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier."
},
"messages": {
"type": "array",
"description": "List of messages comprising the conversation"
},
"frequency_penalty": {
"type": "number",
"description": "Penalize tokens based on frequency in text so far"
},
"logprobs": {
"type": "boolean",
"description": "Whether to return log probabilities"
},
"max_tokens": {
"type": "integer",
"description": "Maximum number of tokens to generate"
},
"presence_penalty": {
"type": "number",
"description": "Penalize tokens based on presence in text so far"
},
"reasoning_effort": {
"type": "string",
"description": "Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh."
},
"response_format": {
"type": "string"
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility"
},
"stop": {
"type": "array",
"description": "Up to 4 sequences where generation will stop"
},
"stream": {
"type": "boolean",
"description": "Whether to stream partial responses using SSE"
},
"stream_options": {
"type": "object"
},
"temperature": {
"type": "number",
"description": "Sampling temperature"
},
"tool_choice": {
"type": "string",
"description": "Controls tool usage"
},
"tool_prompt": {
"type": "string",
"description": "Prompt prepended before tools"
},
"tools": {
"type": "array",
"description": "List of tools the model may call"
},
"top_logprobs": {
"type": "integer",
"description": "Number of most likely tokens to return per position"
},
"top_p": {
"type": "number",
"description": "Nucleus sampling parameter"
}
}
}