Hugging Face · Schema

ChatCompletionRequest

Properties

Name	Type	Description
model	string	Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.
messages	array	List of messages comprising the conversation
frequency_penalty	number	Penalize tokens based on frequency in text so far
logprobs	boolean	Whether to return log probabilities
max_tokens	integer	Maximum number of tokens to generate
presence_penalty	number	Penalize tokens based on presence in text so far
reasoning_effort	string	Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh.
response_format	string
seed	integer	Random seed for reproducibility
stop	array	Up to 4 sequences where generation will stop
stream	boolean	Whether to stream partial responses using SSE
stream_options	object
temperature	number	Sampling temperature
tool_choice	string	Controls tool usage
tool_prompt	string	Prompt prepended before tools
tools	array	List of tools the model may call
top_logprobs	integer	Number of most likely tokens to return per position
top_p	number	Nucleus sampling parameter

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "ChatCompletionRequest",
  "type": "object",
  "properties": {
    "model": {
      "type": "string",
      "description": "Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier."
    },
    "messages": {
      "type": "array",
      "description": "List of messages comprising the conversation"
    },
    "frequency_penalty": {
      "type": "number",
      "description": "Penalize tokens based on frequency in text so far"
    },
    "logprobs": {
      "type": "boolean",
      "description": "Whether to return log probabilities"
    },
    "max_tokens": {
      "type": "integer",
      "description": "Maximum number of tokens to generate"
    },
    "presence_penalty": {
      "type": "number",
      "description": "Penalize tokens based on presence in text so far"
    },
    "reasoning_effort": {
      "type": "string",
      "description": "Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh."
    },
    "response_format": {
      "type": "string"
    },
    "seed": {
      "type": "integer",
      "description": "Random seed for reproducibility"
    },
    "stop": {
      "type": "array",
      "description": "Up to 4 sequences where generation will stop"
    },
    "stream": {
      "type": "boolean",
      "description": "Whether to stream partial responses using SSE"
    },
    "stream_options": {
      "type": "object"
    },
    "temperature": {
      "type": "number",
      "description": "Sampling temperature"
    },
    "tool_choice": {
      "type": "string",
      "description": "Controls tool usage"
    },
    "tool_prompt": {
      "type": "string",
      "description": "Prompt prepended before tools"
    },
    "tools": {
      "type": "array",
      "description": "List of tools the model may call"
    },
    "top_logprobs": {
      "type": "integer",
      "description": "Number of most likely tokens to return per position"
    },
    "top_p": {
      "type": "number",
      "description": "Nucleus sampling parameter"
    }
  }
}