Hugging Face · Schema

ChatCompletionRequest

Properties

Name Type Description
model string Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.
messages array List of messages comprising the conversation
frequency_penalty number Penalize tokens based on frequency in text so far
logprobs boolean Whether to return log probabilities
max_tokens integer Maximum number of tokens to generate
presence_penalty number Penalize tokens based on presence in text so far
reasoning_effort string Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh.
response_format string
seed integer Random seed for reproducibility
stop array Up to 4 sequences where generation will stop
stream boolean Whether to stream partial responses using SSE
stream_options object
temperature number Sampling temperature
tool_choice string Controls tool usage
tool_prompt string Prompt prepended before tools
tools array List of tools the model may call
top_logprobs integer Number of most likely tokens to return per position
top_p number Nucleus sampling parameter
View JSON Schema on GitHub

JSON Schema

hugging-face-inference-providers-chat-completion-request-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "ChatCompletionRequest",
  "type": "object",
  "properties": {
    "model": {
      "type": "string",
      "description": "Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier."
    },
    "messages": {
      "type": "array",
      "description": "List of messages comprising the conversation"
    },
    "frequency_penalty": {
      "type": "number",
      "description": "Penalize tokens based on frequency in text so far"
    },
    "logprobs": {
      "type": "boolean",
      "description": "Whether to return log probabilities"
    },
    "max_tokens": {
      "type": "integer",
      "description": "Maximum number of tokens to generate"
    },
    "presence_penalty": {
      "type": "number",
      "description": "Penalize tokens based on presence in text so far"
    },
    "reasoning_effort": {
      "type": "string",
      "description": "Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh."
    },
    "response_format": {
      "type": "string"
    },
    "seed": {
      "type": "integer",
      "description": "Random seed for reproducibility"
    },
    "stop": {
      "type": "array",
      "description": "Up to 4 sequences where generation will stop"
    },
    "stream": {
      "type": "boolean",
      "description": "Whether to stream partial responses using SSE"
    },
    "stream_options": {
      "type": "object"
    },
    "temperature": {
      "type": "number",
      "description": "Sampling temperature"
    },
    "tool_choice": {
      "type": "string",
      "description": "Controls tool usage"
    },
    "tool_prompt": {
      "type": "string",
      "description": "Prompt prepended before tools"
    },
    "tools": {
      "type": "array",
      "description": "List of tools the model may call"
    },
    "top_logprobs": {
      "type": "integer",
      "description": "Number of most likely tokens to return per position"
    },
    "top_p": {
      "type": "number",
      "description": "Nucleus sampling parameter"
    }
  }
}