Hugging Face · Schema

ChatCompletionRequest

Properties

Name Type Description
model string Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.
messages array List of messages comprising the conversation
frequency_penalty number Penalize tokens based on frequency in text so far
logprobs boolean Whether to return log probabilities
max_tokens integer Maximum number of tokens to generate
presence_penalty number Penalize tokens based on presence in text so far
reasoning_effort string Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh.
response_format object
seed integer Random seed for reproducibility
stop array Up to 4 sequences where generation will stop
stream boolean Whether to stream partial responses using SSE
stream_options object
temperature number Sampling temperature
tool_choice object Controls tool usage
tool_prompt string Prompt prepended before tools
tools array List of tools the model may call
top_logprobs integer Number of most likely tokens to return per position
top_p number Nucleus sampling parameter
View JSON Schema on GitHub

JSON Schema

hugging-face-chatcompletionrequest-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ChatCompletionRequest",
  "title": "ChatCompletionRequest",
  "type": "object",
  "required": [
    "model",
    "messages"
  ],
  "properties": {
    "model": {
      "type": "string",
      "description": "Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier.",
      "example": "meta-llama/Llama-3-70b-chat-hf"
    },
    "messages": {
      "type": "array",
      "description": "List of messages comprising the conversation",
      "items": {
        "type": "object",
        "required": [
          "role"
        ],
        "properties": {
          "role": {
            "type": "string",
            "enum": [
              "system",
              "user",
              "assistant",
              "tool"
            ],
            "description": "The role of the message author"
          },
          "content": {
            "oneOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "oneOf": [
                    {
                      "type": "object",
                      "required": [
                        "type",
                        "text"
                      ],
                      "properties": {
                        "type": {
                          "type": "string",
                          "const": "text"
                        },
                        "text": {
                          "type": "string"
                        }
                      }
                    },
                    {
                      "type": "object",
                      "required": [
                        "type",
                        "image_url"
                      ],
                      "properties": {
                        "type": {
                          "type": "string",
                          "const": "image_url"
                        },
                        "image_url": {
                          "type": "object",
                          "required": [
                            "url"
                          ],
                          "properties": {
                            "url": {
                              "type": "string"
                            }
                          }
                        }
                      }
                    }
                  ]
                }
              }
            ],
            "description": "Message content (string or array for multimodal)"
          },
          "name": {
            "type": "string",
            "description": "Optional name for the participant"
          },
          "tool_calls": {
            "type": "array",
            "items": {
              "type": "object",
              "required": [
                "id",
                "type",
                "function"
              ],
              "properties": {
                "id": {
                  "type": "string"
                },
                "type": {
                  "type": "string"
                },
                "function": {
                  "type": "object",
                  "required": [
                    "name"
                  ],
                  "properties": {
                    "name": {
                      "type": "string"
                    },
                    "arguments": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    }
                  }
                }
              }
            }
          },
          "tool_call_id": {
            "type": "string",
            "description": "Tool call ID for tool responses"
          }
        }
      },
      "example": []
    },
    "frequency_penalty": {
      "type": "number",
      "minimum": -2.0,
      "maximum": 2.0,
      "default": 0,
      "description": "Penalize tokens based on frequency in text so far",
      "example": 42.5
    },
    "logprobs": {
      "type": "boolean",
      "default": false,
      "description": "Whether to return log probabilities",
      "example": true
    },
    "max_tokens": {
      "type": "integer",
      "description": "Maximum number of tokens to generate",
      "example": 10
    },
    "presence_penalty": {
      "type": "number",
      "minimum": -2.0,
      "maximum": 2.0,
      "default": 0,
      "description": "Penalize tokens based on presence in text so far",
      "example": 42.5
    },
    "reasoning_effort": {
      "type": "string",
      "description": "Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh.",
      "example": "example_value"
    },
    "response_format": {
      "oneOf": [
        {
          "type": "object",
          "properties": {
            "type": {
              "type": "string",
              "const": "text"
            }
          }
        },
        {
          "type": "object",
          "required": [
            "type",
            "json_schema"
          ],
          "properties": {
            "type": {
              "type": "string",
              "const": "json_schema"
            },
            "json_schema": {
              "type": "object",
              "required": [
                "name"
              ],
              "properties": {
                "name": {
                  "type": "string"
                },
                "description": {
                  "type": "string"
                },
                "schema": {
                  "type": "object"
                },
                "strict": {
                  "type": "boolean"
                }
              }
            }
          }
        },
        {
          "type": "object",
          "properties": {
            "type": {
              "type": "string",
              "const": "json_object"
            }
          }
        }
      ],
      "example": "example_value"
    },
    "seed": {
      "type": "integer",
      "description": "Random seed for reproducibility",
      "example": 10
    },
    "stop": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "maxItems": 4,
      "description": "Up to 4 sequences where generation will stop",
      "example": []
    },
    "stream": {
      "type": "boolean",
      "default": false,
      "description": "Whether to stream partial responses using SSE",
      "example": true
    },
    "stream_options": {
      "type": "object",
      "properties": {
        "include_usage": {
          "type": "boolean",
          "description": "Include usage statistics in stream"
        }
      },
      "example": "example_value"
    },
    "temperature": {
      "type": "number",
      "minimum": 0,
      "maximum": 2,
      "default": 1.0,
      "description": "Sampling temperature",
      "example": 42.5
    },
    "tool_choice": {
      "oneOf": [
        {
          "type": "string",
          "enum": [
            "auto",
            "none",
            "required"
          ]
        },
        {
          "type": "object",
          "required": [
            "function"
          ],
          "properties": {
            "function": {
              "type": "object",
              "required": [
                "name"
              ],
              "properties": {
                "name": {
                  "type": "string"
                }
              }
            }
          }
        }
      ],
      "description": "Controls tool usage",
      "example": "example_value"
    },
    "tool_prompt": {
      "type": "string",
      "description": "Prompt prepended before tools",
      "example": "example_value"
    },
    "tools": {
      "type": "array",
      "items": {
        "type": "object",
        "required": [
          "type",
          "function"
        ],
        "properties": {
          "type": {
            "type": "string"
          },
          "function": {
            "type": "object",
            "required": [
              "name"
            ],
            "properties": {
              "name": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "parameters": {
                "type": "object"
              }
            }
          }
        }
      },
      "description": "List of tools the model may call",
      "example": []
    },
    "top_logprobs": {
      "type": "integer",
      "minimum": 0,
      "maximum": 5,
      "description": "Number of most likely tokens to return per position",
      "example": 10
    },
    "top_p": {
      "type": "number",
      "minimum": 0,
      "maximum": 1,
      "default": 1.0,
      "description": "Nucleus sampling parameter",
      "example": 42.5
    }
  }
}