Microsoft Azure · Schema
chatCompletionsRequestCommon
API ManagementCloudCloud ComputingEnterpriseInfrastructure as a ServicePlatform as a ServiceT1
Properties
| Name | Type | Description |
|---|---|---|
| temperature | number | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recomm |
| top_p | number | An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 1 |
| stream | boolean | If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message. |
| stop | object | Up to 4 sequences where the API will stop generating further tokens. |
| max_tokens | integer | The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens). |
| presence_penalty | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. |
| frequency_penalty | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. |
| logit_bias | object | Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 10 |
| user | string | A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/chatCompletionsRequestCommon",
"title": "chatCompletionsRequestCommon",
"type": "object",
"properties": {
"temperature": {
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.",
"type": "number",
"minimum": 0,
"maximum": 2,
"default": 1,
"example": 1,
"nullable": true
},
"top_p": {
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.",
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 1,
"example": 1,
"nullable": true
},
"stream": {
"description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.",
"type": "boolean",
"nullable": true,
"default": false
},
"stop": {
"description": "Up to 4 sequences where the API will stop generating further tokens.",
"oneOf": [
{
"type": "string",
"nullable": true
},
{
"type": "array",
"items": {
"type": "string",
"nullable": false
},
"minItems": 1,
"maxItems": 4,
"description": "Array minimum size of 1 and maximum of 4"
}
],
"default": null
},
"max_tokens": {
"description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).",
"type": "integer",
"default": 4096
},
"presence_penalty": {
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2
},
"frequency_penalty": {
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2
},
"logit_bias": {
"description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
"type": "object",
"nullable": true
},
"user": {
"description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.",
"type": "string",
"example": "user-1234",
"nullable": false
}
}
}