Letta · Schema

LLMConfig

Configuration for Language Model (LLM) connection and generation parameters. .. deprecated:: LLMConfig is deprecated and should not be used as an input or return type in API calls. Use the schemas in letta.schemas.model (ModelSettings, OpenAIModelSettings, etc.) instead. For conversion, use the _to_model() method or Model._from_llm_config() method.

AIAgentsStateful AgentsMemoryMemGPTContinual LearningMCPMulti-AgentRAGOpen Source

Properties

Name	Type	Description
model	string	LLM model name.
display_name	object	A human-friendly display name for the model.
model_endpoint_type	string	The endpoint type for the model.
model_endpoint	object	The endpoint for the model.
provider_name	object	The provider name for the model.
provider_category	object	The provider category for the model.
model_wrapper	object	The wrapper for the model.
context_window	integer	The context window size for the model.
put_inner_thoughts_in_kwargs	object	Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.
handle	object	The handle for this config, in the format provider/model-name.
temperature	number	The temperature to use when generating text with the model. A higher temperature will result in more random text.
max_tokens	object	The maximum number of tokens to generate. If not set, the model will use its default value.
enable_reasoner	boolean	Whether or not the model should use extended thinking if it is a 'reasoning' style model
reasoning_effort	object	The reasoning effort to use when generating text reasoning models
max_reasoning_tokens	integer	Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.
effort	object	The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.
frequency_penalty	object	Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.
compatibility_type	object	The framework compatibility type for the model.
verbosity	object	Soft control for how verbose model output should be, used for GPT-5 models.
tier	object	The cost tier for the model (cloud only).
parallel_tool_calls	object	Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.
response_format	object	The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.
strict	boolean	Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.
return_logprobs	boolean	Whether to return log probabilities of the output tokens. Useful for RL training.
top_logprobs	object	Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.
return_token_ids	boolean	Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.
tool_call_parser	object	SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/LLMConfig",
  "title": "LLMConfig",
  "properties": {
    "model": {
      "type": "string",
      "title": "Model",
      "description": "LLM model name. "
    },
    "display_name": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Display Name",
      "description": "A human-friendly display name for the model."
    },
    "model_endpoint_type": {
      "type": "string",
      "enum": [
        "openai",
        "anthropic",
        "google_ai",
        "google_vertex",
        "azure",
        "groq",
        "ollama",
        "webui",
        "webui-legacy",
        "lmstudio",
        "lmstudio-legacy",
        "lmstudio-chatcompletions",
        "llamacpp",
        "koboldcpp",
        "vllm",
        "hugging-face",
        "minimax",
        "mistral",
        "together",
        "bedrock",
        "deepseek",
        "xai",
        "zai",
        "zai_coding",
        "baseten",
        "fireworks",
        "openrouter",
        "chatgpt_oauth"
      ],
      "title": "Model Endpoint Type",
      "description": "The endpoint type for the model."
    },
    "model_endpoint": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Model Endpoint",
      "description": "The endpoint for the model."
    },
    "provider_name": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Provider Name",
      "description": "The provider name for the model."
    },
    "provider_category": {
      "anyOf": [
        {
          "$ref": "#/components/schemas/ProviderCategory"
        },
        {
          "type": "null"
        }
      ],
      "description": "The provider category for the model."
    },
    "model_wrapper": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Model Wrapper",
      "description": "The wrapper for the model."
    },
    "context_window": {
      "type": "integer",
      "title": "Context Window",
      "description": "The context window size for the model."
    },
    "put_inner_thoughts_in_kwargs": {
      "anyOf": [
        {
          "type": "boolean"
        },
        {
          "type": "null"
        }
      ],
      "title": "Put Inner Thoughts In Kwargs",
      "description": "Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.",
      "default": false
    },
    "handle": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Handle",
      "description": "The handle for this config, in the format provider/model-name."
    },
    "temperature": {
      "type": "number",
      "title": "Temperature",
      "description": "The temperature to use when generating text with the model. A higher temperature will result in more random text.",
      "default": 1
    },
    "max_tokens": {
      "anyOf": [
        {
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "title": "Max Tokens",
      "description": "The maximum number of tokens to generate. If not set, the model will use its default value."
    },
    "enable_reasoner": {
      "type": "boolean",
      "title": "Enable Reasoner",
      "description": "Whether or not the model should use extended thinking if it is a 'reasoning' style model",
      "default": true
    },
    "reasoning_effort": {
      "anyOf": [
        {
          "type": "string",
          "enum": [
            "none",
            "minimal",
            "low",
            "medium",
            "high",
            "xhigh"
          ]
        },
        {
          "type": "null"
        }
      ],
      "title": "Reasoning Effort",
      "description": "The reasoning effort to use when generating text reasoning models"
    },
    "max_reasoning_tokens": {
      "type": "integer",
      "title": "Max Reasoning Tokens",
      "description": "Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
      "default": 0
    },
    "effort": {
      "anyOf": [
        {
          "type": "string",
          "enum": [
            "low",
            "medium",
            "high",
            "max"
          ]
        },
        {
          "type": "null"
        }
      ],
      "title": "Effort",
      "description": "The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'."
    },
    "frequency_penalty": {
      "anyOf": [
        {
          "type": "number"
        },
        {
          "type": "null"
        }
      ],
      "title": "Frequency Penalty",
      "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0."
    },
    "compatibility_type": {
      "anyOf": [
        {
          "type": "string",
          "enum": [
            "gguf",
            "mlx"
          ]
        },
        {
          "type": "null"
        }
      ],
      "title": "Compatibility Type",
      "description": "The framework compatibility type for the model."
    },
    "verbosity": {
      "anyOf": [
        {
          "type": "string",
          "enum": [
            "low",
            "medium",
            "high"
          ]
        },
        {
          "type": "null"
        }
      ],
      "title": "Verbosity",
      "description": "Soft control for how verbose model output should be, used for GPT-5 models."
    },
    "tier": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Tier",
      "description": "The cost tier for the model (cloud only)."
    },
    "parallel_tool_calls": {
      "anyOf": [
        {
          "type": "boolean"
        },
        {
          "type": "null"
        }
      ],
      "title": "Parallel Tool Calls",
      "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
      "default": false,
      "deprecated": true
    },
    "response_format": {
      "anyOf": [
        {
          "oneOf": [
            {
              "$ref": "#/components/schemas/TextResponseFormat"
            },
            {
              "$ref": "#/components/schemas/JsonSchemaResponseFormat"
            },
            {
              "$ref": "#/components/schemas/JsonObjectResponseFormat"
            }
          ],
          "discriminator": {
            "propertyName": "type",
            "mapping": {
              "json_object": "#/components/schemas/JsonObjectResponseFormat",
              "json_schema": "#/components/schemas/JsonSchemaResponseFormat",
              "text": "#/components/schemas/TextResponseFormat"
            }
          }
        },
        {
          "type": "null"
        }
      ],
      "title": "Response Format",
      "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings."
    },
    "strict": {
      "type": "boolean",
      "title": "Strict",
      "description": "Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
      "default": false
    },
    "return_logprobs": {
      "type": "boolean",
      "title": "Return Logprobs",
      "description": "Whether to return log probabilities of the output tokens. Useful for RL training.",
      "default": false
    },
    "top_logprobs": {
      "anyOf": [
        {
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "title": "Top Logprobs",
      "description": "Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True."
    },
    "return_token_ids": {
      "type": "boolean",
      "title": "Return Token Ids",
      "description": "Whether to return token IDs for all LLM generations via SGLang native endpoint. Required for multi-turn RL training with loss masking. Only works with SGLang provider.",
      "default": false
    },
    "tool_call_parser": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Tool Call Parser",
      "description": "SGLang tool call parser name (e.g. 'glm47', 'qwen25', 'hermes'). Used by the SGLang native adapter to parse tool calls from raw model output."
    }
  },
  "type": "object",
  "required": [
    "model",
    "model_endpoint_type",
    "context_window"
  ],
  "description": "Configuration for Language Model (LLM) connection and generation parameters.\n\n.. deprecated::\n    LLMConfig is deprecated and should not be used as an input or return type in API calls.\n    Use the schemas in letta.schemas.model (ModelSettings, OpenAIModelSettings, etc.) instead.\n    For conversion, use the _to_model() method or Model._from_llm_config() method."
}