Kong · Schema
AiRateLimitingAdvancedPluginConfig
API GatewayAI GatewayAI ConnectivityAgent GatewayEvent GatewayMCP RegistryService MeshLLMKafkaKonnectOpen Source
Properties
| Name | Type | Description |
|---|---|---|
| config | object | |
| consumer | object | If set, the plugin will activate only for requests where the specified has been authenticated. (Note that some plugins can not be restricted to consumers this way.). Leave unset for the plugin to acti |
| consumer_group | object | If set, the plugin will activate only for requests where the specified consumer group has been authenticated. (Note that some plugins can not be restricted to consumers groups this way.). Leave unset |
| name | object | |
| protocols | array | A set of strings representing HTTP protocols. |
| route | object | If set, the plugin will only activate when receiving requests via the specified route. Leave unset for the plugin to activate regardless of the route being used. |
| service | object | If set, the plugin will only activate when receiving requests via one of the routes belonging to the specified Service. Leave unset for the plugin to activate regardless of the Service being matched. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/AiRateLimitingAdvancedPluginConfig",
"title": "AiRateLimitingAdvancedPluginConfig",
"x-speakeasy-entity": "PluginAiRateLimitingAdvanced",
"properties": {
"config": {
"type": "object",
"properties": {
"custom_cost_count_function": {
"description": "If defined, it uses custom function to generate cost for the inference request",
"type": "string"
},
"decrease_by_fractions_in_redis": {
"description": "By default, Kong decreates the AI rate limiting counters by whole number in Redis. This setting allows to decrease the counters by float number.",
"type": "boolean",
"default": false
},
"dictionary_name": {
"description": "The shared dictionary where counters are stored. When the plugin is configured to synchronize counter data externally (that is `config.strategy` is `cluster` or `redis` and `config.sync_rate` isn't `-1`), this dictionary serves as a buffer to populate counters in the data store on each synchronization cycle. The dictionary must be defined in the nginx configuration using `lua_shared_dict` directive (e.g., `lua_shared_dict kong_rate_limiting_counters 12m`).",
"type": "string",
"default": "kong_rate_limiting_counters"
},
"disable_penalty": {
"description": "If set to `true`, this doesn't count denied requests (status = `429`). If set to `false`, all requests, including denied ones, are counted. This parameter only affects the `sliding` window_type and the request prompt provider.",
"type": "boolean",
"default": false
},
"error_code": {
"description": "Set a custom error code to return when the rate limit is exceeded.",
"type": "number",
"default": 429
},
"error_hide_providers": {
"description": "Optionally hide informative response that would otherwise provide information about the provider in the error message.",
"type": "boolean",
"default": false
},
"error_message": {
"description": "Set a custom error message to return when the rate limit is exceeded.",
"type": "string",
"default": "AI token rate limit exceeded for provider(s): "
},
"header_name": {
"description": "A string representing an HTTP header name.",
"type": "string"
},
"hide_client_headers": {
"description": "Optionally hide informative response headers that would otherwise provide information about the current status of limits and counters.",
"type": "boolean",
"default": false
},
"identifier": {
"description": "The type of identifier used to generate the rate limit key. Defines the scope used to increment the rate limiting counters. Can be `ip`, `credential`, `consumer`, `service`, `header`, `path` or `consumer-group`. Note if `identifier` is `consumer-group`, the plugin must be applied on a consumer group entity. Because a consumer may belong to multiple consumer groups, the plugin needs to know explicitly which consumer group to limit the rate.",
"type": "string",
"default": "consumer",
"enum": [
"consumer",
"consumer-group",
"credential",
"header",
"ip",
"path",
"service"
]
},
"llm_format": {
"description": "LLM input and output format and schema to use",
"type": "string",
"enum": [
"anthropic",
"bedrock",
"cohere",
"gemini",
"huggingface",
"openai"
]
},
"llm_providers": {
"description": "The provider config. Takes an array of `name`, `limit` and `window size` values. Mutually exclusive with `policies`.",
"type": "array",
"items": {
"properties": {
"limit": {
"description": "One or more requests-per-window limits to apply. There must be a matching number of window limits and sizes specified.",
"type": "array",
"items": {
"type": "number"
}
},
"name": {
"description": "The LLM provider to which the rate limit applies.",
"type": "string",
"enum": [
"anthropic",
"azure",
"bedrock",
"cohere",
"customCost",
"gemini",
"huggingface",
"llama2",
"mistral",
"openai",
"requestPrompt"
]
},
"window_size": {
"description": "One or more window sizes to apply a limit to (defined in seconds). There must be a matching number of window limits and sizes specified.",
"type": "array",
"items": {
"type": "number"
}
}
},
"required": [
"limit",
"name",
"window_size"
],
"type": "object"
}
},
"namespace": {
"description": "The rate limiting library namespace to use for this plugin instance. Counter data and sync configuration is isolated in each namespace. NOTE: For the plugin instances sharing the same namespace, all the configurations that are required for synchronizing counters, e.g. `strategy`, `redis`, `sync_rate`, `dictionary_name`, need to be the same.",
"type": "string"
},
"path": {
"description": "A string representing a URL path, such as /path/to/resource. Must start with a forward slash (/) and must not contain empty segments (i.e., two consecutive forward slashes).",
"type": "string"
},
"policies": {
"description": "Policy-based rate limiting. Each policy defines match conditions and limits. Mutually exclusive with `llm_providers`.",
"type": "array",
"items": {
"properties": {
"id": {
"description": "UUID reference to a reusable ai_rate_limiting_policies DAO entity. Mutually exclusive with inline limits.",
"type": "string"
},
"limits": {
"description": "Rate limits to enforce when this policy matches.",
"type": "array",
"items": {
"properties": {
"limit": {
"description": "The rate limit threshold for this window.",
"type": "number"
},
"tokens_count_strategy": {
"description": "What to count for this limit. Supported strategies: total_tokens, prompt_tokens, completion_tokens, cost.",
"type": "string",
"default": "total_tokens",
"enum": [
"completion_tokens",
"cost",
"prompt_tokens",
"total_tokens"
]
},
"window_size": {
"description": "The window size in seconds.",
"type": "integer"
}
},
"required": [
"limit",
"window_size"
],
"type": "object"
},
"minLength": 1
},
"match": {
"description": "Array of match conditions (AND logic). If omitted, this policy acts as a fallback for unmatched requests.",
"type": "array",
"items": {
"properties": {
"key": {
"description": "Sub-key for consumer (id|username|custom_id), consumer_group (id|name), or header (header name).",
"type": "string"
},
"partition_by": {
"description": "If true, the matched value contributes to the composite rate limit counter key.",
"type": "boolean",
"default": false
},
"type": {
"description": "The attribute to match against.",
"type": "string",
"enum": [
"consumer",
"consumer_group",
"header",
"ip",
"model",
"path",
"provider"
]
},
"values": {
"description": "Values to match. If omitted, matches any value of this type.",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"type"
],
"type": "object"
}
},
"window_type": {
"description": "The time window type for this policy.",
"type": "string",
"default": "sliding",
"enum": [
"fixed",
"sliding"
]
}
},
"type": "object"
}
},
"redis": {
"type": "object",
"properties": {
"cloud_authentication": {
"description": "Cloud auth related configs for connecting to a Cloud Provider's Redis instance.",
"type": "object",
"properties": {
"auth_provider": {
"description": "Auth providers to be used to authenticate to a Cloud Provider's Redis instance.",
"type": "string",
"enum": [
"aws",
"azure",
"gcp"
],
"x-referenceable": true
},
"aws_access_key_id": {
"description": "AWS Access Key ID to be used for authentication when `auth_provider` is set to `aws`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"aws_assume_role_arn": {
"description": "The ARN of the IAM role to assume for generating ElastiCache IAM authentication tokens.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"aws_cache_name": {
"description": "The name of the AWS Elasticache cluster when `auth_provider` is set to `aws`.",
"type": "string",
"x-referenceable": true
},
"aws_is_serverless": {
"description": "This flag specifies whether the cluster is serverless when auth_provider is set to `aws`.",
"type": "boolean",
"default": true
},
"aws_region": {
"description": "The region of the AWS ElastiCache cluster when `auth_provider` is set to `aws`.",
"type": "string",
"x-referenceable": true
},
"aws_role_session_name": {
"description": "The session name for the temporary credentials when assuming the IAM role.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"aws_secret_access_key": {
"description": "AWS Secret Access Key to be used for authentication when `auth_provider` is set to `aws`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"azure_client_id": {
"description": "Azure Client ID to be used for authentication when `auth_provider` is set to `azure`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"azure_client_secret": {
"description": "Azure Client Secret to be used for authentication when `auth_provider` is set to `azure`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"azure_tenant_id": {
"description": "Azure Tenant ID to be used for authentication when `auth_provider` is set to `azure`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"gcp_service_account_json": {
"description": "GCP Service Account JSON to be used for authentication when `auth_provider` is set to `gcp`.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
}
}
},
"cluster_max_redirections": {
"description": "Maximum retry attempts for redirection.",
"type": "integer",
"default": 5
},
"cluster_nodes": {
"description": "Cluster addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Cluster. The minimum length of the array is 1 element.",
"type": "array",
"items": {
"properties": {
"ip": {
"description": "A string representing a host name, such as example.com.",
"type": "string",
"default": "127.0.0.1"
},
"port": {
"description": "An integer representing a port number between 0 and 65535, inclusive.",
"type": "integer",
"default": 6379,
"maximum": 65535,
"minimum": 0
}
},
"type": "object"
},
"minLength": 1
},
"connect_timeout": {
"description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.",
"type": "integer",
"default": 2000,
"maximum": 2147483646,
"minimum": 0
},
"connection_is_proxied": {
"description": "If the connection to Redis is proxied (e.g. Envoy), set it `true`. Set the `host` and `port` to point to the proxy address.",
"type": "boolean",
"default": false
},
"database": {
"description": "Database to use for the Redis connection when using the `redis` strategy",
"type": "integer",
"default": 0
},
"host": {
"description": "A string representing a host name, such as example.com.",
"type": "string",
"default": "127.0.0.1",
"x-referenceable": true
},
"keepalive_backlog": {
"description": "Limits the total number of opened connections for a pool. If the connection pool is full, connection queues above the limit go into the backlog queue. If the backlog queue is full, subsequent connect operations fail and return `nil`. Queued operations (subject to set timeouts) resume once the number of connections in the pool is less than `keepalive_pool_size`. If latency is high or throughput is low, try increasing this value. Empirically, this value is larger than `keepalive_pool_size`.",
"type": "integer",
"maximum": 2147483646,
"minimum": 0
},
"keepalive_pool_size": {
"description": "The size limit for every cosocket connection pool associated with every remote server, per worker process. If neither `keepalive_pool_size` nor `keepalive_backlog` is specified, no pool is created. If `keepalive_pool_size` isn't specified but `keepalive_backlog` is specified, then the pool uses the default value. Try to increase (e.g. 512) this value if latency is high or throughput is low.",
"type": "integer",
"default": 256,
"maximum": 2147483646,
"minimum": 1
},
"password": {
"description": "Password to use for Redis connections. If undefined, no AUTH commands are sent to Redis.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"port": {
"description": "An integer representing a port number between 0 and 65535, inclusive.",
"type": "integer",
"default": 6379,
"maximum": 65535,
"minimum": 0,
"x-referenceable": true
},
"read_timeout": {
"description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.",
"type": "integer",
"default": 2000,
"maximum": 2147483646,
"minimum": 0
},
"send_timeout": {
"description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.",
"type": "integer",
"default": 2000,
"maximum": 2147483646,
"minimum": 0
},
"sentinel_master": {
"description": "Sentinel master to use for Redis connections. Defining this value implies using Redis Sentinel.",
"type": "string"
},
"sentinel_nodes": {
"description": "Sentinel node addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Sentinel. The minimum length of the array is 1 element.",
"type": "array",
"items": {
"properties": {
"host": {
"description": "A string representing a host name, such as example.com.",
"type": "string",
"default": "127.0.0.1"
},
"port": {
"description": "An integer representing a port number between 0 and 65535, inclusive.",
"type": "integer",
"default": 6379,
"maximum": 65535,
"minimum": 0
}
},
"type": "object"
},
"minLength": 1
},
"sentinel_password": {
"description": "Sentinel password to authenticate with a Redis Sentinel instance. If undefined, no AUTH commands are sent to Redis Sentinels.",
"type": "string",
"x-encrypted": true,
"x-referenceable": true
},
"sentinel_role": {
"description": "Sentinel role to use for Redis connections when the `redis` strategy is defined. Defining this value implies using Redis Sentinel.",
"type": "string",
"enum": [
"any",
"master",
"slave"
]
},
"sentinel_username": {
"description": "Sentinel username to authenticate with a Redis Sentinel instance. If undefined, ACL authentication won't be performed. This requires Redis v6.2.0+.",
"type": "string",
"x-referenceable": true
},
"server_name": {
"description": "A string representing an SNI (server name indication) value for TLS.",
"type": "string",
"x-referenceable": true
},
"ssl": {
"description": "If set to true, uses SSL to connect to Redis.",
"type": "boolean",
"default": false
},
"ssl_verify": {
"description": "If set to true, verifies the validity of the server SSL certificate. If setting this parameter, also configure `lua_ssl_trusted_certificate` in `kong.conf` to specify the CA (or server) certificate used by your Redis server. You may also need to configure `lua_ssl_verify_depth` accordingly.",
"type": "boolean",
"default": true
},
"username": {
"description": "Username to use for Redis connections. If undefined, ACL authentication won't be performed. This requires Redis v6.0.0+. To be compatible with Redis v5.x.y, you can set it to `default`.",
"type": "string",
"x-referenceable": true
}
}
},
"request_prompt_count_function": {
"description": "If defined, it use custom function to count requests for the request prompt provider",
"type": "string"
},
"retry_after_jitter_max": {
"description": "The upper bound of a jitter (random delay) in seconds to be added to the `Retry-After` header of denied requests (status = `429`) in order to prevent all the clients from coming back at the same time. The lower bound of the jitter is `0`; in this case, the `Retry-After` header is equal to the `RateLimit-Reset` header.",
"type": "number",
"default": 0
},
"strategy": {
"description": "The rate-limiting strategy to use for retrieving and incrementing the limits. Available values are: `local`, `redis` and `cluster`.",
"type": "string",
"default": "local",
"enum": [
"cluster",
"local",
"redis"
]
},
"sync_rate": {
"description": "How often to sync counter data to the central data store. A value of 0 results in synchronous behavior; a value of -1 ignores sync behavior entirely and only stores counters in node memory. A value greater than 0 will sync the counters in the specified number of seconds. The minimum allowed interval is 0.02 seconds (20ms).",
"type": "number"
},
"tokens_count_strategy": {
"description": "What tokens to use for cost calculation. Available values are: `total_tokens` `prompt_tokens`, `completion_tokens` or `cost`.",
"type": "string",
"default": "total_tokens",
"enum": [
"completion_tokens",
"cost",
"prompt_tokens",
"total_tokens"
]
},
"window_type": {
"description": "Sets the time window type to either `sliding` (default) or `fixed`. Sliding windows apply the rate limiting logic while taking into account previous hit rates (from the window that immediately precedes the current) using a dynamic weight. Fixed windows consist of buckets that are statically assigned to a definitive time range, each request is mapped to only one fixed window based on its timestamp and will affect only that window's counters.",
"type": "string",
"default": "sliding",
"enum": [
"fixed",
"sliding"
]
}
}
},
"consumer": {
"description": "If set, the plugin will activate only for requests where the specified has been authenticated. (Note that some plugins can not be restricted to consumers this way.). Leave unset for the plugin to activate regardless of the authenticated Consumer.",
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
}
}
},
"consumer_group": {
"description": "If set, the plugin will activate only for requests where the specified consumer group has been authenticated. (Note that some plugins can not be restricted to consumers groups this way.). Leave unset for the plugin to activate regardless of the authenticated Consumer Groups",
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
}
}
},
"name": {
"const": "ai-rate-limiting-advanced"
},
"protocols": {
"description": "A set of strings representing HTTP protocols.",
"type": "array",
"items": {
"enum": [
"grpc",
"grpcs",
"http",
"https"
],
"type": "string"
},
"format": "set",
"default": [
"grpc",
"grpcs",
"http",
"https"
]
},
"route": {
"description": "If set, the plugin will only activate when receiving requests via the specified route. Leave unset for the plugin to activate regardless of the route being used.",
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
}
}
},
"service": {
"description": "If set, the plugin will only activate when receiving requests via one of the routes belonging to the specified Service. Leave unset for the plugin to activate regardless of the Service being matched.",
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
}
}
}
}
}