Kong · Schema
AiProxyAdvancedPluginConfig

API GatewayAI GatewayAI ConnectivityAgent GatewayEvent GatewayMCP RegistryService MeshLLMKafkaKonnectOpen Source
Properties

Name	Type	Description
config	object
consumer	object	If set, the plugin will activate only for requests where the specified has been authenticated. (Note that some plugins can not be restricted to consumers this way.). Leave unset for the plugin to acti
consumer_group	object	If set, the plugin will activate only for requests where the specified consumer group has been authenticated. (Note that some plugins can not be restricted to consumers groups this way.). Leave unset
name	object
protocols	array	A list of the request protocols that will trigger this plugin. The default value, as well as the possible values allowed on this field, may change depending on the plugin type. For example, plugins th
route	object	If set, the plugin will only activate when receiving requests via the specified route. Leave unset for the plugin to activate regardless of the route being used.
service	object	If set, the plugin will only activate when receiving requests via one of the routes belonging to the specified Service. Leave unset for the plugin to activate regardless of the Service being matched.
View JSON Schema on GitHub
JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/AiProxyAdvancedPluginConfig",
  "title": "AiProxyAdvancedPluginConfig",
  "x-speakeasy-entity": "PluginAiProxyAdvanced",
  "properties": {
    "config": {
      "type": "object",
      "properties": {
        "acls": {
          "description": "Optional ACL rules. Deny rules take precedence over allow rules.",
          "type": "object",
          "properties": {
            "allow": {
              "description": "Requests matching any allow rule are permitted unless also matched by a deny rule.",
              "type": "array",
              "items": {
                "description": "ACL rule composed of one or more match conditions.",
                "properties": {
                  "match": {
                    "description": "All conditions must match for the rule to apply (logical AND).",
                    "type": "array",
                    "items": {
                      "description": "Single match condition (e.g. user or model value).",
                      "properties": {
                        "key": {
                          "description": "Helper key used by some types: consumer (id|username), consumer_group (id|name), header (header name).",
                          "type": "string"
                        },
                        "type": {
                          "description": "The attribute to match against.",
                          "type": "string",
                          "enum": [
                            "consumer",
                            "consumer_group",
                            "header",
                            "ip",
                            "model",
                            "path",
                            "provider"
                          ]
                        },
                        "values": {
                          "description": "Allowed values for the selected type.",
                          "type": "array",
                          "items": {
                            "type": "string"
                          },
                          "minLength": 1
                        }
                      },
                      "required": [
                        "type",
                        "values"
                      ],
                      "type": "object"
                    },
                    "minLength": 1
                  }
                },
                "required": [
                  "match"
                ],
                "type": "object"
              },
              "minLength": 1
            },
            "deny": {
              "description": "Requests matching any deny rule are blocked. Deny rules take precedence over allow rules.",
              "type": "array",
              "items": {
                "description": "ACL rule composed of one or more match conditions.",
                "properties": {
                  "match": {
                    "description": "All conditions must match for the rule to apply (logical AND).",
                    "type": "array",
                    "items": {
                      "description": "Single match condition (e.g. user or model value).",
                      "properties": {
                        "key": {
                          "description": "Helper key used by some types: consumer (id|username), consumer_group (id|name), header (header name).",
                          "type": "string"
                        },
                        "type": {
                          "description": "The attribute to match against.",
                          "type": "string",
                          "enum": [
                            "consumer",
                            "consumer_group",
                            "header",
                            "ip",
                            "model",
                            "path",
                            "provider"
                          ]
                        },
                        "values": {
                          "description": "Allowed values for the selected type.",
                          "type": "array",
                          "items": {
                            "type": "string"
                          },
                          "minLength": 1
                        }
                      },
                      "required": [
                        "type",
                        "values"
                      ],
                      "type": "object"
                    },
                    "minLength": 1
                  }
                },
                "required": [
                  "match"
                ],
                "type": "object"
              },
              "minLength": 1
            }
          }
        },
        "balancer": {
          "type": "object",
          "properties": {
            "algorithm": {
              "description": "Which load balancing algorithm to use.",
              "type": "string",
              "default": "round-robin",
              "enum": [
                "consistent-hashing",
                "least-connections",
                "lowest-latency",
                "lowest-usage",
                "priority",
                "round-robin",
                "semantic"
              ]
            },
            "connect_timeout": {
              "type": "integer",
              "default": 60000,
              "maximum": 2147483646,
              "minimum": 1
            },
            "fail_timeout": {
              "description": "The period of time (in milliseconds) the target will be considered unavailable after the number of unsuccessful attempts reaches `max_fails`.",
              "type": "integer",
              "default": 10000,
              "maximum": 2147483646,
              "minimum": 1
            },
            "failover_criteria": {
              "description": "Specifies in which cases an upstream response should be failover to the next target. Each option in the array is equivalent to the function of http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_next_upstream",
              "type": "array",
              "items": {
                "enum": [
                  "error",
                  "http_403",
                  "http_404",
                  "http_429",
                  "http_500",
                  "http_502",
                  "http_503",
                  "http_504",
                  "invalid_header",
                  "non_idempotent",
                  "timeout"
                ],
                "type": "string"
              },
              "default": [
                "error",
                "timeout"
              ]
            },
            "hash_on_header": {
              "description": "The header to use for consistent-hashing.",
              "type": "string",
              "default": "X-Kong-LLM-Request-ID"
            },
            "latency_strategy": {
              "description": "What metrics to use for latency. Available values are: `tpot` (time-per-output-token) and `e2e`.",
              "type": "string",
              "default": "tpot",
              "enum": [
                "e2e",
                "tpot"
              ]
            },
            "max_fails": {
              "description": "Number of unsuccessful attempts to communicate with a target that should occur in the duration defined by `fail_timeout` before the target is considered unavailable. The zero value disables the circuit breaker. What is considered an unsuccessful attempt is defined by `failover_criteria`. Note the cases of `error`, `timeout` and `invalid_header` are always considered unsuccessful attempts, while the cases of `http_403` and `http_404` are never considered unsuccessful attempts.",
              "type": "integer",
              "default": 0,
              "maximum": 32767,
              "minimum": 0
            },
            "read_timeout": {
              "type": "integer",
              "default": 60000,
              "maximum": 2147483646,
              "minimum": 1
            },
            "retries": {
              "description": "The number of retries to execute upon failure to proxy.",
              "type": "integer",
              "default": 5,
              "maximum": 32767,
              "minimum": 0
            },
            "slots": {
              "description": "The number of slots in the load balancer algorithm.",
              "type": "integer",
              "default": 10000,
              "maximum": 65536,
              "minimum": 10
            },
            "tokens_count_strategy": {
              "description": "What tokens to use for usage calculation. Available values are: `total_tokens` `prompt_tokens`, `completion_tokens` and `cost`.",
              "type": "string",
              "default": "total-tokens",
              "enum": [
                "completion-tokens",
                "cost",
                "llm-accuracy",
                "prompt-tokens",
                "total-tokens"
              ]
            },
            "write_timeout": {
              "type": "integer",
              "default": 60000,
              "maximum": 2147483646,
              "minimum": 1
            }
          }
        },
        "embeddings": {
          "type": "object",
          "properties": {
            "auth": {
              "type": "object",
              "properties": {
                "allow_override": {
                  "description": "If enabled, the authorization header or parameter can be overridden in the request by the value configured in the plugin.",
                  "type": "boolean",
                  "default": false
                },
                "aws_access_key_id": {
                  "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                },
                "aws_secret_access_key": {
                  "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                },
                "azure_client_id": {
                  "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID.",
                  "type": "string",
                  "x-referenceable": true
                },
                "azure_client_secret": {
                  "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                },
                "azure_tenant_id": {
                  "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID.",
                  "type": "string",
                  "x-referenceable": true
                },
                "azure_use_managed_identity": {
                  "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models.",
                  "type": "boolean",
                  "default": false
                },
                "gcp_metadata_url": {
                  "description": "Custom metadata URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google metadata endpoint.",
                  "type": "string",
                  "x-referenceable": true
                },
                "gcp_oauth_token_url": {
                  "description": "Custom OAuth token URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google OAuth token endpoint.",
                  "type": "string",
                  "x-referenceable": true
                },
                "gcp_service_account_json": {
                  "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                },
                "gcp_use_service_account": {
                  "description": "Use service account auth for GCP-based providers and models.",
                  "type": "boolean",
                  "default": false
                },
                "header_name": {
                  "description": "If AI model requires authentication via Authorization or API key header, specify its name here.",
                  "type": "string",
                  "x-referenceable": true
                },
                "header_value": {
                  "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                },
                "param_location": {
                  "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body.",
                  "type": "string",
                  "enum": [
                    "body",
                    "query"
                  ]
                },
                "param_name": {
                  "description": "If AI model requires authentication via query parameter, specify its name here.",
                  "type": "string",
                  "x-referenceable": true
                },
                "param_value": {
                  "description": "Specify the full parameter value for 'param_name'.",
                  "type": "string",
                  "x-encrypted": true,
                  "x-referenceable": true
                }
              }
            },
            "model": {
              "type": "object",
              "properties": {
                "name": {
                  "description": "Model name to execute.",
                  "type": "string"
                },
                "options": {
                  "description": "Key/value settings for the model",
                  "type": "object",
                  "properties": {
                    "azure": {
                      "type": "object",
                      "properties": {
                        "api_version": {
                          "description": "'api-version' for Azure OpenAI instances.",
                          "type": "string",
                          "default": "2023-05-15"
                        },
                        "deployment_id": {
                          "description": "Deployment ID for Azure OpenAI instances.",
                          "type": "string"
                        },
                        "instance": {
                          "description": "Instance name for Azure OpenAI hosted models.",
                          "type": "string"
                        }
                      }
                    },
                    "bedrock": {
                      "type": "object",
                      "properties": {
                        "aws_assume_role_arn": {
                          "description": "If using AWS providers (Bedrock) you can assume a different role after authentication with the current IAM context is successful.",
                          "type": "string"
                        },
                        "aws_region": {
                          "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option.",
                          "type": "string"
                        },
                        "aws_role_session_name": {
                          "description": "If using AWS providers (Bedrock), set the identifier of the assumed role session.",
                          "type": "string"
                        },
                        "aws_sts_endpoint_url": {
                          "description": "If using AWS providers (Bedrock), override the STS endpoint URL when assuming a different role.",
                          "type": "string"
                        },
                        "batch_bucket_prefix": {
                          "description": "S3 URI prefix (s3://bucket/prefix/) where Bedrock will get input files from and store results to for native batch API.",
                          "type": "string"
                        },
                        "batch_role_arn": {
                          "description": "AWS role arn used for calling batch API. Try to get the value from request if ommited.",
                          "type": "string"
                        },
                        "embeddings_normalize": {
                          "description": "If using AWS providers (Bedrock), set to true to normalize the embeddings.",
                          "type": "boolean",
                          "default": false
                        },
                        "performance_config_latency": {
                          "description": "Force the client's performance configuration 'latency' for all requests. Leave empty to let the consumer select the performance configuration.",
                          "type": "string"
                        },
                        "video_output_s3_uri": {
                          "description": "S3 URI (s3://bucket/prefix) where Bedrock will store generated video files. Required for video generation.",
                          "type": "string"
                        }
                      }
                    },
                    "gemini": {
                      "type": "object",
                      "properties": {
                        "api_endpoint": {
                          "description": "If running Gemini on Vertex, specify the regional API endpoint (hostname only).",
                          "type": "string"
                        },
                        "location_id": {
                          "description": "If running Gemini on Vertex, specify the location ID.",
                          "type": "string"
                        },
                        "project_id": {
                          "description": "If running Gemini on Vertex, specify the project ID.",
                          "type": "string"
                        }
                      }
                    },
                    "huggingface": {
                      "type": "object",
                      "properties": {
                        "use_cache": {
                          "description": "Use the cache layer on the inference API",
                          "type": "boolean"
                        },
                        "wait_for_model": {
                          "description": "Wait for the model if it is not ready",
                          "type": "boolean"
                        }
                      }
                    },
                    "upstream_url": {
                      "description": "upstream url for the embeddings",
                      "type": "string"
                    }
                  }
                },
                "provider": {
                  "description": "AI provider format to use for embeddings API",
                  "type": "string",
                  "enum": [
                    "azure",
                    "bedrock",
                    "gemini",
                    "huggingface",
                    "mistral",
                    "ollama",
                    "openai"
                  ]
                }
              },
              "required": [
                "name",
                "provider"
              ]
            }
          },
          "required": [
            "model"
          ]
        },
        "genai_category": {
          "description": "Generative AI category of the request",
          "type": "string",
          "default": "text/generation",
          "enum": [
            "audio/speech",
            "audio/transcription",
            "image/generation",
            "realtime/generation",
            "text/embeddings",
            "text/generation",
            "video/generation"
          ]
        },
        "llm_format": {
          "description": "LLM input and output format and schema to use",
          "type": "string",
          "default": "openai",
          "enum": [
            "anthropic",
            "bedrock",
            "cohere",
            "gemini",
            "huggingface",
            "openai"
          ]
        },
        "max_request_body_size": {
          "description": "max allowed body size allowed to be introspected. 0 means unlimited, but the size of this body will still be limited by Nginx's client_max_body_size.",
          "type": "integer",
          "default": 1048576
        },
        "model_name_header": {
          "description": "Display the model name selected in the X-Kong-LLM-Model response header",
          "type": "boolean",
          "default": true
        },
        "response_streaming": {
          "description": "Whether to 'optionally allow', 'deny', or 'always' (force) the streaming of answers via server sent events.",
          "type": "string",
          "default": "allow",
          "enum": [
            "allow",
            "always",
            "deny"
          ]
        },
        "targets": {
          "type": "array",
          "items": {
            "properties": {
              "auth": {
                "type": "object",
                "properties": {
                  "allow_override": {
                    "description": "If enabled, the authorization header or parameter can be overridden in the request by the value configured in the plugin.",
                    "type": "boolean",
                    "default": false
                  },
                  "aws_access_key_id": {
                    "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  },
                  "aws_secret_access_key": {
                    "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  },
                  "azure_client_id": {
                    "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "azure_client_secret": {
                    "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  },
                  "azure_tenant_id": {
                    "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "azure_use_managed_identity": {
                    "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models.",
                    "type": "boolean",
                    "default": false
                  },
                  "gcp_metadata_url": {
                    "description": "Custom metadata URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google metadata endpoint.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "gcp_oauth_token_url": {
                    "description": "Custom OAuth token URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google OAuth token endpoint.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "gcp_service_account_json": {
                    "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  },
                  "gcp_use_service_account": {
                    "description": "Use service account auth for GCP-based providers and models.",
                    "type": "boolean",
                    "default": false
                  },
                  "header_name": {
                    "description": "If AI model requires authentication via Authorization or API key header, specify its name here.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "header_value": {
                    "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  },
                  "param_location": {
                    "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body.",
                    "type": "string",
                    "enum": [
                      "body",
                      "query"
                    ]
                  },
                  "param_name": {
                    "description": "If AI model requires authentication via query parameter, specify its name here.",
                    "type": "string",
                    "x-referenceable": true
                  },
                  "param_value": {
                    "description": "Specify the full parameter value for 'param_name'.",
                    "type": "string",
                    "x-encrypted": true,
                    "x-referenceable": true
                  }
                }
              },
              "description": {
                "description": "The semantic description of the target, required if using semantic load balancing. Specially, setting this to 'CATCHALL' will indicate such target to be used when no other targets match the semantic threshold. Only used by ai-proxy-advanced.",
                "type": "string"
              },
              "logging": {
                "type": "object",
                "properties": {
                  "log_payloads": {
                    "description": "If enabled, will log the request and response body into the Kong log plugin(s) output.Furthermore if Opentelemetry instrumentation is enabled the traces will contain this data as well.",
                    "type": "boolean",
                    "default": false
                  },
                  "log_statistics": {
                    "description": "If enabled and supported by the driver, will add model usage and token metrics into the Kong log plugin(s) output.",
                    "type": "boolean",
                    "default": false
                  }
                }
              },
              "metadata": {
                "description": "For internal use only. ",
                "type": "object",
                "additionalProperties": true,
                "nullable": true,
                "x-speakeasy-type-override": "any"
              },
              "model": {
                "type": "object",
                "properties": {
                  "model_alias": {
                    "description": "The model name parameter from the request that this model should map to.",
                    "type": "string"
                  },
                  "name": {
                    "description": "Model name to execute.",
                    "type": "string"
                  },
                  "options": {
                    "description": "Key/value settings for the model",
                    "type": "object",
                    "properties": {
                      "anthropic_version": {
                        "description": "Defines the schema/API version, if using Anthropic provider.",
                        "type": "string"
                      },
                      "azure_api_version": {
                        "description": "'api-version' for Azure OpenAI instances.",
                        "type": "string",
                        "default": "2023-05-15"
                      },
                      "azure_deployment_id": {
                        "description": "Deployment ID for Azure OpenAI instances.",
                        "type": "string"
                      },
                      "azure_instance": {
                        "description": "Instance name for Azure OpenAI hosted models.",
                        "type": "string"
                      },
                      "bedrock": {
                        "type": "object",
                        "properties": {
                          "aws_assume_role_arn": {
                            "description": "If using AWS providers (Bedrock) you can assume a different role after authentication with the current IAM context is successful.",
                            "type": "string"
                          },
                          "aws_region": {
                            "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option.",
                            "type": "string"
                          },
                          "aws_role_session_name": {
                            "description": "If using AWS providers (Bedrock), set the identifier of the assumed role session.",
                            "type": "string"
                          },
                          "aws_sts_endpoint_url": {
                            "description": "If using AWS providers (Bedrock), override the STS endpoint URL when assuming a different role.",
                            "type": "string"
                          },
                          "batch_bucket_prefix": {
                            "description": "S3 URI prefix (s3://bucket/prefix/) where Bedrock will get input files from and store results to for native batch API.",
                            "type": "string"
                          },
                          "batch_role_arn": {
                            "description": "AWS role arn used for calling batch API. Try to get the value from request if ommited.",
                            "type": "string"
                          },
                          "embeddings_normalize": {
                            "description": "If using AWS providers (Bedrock), set to true to normalize the embeddings.",
                            "type": "boolean",
       

# --- truncated at 32 KB (59 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/kong/refs/heads/main/json-schema/kong-aiproxyadvancedpluginconfig-schema.json