Inference Request

Open Inference Protocol V2 inference request submitted to a model serving endpoint via HTTP POST. Compatible with KServe, NVIDIA Triton, BentoML, and other OIP-compliant servers.

AICNCFDeploymentInferenceKubernetesLLMMachine LearningModel ServingMLOpsScalability

Properties

Name Type Description
id string Optional request identifier that will be echoed back in the response for correlation.
parameters object Optional key/value parameters passed to the model's pre/post-processing pipeline.
inputs array Input tensors for the inference request. Each tensor specifies its name, shape, datatype, and data.
outputs array Optional list of output tensors to return. If omitted, all model outputs are returned.
View JSON Schema on GitHub

JSON Schema

kserve-inference-request-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/scalable-inference-serving/main/json-schema/kserve-inference-request-schema.json",
  "title": "Inference Request",
  "description": "Open Inference Protocol V2 inference request submitted to a model serving endpoint via HTTP POST. Compatible with KServe, NVIDIA Triton, BentoML, and other OIP-compliant servers.",
  "type": "object",
  "required": ["inputs"],
  "properties": {
    "id": {
      "type": "string",
      "description": "Optional request identifier that will be echoed back in the response for correlation.",
      "example": "req-a1b2c3d4-e5f6-7890-abcd-ef1234567890"
    },
    "parameters": {
      "type": "object",
      "description": "Optional key/value parameters passed to the model's pre/post-processing pipeline.",
      "additionalProperties": true
    },
    "inputs": {
      "type": "array",
      "description": "Input tensors for the inference request. Each tensor specifies its name, shape, datatype, and data.",
      "minItems": 1,
      "items": {
        "$ref": "#/$defs/RequestInput"
      }
    },
    "outputs": {
      "type": "array",
      "description": "Optional list of output tensors to return. If omitted, all model outputs are returned.",
      "items": {
        "$ref": "#/$defs/RequestOutput"
      }
    }
  },
  "$defs": {
    "RequestInput": {
      "type": "object",
      "title": "Request Input",
      "description": "A single named input tensor for an inference request.",
      "required": ["name", "shape", "datatype", "data"],
      "properties": {
        "name": {
          "type": "string",
          "description": "Tensor name as defined in the model's input specification."
        },
        "shape": {
          "type": "array",
          "description": "Shape of the tensor. Use -1 for variable-length or batch dimensions.",
          "items": {"type": "integer"},
          "example": [1, 128]
        },
        "datatype": {
          "$ref": "#/$defs/TensorDatatype"
        },
        "parameters": {
          "type": "object",
          "additionalProperties": true,
          "description": "Optional tensor-level parameters."
        },
        "data": {
          "description": "Tensor data in row-major order. Nested arrays or flat array acceptable.",
          "oneOf": [
            {"type": "array", "items": {}},
            {"type": "string", "description": "Base64-encoded binary data for the binary tensor data extension."}
          ]
        }
      }
    },
    "RequestOutput": {
      "type": "object",
      "title": "Request Output",
      "description": "Specifies which model output tensor to include in the response.",
      "required": ["name"],
      "properties": {
        "name": {
          "type": "string",
          "description": "Name of the output tensor to include in the response."
        },
        "parameters": {
          "type": "object",
          "additionalProperties": true
        }
      }
    },
    "TensorDatatype": {
      "type": "string",
      "title": "Tensor Datatype",
      "description": "Data type of a tensor per the Open Inference Protocol specification.",
      "enum": ["BOOL", "UINT8", "UINT16", "UINT32", "UINT64", "INT8", "INT16", "INT32", "INT64", "FP16", "FP32", "FP64", "BYTES", "STRING"]
    }
  }
}