Triton Inference Response

An inference response returned by NVIDIA Triton Inference Server following the KServe V2 inference protocol. Contains the model identification, request correlation ID, and output tensors with their data.

AIDeep LearningInferenceMachine LearningModel ServingNVIDIAOpen Source

Properties

Name Type Description
id string Unique identifier for the response, matching the corresponding request ID
model_name string Name of the model that produced the inference results
model_version string Version of the model that produced the inference results
parameters object Response-level parameters returned by the server
outputs array Output tensors produced by the inference
View JSON Schema on GitHub

JSON Schema

triton-inference-response-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://developer.nvidia.com/schemas/triton/inference-response.json",
  "title": "Triton Inference Response",
  "description": "An inference response returned by NVIDIA Triton Inference Server following the KServe V2 inference protocol. Contains the model identification, request correlation ID, and output tensors with their data.",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier for the response, matching the corresponding request ID"
    },
    "model_name": {
      "type": "string",
      "description": "Name of the model that produced the inference results"
    },
    "model_version": {
      "type": "string",
      "description": "Version of the model that produced the inference results"
    },
    "parameters": {
      "type": "object",
      "description": "Response-level parameters returned by the server",
      "properties": {
        "sequence_id": {
          "oneOf": [
            { "type": "integer" },
            { "type": "string" }
          ],
          "description": "Sequence identifier if this response is part of a sequence"
        },
        "sequence_start": {
          "type": "boolean",
          "description": "Indicates this is the first response in a sequence"
        },
        "sequence_end": {
          "type": "boolean",
          "description": "Indicates this is the last response in a sequence"
        }
      },
      "additionalProperties": {
        "oneOf": [
          { "type": "string" },
          { "type": "boolean" },
          { "type": "integer" }
        ]
      }
    },
    "outputs": {
      "type": "array",
      "description": "Output tensors produced by the inference",
      "items": {
        "$ref": "#/$defs/OutputTensor"
      }
    }
  },
  "$defs": {
    "OutputTensor": {
      "type": "object",
      "description": "An output tensor returned by the model after inference",
      "properties": {
        "name": {
          "type": "string",
          "description": "Name of the output tensor"
        },
        "shape": {
          "type": "array",
          "description": "Shape of the output tensor",
          "items": {
            "type": "integer",
            "minimum": 0
          }
        },
        "datatype": {
          "type": "string",
          "description": "Data type of the tensor elements",
          "enum": [
            "BOOL",
            "UINT8",
            "UINT16",
            "UINT32",
            "UINT64",
            "INT8",
            "INT16",
            "INT32",
            "INT64",
            "FP16",
            "FP32",
            "FP64",
            "BYTES",
            "BF16"
          ]
        },
        "parameters": {
          "type": "object",
          "description": "Per-output parameters",
          "properties": {
            "binary_data_size": {
              "type": "integer",
              "minimum": 0,
              "description": "Size in bytes of binary data appended after the JSON response body"
            }
          },
          "additionalProperties": {
            "oneOf": [
              { "type": "string" },
              { "type": "boolean" },
              { "type": "integer" }
            ]
          }
        },
        "data": {
          "type": "array",
          "description": "Tensor data as a flattened row-major array of values"
        }
      }
    }
  }
}