Hugging Face · Schema

Hugging Face Inference Endpoint

Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.

Properties

Name Type Description
name string Unique name of the Inference Endpoint
accountId string Account or organization ID that owns the endpoint
type string Security type controlling endpoint access
provider object Cloud provider configuration
compute object Compute resources configuration
model object Model deployment configuration
status object Current status of the endpoint
View JSON Schema on GitHub

JSON Schema

hugging-face-inference-endpoint-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://huggingface.co/schemas/inference-endpoint.json",
  "title": "Hugging Face Inference Endpoint",
  "description": "Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.",
  "type": "object",
  "required": [
    "name",
    "type",
    "provider",
    "compute",
    "model"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Unique name of the Inference Endpoint",
      "pattern": "^[a-z0-9][a-z0-9-]{0,30}[a-z0-9]$",
      "examples": [
        "my-text-gen-endpoint",
        "prod-llama-chat",
        "staging-bert-classifier"
      ]
    },
    "accountId": {
      "type": "string",
      "description": "Account or organization ID that owns the endpoint"
    },
    "type": {
      "type": "string",
      "description": "Security type controlling endpoint access",
      "enum": [
        "public",
        "protected",
        "private"
      ],
      "default": "protected"
    },
    "provider": {
      "type": "object",
      "description": "Cloud provider configuration",
      "required": [
        "vendor",
        "region"
      ],
      "properties": {
        "vendor": {
          "type": "string",
          "description": "Cloud provider vendor",
          "enum": [
            "aws",
            "azure",
            "gcp"
          ]
        },
        "region": {
          "type": "string",
          "description": "Cloud region for deployment",
          "examples": [
            "us-east-1",
            "eu-west-1",
            "us-west-2",
            "ap-southeast-1"
          ]
        }
      }
    },
    "compute": {
      "type": "object",
      "description": "Compute resources configuration",
      "required": [
        "accelerator",
        "instanceType",
        "instanceSize",
        "scaling"
      ],
      "properties": {
        "accelerator": {
          "type": "string",
          "description": "Type of compute accelerator",
          "enum": [
            "cpu",
            "gpu"
          ]
        },
        "instanceType": {
          "type": "string",
          "description": "GPU or instance type",
          "examples": [
            "nvidia-a10g",
            "nvidia-t4",
            "nvidia-a100",
            "nvidia-l4",
            "nvidia-h100",
            "intel-icl",
            "intel-spr",
            "aws-inf2"
          ]
        },
        "instanceSize": {
          "type": "string",
          "description": "Instance size determining memory and compute",
          "examples": [
            "x1",
            "x2",
            "x4",
            "x8"
          ]
        },
        "scaling": {
          "type": "object",
          "description": "Autoscaling configuration",
          "required": [
            "minReplica",
            "maxReplica"
          ],
          "properties": {
            "minReplica": {
              "type": "integer",
              "description": "Minimum number of replicas (0 enables scale-to-zero)",
              "minimum": 0,
              "default": 0
            },
            "maxReplica": {
              "type": "integer",
              "description": "Maximum number of replicas",
              "minimum": 1,
              "default": 1
            },
            "scaleToZeroTimeout": {
              "type": "integer",
              "description": "Minutes of inactivity before scaling to zero",
              "minimum": 1,
              "default": 15
            }
          }
        }
      }
    },
    "model": {
      "type": "object",
      "description": "Model deployment configuration",
      "required": [
        "repository",
        "task"
      ],
      "properties": {
        "repository": {
          "type": "string",
          "description": "Hugging Face model repository ID",
          "examples": [
            "meta-llama/Llama-3-70b-chat-hf",
            "mistralai/Mistral-7B-Instruct-v0.3",
            "sentence-transformers/all-MiniLM-L6-v2"
          ]
        },
        "revision": {
          "type": "string",
          "description": "Git revision (branch, tag, or commit SHA)",
          "default": "main"
        },
        "task": {
          "type": "string",
          "description": "Inference task type",
          "enum": [
            "text-generation",
            "text-classification",
            "token-classification",
            "question-answering",
            "summarization",
            "translation",
            "fill-mask",
            "feature-extraction",
            "sentence-similarity",
            "image-classification",
            "object-detection",
            "automatic-speech-recognition",
            "text-to-image",
            "custom"
          ]
        },
        "framework": {
          "type": "string",
          "description": "Serving framework",
          "enum": [
            "pytorch",
            "custom"
          ],
          "default": "pytorch"
        },
        "image": {
          "type": "object",
          "description": "Container image configuration",
          "properties": {
            "huggingface": {
              "type": "object",
              "description": "Hugging Face optimized container settings"
            },
            "custom": {
              "type": "object",
              "description": "Custom container settings",
              "properties": {
                "url": {
                  "type": "string",
                  "format": "uri",
                  "description": "Custom container image URL"
                },
                "health_route": {
                  "type": "string",
                  "description": "Health check endpoint path"
                },
                "port": {
                  "type": "integer",
                  "description": "Container port"
                },
                "env": {
                  "type": "object",
                  "additionalProperties": {
                    "type": "string"
                  },
                  "description": "Environment variables for the container"
                }
              }
            }
          }
        }
      }
    },
    "status": {
      "type": "object",
      "description": "Current status of the endpoint",
      "properties": {
        "state": {
          "type": "string",
          "description": "Current operational state",
          "enum": [
            "pending",
            "initializing",
            "running",
            "updating",
            "paused",
            "scaledToZero",
            "failed"
          ]
        },
        "message": {
          "type": "string",
          "description": "Human-readable status message"
        },
        "createdAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was created"
        },
        "updatedAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was last updated"
        },
        "url": {
          "type": "string",
          "format": "uri",
          "description": "Inference URL when the endpoint is running"
        },
        "readyReplica": {
          "type": "integer",
          "description": "Number of replicas currently ready"
        },
        "targetReplica": {
          "type": "integer",
          "description": "Target number of replicas"
        },
        "errorMessage": {
          "type": "string",
          "description": "Error message if the endpoint is in a failed state"
        }
      }
    }
  }
}