Hugging Face · Schema
Hugging Face Inference Endpoint

Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.
Properties

Name	Type	Description
name	string	Unique name of the Inference Endpoint
accountId	string	Account or organization ID that owns the endpoint
type	string	Security type controlling endpoint access
provider	object	Cloud provider configuration
compute	object	Compute resources configuration
model	object	Model deployment configuration
status	object	Current status of the endpoint
View JSON Schema on GitHub
JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://huggingface.co/schemas/inference-endpoint.json",
  "title": "Hugging Face Inference Endpoint",
  "description": "Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.",
  "type": "object",
  "required": [
    "name",
    "type",
    "provider",
    "compute",
    "model"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Unique name of the Inference Endpoint",
      "pattern": "^[a-z0-9][a-z0-9-]{0,30}[a-z0-9]$",
      "examples": [
        "my-text-gen-endpoint",
        "prod-llama-chat",
        "staging-bert-classifier"
      ]
    },
    "accountId": {
      "type": "string",
      "description": "Account or organization ID that owns the endpoint"
    },
    "type": {
      "type": "string",
      "description": "Security type controlling endpoint access",
      "enum": [
        "public",
        "protected",
        "private"
      ],
      "default": "protected"
    },
    "provider": {
      "type": "object",
      "description": "Cloud provider configuration",
      "required": [
        "vendor",
        "region"
      ],
      "properties": {
        "vendor": {
          "type": "string",
          "description": "Cloud provider vendor",
          "enum": [
            "aws",
            "azure",
            "gcp"
          ]
        },
        "region": {
          "type": "string",
          "description": "Cloud region for deployment",
          "examples": [
            "us-east-1",
            "eu-west-1",
            "us-west-2",
            "ap-southeast-1"
          ]
        }
      }
    },
    "compute": {
      "type": "object",
      "description": "Compute resources configuration",
      "required": [
        "accelerator",
        "instanceType",
        "instanceSize",
        "scaling"
      ],
      "properties": {
        "accelerator": {
          "type": "string",
          "description": "Type of compute accelerator",
          "enum": [
            "cpu",
            "gpu"
          ]
        },
        "instanceType": {
          "type": "string",
          "description": "GPU or instance type",
          "examples": [
            "nvidia-a10g",
            "nvidia-t4",
            "nvidia-a100",
            "nvidia-l4",
            "nvidia-h100",
            "intel-icl",
            "intel-spr",
            "aws-inf2"
          ]
        },
        "instanceSize": {
          "type": "string",
          "description": "Instance size determining memory and compute",
          "examples": [
            "x1",
            "x2",
            "x4",
            "x8"
          ]
        },
        "scaling": {
          "type": "object",
          "description": "Autoscaling configuration",
          "required": [
            "minReplica",
            "maxReplica"
          ],
          "properties": {
            "minReplica": {
              "type": "integer",
              "description": "Minimum number of replicas (0 enables scale-to-zero)",
              "minimum": 0,
              "default": 0
            },
            "maxReplica": {
              "type": "integer",
              "description": "Maximum number of replicas",
              "minimum": 1,
              "default": 1
            },
            "scaleToZeroTimeout": {
              "type": "integer",
              "description": "Minutes of inactivity before scaling to zero",
              "minimum": 1,
              "default": 15
            }
          }
        }
      }
    },
    "model": {
      "type": "object",
      "description": "Model deployment configuration",
      "required": [
        "repository",
        "task"
      ],
      "properties": {
        "repository": {
          "type": "string",
          "description": "Hugging Face model repository ID",
          "examples": [
            "meta-llama/Llama-3-70b-chat-hf",
            "mistralai/Mistral-7B-Instruct-v0.3",
            "sentence-transformers/all-MiniLM-L6-v2"
          ]
        },
        "revision": {
          "type": "string",
          "description": "Git revision (branch, tag, or commit SHA)",
          "default": "main"
        },
        "task": {
          "type": "string",
          "description": "Inference task type",
          "enum": [
            "text-generation",
            "text-classification",
            "token-classification",
            "question-answering",
            "summarization",
            "translation",
            "fill-mask",
            "feature-extraction",
            "sentence-similarity",
            "image-classification",
            "object-detection",
            "automatic-speech-recognition",
            "text-to-image",
            "custom"
          ]
        },
        "framework": {
          "type": "string",
          "description": "Serving framework",
          "enum": [
            "pytorch",
            "custom"
          ],
          "default": "pytorch"
        },
        "image": {
          "type": "object",
          "description": "Container image configuration",
          "properties": {
            "huggingface": {
              "type": "object",
              "description": "Hugging Face optimized container settings"
            },
            "custom": {
              "type": "object",
              "description": "Custom container settings",
              "properties": {
                "url": {
                  "type": "string",
                  "format": "uri",
                  "description": "Custom container image URL"
                },
                "health_route": {
                  "type": "string",
                  "description": "Health check endpoint path"
                },
                "port": {
                  "type": "integer",
                  "description": "Container port"
                },
                "env": {
                  "type": "object",
                  "additionalProperties": {
                    "type": "string"
                  },
                  "description": "Environment variables for the container"
                }
              }
            }
          }
        }
      }
    },
    "status": {
      "type": "object",
      "description": "Current status of the endpoint",
      "properties": {
        "state": {
          "type": "string",
          "description": "Current operational state",
          "enum": [
            "pending",
            "initializing",
            "running",
            "updating",
            "paused",
            "scaledToZero",
            "failed"
          ]
        },
        "message": {
          "type": "string",
          "description": "Human-readable status message"
        },
        "createdAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was created"
        },
        "updatedAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was last updated"
        },
        "url": {
          "type": "string",
          "format": "uri",
          "description": "Inference URL when the endpoint is running"
        },
        "readyReplica": {
          "type": "integer",
          "description": "Number of replicas currently ready"
        },
        "targetReplica": {
          "type": "integer",
          "description": "Target number of replicas"
        },
        "errorMessage": {
          "type": "string",
          "description": "Error message if the endpoint is in a failed state"
        }
      }
    }
  }
}