Hugging Face · Schema
Hugging Face Inference Endpoint
Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.
Properties
| Name | Type | Description |
|---|---|---|
| name | string | Unique name of the Inference Endpoint |
| accountId | string | Account or organization ID that owns the endpoint |
| type | string | Security type controlling endpoint access |
| provider | object | Cloud provider configuration |
| compute | object | Compute resources configuration |
| model | object | Model deployment configuration |
| status | object | Current status of the endpoint |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://huggingface.co/schemas/inference-endpoint.json",
"title": "Hugging Face Inference Endpoint",
"description": "Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.",
"type": "object",
"required": [
"name",
"type",
"provider",
"compute",
"model"
],
"properties": {
"name": {
"type": "string",
"description": "Unique name of the Inference Endpoint",
"pattern": "^[a-z0-9][a-z0-9-]{0,30}[a-z0-9]$",
"examples": [
"my-text-gen-endpoint",
"prod-llama-chat",
"staging-bert-classifier"
]
},
"accountId": {
"type": "string",
"description": "Account or organization ID that owns the endpoint"
},
"type": {
"type": "string",
"description": "Security type controlling endpoint access",
"enum": [
"public",
"protected",
"private"
],
"default": "protected"
},
"provider": {
"type": "object",
"description": "Cloud provider configuration",
"required": [
"vendor",
"region"
],
"properties": {
"vendor": {
"type": "string",
"description": "Cloud provider vendor",
"enum": [
"aws",
"azure",
"gcp"
]
},
"region": {
"type": "string",
"description": "Cloud region for deployment",
"examples": [
"us-east-1",
"eu-west-1",
"us-west-2",
"ap-southeast-1"
]
}
}
},
"compute": {
"type": "object",
"description": "Compute resources configuration",
"required": [
"accelerator",
"instanceType",
"instanceSize",
"scaling"
],
"properties": {
"accelerator": {
"type": "string",
"description": "Type of compute accelerator",
"enum": [
"cpu",
"gpu"
]
},
"instanceType": {
"type": "string",
"description": "GPU or instance type",
"examples": [
"nvidia-a10g",
"nvidia-t4",
"nvidia-a100",
"nvidia-l4",
"nvidia-h100",
"intel-icl",
"intel-spr",
"aws-inf2"
]
},
"instanceSize": {
"type": "string",
"description": "Instance size determining memory and compute",
"examples": [
"x1",
"x2",
"x4",
"x8"
]
},
"scaling": {
"type": "object",
"description": "Autoscaling configuration",
"required": [
"minReplica",
"maxReplica"
],
"properties": {
"minReplica": {
"type": "integer",
"description": "Minimum number of replicas (0 enables scale-to-zero)",
"minimum": 0,
"default": 0
},
"maxReplica": {
"type": "integer",
"description": "Maximum number of replicas",
"minimum": 1,
"default": 1
},
"scaleToZeroTimeout": {
"type": "integer",
"description": "Minutes of inactivity before scaling to zero",
"minimum": 1,
"default": 15
}
}
}
}
},
"model": {
"type": "object",
"description": "Model deployment configuration",
"required": [
"repository",
"task"
],
"properties": {
"repository": {
"type": "string",
"description": "Hugging Face model repository ID",
"examples": [
"meta-llama/Llama-3-70b-chat-hf",
"mistralai/Mistral-7B-Instruct-v0.3",
"sentence-transformers/all-MiniLM-L6-v2"
]
},
"revision": {
"type": "string",
"description": "Git revision (branch, tag, or commit SHA)",
"default": "main"
},
"task": {
"type": "string",
"description": "Inference task type",
"enum": [
"text-generation",
"text-classification",
"token-classification",
"question-answering",
"summarization",
"translation",
"fill-mask",
"feature-extraction",
"sentence-similarity",
"image-classification",
"object-detection",
"automatic-speech-recognition",
"text-to-image",
"custom"
]
},
"framework": {
"type": "string",
"description": "Serving framework",
"enum": [
"pytorch",
"custom"
],
"default": "pytorch"
},
"image": {
"type": "object",
"description": "Container image configuration",
"properties": {
"huggingface": {
"type": "object",
"description": "Hugging Face optimized container settings"
},
"custom": {
"type": "object",
"description": "Custom container settings",
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "Custom container image URL"
},
"health_route": {
"type": "string",
"description": "Health check endpoint path"
},
"port": {
"type": "integer",
"description": "Container port"
},
"env": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Environment variables for the container"
}
}
}
}
}
}
},
"status": {
"type": "object",
"description": "Current status of the endpoint",
"properties": {
"state": {
"type": "string",
"description": "Current operational state",
"enum": [
"pending",
"initializing",
"running",
"updating",
"paused",
"scaledToZero",
"failed"
]
},
"message": {
"type": "string",
"description": "Human-readable status message"
},
"createdAt": {
"type": "string",
"format": "date-time",
"description": "When the endpoint was created"
},
"updatedAt": {
"type": "string",
"format": "date-time",
"description": "When the endpoint was last updated"
},
"url": {
"type": "string",
"format": "uri",
"description": "Inference URL when the endpoint is running"
},
"readyReplica": {
"type": "integer",
"description": "Number of replicas currently ready"
},
"targetReplica": {
"type": "integer",
"description": "Target number of replicas"
},
"errorMessage": {
"type": "string",
"description": "Error message if the endpoint is in a failed state"
}
}
}
}
}