Hugging Face · Schema
Endpoint

Properties

Name	Type	Description
name	string	Endpoint name
type	string	Endpoint type
accountId	string
provider	object
compute	object
model	object
status	object
url	string	Inference URL for the endpoint
View JSON Schema on GitHub
JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/Endpoint",
  "title": "Endpoint",
  "type": "object",
  "properties": {
    "name": {
      "type": "string",
      "description": "Endpoint name",
      "example": "my-text-gen-endpoint"
    },
    "type": {
      "type": "string",
      "description": "Endpoint type",
      "enum": [
        "public",
        "protected",
        "private"
      ],
      "example": "public"
    },
    "accountId": {
      "type": "string",
      "example": "500123"
    },
    "provider": {
      "type": "object",
      "properties": {
        "vendor": {
          "type": "string",
          "description": "Cloud vendor",
          "enum": [
            "aws",
            "azure",
            "gcp"
          ]
        },
        "region": {
          "type": "string",
          "description": "Cloud region",
          "example": "us-east-1"
        }
      },
      "example": "example_value"
    },
    "compute": {
      "type": "object",
      "properties": {
        "accelerator": {
          "type": "string",
          "description": "GPU or accelerator type",
          "example": "gpu"
        },
        "instanceType": {
          "type": "string",
          "description": "Instance type identifier",
          "example": "nvidia-a10g"
        },
        "instanceSize": {
          "type": "string",
          "description": "Instance size",
          "example": "x1"
        },
        "scaling": {
          "type": "object",
          "properties": {
            "minReplica": {
              "type": "integer",
              "description": "Minimum number of replicas",
              "example": 0
            },
            "maxReplica": {
              "type": "integer",
              "description": "Maximum number of replicas",
              "example": 2
            },
            "scaleToZeroTimeout": {
              "type": "integer",
              "description": "Minutes of inactivity before scaling to zero",
              "example": 15
            }
          }
        }
      },
      "example": "example_value"
    },
    "model": {
      "type": "object",
      "properties": {
        "repository": {
          "type": "string",
          "description": "Model repository ID on the Hub",
          "example": "meta-llama/Llama-2-7b-chat-hf"
        },
        "revision": {
          "type": "string",
          "description": "Model revision or branch",
          "example": "main"
        },
        "task": {
          "type": "string",
          "description": "Inference task",
          "example": "text-generation"
        },
        "framework": {
          "type": "string",
          "description": "Serving framework",
          "enum": [
            "pytorch",
            "custom"
          ]
        },
        "image": {
          "type": "object",
          "properties": {
            "huggingface": {
              "type": "object",
              "description": "Hugging Face optimized container settings"
            },
            "custom": {
              "type": "object",
              "description": "Custom container settings",
              "properties": {
                "url": {
                  "type": "string",
                  "format": "uri"
                },
                "port": {
                  "type": "integer"
                }
              }
            }
          }
        }
      },
      "example": "example_value"
    },
    "status": {
      "type": "object",
      "properties": {
        "state": {
          "type": "string",
          "description": "Current endpoint state",
          "enum": [
            "pending",
            "initializing",
            "running",
            "updating",
            "paused",
            "scaledToZero",
            "failed"
          ]
        },
        "message": {
          "type": "string",
          "description": "Human-readable status message"
        },
        "createdAt": {
          "type": "string",
          "format": "date-time"
        },
        "updatedAt": {
          "type": "string",
          "format": "date-time"
        },
        "url": {
          "type": "string",
          "format": "uri",
          "description": "Inference URL for the running endpoint"
        }
      },
      "example": "example_value"
    },
    "url": {
      "type": "string",
      "format": "uri",
      "description": "Inference URL for the endpoint",
      "example": "https://www.example.com"
    }
  }
}