Chroma · Schema

Chroma Collection

A Chroma collection stores embeddings, documents, and associated metadata. Collections are the primary unit for organizing and searching vector data within a database.

AIAI NativeApache 2.0CloudEmbeddingsHybrid SearchJavaScriptLLMMachine LearningMulti-ModalOpen SourcePythonRAGRetrievalSDKSearchServerlessTypeScriptVector Database

Properties

Name Type Description
id string The unique identifier of the collection, assigned by the server on creation
name string The name of the collection, used as a human-readable identifier within a database
metadata objectnull Arbitrary key-value metadata associated with the collection, used for organizing and describing the collection's purpose and configuration
tenant string The name of the tenant this collection belongs to
database string The name of the database this collection belongs to
View JSON Schema on GitHub

JSON Schema

chroma-collection-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://trychroma.com/schemas/chroma/collection.json",
  "title": "Chroma Collection",
  "description": "A Chroma collection stores embeddings, documents, and associated metadata. Collections are the primary unit for organizing and searching vector data within a database.",
  "type": "object",
  "required": ["id", "name"],
  "properties": {
    "id": {
      "type": "string",
      "format": "uuid",
      "description": "The unique identifier of the collection, assigned by the server on creation"
    },
    "name": {
      "type": "string",
      "minLength": 1,
      "maxLength": 512,
      "pattern": "^[a-zA-Z0-9_-]+$",
      "description": "The name of the collection, used as a human-readable identifier within a database"
    },
    "metadata": {
      "type": ["object", "null"],
      "additionalProperties": true,
      "description": "Arbitrary key-value metadata associated with the collection, used for organizing and describing the collection's purpose and configuration"
    },
    "tenant": {
      "type": "string",
      "description": "The name of the tenant this collection belongs to"
    },
    "database": {
      "type": "string",
      "description": "The name of the database this collection belongs to"
    }
  },
  "$defs": {
    "CollectionConfiguration": {
      "type": "object",
      "description": "Configuration options that can be set in collection metadata to control embedding and indexing behavior",
      "properties": {
        "hnsw:space": {
          "type": "string",
          "enum": ["l2", "ip", "cosine"],
          "default": "l2",
          "description": "The distance function used for nearest neighbor search. l2 is Euclidean distance, ip is inner product, cosine is cosine similarity."
        },
        "hnsw:construction_ef": {
          "type": "integer",
          "minimum": 1,
          "default": 100,
          "description": "The size of the dynamic candidate list during HNSW index construction. Higher values improve recall at the cost of indexing speed."
        },
        "hnsw:search_ef": {
          "type": "integer",
          "minimum": 1,
          "default": 10,
          "description": "The size of the dynamic candidate list during search. Higher values improve recall at the cost of search speed."
        },
        "hnsw:M": {
          "type": "integer",
          "minimum": 2,
          "default": 16,
          "description": "The maximum number of bi-directional links per element in the HNSW graph. Higher values improve recall at the cost of memory."
        },
        "hnsw:num_threads": {
          "type": "integer",
          "minimum": 1,
          "default": 4,
          "description": "Number of threads to use during HNSW index construction"
        }
      }
    }
  }
}