Opik · Schema

Experiment

An evaluation run that executes an LLM application against a Dataset and collects feedback scores for each DatasetItem. Experiments support multiple types and evaluation methods.

LLMEvaluationObservabilityTracingOpen SourceLLMOpsRAGAIMachine LearningMonitoring

Properties

Name	Type	Description
id	string	Unique identifier for the experiment.
dataset_name	string	Name of the Dataset this experiment runs against.
dataset_id	string	UUID of the Dataset this experiment runs against.
project_id	string	Project ID. Takes precedence over project_name when both are provided.
project_name	string	Project name. Creates project if it does not exist. Ignored when project_id is provided.
name	string	Human-readable name for the experiment.
metadata	object	Additional metadata associated with the experiment.
tags	array	Labels for categorizing the experiment.
type	string	Type of experiment run.
evaluation_method	string	Method used to drive the evaluation (standard dataset or evaluation suite).
optimization_id	string	UUID of the Optimization workflow this experiment belongs to, if any.
feedback_scores	array	Aggregated average feedback scores across all experiment items.
comments	array	User comments on this experiment.
created_at	string	Timestamp when the experiment was created.
last_updated_at	string	Timestamp when the experiment was last updated.
created_by	string	Username or identifier of the user who created the experiment.
last_updated_by	string	Username or identifier of the user who last updated the experiment.
trace_count	integer	Number of traces (experiment items) in this experiment.
duration	number	Duration of the experiment run in milliseconds.
total_estimated_cost	number	Estimated total cost of LLM API calls made during this experiment.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://raw.githubusercontent.com/api-evangelist/opik/main/json-schema/opik-experiment.json",
  "title": "Experiment",
  "description": "An evaluation run that executes an LLM application against a Dataset and collects feedback scores for each DatasetItem. Experiments support multiple types and evaluation methods.",
  "type": "object",
  "required": ["dataset_name"],
  "properties": {
    "id": {
      "type": "string",
      "format": "uuid",
      "description": "Unique identifier for the experiment."
    },
    "dataset_name": {
      "type": "string",
      "minLength": 1,
      "description": "Name of the Dataset this experiment runs against."
    },
    "dataset_id": {
      "type": "string",
      "format": "uuid",
      "description": "UUID of the Dataset this experiment runs against.",
      "readOnly": true
    },
    "project_id": {
      "type": "string",
      "format": "uuid",
      "description": "Project ID. Takes precedence over project_name when both are provided."
    },
    "project_name": {
      "type": "string",
      "pattern": "(?s)^\\s*(\\S.*\\S|\\S)\\s*$",
      "description": "Project name. Creates project if it does not exist. Ignored when project_id is provided."
    },
    "name": {
      "type": "string",
      "description": "Human-readable name for the experiment."
    },
    "metadata": {
      "description": "Additional metadata associated with the experiment."
    },
    "tags": {
      "type": "array",
      "uniqueItems": true,
      "minItems": 0,
      "maxItems": 50,
      "description": "Labels for categorizing the experiment.",
      "items": {
        "type": "string",
        "minLength": 0,
        "maxLength": 100
      }
    },
    "type": {
      "type": "string",
      "enum": ["regular", "trial", "mini-batch", "mutation"],
      "description": "Type of experiment run."
    },
    "evaluation_method": {
      "type": "string",
      "enum": ["dataset", "evaluation_suite"],
      "description": "Method used to drive the evaluation (standard dataset or evaluation suite)."
    },
    "optimization_id": {
      "type": "string",
      "format": "uuid",
      "description": "UUID of the Optimization workflow this experiment belongs to, if any."
    },
    "feedback_scores": {
      "type": "array",
      "description": "Aggregated average feedback scores across all experiment items.",
      "readOnly": true,
      "items": {
        "type": "object",
        "properties": {
          "name": { "type": "string" },
          "value": { "type": "number" },
          "count": { "type": "integer" }
        }
      }
    },
    "comments": {
      "type": "array",
      "description": "User comments on this experiment.",
      "readOnly": true,
      "items": {
        "type": "object",
        "properties": {
          "id": { "type": "string", "format": "uuid" },
          "text": { "type": "string" },
          "created_at": { "type": "string", "format": "date-time" },
          "created_by": { "type": "string" }
        }
      }
    },
    "created_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the experiment was created.",
      "readOnly": true
    },
    "last_updated_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the experiment was last updated.",
      "readOnly": true
    },
    "created_by": {
      "type": "string",
      "description": "Username or identifier of the user who created the experiment.",
      "readOnly": true
    },
    "last_updated_by": {
      "type": "string",
      "description": "Username or identifier of the user who last updated the experiment.",
      "readOnly": true
    },
    "trace_count": {
      "type": "integer",
      "description": "Number of traces (experiment items) in this experiment.",
      "readOnly": true
    },
    "duration": {
      "type": "number",
      "description": "Duration of the experiment run in milliseconds.",
      "readOnly": true
    },
    "total_estimated_cost": {
      "type": "number",
      "description": "Estimated total cost of LLM API calls made during this experiment.",
      "readOnly": true
    }
  }
}