cohere · Schema

Cohere Dataset

Represents a dataset managed through the Cohere Datasets API, used for embed jobs, fine-tuning, and other batch processing tasks.

Properties

Name Type Description
id string The unique identifier of the dataset.
name string The human-readable name of the dataset.
dataset_type string The type of dataset, which determines its schema and compatible operations.
validation_status string The current validation status of the dataset after upload.
created_at string The ISO 8601 timestamp when the dataset was created.
updated_at string The ISO 8601 timestamp when the dataset was last updated.
schema string The expected schema definition for the dataset records.
required_fields array The field names required in each record of the dataset.
preserve_fields array The field names that are preserved through processing.
validation_error string The error message if dataset validation failed.
validation_warnings array Warning messages for rows that were dropped during validation.
View JSON Schema on GitHub

JSON Schema

cohere-dataset-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://api.cohere.com/schemas/cohere/dataset.json",
  "title": "Cohere Dataset",
  "description": "Represents a dataset managed through the Cohere Datasets API, used for embed jobs, fine-tuning, and other batch processing tasks.",
  "type": "object",
  "required": ["id", "name", "dataset_type"],
  "properties": {
    "id": {
      "type": "string",
      "description": "The unique identifier of the dataset."
    },
    "name": {
      "type": "string",
      "description": "The human-readable name of the dataset."
    },
    "dataset_type": {
      "type": "string",
      "description": "The type of dataset, which determines its schema and compatible operations.",
      "enum": [
        "embed-input",
        "embed-output",
        "reranker-finetune-input",
        "prompt-completion-finetune-input",
        "single-label-classification-finetune-input",
        "chat-finetune-input"
      ]
    },
    "validation_status": {
      "type": "string",
      "description": "The current validation status of the dataset after upload.",
      "enum": ["Unknown", "Queued", "Processing", "Validated", "Skipped", "Failed"]
    },
    "created_at": {
      "type": "string",
      "format": "date-time",
      "description": "The ISO 8601 timestamp when the dataset was created."
    },
    "updated_at": {
      "type": "string",
      "format": "date-time",
      "description": "The ISO 8601 timestamp when the dataset was last updated."
    },
    "schema": {
      "type": "string",
      "description": "The expected schema definition for the dataset records."
    },
    "required_fields": {
      "type": "array",
      "description": "The field names required in each record of the dataset.",
      "items": {
        "type": "string"
      }
    },
    "preserve_fields": {
      "type": "array",
      "description": "The field names that are preserved through processing.",
      "items": {
        "type": "string"
      }
    },
    "validation_error": {
      "type": "string",
      "description": "The error message if dataset validation failed."
    },
    "validation_warnings": {
      "type": "array",
      "description": "Warning messages for rows that were dropped during validation.",
      "items": {
        "type": "string"
      }
    }
  }
}