cohere · Schema
Cohere Dataset
Represents a dataset managed through the Cohere Datasets API, used for embed jobs, fine-tuning, and other batch processing tasks.
Properties
| Name | Type | Description |
|---|---|---|
| id | string | The unique identifier of the dataset. |
| name | string | The human-readable name of the dataset. |
| dataset_type | string | The type of dataset, which determines its schema and compatible operations. |
| validation_status | string | The current validation status of the dataset after upload. |
| created_at | string | The ISO 8601 timestamp when the dataset was created. |
| updated_at | string | The ISO 8601 timestamp when the dataset was last updated. |
| schema | string | The expected schema definition for the dataset records. |
| required_fields | array | The field names required in each record of the dataset. |
| preserve_fields | array | The field names that are preserved through processing. |
| validation_error | string | The error message if dataset validation failed. |
| validation_warnings | array | Warning messages for rows that were dropped during validation. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://api.cohere.com/schemas/cohere/dataset.json",
"title": "Cohere Dataset",
"description": "Represents a dataset managed through the Cohere Datasets API, used for embed jobs, fine-tuning, and other batch processing tasks.",
"type": "object",
"required": ["id", "name", "dataset_type"],
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the dataset."
},
"name": {
"type": "string",
"description": "The human-readable name of the dataset."
},
"dataset_type": {
"type": "string",
"description": "The type of dataset, which determines its schema and compatible operations.",
"enum": [
"embed-input",
"embed-output",
"reranker-finetune-input",
"prompt-completion-finetune-input",
"single-label-classification-finetune-input",
"chat-finetune-input"
]
},
"validation_status": {
"type": "string",
"description": "The current validation status of the dataset after upload.",
"enum": ["Unknown", "Queued", "Processing", "Validated", "Skipped", "Failed"]
},
"created_at": {
"type": "string",
"format": "date-time",
"description": "The ISO 8601 timestamp when the dataset was created."
},
"updated_at": {
"type": "string",
"format": "date-time",
"description": "The ISO 8601 timestamp when the dataset was last updated."
},
"schema": {
"type": "string",
"description": "The expected schema definition for the dataset records."
},
"required_fields": {
"type": "array",
"description": "The field names required in each record of the dataset.",
"items": {
"type": "string"
}
},
"preserve_fields": {
"type": "array",
"description": "The field names that are preserved through processing.",
"items": {
"type": "string"
}
},
"validation_error": {
"type": "string",
"description": "The error message if dataset validation failed."
},
"validation_warnings": {
"type": "array",
"description": "Warning messages for rows that were dropped during validation.",
"items": {
"type": "string"
}
}
}
}