llamaindex · Schema
LlamaIndex Parse Job
A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.
Properties
| Name | Type | Description |
|---|---|---|
| id | string | Unique identifier of the parse job. |
| status | string | Current processing status of the parse job. |
| tier | string | Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provid |
| version | string | API version used for parsing. Use 'latest' for most recent or a specific date string for production stability. |
| file_id | string | Identifier of the uploaded file being parsed. |
| source_url | string | URL of the document fetched for parsing, if provided instead of file_id. |
| file_name | string | Name of the parsed file. |
| num_pages | integer | Total number of pages in the document. |
| target_pages | string | Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10'). |
| result | object | |
| created_at | string | Timestamp when the parse job was created. |
| completed_at | string | Timestamp when the parse job completed. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://llamaindex.ai/schemas/llamaindex/parse-job.json",
"title": "LlamaIndex Parse Job",
"description": "A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.",
"type": "object",
"required": ["id", "status", "tier"],
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the parse job."
},
"status": {
"type": "string",
"enum": ["pending", "processing", "completed", "failed"],
"description": "Current processing status of the parse job."
},
"tier": {
"type": "string",
"enum": ["fast", "cost_effective", "agentic", "agentic_plus"],
"description": "Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provides maximum fidelity for complex layouts."
},
"version": {
"type": "string",
"description": "API version used for parsing. Use 'latest' for most recent or a specific date string for production stability.",
"pattern": "^(latest|\\d{4}-\\d{2}-\\d{2})$"
},
"file_id": {
"type": "string",
"description": "Identifier of the uploaded file being parsed."
},
"source_url": {
"type": "string",
"format": "uri",
"description": "URL of the document fetched for parsing, if provided instead of file_id."
},
"file_name": {
"type": "string",
"description": "Name of the parsed file."
},
"num_pages": {
"type": "integer",
"minimum": 0,
"description": "Total number of pages in the document."
},
"target_pages": {
"type": "string",
"description": "Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10').",
"pattern": "^[\\d,\\-\\s]+$"
},
"result": {
"$ref": "#/$defs/ParseResult"
},
"created_at": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the parse job was created."
},
"completed_at": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the parse job completed."
}
},
"$defs": {
"ParseResult": {
"type": "object",
"description": "The parsed output of a document containing extracted content in various formats.",
"properties": {
"text": {
"type": "string",
"description": "Plain text representation of the parsed content."
},
"markdown": {
"type": "string",
"description": "Markdown-formatted representation of the parsed content."
},
"json": {
"type": "object",
"additionalProperties": true,
"description": "Structured JSON representation of the parsed content."
},
"pages": {
"type": "array",
"description": "Per-page parsing results.",
"items": {
"$ref": "#/$defs/ParsePage"
}
},
"metadata": {
"type": "object",
"additionalProperties": true,
"description": "Metadata extracted from the document."
}
}
},
"ParsePage": {
"type": "object",
"description": "Parsed content for a single page of a document.",
"required": ["page_number"],
"properties": {
"page_number": {
"type": "integer",
"minimum": 1,
"description": "1-based page number."
},
"text": {
"type": "string",
"description": "Plain text content of the page."
},
"markdown": {
"type": "string",
"description": "Markdown-formatted content of the page."
}
}
}
}
}