llamaindex · Schema

LlamaIndex Parse Job

A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.

Properties

Name	Type	Description
id	string	Unique identifier of the parse job.
status	string	Current processing status of the parse job.
tier	string	Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provid
version	string	API version used for parsing. Use 'latest' for most recent or a specific date string for production stability.
file_id	string	Identifier of the uploaded file being parsed.
source_url	string	URL of the document fetched for parsing, if provided instead of file_id.
file_name	string	Name of the parsed file.
num_pages	integer	Total number of pages in the document.
target_pages	string	Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10').
result	object
created_at	string	Timestamp when the parse job was created.
completed_at	string	Timestamp when the parse job completed.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://llamaindex.ai/schemas/llamaindex/parse-job.json",
  "title": "LlamaIndex Parse Job",
  "description": "A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.",
  "type": "object",
  "required": ["id", "status", "tier"],
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier of the parse job."
    },
    "status": {
      "type": "string",
      "enum": ["pending", "processing", "completed", "failed"],
      "description": "Current processing status of the parse job."
    },
    "tier": {
      "type": "string",
      "enum": ["fast", "cost_effective", "agentic", "agentic_plus"],
      "description": "Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provides maximum fidelity for complex layouts."
    },
    "version": {
      "type": "string",
      "description": "API version used for parsing. Use 'latest' for most recent or a specific date string for production stability.",
      "pattern": "^(latest|\\d{4}-\\d{2}-\\d{2})$"
    },
    "file_id": {
      "type": "string",
      "description": "Identifier of the uploaded file being parsed."
    },
    "source_url": {
      "type": "string",
      "format": "uri",
      "description": "URL of the document fetched for parsing, if provided instead of file_id."
    },
    "file_name": {
      "type": "string",
      "description": "Name of the parsed file."
    },
    "num_pages": {
      "type": "integer",
      "minimum": 0,
      "description": "Total number of pages in the document."
    },
    "target_pages": {
      "type": "string",
      "description": "Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10').",
      "pattern": "^[\\d,\\-\\s]+$"
    },
    "result": {
      "$ref": "#/$defs/ParseResult"
    },
    "created_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the parse job was created."
    },
    "completed_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the parse job completed."
    }
  },
  "$defs": {
    "ParseResult": {
      "type": "object",
      "description": "The parsed output of a document containing extracted content in various formats.",
      "properties": {
        "text": {
          "type": "string",
          "description": "Plain text representation of the parsed content."
        },
        "markdown": {
          "type": "string",
          "description": "Markdown-formatted representation of the parsed content."
        },
        "json": {
          "type": "object",
          "additionalProperties": true,
          "description": "Structured JSON representation of the parsed content."
        },
        "pages": {
          "type": "array",
          "description": "Per-page parsing results.",
          "items": {
            "$ref": "#/$defs/ParsePage"
          }
        },
        "metadata": {
          "type": "object",
          "additionalProperties": true,
          "description": "Metadata extracted from the document."
        }
      }
    },
    "ParsePage": {
      "type": "object",
      "description": "Parsed content for a single page of a document.",
      "required": ["page_number"],
      "properties": {
        "page_number": {
          "type": "integer",
          "minimum": 1,
          "description": "1-based page number."
        },
        "text": {
          "type": "string",
          "description": "Plain text content of the page."
        },
        "markdown": {
          "type": "string",
          "description": "Markdown-formatted content of the page."
        }
      }
    }
  }
}