llamaindex · Schema

LlamaIndex Parse Job

A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.

Properties

Name Type Description
id string Unique identifier of the parse job.
status string Current processing status of the parse job.
tier string Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provid
version string API version used for parsing. Use 'latest' for most recent or a specific date string for production stability.
file_id string Identifier of the uploaded file being parsed.
source_url string URL of the document fetched for parsing, if provided instead of file_id.
file_name string Name of the parsed file.
num_pages integer Total number of pages in the document.
target_pages string Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10').
result object
created_at string Timestamp when the parse job was created.
completed_at string Timestamp when the parse job completed.
View JSON Schema on GitHub

JSON Schema

llamaindex-parse-job-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://llamaindex.ai/schemas/llamaindex/parse-job.json",
  "title": "LlamaIndex Parse Job",
  "description": "A parse job in LlamaParse representing the asynchronous processing of a document through AI-powered parsing with configurable tiers for different quality and speed trade-offs.",
  "type": "object",
  "required": ["id", "status", "tier"],
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier of the parse job."
    },
    "status": {
      "type": "string",
      "enum": ["pending", "processing", "completed", "failed"],
      "description": "Current processing status of the parse job."
    },
    "tier": {
      "type": "string",
      "enum": ["fast", "cost_effective", "agentic", "agentic_plus"],
      "description": "Parsing tier that determines quality and speed trade-off. Fast outputs spatial text only. Cost-effective is optimized for text-heavy documents. Agentic handles images and diagrams. Agentic-plus provides maximum fidelity for complex layouts."
    },
    "version": {
      "type": "string",
      "description": "API version used for parsing. Use 'latest' for most recent or a specific date string for production stability.",
      "pattern": "^(latest|\\d{4}-\\d{2}-\\d{2})$"
    },
    "file_id": {
      "type": "string",
      "description": "Identifier of the uploaded file being parsed."
    },
    "source_url": {
      "type": "string",
      "format": "uri",
      "description": "URL of the document fetched for parsing, if provided instead of file_id."
    },
    "file_name": {
      "type": "string",
      "description": "Name of the parsed file."
    },
    "num_pages": {
      "type": "integer",
      "minimum": 0,
      "description": "Total number of pages in the document."
    },
    "target_pages": {
      "type": "string",
      "description": "Page range to parse using 1-based indexing (e.g., '1-5', '3,7,10').",
      "pattern": "^[\\d,\\-\\s]+$"
    },
    "result": {
      "$ref": "#/$defs/ParseResult"
    },
    "created_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the parse job was created."
    },
    "completed_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the parse job completed."
    }
  },
  "$defs": {
    "ParseResult": {
      "type": "object",
      "description": "The parsed output of a document containing extracted content in various formats.",
      "properties": {
        "text": {
          "type": "string",
          "description": "Plain text representation of the parsed content."
        },
        "markdown": {
          "type": "string",
          "description": "Markdown-formatted representation of the parsed content."
        },
        "json": {
          "type": "object",
          "additionalProperties": true,
          "description": "Structured JSON representation of the parsed content."
        },
        "pages": {
          "type": "array",
          "description": "Per-page parsing results.",
          "items": {
            "$ref": "#/$defs/ParsePage"
          }
        },
        "metadata": {
          "type": "object",
          "additionalProperties": true,
          "description": "Metadata extracted from the document."
        }
      }
    },
    "ParsePage": {
      "type": "object",
      "description": "Parsed content for a single page of a document.",
      "required": ["page_number"],
      "properties": {
        "page_number": {
          "type": "integer",
          "minimum": 1,
          "description": "1-based page number."
        },
        "text": {
          "type": "string",
          "description": "Plain text content of the page."
        },
        "markdown": {
          "type": "string",
          "description": "Markdown-formatted content of the page."
        }
      }
    }
  }
}