llamaindex · Schema

LlamaIndex Extraction Agent

An extraction agent in LlamaExtract configured with a specific data schema and extraction settings for transforming unstructured documents into structured JSON representations.

Properties

Name	Type	Description
id	string	Unique identifier of the extraction agent.
name	string	Human-readable name of the extraction agent.
project_id	string	Identifier of the project the agent belongs to.
description	string	Optional description of the extraction agent and its purpose.
data_schema	object	JSON Schema defining the structure of the data to extract from documents.
prompt	string	Optional prompt used for automatic schema inference from example documents.
extraction_jobs	array	Extraction jobs that have been run with this agent.
created_at	string	Timestamp when the extraction agent was created.
updated_at	string	Timestamp when the extraction agent was last updated.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://llamaindex.ai/schemas/llamaindex/extraction-agent.json",
  "title": "LlamaIndex Extraction Agent",
  "description": "An extraction agent in LlamaExtract configured with a specific data schema and extraction settings for transforming unstructured documents into structured JSON representations.",
  "type": "object",
  "required": ["id", "name", "project_id"],
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier of the extraction agent."
    },
    "name": {
      "type": "string",
      "description": "Human-readable name of the extraction agent.",
      "minLength": 1,
      "maxLength": 255
    },
    "project_id": {
      "type": "string",
      "description": "Identifier of the project the agent belongs to."
    },
    "description": {
      "type": "string",
      "description": "Optional description of the extraction agent and its purpose.",
      "maxLength": 1000
    },
    "data_schema": {
      "type": "object",
      "additionalProperties": true,
      "description": "JSON Schema defining the structure of the data to extract from documents."
    },
    "prompt": {
      "type": "string",
      "description": "Optional prompt used for automatic schema inference from example documents."
    },
    "extraction_jobs": {
      "type": "array",
      "description": "Extraction jobs that have been run with this agent.",
      "items": {
        "$ref": "#/$defs/ExtractionJob"
      }
    },
    "created_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the extraction agent was created."
    },
    "updated_at": {
      "type": "string",
      "format": "date-time",
      "description": "Timestamp when the extraction agent was last updated."
    }
  },
  "$defs": {
    "ExtractionJob": {
      "type": "object",
      "description": "An asynchronous extraction job that processes documents through an extraction agent.",
      "required": ["id", "extraction_agent_id", "status"],
      "properties": {
        "id": {
          "type": "string",
          "description": "Unique identifier of the extraction job."
        },
        "extraction_agent_id": {
          "type": "string",
          "description": "Identifier of the extraction agent used for this job."
        },
        "status": {
          "type": "string",
          "enum": ["pending", "processing", "completed", "failed"],
          "description": "Current status of the extraction job."
        },
        "file_ids": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "Identifiers of the files being processed."
        },
        "results": {
          "type": "array",
          "description": "Extraction results for each processed file.",
          "items": {
            "$ref": "#/$defs/FileExtractionResult"
          }
        },
        "created_at": {
          "type": "string",
          "format": "date-time",
          "description": "Timestamp when the extraction job was created."
        },
        "completed_at": {
          "type": "string",
          "format": "date-time",
          "description": "Timestamp when the extraction job completed."
        }
      }
    },
    "FileExtractionResult": {
      "type": "object",
      "description": "Extraction result for a single processed file.",
      "required": ["file_id", "status"],
      "properties": {
        "file_id": {
          "type": "string",
          "description": "Identifier of the processed file."
        },
        "file_name": {
          "type": "string",
          "description": "Name of the processed file."
        },
        "data": {
          "type": "object",
          "additionalProperties": true,
          "description": "Structured data extracted from the file, conforming to the agent data schema."
        },
        "status": {
          "type": "string",
          "enum": ["success", "error"],
          "description": "Extraction status for this specific file."
        },
        "error": {
          "type": "string",
          "description": "Error message if extraction failed for this file."
        }
      }
    }
  }
}