reducto-ai · Schema

Reducto Parse

Schema for the Reducto Parse API request and response. POST /parse accepts either a SyncParseConfig or AsyncParseConfig; the response is a ParseResponse (sync) or AsyncParseResponse (async).

View JSON Schema on GitHub

JSON Schema

reducto-parse-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://api-evangelist.com/schemas/reducto/reducto-parse-schema.json",
  "title": "Reducto Parse",
  "description": "Schema for the Reducto Parse API request and response. POST /parse accepts either a SyncParseConfig or AsyncParseConfig; the response is a ParseResponse (sync) or AsyncParseResponse (async).",
  "type": "object",
  "definitions": {
    "ParseRequest": {
      "type": "object",
      "description": "Request body for POST /parse — synchronous or asynchronous parse.",
      "required": ["document_url"],
      "properties": {
        "document_url": {
          "type": "string",
          "description": "Source document. Accepts a public URL, presigned S3 URL, reducto:// reference returned by /upload, or jobid:// reference from a previous parse.",
          "format": "uri"
        },
        "options": {
          "type": "object",
          "description": "Optional parse configuration covering OCR, chunking, table format, page range, agentic enhancements, and figure summarization.",
          "properties": {
            "ocr_mode": {
              "type": "string",
              "enum": ["standard", "highest_quality", "disabled"],
              "description": "OCR engine mode. highest_quality uses agentic OCR with error correction; disabled is fastest."
            },
            "chunking": {
              "type": "object",
              "description": "Chunking strategy that controls how the parsed document is broken into retrieval-ready chunks.",
              "properties": {
                "chunk_mode": {
                  "type": "string",
                  "enum": ["variable", "section", "page", "block", "disabled"]
                },
                "chunk_size": { "type": "integer", "minimum": 1 }
              }
            },
            "table_output_format": {
              "type": "string",
              "enum": ["html", "json", "md", "csv", "ai_json"],
              "description": "Output format for detected tables."
            },
            "figure_summarization": { "type": "boolean" },
            "page_range": { "type": "string", "description": "Inclusive page range, e.g. '1-10,15'." },
            "embed_metadata": { "type": "boolean" }
          }
        },
        "advanced_options": {
          "type": "object",
          "description": "Experimental and agentic features such as deep parse, agentic table merge, and layout enrichment."
        },
        "experimental_options": {
          "type": "object",
          "description": "Bleeding-edge options that may change without notice."
        },
        "priority": { "type": "boolean", "description": "Schedule the request on the priority lane (Growth and Enterprise tiers)." }
      }
    },
    "ParseResponse": {
      "type": "object",
      "description": "Response from a synchronous /parse call. Contains structured document content, chunks, and usage metrics.",
      "required": ["job_id", "result"],
      "properties": {
        "job_id": { "type": "string" },
        "result": {
          "type": "object",
          "properties": {
            "type": { "type": "string", "enum": ["full", "url"] },
            "chunks": {
              "type": "array",
              "items": { "$ref": "#/definitions/ParseChunk" }
            },
            "ocr_pages_count": { "type": "integer" },
            "duration": { "type": "number" }
          }
        },
        "usage": { "$ref": "#/definitions/Usage" }
      }
    },
    "AsyncParseResponse": {
      "type": "object",
      "description": "Response from a /parse_async call. Returns a job_id that can be polled via /job/{job_id} or notified via webhook.",
      "required": ["job_id"],
      "properties": {
        "job_id": { "type": "string" },
        "status_url": { "type": "string", "format": "uri" }
      }
    },
    "ParseChunk": {
      "type": "object",
      "description": "A single chunk emitted by the Parse pipeline.",
      "properties": {
        "content": { "type": "string" },
        "embed": { "type": "string" },
        "enriched": { "type": "string" },
        "enrichment_success": { "type": "boolean" },
        "blocks": {
          "type": "array",
          "items": { "$ref": "#/definitions/ParseBlock" }
        }
      }
    },
    "ParseBlock": {
      "type": "object",
      "description": "An individual layout block (text, table, figure, list, equation, etc.) detected on a page.",
      "properties": {
        "type": { "type": "string", "enum": ["Text", "Title", "Section Header", "List Item", "Table", "Figure", "Caption", "Footer", "Header", "Equation", "Discard"] },
        "bbox": {
          "type": "object",
          "properties": {
            "top": { "type": "number" },
            "left": { "type": "number" },
            "height": { "type": "number" },
            "width": { "type": "number" },
            "page": { "type": "integer" }
          }
        },
        "content": { "type": "string" },
        "confidence": { "type": "string", "enum": ["high", "medium", "low"] }
      }
    },
    "Usage": {
      "type": "object",
      "description": "Billing usage emitted with sync responses.",
      "properties": {
        "num_pages": { "type": "integer" },
        "credits": { "type": "number" }
      }
    }
  }
}