reducto-ai · Schema
Reducto Parse
Schema for the Reducto Parse API request and response. POST /parse accepts either a SyncParseConfig or AsyncParseConfig; the response is a ParseResponse (sync) or AsyncParseResponse (async).
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://api-evangelist.com/schemas/reducto/reducto-parse-schema.json",
"title": "Reducto Parse",
"description": "Schema for the Reducto Parse API request and response. POST /parse accepts either a SyncParseConfig or AsyncParseConfig; the response is a ParseResponse (sync) or AsyncParseResponse (async).",
"type": "object",
"definitions": {
"ParseRequest": {
"type": "object",
"description": "Request body for POST /parse — synchronous or asynchronous parse.",
"required": ["document_url"],
"properties": {
"document_url": {
"type": "string",
"description": "Source document. Accepts a public URL, presigned S3 URL, reducto:// reference returned by /upload, or jobid:// reference from a previous parse.",
"format": "uri"
},
"options": {
"type": "object",
"description": "Optional parse configuration covering OCR, chunking, table format, page range, agentic enhancements, and figure summarization.",
"properties": {
"ocr_mode": {
"type": "string",
"enum": ["standard", "highest_quality", "disabled"],
"description": "OCR engine mode. highest_quality uses agentic OCR with error correction; disabled is fastest."
},
"chunking": {
"type": "object",
"description": "Chunking strategy that controls how the parsed document is broken into retrieval-ready chunks.",
"properties": {
"chunk_mode": {
"type": "string",
"enum": ["variable", "section", "page", "block", "disabled"]
},
"chunk_size": { "type": "integer", "minimum": 1 }
}
},
"table_output_format": {
"type": "string",
"enum": ["html", "json", "md", "csv", "ai_json"],
"description": "Output format for detected tables."
},
"figure_summarization": { "type": "boolean" },
"page_range": { "type": "string", "description": "Inclusive page range, e.g. '1-10,15'." },
"embed_metadata": { "type": "boolean" }
}
},
"advanced_options": {
"type": "object",
"description": "Experimental and agentic features such as deep parse, agentic table merge, and layout enrichment."
},
"experimental_options": {
"type": "object",
"description": "Bleeding-edge options that may change without notice."
},
"priority": { "type": "boolean", "description": "Schedule the request on the priority lane (Growth and Enterprise tiers)." }
}
},
"ParseResponse": {
"type": "object",
"description": "Response from a synchronous /parse call. Contains structured document content, chunks, and usage metrics.",
"required": ["job_id", "result"],
"properties": {
"job_id": { "type": "string" },
"result": {
"type": "object",
"properties": {
"type": { "type": "string", "enum": ["full", "url"] },
"chunks": {
"type": "array",
"items": { "$ref": "#/definitions/ParseChunk" }
},
"ocr_pages_count": { "type": "integer" },
"duration": { "type": "number" }
}
},
"usage": { "$ref": "#/definitions/Usage" }
}
},
"AsyncParseResponse": {
"type": "object",
"description": "Response from a /parse_async call. Returns a job_id that can be polled via /job/{job_id} or notified via webhook.",
"required": ["job_id"],
"properties": {
"job_id": { "type": "string" },
"status_url": { "type": "string", "format": "uri" }
}
},
"ParseChunk": {
"type": "object",
"description": "A single chunk emitted by the Parse pipeline.",
"properties": {
"content": { "type": "string" },
"embed": { "type": "string" },
"enriched": { "type": "string" },
"enrichment_success": { "type": "boolean" },
"blocks": {
"type": "array",
"items": { "$ref": "#/definitions/ParseBlock" }
}
}
},
"ParseBlock": {
"type": "object",
"description": "An individual layout block (text, table, figure, list, equation, etc.) detected on a page.",
"properties": {
"type": { "type": "string", "enum": ["Text", "Title", "Section Header", "List Item", "Table", "Figure", "Caption", "Footer", "Header", "Equation", "Discard"] },
"bbox": {
"type": "object",
"properties": {
"top": { "type": "number" },
"left": { "type": "number" },
"height": { "type": "number" },
"width": { "type": "number" },
"page": { "type": "integer" }
}
},
"content": { "type": "string" },
"confidence": { "type": "string", "enum": ["high", "medium", "low"] }
}
},
"Usage": {
"type": "object",
"description": "Billing usage emitted with sync responses.",
"properties": {
"num_pages": { "type": "integer" },
"credits": { "type": "number" }
}
}
}
}