Reducto · Schema
SyncSplitConfig
Reducto SyncSplitConfig schema
Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data
Properties
| Name | Type | Description |
|---|---|---|
| input | object | For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine |
| parsing | object | The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored. |
| split_description | array | The configuration options for processing the document. |
| split_rules | string | The prompt that describes rules for splitting the document. |
| settings | object | The settings for split processing. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-syncsplitconfig.json",
"title": "SyncSplitConfig",
"description": "Reducto SyncSplitConfig schema",
"properties": {
"input": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
},
{
"$ref": "#/components/schemas/UploadResponse"
}
],
"title": "Input",
"description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions "
},
"parsing": {
"$ref": "#/components/schemas/ParseOptions",
"description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
"default": {
"enhance": {
"agentic": [],
"intelligent_ordering": false,
"summarize_figures": true
},
"retrieval": {
"chunking": {
"chunk_mode": "disabled",
"chunk_overlap": 0
},
"embedding_optimized": false,
"filter_blocks": []
},
"formatting": {
"add_page_markers": false,
"include": [],
"merge_tables": false,
"table_output_format": "dynamic"
},
"spreadsheet": {
"clustering": "accurate",
"exclude": [],
"include": [],
"split_large_tables": {
"enabled": true,
"size": 50
}
},
"settings": {
"embed_pdf_metadata": false,
"embed_pdf_metadata_dpi": 100,
"extraction_mode": "hybrid",
"force_url_result": false,
"hybrid_vpc": {},
"ocr_system": "standard",
"persist_results": false,
"return_images": [],
"return_ocr_data": false
}
}
},
"split_description": {
"items": {
"$ref": "#/components/schemas/SplitCategory"
},
"type": "array",
"title": "Split Description",
"description": "The configuration options for processing the document."
},
"split_rules": {
"type": "string",
"title": "Split Rules",
"description": "The prompt that describes rules for splitting the document.",
"default": "Split the document into the applicable sections. Sections may only overlap at their first and last page if at all."
},
"settings": {
"$ref": "#/components/schemas/SplitSettings",
"description": "The settings for split processing.",
"default": {
"table_cutoff": "truncate",
"allow_page_overlap": true,
"deep_split": false
}
}
},
"type": "object",
"required": [
"input",
"split_description"
]
}