Reducto · Schema

SyncSplitConfig

Reducto SyncSplitConfig schema

Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data

Properties

Name Type Description
input object For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine
parsing object The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.
split_description array The configuration options for processing the document.
split_rules string The prompt that describes rules for splitting the document.
settings object The settings for split processing.
View JSON Schema on GitHub

JSON Schema

reducto-syncsplitconfig.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-syncsplitconfig.json",
  "title": "SyncSplitConfig",
  "description": "Reducto SyncSplitConfig schema",
  "properties": {
    "input": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        {
          "$ref": "#/components/schemas/UploadResponse"
        }
      ],
      "title": "Input",
      "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n            4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
    },
    "parsing": {
      "$ref": "#/components/schemas/ParseOptions",
      "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
      "default": {
        "enhance": {
          "agentic": [],
          "intelligent_ordering": false,
          "summarize_figures": true
        },
        "retrieval": {
          "chunking": {
            "chunk_mode": "disabled",
            "chunk_overlap": 0
          },
          "embedding_optimized": false,
          "filter_blocks": []
        },
        "formatting": {
          "add_page_markers": false,
          "include": [],
          "merge_tables": false,
          "table_output_format": "dynamic"
        },
        "spreadsheet": {
          "clustering": "accurate",
          "exclude": [],
          "include": [],
          "split_large_tables": {
            "enabled": true,
            "size": 50
          }
        },
        "settings": {
          "embed_pdf_metadata": false,
          "embed_pdf_metadata_dpi": 100,
          "extraction_mode": "hybrid",
          "force_url_result": false,
          "hybrid_vpc": {},
          "ocr_system": "standard",
          "persist_results": false,
          "return_images": [],
          "return_ocr_data": false
        }
      }
    },
    "split_description": {
      "items": {
        "$ref": "#/components/schemas/SplitCategory"
      },
      "type": "array",
      "title": "Split Description",
      "description": "The configuration options for processing the document."
    },
    "split_rules": {
      "type": "string",
      "title": "Split Rules",
      "description": "The prompt that describes rules for splitting the document.",
      "default": "Split the document into the applicable sections. Sections may only overlap at their first and last page if at all."
    },
    "settings": {
      "$ref": "#/components/schemas/SplitSettings",
      "description": "The settings for split processing.",
      "default": {
        "table_cutoff": "truncate",
        "allow_page_overlap": true,
        "deep_split": false
      }
    }
  },
  "type": "object",
  "required": [
    "input",
    "split_description"
  ]
}