Reducto · Schema

SyncExtractConfig

Reducto SyncExtractConfig schema

Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data

Properties

Name	Type	Description
input	object	For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine
parsing	object	The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.
instructions	object	The instructions to use for the extraction.
settings	object	The settings to use for the extraction.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-syncextractconfig.json",
  "title": "SyncExtractConfig",
  "description": "Reducto SyncExtractConfig schema",
  "properties": {
    "input": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        {
          "$ref": "#/components/schemas/UploadResponse"
        }
      ],
      "title": "Input",
      "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n            4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
    },
    "parsing": {
      "$ref": "#/components/schemas/ParseOptions",
      "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
      "default": {
        "enhance": {
          "agentic": [],
          "intelligent_ordering": false,
          "summarize_figures": true
        },
        "retrieval": {
          "chunking": {
            "chunk_mode": "disabled",
            "chunk_overlap": 0
          },
          "embedding_optimized": false,
          "filter_blocks": []
        },
        "formatting": {
          "add_page_markers": false,
          "include": [],
          "merge_tables": false,
          "table_output_format": "dynamic"
        },
        "spreadsheet": {
          "clustering": "accurate",
          "exclude": [],
          "include": [],
          "split_large_tables": {
            "enabled": true,
            "size": 50
          }
        },
        "settings": {
          "embed_pdf_metadata": false,
          "embed_pdf_metadata_dpi": 100,
          "extraction_mode": "hybrid",
          "force_url_result": false,
          "hybrid_vpc": {},
          "ocr_system": "standard",
          "persist_results": false,
          "return_images": [],
          "return_ocr_data": false
        }
      }
    },
    "instructions": {
      "$ref": "#/components/schemas/Instructions",
      "description": "The instructions to use for the extraction.",
      "default": {
        "schema": {},
        "system_prompt": "Be precise and thorough."
      }
    },
    "settings": {
      "$ref": "#/components/schemas/ExtractSettings",
      "description": "The settings to use for the extraction.",
      "default": {
        "include_images": false,
        "optimize_for_latency": false,
        "array_extract": false,
        "deep_extract": false,
        "citations": {
          "enabled": false,
          "numerical_confidence": true
        }
      }
    }
  },
  "type": "object",
  "required": [
    "input"
  ]
}