Reducto · Schema

SyncExtractConfig

Reducto SyncExtractConfig schema

Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data

Properties

Name Type Description
input object For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine
parsing object The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.
instructions object The instructions to use for the extraction.
settings object The settings to use for the extraction.
View JSON Schema on GitHub

JSON Schema

reducto-syncextractconfig.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-syncextractconfig.json",
  "title": "SyncExtractConfig",
  "description": "Reducto SyncExtractConfig schema",
  "properties": {
    "input": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        {
          "$ref": "#/components/schemas/UploadResponse"
        }
      ],
      "title": "Input",
      "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n            4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
    },
    "parsing": {
      "$ref": "#/components/schemas/ParseOptions",
      "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
      "default": {
        "enhance": {
          "agentic": [],
          "intelligent_ordering": false,
          "summarize_figures": true
        },
        "retrieval": {
          "chunking": {
            "chunk_mode": "disabled",
            "chunk_overlap": 0
          },
          "embedding_optimized": false,
          "filter_blocks": []
        },
        "formatting": {
          "add_page_markers": false,
          "include": [],
          "merge_tables": false,
          "table_output_format": "dynamic"
        },
        "spreadsheet": {
          "clustering": "accurate",
          "exclude": [],
          "include": [],
          "split_large_tables": {
            "enabled": true,
            "size": 50
          }
        },
        "settings": {
          "embed_pdf_metadata": false,
          "embed_pdf_metadata_dpi": 100,
          "extraction_mode": "hybrid",
          "force_url_result": false,
          "hybrid_vpc": {},
          "ocr_system": "standard",
          "persist_results": false,
          "return_images": [],
          "return_ocr_data": false
        }
      }
    },
    "instructions": {
      "$ref": "#/components/schemas/Instructions",
      "description": "The instructions to use for the extraction.",
      "default": {
        "schema": {},
        "system_prompt": "Be precise and thorough."
      }
    },
    "settings": {
      "$ref": "#/components/schemas/ExtractSettings",
      "description": "The settings to use for the extraction.",
      "default": {
        "include_images": false,
        "optimize_for_latency": false,
        "array_extract": false,
        "deep_extract": false,
        "citations": {
          "enabled": false,
          "numerical_confidence": true
        }
      }
    }
  },
  "type": "object",
  "required": [
    "input"
  ]
}