Reducto · Schema

AsyncExtractConfig

Reducto AsyncExtractConfig schema

Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data

Properties

Name	Type	Description
async	object	The configuration options for asynchronous processing (default synchronous).
input	object	For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine
parsing	object	The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.
instructions	object	The instructions to use for the extraction.
settings	object	The settings to use for the extraction.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-asyncextractconfig.json",
  "title": "AsyncExtractConfig",
  "description": "Reducto AsyncExtractConfig schema",
  "properties": {
    "async": {
      "$ref": "#/components/schemas/config__v3__AsyncConfig",
      "description": "The configuration options for asynchronous processing (default synchronous).",
      "default": {
        "priority": false
      }
    },
    "input": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        {
          "$ref": "#/components/schemas/UploadResponse"
        }
      ],
      "title": "Input",
      "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n            4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
    },
    "parsing": {
      "$ref": "#/components/schemas/ParseOptions",
      "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
      "default": {
        "enhance": {
          "agentic": [],
          "intelligent_ordering": false,
          "summarize_figures": true
        },
        "retrieval": {
          "chunking": {
            "chunk_mode": "disabled",
            "chunk_overlap": 0
          },
          "embedding_optimized": false,
          "filter_blocks": []
        },
        "formatting": {
          "add_page_markers": false,
          "include": [],
          "merge_tables": false,
          "table_output_format": "dynamic"
        },
        "spreadsheet": {
          "clustering": "accurate",
          "exclude": [],
          "include": [],
          "split_large_tables": {
            "enabled": true,
            "size": 50
          }
        },
        "settings": {
          "embed_pdf_metadata": false,
          "embed_pdf_metadata_dpi": 100,
          "extraction_mode": "hybrid",
          "force_url_result": false,
          "hybrid_vpc": {},
          "ocr_system": "standard",
          "persist_results": false,
          "return_images": [],
          "return_ocr_data": false
        }
      }
    },
    "instructions": {
      "$ref": "#/components/schemas/Instructions",
      "description": "The instructions to use for the extraction.",
      "default": {
        "schema": {},
        "system_prompt": "Be precise and thorough."
      }
    },
    "settings": {
      "$ref": "#/components/schemas/ExtractSettings",
      "description": "The settings to use for the extraction.",
      "default": {
        "include_images": false,
        "optimize_for_latency": false,
        "array_extract": false,
        "deep_extract": false,
        "citations": {
          "enabled": false,
          "numerical_confidence": true
        }
      }
    }
  },
  "type": "object",
  "required": [
    "input"
  ]
}