Reducto · Schema

AsyncExtractConfig

Reducto AsyncExtractConfig schema

Document ParsingPDFOCRData ExtractionAIMachine LearningDocument IntelligenceStructured Data

Properties

Name Type Description
async object The configuration options for asynchronous processing (default synchronous).
input object For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtaine
parsing object The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.
instructions object The instructions to use for the extraction.
settings object The settings to use for the extraction.
View JSON Schema on GitHub

JSON Schema

reducto-asyncextractconfig.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-asyncextractconfig.json",
  "title": "AsyncExtractConfig",
  "description": "Reducto AsyncExtractConfig schema",
  "properties": {
    "async": {
      "$ref": "#/components/schemas/config__v3__AsyncConfig",
      "description": "The configuration options for asynchronous processing (default synchronous).",
      "default": {
        "priority": false
      }
    },
    "input": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        {
          "$ref": "#/components/schemas/UploadResponse"
        }
      ],
      "title": "Input",
      "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n            4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
    },
    "parsing": {
      "$ref": "#/components/schemas/ParseOptions",
      "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.",
      "default": {
        "enhance": {
          "agentic": [],
          "intelligent_ordering": false,
          "summarize_figures": true
        },
        "retrieval": {
          "chunking": {
            "chunk_mode": "disabled",
            "chunk_overlap": 0
          },
          "embedding_optimized": false,
          "filter_blocks": []
        },
        "formatting": {
          "add_page_markers": false,
          "include": [],
          "merge_tables": false,
          "table_output_format": "dynamic"
        },
        "spreadsheet": {
          "clustering": "accurate",
          "exclude": [],
          "include": [],
          "split_large_tables": {
            "enabled": true,
            "size": 50
          }
        },
        "settings": {
          "embed_pdf_metadata": false,
          "embed_pdf_metadata_dpi": 100,
          "extraction_mode": "hybrid",
          "force_url_result": false,
          "hybrid_vpc": {},
          "ocr_system": "standard",
          "persist_results": false,
          "return_images": [],
          "return_ocr_data": false
        }
      }
    },
    "instructions": {
      "$ref": "#/components/schemas/Instructions",
      "description": "The instructions to use for the extraction.",
      "default": {
        "schema": {},
        "system_prompt": "Be precise and thorough."
      }
    },
    "settings": {
      "$ref": "#/components/schemas/ExtractSettings",
      "description": "The settings to use for the extraction.",
      "default": {
        "include_images": false,
        "optimize_for_latency": false,
        "array_extract": false,
        "deep_extract": false,
        "citations": {
          "enabled": false,
          "numerical_confidence": true
        }
      }
    }
  },
  "type": "object",
  "required": [
    "input"
  ]
}