UiPath · Schema

UiPath Document Understanding Entities

JSON Schema for UiPath Document Understanding entities including digitization results, classification results, extraction results, and field value structures used in intelligent document processing workflows.

AutomationRobotic Process AutomationRPAArtificial IntelligenceDocument ProcessingEnterprise AutomationOrchestrationTesting
View JSON Schema on GitHub

JSON Schema

uipath-document-understanding-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://uipath.com/schemas/document-understanding/document-understanding.json",
  "title": "UiPath Document Understanding Entities",
  "description": "JSON Schema for UiPath Document Understanding entities including digitization results, classification results, extraction results, and field value structures used in intelligent document processing workflows.",
  "type": "object",
  "$defs": {
    "DigitizationResult": {
      "type": "object",
      "title": "Digitization Result",
      "description": "The result of submitting a document to the Document Understanding digitization endpoint. The documentId is used in all subsequent processing steps.",
      "required": ["documentId", "status"],
      "properties": {
        "documentId": {
          "type": "string",
          "description": "Unique identifier assigned to the digitized document. Must be passed to classification, extraction, and validation endpoints. Retained for seven days."
        },
        "status": {
          "type": "string",
          "enum": ["Succeeded", "Failed"],
          "description": "Outcome of the digitization operation"
        },
        "pageCount": {
          "type": "integer",
          "minimum": 1,
          "description": "Number of pages detected and processed in the submitted document"
        }
      }
    },
    "ClassificationResultItem": {
      "type": "object",
      "title": "Classification Result Item",
      "description": "A single classification result produced by a classifier model for a document or page range.",
      "required": ["documentTypeId", "confidence"],
      "properties": {
        "classifierId": {
          "type": "string",
          "description": "Identifier of the classifier model that produced this result"
        },
        "documentTypeId": {
          "type": "string",
          "description": "Identifier of the document type that was identified by the classifier"
        },
        "documentTypeName": {
          "type": "string",
          "description": "Human-readable name of the identified document type"
        },
        "confidence": {
          "type": "number",
          "format": "float",
          "minimum": 0,
          "maximum": 1,
          "description": "Confidence score for this classification (0.0 = no confidence, 1.0 = full confidence)"
        },
        "startPage": {
          "type": "integer",
          "minimum": 1,
          "description": "First page (1-indexed) of the document section assigned this classification"
        },
        "endPage": {
          "type": "integer",
          "minimum": 1,
          "description": "Last page (1-indexed) of the document section assigned this classification"
        }
      }
    },
    "ExtractionResultData": {
      "type": "object",
      "title": "Extraction Result Data",
      "description": "The structured data extraction output containing all extracted field values and their confidence metadata.",
      "required": ["DocumentId", "Fields"],
      "properties": {
        "ResultsVersion": {
          "type": "integer",
          "description": "Version number of the extraction results format schema"
        },
        "DocumentId": {
          "type": "string",
          "description": "Document identifier of the document from which data was extracted"
        },
        "Fields": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/ExtractedField"
          },
          "description": "Array of extracted field values with confidence scores and metadata"
        }
      }
    },
    "ExtractedField": {
      "type": "object",
      "title": "Extracted Field",
      "description": "A single field extracted from a document, including its value, confidence score, and OCR metadata.",
      "required": ["FieldId", "FieldName"],
      "properties": {
        "FieldId": {
          "type": "string",
          "description": "Identifier of the field as defined in the extractor's schema configuration"
        },
        "FieldName": {
          "type": "string",
          "description": "Display name of the extracted field"
        },
        "IsMissing": {
          "type": "boolean",
          "description": "True when the field was not found in the document; Value will be null"
        },
        "Value": {
          "$ref": "#/$defs/FieldValue",
          "description": "The extracted value with confidence score and page reference"
        },
        "Values": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/FieldValue"
          },
          "description": "For multi-value fields, all extracted occurrences of this field"
        }
      }
    },
    "FieldValue": {
      "type": "object",
      "title": "Field Value",
      "description": "An extracted field value with associated confidence score, OCR metadata, and page reference.",
      "properties": {
        "Value": {
          "description": "The extracted value; type varies by field definition (string, number, boolean, or structured object)"
        },
        "Reference": {
          "$ref": "#/$defs/FieldReference",
          "description": "Reference to the location in the document from which the value was extracted"
        },
        "Confidence": {
          "type": "number",
          "format": "float",
          "minimum": 0,
          "maximum": 1,
          "description": "Model confidence score for this extracted value (0.0 to 1.0)"
        },
        "OcrConfidence": {
          "type": "number",
          "format": "float",
          "minimum": 0,
          "maximum": 1,
          "description": "OCR engine confidence score for the text underlying this extracted value"
        },
        "TextType": {
          "type": "string",
          "enum": ["Printed", "Handwritten"],
          "description": "Whether the source text was printed or handwritten"
        },
        "IsManual": {
          "type": "boolean",
          "description": "Whether this value was manually provided during human validation"
        }
      }
    },
    "FieldReference": {
      "type": "object",
      "title": "Field Reference",
      "description": "A reference to the location within a document where a field value was found.",
      "properties": {
        "TextStartIndex": {
          "type": "integer",
          "minimum": 0,
          "description": "Character start index in the document text of the extracted value"
        },
        "TextLength": {
          "type": "integer",
          "minimum": 0,
          "description": "Length in characters of the extracted text"
        },
        "Tokens": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/FieldToken"
          },
          "description": "Individual token references with their bounding box coordinates on the page"
        }
      }
    },
    "FieldToken": {
      "type": "object",
      "title": "Field Token",
      "description": "A single token (word or text unit) contributing to an extracted field value, with page and position information.",
      "properties": {
        "TextStartIndex": {
          "type": "integer",
          "minimum": 0,
          "description": "Character start index of this token in the full document text"
        },
        "TextLength": {
          "type": "integer",
          "minimum": 1,
          "description": "Length in characters of this token"
        },
        "Page": {
          "type": "integer",
          "minimum": 1,
          "description": "Page number (1-indexed) where this token appears"
        },
        "PageWidth": {
          "type": "number",
          "description": "Width of the page in document units"
        },
        "PageHeight": {
          "type": "number",
          "description": "Height of the page in document units"
        },
        "Left": {
          "type": "number",
          "description": "Left coordinate of the token bounding box"
        },
        "Top": {
          "type": "number",
          "description": "Top coordinate of the token bounding box"
        },
        "Width": {
          "type": "number",
          "description": "Width of the token bounding box"
        },
        "Height": {
          "type": "number",
          "description": "Height of the token bounding box"
        }
      }
    },
    "AsyncJobStatus": {
      "type": "object",
      "title": "Async Job Status",
      "description": "Status of an asynchronous Document Understanding processing job.",
      "required": ["requestId", "status"],
      "properties": {
        "requestId": {
          "type": "string",
          "description": "Unique identifier of the asynchronous processing job"
        },
        "status": {
          "type": "string",
          "enum": ["NotStarted", "Running", "Failed", "Succeeded"],
          "description": "Current execution status of the asynchronous job"
        },
        "errorMessage": {
          "type": "string",
          "description": "Error message populated when status is Failed"
        }
      }
    }
  }
}