Amazon Textract · Schema

DocumentAnalysis

Response object containing the results of document text detection or analysis, including detected blocks of text, tables, and forms.

Document ProcessingMachine LearningOCR

Properties

Name Type Description
DocumentMetadata object Metadata about the document.
Blocks array The items detected in the document.
AnalyzeDocumentModelVersion string The version of the model used to analyze the document.
View JSON Schema on GitHub

JSON Schema

amazon-textract-document-analysis-schema.json Raw ↑
{
  "type": "object",
  "description": "Response object containing the results of document text detection or analysis, including detected blocks of text, tables, and forms.",
  "properties": {
    "DocumentMetadata": {
      "type": "object",
      "description": "Metadata about the document.",
      "properties": {
        "Pages": {
          "type": "integer",
          "description": "The number of pages detected in the document."
        }
      }
    },
    "Blocks": {
      "type": "array",
      "description": "The items detected in the document.",
      "items": {
        "type": "object",
        "properties": {
          "BlockType": {
            "type": "string",
            "description": "The type of text item detected.",
            "enum": [
              "KEY_VALUE_SET",
              "PAGE",
              "LINE",
              "WORD",
              "TABLE",
              "CELL",
              "SELECTION_ELEMENT",
              "MERGED_CELL",
              "TITLE",
              "QUERY",
              "QUERY_RESULT",
              "SIGNATURE",
              "TABLE_TITLE",
              "TABLE_FOOTER",
              "LAYOUT_TEXT",
              "LAYOUT_TITLE",
              "LAYOUT_HEADER",
              "LAYOUT_FOOTER",
              "LAYOUT_SECTION_HEADER",
              "LAYOUT_PAGE_NUMBER",
              "LAYOUT_LIST",
              "LAYOUT_FIGURE",
              "LAYOUT_TABLE",
              "LAYOUT_KEY_VALUE"
            ]
          },
          "Confidence": {
            "type": "number",
            "format": "float",
            "description": "The confidence score for the detected block."
          },
          "Text": {
            "type": "string",
            "description": "The word or line of text recognized."
          },
          "Geometry": {
            "type": "object",
            "properties": {
              "BoundingBox": {
                "type": "object",
                "properties": {
                  "Width": {
                    "type": "number"
                  },
                  "Height": {
                    "type": "number"
                  },
                  "Left": {
                    "type": "number"
                  },
                  "Top": {
                    "type": "number"
                  }
                }
              },
              "Polygon": {
                "type": "array",
                "items": {
                  "type": "object",
                  "properties": {
                    "X": {
                      "type": "number"
                    },
                    "Y": {
                      "type": "number"
                    }
                  }
                }
              }
            }
          },
          "Id": {
            "type": "string",
            "description": "The identifier for the recognized text block."
          },
          "Relationships": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "Type": {
                  "type": "string",
                  "enum": [
                    "VALUE",
                    "CHILD",
                    "COMPLEX_FEATURES",
                    "MERGED_CELL",
                    "TITLE",
                    "ANSWER",
                    "TABLE",
                    "TABLE_TITLE",
                    "TABLE_FOOTER"
                  ]
                },
                "Ids": {
                  "type": "array",
                  "items": {
                    "type": "string"
                  }
                }
              }
            }
          },
          "Page": {
            "type": "integer",
            "description": "The page on which the block was detected."
          }
        }
      }
    },
    "AnalyzeDocumentModelVersion": {
      "type": "string",
      "description": "The version of the model used to analyze the document."
    }
  },
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "DocumentAnalysis"
}