Amazon Textract · Schema
DocumentAnalysis
Schema for an Amazon Textract document analysis response containing detected blocks of text, tables, forms, and layout elements extracted from a document.
Document ProcessingMachine LearningOCR
Properties
| Name | Type | Description |
|---|---|---|
| DocumentMetadata | object | Metadata about the analyzed document. |
| Blocks | array | The items detected in the document, including text lines, words, tables, and form elements. |
| AnalyzeDocumentModelVersion | string | The version of the model used to analyze the document. |
JSON Schema
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://aws.amazon.com/textract/schemas/documentanalysis",
"title": "DocumentAnalysis",
"description": "Schema for an Amazon Textract document analysis response containing detected blocks of text, tables, forms, and layout elements extracted from a document.",
"type": "object",
"properties": {
"DocumentMetadata": {
"type": "object",
"description": "Metadata about the analyzed document.",
"properties": {
"Pages": {
"type": "integer",
"description": "The number of pages detected in the document.",
"minimum": 1
}
},
"required": ["Pages"]
},
"Blocks": {
"type": "array",
"description": "The items detected in the document, including text lines, words, tables, and form elements.",
"items": {
"type": "object",
"properties": {
"BlockType": {
"type": "string",
"description": "The type of text item detected.",
"enum": [
"KEY_VALUE_SET",
"PAGE",
"LINE",
"WORD",
"TABLE",
"CELL",
"SELECTION_ELEMENT",
"MERGED_CELL",
"TITLE",
"QUERY",
"QUERY_RESULT",
"SIGNATURE",
"TABLE_TITLE",
"TABLE_FOOTER",
"LAYOUT_TEXT",
"LAYOUT_TITLE",
"LAYOUT_HEADER",
"LAYOUT_FOOTER",
"LAYOUT_SECTION_HEADER",
"LAYOUT_PAGE_NUMBER",
"LAYOUT_LIST",
"LAYOUT_FIGURE",
"LAYOUT_TABLE",
"LAYOUT_KEY_VALUE"
]
},
"Confidence": {
"type": "number",
"description": "The confidence that Amazon Textract has in the accuracy of the detected block.",
"minimum": 0,
"maximum": 100
},
"Text": {
"type": "string",
"description": "The word or line of text that is recognized by Amazon Textract."
},
"TextType": {
"type": "string",
"description": "The kind of text detected.",
"enum": ["HANDWRITING", "PRINTED"]
},
"RowIndex": {
"type": "integer",
"description": "The row in which a table cell is located.",
"minimum": 1
},
"ColumnIndex": {
"type": "integer",
"description": "The column in which a table cell appears.",
"minimum": 1
},
"RowSpan": {
"type": "integer",
"description": "The number of rows that a table cell spans.",
"minimum": 1
},
"ColumnSpan": {
"type": "integer",
"description": "The number of columns that a table cell spans.",
"minimum": 1
},
"Geometry": {
"type": "object",
"description": "The location of the detected block on the document page.",
"properties": {
"BoundingBox": {
"type": "object",
"description": "An axis-aligned bounding box for the detected block.",
"properties": {
"Width": {
"type": "number",
"description": "The width of the bounding box as a ratio of the overall document page width."
},
"Height": {
"type": "number",
"description": "The height of the bounding box as a ratio of the overall document page height."
},
"Left": {
"type": "number",
"description": "The left coordinate of the bounding box."
},
"Top": {
"type": "number",
"description": "The top coordinate of the bounding box."
}
},
"required": ["Width", "Height", "Left", "Top"]
},
"Polygon": {
"type": "array",
"description": "A fine-grained polygon around the detected block.",
"items": {
"type": "object",
"properties": {
"X": {
"type": "number"
},
"Y": {
"type": "number"
}
},
"required": ["X", "Y"]
}
}
}
},
"Id": {
"type": "string",
"description": "The identifier for the recognized text block."
},
"Relationships": {
"type": "array",
"description": "A list of relationship objects that describe how blocks are related to each other.",
"items": {
"type": "object",
"properties": {
"Type": {
"type": "string",
"description": "The type of relationship.",
"enum": ["VALUE", "CHILD", "COMPLEX_FEATURES", "MERGED_CELL", "TITLE", "ANSWER", "TABLE", "TABLE_TITLE", "TABLE_FOOTER"]
},
"Ids": {
"type": "array",
"description": "An array of IDs for related blocks.",
"items": {
"type": "string"
}
}
},
"required": ["Type", "Ids"]
}
},
"EntityTypes": {
"type": "array",
"description": "The type of entity such as KEY or VALUE.",
"items": {
"type": "string",
"enum": ["KEY", "VALUE", "COLUMN_HEADER", "TABLE_TITLE", "TABLE_FOOTER", "TABLE_SECTION_TITLE", "TABLE_SUMMARY", "STRUCTURED_TABLE", "SEMI_STRUCTURED_TABLE"]
}
},
"SelectionStatus": {
"type": "string",
"description": "The selection status of a selection element.",
"enum": ["SELECTED", "NOT_SELECTED"]
},
"Page": {
"type": "integer",
"description": "The page on which the block was detected.",
"minimum": 1
},
"Query": {
"type": "object",
"description": "The query that was used for query-based analysis.",
"properties": {
"Text": {
"type": "string"
},
"Alias": {
"type": "string"
},
"Pages": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": ["Text"]
}
},
"required": ["BlockType"]
}
},
"AnalyzeDocumentModelVersion": {
"type": "string",
"description": "The version of the model used to analyze the document."
}
},
"required": ["DocumentMetadata", "Blocks"]
}