Docling · Schema
DoclingDocument
Lossless representation of a parsed document produced by Docling. Captures structural elements (texts, tables, pictures, key-value items), provenance, layout, and hierarchy across pages.
DocumentsParsingPDFOCRLayoutTablesRAGLLMOpen SourceIBM ResearchLF AI and DataMCPKnowledge GraphGenerative AI
Properties
| Name | Type | Description |
|---|---|---|
| schema_name | string | |
| version | string | Schema version, e.g. 1.4.0. |
| name | string | Logical document name (typically the source filename without extension). |
| origin | object | Provenance of the source artifact. |
| furniture | array | Non-content elements (headers, footers, page numbers). |
| body | object | Root of the structural hierarchy. |
| groups | array | Grouping nodes (sections, lists). |
| texts | array | |
| tables | array | |
| pictures | array | |
| key_value_items | array | |
| pages | object | Per-page metadata keyed by page number. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/docling/refs/heads/main/json-schema/docling-document-schema.json",
"title": "DoclingDocument",
"description": "Lossless representation of a parsed document produced by Docling. Captures structural elements (texts, tables, pictures, key-value items), provenance, layout, and hierarchy across pages.",
"type": "object",
"required": ["schema_name", "version", "name"],
"properties": {
"schema_name": {
"type": "string",
"const": "DoclingDocument"
},
"version": {
"type": "string",
"description": "Schema version, e.g. 1.4.0."
},
"name": {
"type": "string",
"description": "Logical document name (typically the source filename without extension)."
},
"origin": {
"type": "object",
"description": "Provenance of the source artifact.",
"properties": {
"mimetype": {"type": "string"},
"binary_hash": {"type": "string"},
"filename": {"type": "string"},
"uri": {"type": "string", "format": "uri"}
}
},
"furniture": {
"type": "array",
"description": "Non-content elements (headers, footers, page numbers).",
"items": {"$ref": "#/$defs/RefItem"}
},
"body": {
"type": "object",
"description": "Root of the structural hierarchy.",
"properties": {
"self_ref": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/RefItem"}
}
}
},
"groups": {
"type": "array",
"description": "Grouping nodes (sections, lists).",
"items": {"$ref": "#/$defs/GroupItem"}
},
"texts": {
"type": "array",
"items": {"$ref": "#/$defs/TextItem"}
},
"tables": {
"type": "array",
"items": {"$ref": "#/$defs/TableItem"}
},
"pictures": {
"type": "array",
"items": {"$ref": "#/$defs/PictureItem"}
},
"key_value_items": {
"type": "array",
"items": {"$ref": "#/$defs/KeyValueItem"}
},
"pages": {
"type": "object",
"description": "Per-page metadata keyed by page number.",
"additionalProperties": {"$ref": "#/$defs/PageItem"}
}
},
"$defs": {
"RefItem": {
"type": "object",
"properties": {
"$ref": {"type": "string", "description": "JSON pointer reference to another element."}
}
},
"BoundingBox": {
"type": "object",
"required": ["l", "t", "r", "b"],
"properties": {
"l": {"type": "number"},
"t": {"type": "number"},
"r": {"type": "number"},
"b": {"type": "number"},
"coord_origin": {"type": "string", "enum": ["TOPLEFT", "BOTTOMLEFT"]}
}
},
"Provenance": {
"type": "object",
"properties": {
"page_no": {"type": "integer"},
"bbox": {"$ref": "#/$defs/BoundingBox"},
"charspan": {
"type": "array",
"items": {"type": "integer"},
"minItems": 2,
"maxItems": 2
}
}
},
"TextItem": {
"type": "object",
"required": ["self_ref", "label", "text"],
"properties": {
"self_ref": {"type": "string"},
"parent": {"$ref": "#/$defs/RefItem"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/RefItem"}
},
"label": {
"type": "string",
"enum": [
"title",
"section_header",
"paragraph",
"list_item",
"caption",
"footnote",
"page_header",
"page_footer",
"code",
"formula",
"text"
]
},
"text": {"type": "string"},
"orig": {"type": "string"},
"prov": {
"type": "array",
"items": {"$ref": "#/$defs/Provenance"}
},
"level": {"type": "integer"}
}
},
"TableItem": {
"type": "object",
"required": ["self_ref", "data"],
"properties": {
"self_ref": {"type": "string"},
"label": {"type": "string", "const": "table"},
"captions": {
"type": "array",
"items": {"$ref": "#/$defs/RefItem"}
},
"data": {
"type": "object",
"properties": {
"num_rows": {"type": "integer"},
"num_cols": {"type": "integer"},
"grid": {
"type": "array",
"items": {
"type": "array",
"items": {"$ref": "#/$defs/TableCell"}
}
}
}
},
"prov": {
"type": "array",
"items": {"$ref": "#/$defs/Provenance"}
}
}
},
"TableCell": {
"type": "object",
"properties": {
"text": {"type": "string"},
"row_span": {"type": "integer"},
"col_span": {"type": "integer"},
"start_row_offset_idx": {"type": "integer"},
"end_row_offset_idx": {"type": "integer"},
"start_col_offset_idx": {"type": "integer"},
"end_col_offset_idx": {"type": "integer"},
"column_header": {"type": "boolean"},
"row_header": {"type": "boolean"},
"row_section": {"type": "boolean"}
}
},
"PictureItem": {
"type": "object",
"required": ["self_ref"],
"properties": {
"self_ref": {"type": "string"},
"label": {"type": "string", "const": "picture"},
"image": {
"type": "object",
"properties": {
"mimetype": {"type": "string"},
"dpi": {"type": "integer"},
"size": {
"type": "object",
"properties": {"width": {"type": "number"}, "height": {"type": "number"}}
},
"uri": {"type": "string"}
}
},
"captions": {
"type": "array",
"items": {"$ref": "#/$defs/RefItem"}
},
"annotations": {
"type": "array",
"items": {
"type": "object",
"properties": {
"kind": {"type": "string", "enum": ["classification", "description"]},
"text": {"type": "string"},
"predicted_classes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"class_name": {"type": "string"},
"confidence": {"type": "number"}
}
}
}
}
}
},
"prov": {
"type": "array",
"items": {"$ref": "#/$defs/Provenance"}
}
}
},
"KeyValueItem": {
"type": "object",
"required": ["self_ref"],
"properties": {
"self_ref": {"type": "string"},
"label": {"type": "string", "const": "key_value_region"},
"graph": {
"type": "object",
"properties": {
"cells": {
"type": "array",
"items": {
"type": "object",
"properties": {
"cell_id": {"type": "integer"},
"text": {"type": "string"},
"label": {"type": "string", "enum": ["key", "value"]}
}
}
},
"links": {
"type": "array",
"items": {
"type": "object",
"properties": {
"source_cell_id": {"type": "integer"},
"target_cell_id": {"type": "integer"},
"label": {"type": "string"}
}
}
}
}
}
}
},
"GroupItem": {
"type": "object",
"properties": {
"self_ref": {"type": "string"},
"label": {"type": "string", "enum": ["section", "list", "ordered_list", "unordered_list"]},
"name": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/RefItem"}
}
}
},
"PageItem": {
"type": "object",
"properties": {
"page_no": {"type": "integer"},
"size": {
"type": "object",
"properties": {"width": {"type": "number"}, "height": {"type": "number"}}
},
"image": {
"type": "object",
"properties": {"uri": {"type": "string"}, "dpi": {"type": "integer"}}
}
}
}
}
}