Apache Iceberg · Schema

DataFile

DataFile schema from Apache Iceberg REST Catalog API

ACIDAnalyticsApacheData LakeLakehouseOpen SourceTable Format

Properties

Name Type Description
content string
first-row-id integer The first row ID assigned to the first row in the data file
column-sizes object Map of column id to total count, including null and NaN
value-counts object Map of column id to null value count
null-value-counts object Map of column id to null value count
nan-value-counts object Map of column id to number of NaN values in the column
lower-bounds object Map of column id to lower bound primitive type values
upper-bounds object Map of column id to upper bound primitive type values
View JSON Schema on GitHub

JSON Schema

rest-catalog-open-api-data-file-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-iceberg/refs/heads/main/json-schema/rest-catalog-open-api-data-file-schema.json",
  "title": "DataFile",
  "description": "DataFile schema from Apache Iceberg REST Catalog API",
  "type": "object",
  "properties": {
    "content": {
      "type": "string",
      "const": "data"
    },
    "first-row-id": {
      "type": "integer",
      "format": "int64",
      "description": "The first row ID assigned to the first row in the data file"
    },
    "column-sizes": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CountMap"
        }
      ],
      "description": "Map of column id to total count, including null and NaN"
    },
    "value-counts": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CountMap"
        }
      ],
      "description": "Map of column id to null value count"
    },
    "null-value-counts": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CountMap"
        }
      ],
      "description": "Map of column id to null value count"
    },
    "nan-value-counts": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CountMap"
        }
      ],
      "description": "Map of column id to number of NaN values in the column"
    },
    "lower-bounds": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ValueMap"
        }
      ],
      "description": "Map of column id to lower bound primitive type values"
    },
    "upper-bounds": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ValueMap"
        }
      ],
      "description": "Map of column id to upper bound primitive type values"
    }
  },
  "required": [
    "content"
  ],
  "allOf": [
    {
      "$ref": "#/components/schemas/ContentFile"
    }
  ]
}