AWS Kinesis · Schema

ParquetSerDe

A serializer to use for converting data to the Parquet format before storing it in Amazon S3. For more information, see Apache Parquet.

AnalyticsApache FlinkBig DataData ProcessingReal-TimeStreamingVideo

Properties

Name Type Description
BlockSizeBytes object
PageSizeBytes object
Compression object
EnableDictionaryCompression object
MaxPaddingBytes object
WriterVersion object
View JSON Schema on GitHub

JSON Schema

kinesis-parquetserde-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ParquetSerDe",
  "title": "ParquetSerDe",
  "type": "object",
  "properties": {
    "BlockSizeBytes": {
      "allOf": [
        {
          "$ref": "#/components/schemas/BlockSizeBytes"
        },
        {
          "description": "The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for padding calculations."
        }
      ]
    },
    "PageSizeBytes": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ParquetPageSizeBytes"
        },
        {
          "description": "The Parquet page size. Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB."
        }
      ]
    },
    "Compression": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ParquetCompression"
        },
        {
          "description": "The compression code to use over data blocks. The possible values are <code>UNCOMPRESSED</code>, <code>SNAPPY</code>, and <code>GZIP</code>, with the default being <code>SNAPPY</code>. Use <code>SNAPPY</code> for higher decompression speed. Use <code>GZIP</code> if the compression ratio is more important than speed."
        }
      ]
    },
    "EnableDictionaryCompression": {
      "allOf": [
        {
          "$ref": "#/components/schemas/BooleanObject"
        },
        {
          "description": "Indicates whether to enable dictionary compression."
        }
      ]
    },
    "MaxPaddingBytes": {
      "allOf": [
        {
          "$ref": "#/components/schemas/NonNegativeIntegerObject"
        },
        {
          "description": "The maximum amount of padding to apply. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 0."
        }
      ]
    },
    "WriterVersion": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ParquetWriterVersion"
        },
        {
          "description": "Indicates the version of row format to output. The possible values are <code>V1</code> and <code>V2</code>. The default is <code>V1</code>."
        }
      ]
    }
  },
  "description": "A serializer to use for converting data to the Parquet format before storing it in Amazon S3. For more information, see <a href=\"https://parquet.apache.org/documentation/latest/\">Apache Parquet</a>."
}