Sketches · Schema
Probabilistic Sketch
Schema representing a probabilistic data structure (sketch) configuration and result
Data StructuresProbabilistic AlgorithmsStreaming AnalyticsApproximate Query ProcessingBig DataReal-Time Analytics
Properties
| Name | Type | Description |
|---|---|---|
| id | string | Unique identifier for the sketch instance |
| type | string | The type of probabilistic sketch |
| name | string | Human-readable name for this sketch instance |
| description | string | Description of what this sketch is measuring |
| configuration | object | Sketch configuration parameters |
| result | object | Computed result from the sketch |
| memoryUsage | integer | Memory used by this sketch in bytes |
| itemCount | integer | Number of items that have been added to the sketch |
| createdAt | string | Timestamp when the sketch was created |
| updatedAt | string | Timestamp of the last update to the sketch |
JSON Schema
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://github.com/api-evangelist/sketches/blob/main/json-schema/sketches-sketch-schema.json",
"title": "Probabilistic Sketch",
"description": "Schema representing a probabilistic data structure (sketch) configuration and result",
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier for the sketch instance"
},
"type": {
"type": "string",
"description": "The type of probabilistic sketch",
"enum": [
"HyperLogLog",
"Count-Min Sketch",
"Bloom Filter",
"Cuckoo Filter",
"T-Digest",
"Theta Sketch",
"Quantiles Sketch",
"Top-K",
"CPC Sketch"
]
},
"name": {
"type": "string",
"description": "Human-readable name for this sketch instance"
},
"description": {
"type": "string",
"description": "Description of what this sketch is measuring"
},
"configuration": {
"type": "object",
"description": "Sketch configuration parameters",
"properties": {
"errorRate": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Target false positive or error rate (e.g., 0.01 for 1%)"
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Confidence level for the error bound (e.g., 0.99 for 99%)"
},
"precision": {
"type": "integer",
"description": "Precision parameter (e.g., HyperLogLog precision bits 4-18)"
},
"width": {
"type": "integer",
"description": "Width of the sketch matrix (Count-Min Sketch)"
},
"depth": {
"type": "integer",
"description": "Depth (number of hash functions) of the sketch matrix"
},
"capacity": {
"type": "integer",
"description": "Expected capacity (number of distinct items) for Bloom Filter"
},
"k": {
"type": "integer",
"description": "Parameter k (e.g., top-k items for Top-K sketch, or nominal entries for Theta Sketch)"
}
}
},
"result": {
"type": "object",
"description": "Computed result from the sketch",
"properties": {
"estimate": {
"type": "number",
"description": "The primary estimated value (cardinality, frequency, quantile, etc.)"
},
"upperBound": {
"type": "number",
"description": "Upper bound of the estimate at the configured confidence level"
},
"lowerBound": {
"type": "number",
"description": "Lower bound of the estimate at the configured confidence level"
},
"exactResult": {
"type": "boolean",
"description": "Whether this result is exact (below sampling threshold) or estimated"
}
}
},
"memoryUsage": {
"type": "integer",
"description": "Memory used by this sketch in bytes"
},
"itemCount": {
"type": "integer",
"description": "Number of items that have been added to the sketch"
},
"createdAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the sketch was created"
},
"updatedAt": {
"type": "string",
"format": "date-time",
"description": "Timestamp of the last update to the sketch"
}
},
"required": ["type"],
"additionalProperties": false
}