Google Cloud Dataflow · Schema
Google Cloud Dataflow Pipeline Description
A descriptive representation of a Dataflow pipeline, providing structural information about the stages and transforms in the pipeline graph.
Apache BeamBatch ProcessingBig DataData ProcessingETLStream Processing
Properties
| Name | Type | Description |
|---|---|---|
| originalPipelineTransform | array | Description of each transform in the pipeline as provided by the user. |
| executionPipelineStage | array | Description of each stage of execution after the pipeline has been optimized by the service. |
| displayData | array | Pipeline level display data. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/api-search/google-cloud-dataflow/json-schema/google-cloud-dataflow-pipeline-schema.json",
"title": "Google Cloud Dataflow Pipeline Description",
"description": "A descriptive representation of a Dataflow pipeline, providing structural information about the stages and transforms in the pipeline graph.",
"type": "object",
"properties": {
"originalPipelineTransform": {
"type": "array",
"description": "Description of each transform in the pipeline as provided by the user.",
"items": {
"$ref": "#/$defs/TransformSummary"
}
},
"executionPipelineStage": {
"type": "array",
"description": "Description of each stage of execution after the pipeline has been optimized by the service.",
"items": {
"$ref": "#/$defs/ExecutionStageSummary"
}
},
"displayData": {
"type": "array",
"description": "Pipeline level display data.",
"items": {
"$ref": "#/$defs/DisplayData"
}
}
},
"$defs": {
"TransformSummary": {
"type": "object",
"description": "Description of a transform executed as part of a Dataflow job.",
"properties": {
"kind": {
"type": "string",
"description": "The type of transform.",
"enum": [
"UNKNOWN_KIND",
"PAR_DO_KIND",
"GROUP_BY_KEY_KIND",
"FLATTEN_KIND",
"READ_KIND",
"WRITE_KIND",
"CONSTANT_KIND",
"SINGLETON_KIND",
"SHUFFLE_KIND"
]
},
"id": {
"type": "string",
"description": "SDK-generated unique identifier of the transform."
},
"name": {
"type": "string",
"description": "User-provided name of the transform."
},
"displayData": {
"type": "array",
"description": "Transform-specific display data.",
"items": {
"$ref": "#/$defs/DisplayData"
}
},
"outputCollectionName": {
"type": "array",
"description": "User names for the output collections of this transform.",
"items": {
"type": "string"
}
},
"inputCollectionName": {
"type": "array",
"description": "User names for the input collections of this transform.",
"items": {
"type": "string"
}
}
}
},
"ExecutionStageSummary": {
"type": "object",
"description": "Description of a stage of execution after pipeline optimization.",
"properties": {
"name": {
"type": "string",
"description": "Dataflow service generated name for this stage."
},
"id": {
"type": "string",
"description": "Dataflow service generated unique ID for this stage."
},
"kind": {
"type": "string",
"description": "The type of execution stage.",
"enum": [
"UNKNOWN_KIND",
"PAR_DO_KIND",
"GROUP_BY_KEY_KIND",
"FLATTEN_KIND",
"READ_KIND",
"WRITE_KIND",
"CONSTANT_KIND",
"SINGLETON_KIND",
"SHUFFLE_KIND"
]
},
"inputSource": {
"type": "array",
"description": "Input sources for this stage.",
"items": {
"$ref": "#/$defs/StageSource"
}
},
"outputSource": {
"type": "array",
"description": "Output sources for this stage.",
"items": {
"$ref": "#/$defs/StageSource"
}
},
"componentTransform": {
"type": "array",
"description": "Transforms that comprise this execution stage.",
"items": {
"type": "object",
"properties": {
"userName": {
"type": "string",
"description": "Human-readable name for this transform."
},
"name": {
"type": "string",
"description": "Dataflow service generated name for this transform."
},
"originalTransform": {
"type": "string",
"description": "User name for the original user transform."
}
}
}
},
"componentSource": {
"type": "array",
"description": "Collections produced and consumed by component transforms.",
"items": {
"type": "object",
"properties": {
"userName": {
"type": "string",
"description": "Human-readable name for this source."
},
"name": {
"type": "string",
"description": "Dataflow service generated name for this source."
},
"originalTransformOrCollection": {
"type": "string",
"description": "User name for the original transform or collection."
}
}
}
},
"prerequisiteStage": {
"type": "array",
"description": "Other stages that must complete before this stage can run.",
"items": {
"type": "string"
}
}
}
},
"StageSource": {
"type": "object",
"description": "Describes a stream of data that flows in or out of a stage.",
"properties": {
"userName": {
"type": "string",
"description": "Human-readable name for this source."
},
"name": {
"type": "string",
"description": "Dataflow service generated name for this source."
},
"originalTransformOrCollection": {
"type": "string",
"description": "User name for the original transform or collection."
},
"sizeBytes": {
"type": "string",
"format": "int64",
"description": "Size of the source in bytes, if known."
}
}
},
"DisplayData": {
"type": "object",
"description": "Data provided with a pipeline or transform for descriptive information.",
"properties": {
"key": {
"type": "string",
"description": "The key identifying the display data."
},
"namespace": {
"type": "string",
"description": "The namespace for the key, usually a class name."
},
"strValue": {
"type": "string",
"description": "Contains value if the data is of string type."
},
"int64Value": {
"type": "string",
"format": "int64",
"description": "Contains value if the data is of int64 type."
},
"floatValue": {
"type": "number",
"format": "float",
"description": "Contains value if the data is of float type."
},
"javaClassValue": {
"type": "string",
"description": "Contains value if the data is of java class type."
},
"timestampValue": {
"type": "string",
"format": "date-time",
"description": "Contains value if the data is of timestamp type."
},
"durationValue": {
"type": "string",
"description": "Contains value if the data is of duration type."
},
"boolValue": {
"type": "boolean",
"description": "Contains value if the data is of bool type."
},
"shortStrValue": {
"type": "string",
"description": "A possible additional shorter value to display."
},
"url": {
"type": "string",
"description": "An optional full URL."
},
"label": {
"type": "string",
"description": "An optional label to display with the value."
}
}
}
}
}