Google Cloud Dataflow Pipeline Description

A descriptive representation of a Dataflow pipeline, providing structural information about the stages and transforms in the pipeline graph.

Apache BeamBatch ProcessingBig DataData ProcessingETLStream Processing

Properties

Name Type Description
originalPipelineTransform array Description of each transform in the pipeline as provided by the user.
executionPipelineStage array Description of each stage of execution after the pipeline has been optimized by the service.
displayData array Pipeline level display data.
View JSON Schema on GitHub

JSON Schema

google-cloud-dataflow-pipeline-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://github.com/api-search/google-cloud-dataflow/json-schema/google-cloud-dataflow-pipeline-schema.json",
  "title": "Google Cloud Dataflow Pipeline Description",
  "description": "A descriptive representation of a Dataflow pipeline, providing structural information about the stages and transforms in the pipeline graph.",
  "type": "object",
  "properties": {
    "originalPipelineTransform": {
      "type": "array",
      "description": "Description of each transform in the pipeline as provided by the user.",
      "items": {
        "$ref": "#/$defs/TransformSummary"
      }
    },
    "executionPipelineStage": {
      "type": "array",
      "description": "Description of each stage of execution after the pipeline has been optimized by the service.",
      "items": {
        "$ref": "#/$defs/ExecutionStageSummary"
      }
    },
    "displayData": {
      "type": "array",
      "description": "Pipeline level display data.",
      "items": {
        "$ref": "#/$defs/DisplayData"
      }
    }
  },
  "$defs": {
    "TransformSummary": {
      "type": "object",
      "description": "Description of a transform executed as part of a Dataflow job.",
      "properties": {
        "kind": {
          "type": "string",
          "description": "The type of transform.",
          "enum": [
            "UNKNOWN_KIND",
            "PAR_DO_KIND",
            "GROUP_BY_KEY_KIND",
            "FLATTEN_KIND",
            "READ_KIND",
            "WRITE_KIND",
            "CONSTANT_KIND",
            "SINGLETON_KIND",
            "SHUFFLE_KIND"
          ]
        },
        "id": {
          "type": "string",
          "description": "SDK-generated unique identifier of the transform."
        },
        "name": {
          "type": "string",
          "description": "User-provided name of the transform."
        },
        "displayData": {
          "type": "array",
          "description": "Transform-specific display data.",
          "items": {
            "$ref": "#/$defs/DisplayData"
          }
        },
        "outputCollectionName": {
          "type": "array",
          "description": "User names for the output collections of this transform.",
          "items": {
            "type": "string"
          }
        },
        "inputCollectionName": {
          "type": "array",
          "description": "User names for the input collections of this transform.",
          "items": {
            "type": "string"
          }
        }
      }
    },
    "ExecutionStageSummary": {
      "type": "object",
      "description": "Description of a stage of execution after pipeline optimization.",
      "properties": {
        "name": {
          "type": "string",
          "description": "Dataflow service generated name for this stage."
        },
        "id": {
          "type": "string",
          "description": "Dataflow service generated unique ID for this stage."
        },
        "kind": {
          "type": "string",
          "description": "The type of execution stage.",
          "enum": [
            "UNKNOWN_KIND",
            "PAR_DO_KIND",
            "GROUP_BY_KEY_KIND",
            "FLATTEN_KIND",
            "READ_KIND",
            "WRITE_KIND",
            "CONSTANT_KIND",
            "SINGLETON_KIND",
            "SHUFFLE_KIND"
          ]
        },
        "inputSource": {
          "type": "array",
          "description": "Input sources for this stage.",
          "items": {
            "$ref": "#/$defs/StageSource"
          }
        },
        "outputSource": {
          "type": "array",
          "description": "Output sources for this stage.",
          "items": {
            "$ref": "#/$defs/StageSource"
          }
        },
        "componentTransform": {
          "type": "array",
          "description": "Transforms that comprise this execution stage.",
          "items": {
            "type": "object",
            "properties": {
              "userName": {
                "type": "string",
                "description": "Human-readable name for this transform."
              },
              "name": {
                "type": "string",
                "description": "Dataflow service generated name for this transform."
              },
              "originalTransform": {
                "type": "string",
                "description": "User name for the original user transform."
              }
            }
          }
        },
        "componentSource": {
          "type": "array",
          "description": "Collections produced and consumed by component transforms.",
          "items": {
            "type": "object",
            "properties": {
              "userName": {
                "type": "string",
                "description": "Human-readable name for this source."
              },
              "name": {
                "type": "string",
                "description": "Dataflow service generated name for this source."
              },
              "originalTransformOrCollection": {
                "type": "string",
                "description": "User name for the original transform or collection."
              }
            }
          }
        },
        "prerequisiteStage": {
          "type": "array",
          "description": "Other stages that must complete before this stage can run.",
          "items": {
            "type": "string"
          }
        }
      }
    },
    "StageSource": {
      "type": "object",
      "description": "Describes a stream of data that flows in or out of a stage.",
      "properties": {
        "userName": {
          "type": "string",
          "description": "Human-readable name for this source."
        },
        "name": {
          "type": "string",
          "description": "Dataflow service generated name for this source."
        },
        "originalTransformOrCollection": {
          "type": "string",
          "description": "User name for the original transform or collection."
        },
        "sizeBytes": {
          "type": "string",
          "format": "int64",
          "description": "Size of the source in bytes, if known."
        }
      }
    },
    "DisplayData": {
      "type": "object",
      "description": "Data provided with a pipeline or transform for descriptive information.",
      "properties": {
        "key": {
          "type": "string",
          "description": "The key identifying the display data."
        },
        "namespace": {
          "type": "string",
          "description": "The namespace for the key, usually a class name."
        },
        "strValue": {
          "type": "string",
          "description": "Contains value if the data is of string type."
        },
        "int64Value": {
          "type": "string",
          "format": "int64",
          "description": "Contains value if the data is of int64 type."
        },
        "floatValue": {
          "type": "number",
          "format": "float",
          "description": "Contains value if the data is of float type."
        },
        "javaClassValue": {
          "type": "string",
          "description": "Contains value if the data is of java class type."
        },
        "timestampValue": {
          "type": "string",
          "format": "date-time",
          "description": "Contains value if the data is of timestamp type."
        },
        "durationValue": {
          "type": "string",
          "description": "Contains value if the data is of duration type."
        },
        "boolValue": {
          "type": "boolean",
          "description": "Contains value if the data is of bool type."
        },
        "shortStrValue": {
          "type": "string",
          "description": "A possible additional shorter value to display."
        },
        "url": {
          "type": "string",
          "description": "An optional full URL."
        },
        "label": {
          "type": "string",
          "description": "An optional label to display with the value."
        }
      }
    }
  }
}