Google Cloud Dataflow Job

Defines a Dataflow job representing a pipeline execution. A job encapsulates the pipeline configuration, environment, execution state, and metadata for batch or streaming workloads on Google Cloud Platform.

Apache BeamBatch ProcessingBig DataData ProcessingETLStream Processing

Properties

Name Type Description
id string The unique identifier of the job, assigned by the server and immutable once set.
projectId string The ID of the Google Cloud project that owns this job.
name string The user-assigned name of the job. Job names do not need to be unique within a project.
type string The type of Dataflow job, indicating batch or streaming execution.
currentState string The current state of the job, representing its lifecycle position from creation through completion or cancellation.
currentStateTime string The timestamp of the most recent state transition.
requestedState string The state requested for the job, such as cancelling or draining.
createTime string The timestamp when the job was initially created.
startTime string The timestamp when the job began executing.
environment object The execution environment configuration for the job.
steps array The pipeline processing steps that define the job.
stepsLocation string The Cloud Storage location where step information is stored.
stageStates array The per-stage execution state information for the job.
pipelineDescription object A description of the pipeline structure.
labels object User-defined labels for the job as key-value string pairs.
location string The regional endpoint where this job runs, such as us-central1.
createdFromSnapshotId string If this job was created from a snapshot, the ID of that snapshot.
replacedByJobId string If this job has been replaced by another job, the ID of the replacement.
replaceJobId string If this job is replacing another job, the ID of the job being replaced.
clientRequestId string A unique client-generated idempotency key for preventing duplicate job creation.
tempFiles array A set of Cloud Storage files used for temporary storage.
jobMetadata object Metadata about the job for filtering and discovery.
runtimeUpdatableParams object Parameters that can be updated during execution without stopping the job.
serviceResources object Resources allocated by the Dataflow service for the job.
View JSON Schema on GitHub

JSON Schema

google-cloud-dataflow-job-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://github.com/api-search/google-cloud-dataflow/json-schema/google-cloud-dataflow-job-schema.json",
  "title": "Google Cloud Dataflow Job",
  "description": "Defines a Dataflow job representing a pipeline execution. A job encapsulates the pipeline configuration, environment, execution state, and metadata for batch or streaming workloads on Google Cloud Platform.",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "The unique identifier of the job, assigned by the server and immutable once set.",
      "readOnly": true
    },
    "projectId": {
      "type": "string",
      "description": "The ID of the Google Cloud project that owns this job."
    },
    "name": {
      "type": "string",
      "description": "The user-assigned name of the job. Job names do not need to be unique within a project."
    },
    "type": {
      "type": "string",
      "description": "The type of Dataflow job, indicating batch or streaming execution.",
      "enum": [
        "JOB_TYPE_UNKNOWN",
        "JOB_TYPE_BATCH",
        "JOB_TYPE_STREAMING"
      ]
    },
    "currentState": {
      "type": "string",
      "description": "The current state of the job, representing its lifecycle position from creation through completion or cancellation.",
      "enum": [
        "JOB_STATE_UNKNOWN",
        "JOB_STATE_STOPPED",
        "JOB_STATE_RUNNING",
        "JOB_STATE_DONE",
        "JOB_STATE_FAILED",
        "JOB_STATE_CANCELLED",
        "JOB_STATE_UPDATED",
        "JOB_STATE_DRAINING",
        "JOB_STATE_DRAINED",
        "JOB_STATE_PENDING",
        "JOB_STATE_CANCELLING",
        "JOB_STATE_QUEUED",
        "JOB_STATE_RESOURCE_CLEANING_UP"
      ],
      "readOnly": true
    },
    "currentStateTime": {
      "type": "string",
      "format": "date-time",
      "description": "The timestamp of the most recent state transition.",
      "readOnly": true
    },
    "requestedState": {
      "type": "string",
      "description": "The state requested for the job, such as cancelling or draining.",
      "enum": [
        "JOB_STATE_UNKNOWN",
        "JOB_STATE_STOPPED",
        "JOB_STATE_RUNNING",
        "JOB_STATE_DONE",
        "JOB_STATE_FAILED",
        "JOB_STATE_CANCELLED",
        "JOB_STATE_UPDATED",
        "JOB_STATE_DRAINING",
        "JOB_STATE_DRAINED",
        "JOB_STATE_PENDING",
        "JOB_STATE_CANCELLING",
        "JOB_STATE_QUEUED",
        "JOB_STATE_RESOURCE_CLEANING_UP"
      ]
    },
    "createTime": {
      "type": "string",
      "format": "date-time",
      "description": "The timestamp when the job was initially created.",
      "readOnly": true
    },
    "startTime": {
      "type": "string",
      "format": "date-time",
      "description": "The timestamp when the job began executing.",
      "readOnly": true
    },
    "environment": {
      "$ref": "google-cloud-dataflow-environment-schema.json",
      "description": "The execution environment configuration for the job."
    },
    "steps": {
      "type": "array",
      "description": "The pipeline processing steps that define the job.",
      "items": {
        "type": "object",
        "properties": {
          "kind": {
            "type": "string",
            "description": "The type of transform this step represents."
          },
          "name": {
            "type": "string",
            "description": "The unique name of this step within the job."
          },
          "properties": {
            "type": "object",
            "description": "Named properties associated with the step.",
            "additionalProperties": true
          }
        }
      }
    },
    "stepsLocation": {
      "type": "string",
      "description": "The Cloud Storage location where step information is stored."
    },
    "stageStates": {
      "type": "array",
      "description": "The per-stage execution state information for the job.",
      "readOnly": true,
      "items": {
        "type": "object",
        "properties": {
          "executionStageName": {
            "type": "string",
            "description": "The name of the execution stage."
          },
          "executionStageState": {
            "type": "string",
            "description": "The state of the execution stage.",
            "enum": [
              "JOB_STATE_UNKNOWN",
              "JOB_STATE_STOPPED",
              "JOB_STATE_RUNNING",
              "JOB_STATE_DONE",
              "JOB_STATE_FAILED",
              "JOB_STATE_CANCELLED",
              "JOB_STATE_UPDATED",
              "JOB_STATE_DRAINING",
              "JOB_STATE_DRAINED",
              "JOB_STATE_PENDING",
              "JOB_STATE_CANCELLING",
              "JOB_STATE_QUEUED",
              "JOB_STATE_RESOURCE_CLEANING_UP"
            ]
          },
          "currentStateTime": {
            "type": "string",
            "format": "date-time",
            "description": "The time at which the stage entered its current state."
          }
        }
      }
    },
    "pipelineDescription": {
      "$ref": "google-cloud-dataflow-pipeline-schema.json",
      "description": "A description of the pipeline structure."
    },
    "labels": {
      "type": "object",
      "description": "User-defined labels for the job as key-value string pairs.",
      "additionalProperties": {
        "type": "string"
      }
    },
    "location": {
      "type": "string",
      "description": "The regional endpoint where this job runs, such as us-central1."
    },
    "createdFromSnapshotId": {
      "type": "string",
      "description": "If this job was created from a snapshot, the ID of that snapshot.",
      "readOnly": true
    },
    "replacedByJobId": {
      "type": "string",
      "description": "If this job has been replaced by another job, the ID of the replacement.",
      "readOnly": true
    },
    "replaceJobId": {
      "type": "string",
      "description": "If this job is replacing another job, the ID of the job being replaced."
    },
    "clientRequestId": {
      "type": "string",
      "description": "A unique client-generated idempotency key for preventing duplicate job creation."
    },
    "tempFiles": {
      "type": "array",
      "description": "A set of Cloud Storage files used for temporary storage.",
      "items": {
        "type": "string"
      }
    },
    "jobMetadata": {
      "type": "object",
      "description": "Metadata about the job for filtering and discovery.",
      "properties": {
        "sdkVersion": {
          "type": "object",
          "description": "The version of the SDK used to run the job.",
          "properties": {
            "version": {
              "type": "string",
              "description": "The version string."
            },
            "versionDisplayName": {
              "type": "string",
              "description": "A human-readable version name."
            },
            "sdkSupportStatus": {
              "type": "string",
              "description": "The support status for this SDK version.",
              "enum": ["UNKNOWN", "SUPPORTED", "STALE", "DEPRECATED", "UNSUPPORTED"]
            }
          }
        },
        "spannerDetails": {
          "type": "array",
          "description": "Cloud Spanner sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "projectId": { "type": "string" },
              "instanceId": { "type": "string" },
              "databaseId": { "type": "string" }
            }
          }
        },
        "bigqueryDetails": {
          "type": "array",
          "description": "BigQuery sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "table": { "type": "string" },
              "dataset": { "type": "string" },
              "projectId": { "type": "string" },
              "query": { "type": "string" }
            }
          }
        },
        "bigTableDetails": {
          "type": "array",
          "description": "Cloud Bigtable sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "projectId": { "type": "string" },
              "instanceId": { "type": "string" },
              "tableId": { "type": "string" }
            }
          }
        },
        "pubsubDetails": {
          "type": "array",
          "description": "Pub/Sub sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "topic": { "type": "string" },
              "subscription": { "type": "string" }
            }
          }
        },
        "fileDetails": {
          "type": "array",
          "description": "File-based sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "filePattern": { "type": "string" }
            }
          }
        },
        "datastoreDetails": {
          "type": "array",
          "description": "Datastore sources used by this job.",
          "items": {
            "type": "object",
            "properties": {
              "namespace": { "type": "string" },
              "projectId": { "type": "string" }
            }
          }
        },
        "userDisplayProperties": {
          "type": "object",
          "description": "User-supplied properties for display.",
          "additionalProperties": { "type": "string" }
        }
      }
    },
    "runtimeUpdatableParams": {
      "type": "object",
      "description": "Parameters that can be updated during execution without stopping the job.",
      "properties": {
        "maxNumWorkers": {
          "type": "integer",
          "format": "int32",
          "description": "The maximum number of workers for autoscaling."
        },
        "minNumWorkers": {
          "type": "integer",
          "format": "int32",
          "description": "The minimum number of workers for autoscaling."
        },
        "workerUtilizationHint": {
          "type": "number",
          "format": "double",
          "description": "Target worker utilization between 0.1 and 0.9."
        }
      }
    },
    "serviceResources": {
      "type": "object",
      "description": "Resources allocated by the Dataflow service for the job.",
      "properties": {
        "zones": {
          "type": "array",
          "description": "The Cloud zones from which resources are allocated.",
          "items": { "type": "string" }
        }
      }
    }
  },
  "required": ["name"]
}