Google Cloud Dataflow Environment

Describes the environment in which a Dataflow job runs, including worker pool configuration, networking, encryption, and runtime settings.

Apache BeamBatch ProcessingBig DataData ProcessingETLStream Processing

Properties

Name Type Description
tempStoragePrefix string The prefix of the Cloud Storage path for temporary storage used during job execution.
clusterManagerApiService string The type of cluster manager API to use for managing workers.
experiments array A list of experiment flags passed to the SDK and Dataflow service for enabling experimental features.
serviceOptions array A list of service-level feature flags for the Dataflow service.
serviceKmsKeyName string The Cloud KMS key used for encrypting data at rest. Format: projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{key}.
workerPools array The worker pool configuration for the job, defining machine type, disk, network, and autoscaling settings.
userAgent object A structure describing the SDK and its version used by the job.
version object A structure describing which version of the Dataflow service the job requires.
dataset string The BigQuery dataset for workflow logging tables. Format: bigquery.googleapis.com/projects/{project}/datasets/{dataset}.
sdkPipelineOptions object The Cloud Dataflow SDK pipeline options specified by the user.
serviceAccountEmail string The email address of the service account to run the workers as.
flexResourceSchedulingGoal string Which Flexible Resource Scheduling mode to run in for Flex RS jobs.
workerRegion string The Compute Engine region where workers should be created.
workerZone string The specific Compute Engine zone where workers should be created.
shuffleMode string The shuffle mode for the job, set by the service.
debugOptions object Debugging options for the job.
streamingMode string The streaming mode for the job, specifying the message processing guarantee.
View JSON Schema on GitHub

JSON Schema

google-cloud-dataflow-environment-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://github.com/api-search/google-cloud-dataflow/json-schema/google-cloud-dataflow-environment-schema.json",
  "title": "Google Cloud Dataflow Environment",
  "description": "Describes the environment in which a Dataflow job runs, including worker pool configuration, networking, encryption, and runtime settings.",
  "type": "object",
  "properties": {
    "tempStoragePrefix": {
      "type": "string",
      "description": "The prefix of the Cloud Storage path for temporary storage used during job execution."
    },
    "clusterManagerApiService": {
      "type": "string",
      "description": "The type of cluster manager API to use for managing workers."
    },
    "experiments": {
      "type": "array",
      "description": "A list of experiment flags passed to the SDK and Dataflow service for enabling experimental features.",
      "items": {
        "type": "string"
      }
    },
    "serviceOptions": {
      "type": "array",
      "description": "A list of service-level feature flags for the Dataflow service.",
      "items": {
        "type": "string"
      }
    },
    "serviceKmsKeyName": {
      "type": "string",
      "description": "The Cloud KMS key used for encrypting data at rest. Format: projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{key}."
    },
    "workerPools": {
      "type": "array",
      "description": "The worker pool configuration for the job, defining machine type, disk, network, and autoscaling settings.",
      "items": {
        "$ref": "google-cloud-dataflow-worker-pool-schema.json"
      }
    },
    "userAgent": {
      "type": "object",
      "description": "A structure describing the SDK and its version used by the job.",
      "additionalProperties": true
    },
    "version": {
      "type": "object",
      "description": "A structure describing which version of the Dataflow service the job requires.",
      "additionalProperties": true
    },
    "dataset": {
      "type": "string",
      "description": "The BigQuery dataset for workflow logging tables. Format: bigquery.googleapis.com/projects/{project}/datasets/{dataset}."
    },
    "sdkPipelineOptions": {
      "type": "object",
      "description": "The Cloud Dataflow SDK pipeline options specified by the user.",
      "additionalProperties": true
    },
    "serviceAccountEmail": {
      "type": "string",
      "format": "email",
      "description": "The email address of the service account to run the workers as."
    },
    "flexResourceSchedulingGoal": {
      "type": "string",
      "description": "Which Flexible Resource Scheduling mode to run in for Flex RS jobs.",
      "enum": [
        "FLEXRS_UNSPECIFIED",
        "FLEXRS_SPEED_OPTIMIZED",
        "FLEXRS_COST_OPTIMIZED"
      ]
    },
    "workerRegion": {
      "type": "string",
      "description": "The Compute Engine region where workers should be created."
    },
    "workerZone": {
      "type": "string",
      "description": "The specific Compute Engine zone where workers should be created."
    },
    "shuffleMode": {
      "type": "string",
      "description": "The shuffle mode for the job, set by the service.",
      "readOnly": true,
      "enum": [
        "SHUFFLE_MODE_UNSPECIFIED",
        "VM_BASED",
        "SERVICE_BASED"
      ]
    },
    "debugOptions": {
      "type": "object",
      "description": "Debugging options for the job.",
      "properties": {
        "enableHotKeyLogging": {
          "type": "boolean",
          "description": "When true, enables logging of hot key detections during job execution."
        }
      }
    },
    "streamingMode": {
      "type": "string",
      "description": "The streaming mode for the job, specifying the message processing guarantee.",
      "enum": [
        "STREAMING_MODE_UNSPECIFIED",
        "STREAMING_MODE_EXACTLY_ONCE",
        "STREAMING_MODE_AT_LEAST_ONCE"
      ]
    }
  }
}