Google Cloud Dataproc Cluster

A Dataproc cluster resource representing a managed Apache Hadoop and Spark cluster on Google Cloud.

Big DataData ProcessingGoogle CloudHadoopSpark

Properties

Name Type Description
projectId string The Google Cloud Platform project ID that the cluster belongs to.
clusterName string The cluster name, which must be unique within a project.
config object The cluster config.
labels object The labels to associate with this cluster.
status object
clusterUuid string A cluster UUID generated by the Dataproc service.
View JSON Schema on GitHub

JSON Schema

cluster-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/google-cloud-dataproc/refs/heads/main/json-schema/cluster-schema.json",
  "title": "Google Cloud Dataproc Cluster",
  "description": "A Dataproc cluster resource representing a managed Apache Hadoop and Spark cluster on Google Cloud.",
  "type": "object",
  "properties": {
    "projectId": {
      "type": "string",
      "description": "The Google Cloud Platform project ID that the cluster belongs to."
    },
    "clusterName": {
      "type": "string",
      "description": "The cluster name, which must be unique within a project.",
      "pattern": "^[a-z][-a-z0-9]{0,53}$"
    },
    "config": {
      "type": "object",
      "description": "The cluster config.",
      "properties": {
        "configBucket": {
          "type": "string",
          "description": "Cloud Storage bucket used for staging dependencies and config files."
        },
        "tempBucket": {
          "type": "string",
          "description": "Cloud Storage bucket used for temporary data."
        },
        "gceClusterConfig": {
          "type": "object",
          "properties": {
            "zoneUri": { "type": "string" },
            "networkUri": { "type": "string" },
            "subnetworkUri": { "type": "string" },
            "internalIpOnly": { "type": "boolean" },
            "serviceAccountScopes": {
              "type": "array",
              "items": { "type": "string" }
            },
            "tags": {
              "type": "array",
              "items": { "type": "string" }
            }
          }
        },
        "masterConfig": {
          "$ref": "#/$defs/InstanceGroupConfig"
        },
        "workerConfig": {
          "$ref": "#/$defs/InstanceGroupConfig"
        },
        "secondaryWorkerConfig": {
          "$ref": "#/$defs/InstanceGroupConfig"
        },
        "softwareConfig": {
          "type": "object",
          "properties": {
            "imageVersion": {
              "type": "string",
              "description": "The version of software inside the cluster (e.g., 2.1-debian11)."
            },
            "properties": {
              "type": "object",
              "additionalProperties": { "type": "string" }
            },
            "optionalComponents": {
              "type": "array",
              "items": {
                "type": "string",
                "examples": ["JUPYTER", "ZEPPELIN", "DOCKER", "FLINK"]
              }
            }
          }
        },
        "initializationActions": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "executableFile": { "type": "string" },
              "executionTimeout": { "type": "string" }
            },
            "required": ["executableFile"]
          }
        }
      }
    },
    "labels": {
      "type": "object",
      "additionalProperties": {
        "type": "string"
      },
      "description": "The labels to associate with this cluster."
    },
    "status": {
      "type": "object",
      "properties": {
        "state": {
          "type": "string",
          "enum": ["UNKNOWN", "CREATING", "RUNNING", "ERROR", "ERROR_DUE_TO_UPDATE", "DELETING", "UPDATING", "STOPPING", "STOPPED", "STARTING", "REPAIRING"]
        },
        "stateStartTime": {
          "type": "string",
          "format": "date-time"
        },
        "detail": {
          "type": "string"
        }
      }
    },
    "clusterUuid": {
      "type": "string",
      "description": "A cluster UUID generated by the Dataproc service."
    }
  },
  "required": ["projectId", "clusterName", "config"],
  "$defs": {
    "InstanceGroupConfig": {
      "type": "object",
      "properties": {
        "numInstances": {
          "type": "integer",
          "minimum": 0,
          "description": "The number of VM instances in the instance group."
        },
        "machineTypeUri": {
          "type": "string",
          "description": "The Compute Engine machine type (e.g., n1-standard-4)."
        },
        "diskConfig": {
          "type": "object",
          "properties": {
            "bootDiskType": {
              "type": "string",
              "enum": ["pd-standard", "pd-ssd", "pd-balanced"]
            },
            "bootDiskSizeGb": {
              "type": "integer",
              "minimum": 10
            },
            "numLocalSsds": {
              "type": "integer",
              "minimum": 0
            }
          }
        },
        "imageUri": {
          "type": "string"
        },
        "preemptibility": {
          "type": "string",
          "enum": ["PREEMPTIBILITY_UNSPECIFIED", "NON_PREEMPTIBLE", "PREEMPTIBLE", "SPOT"]
        }
      }
    }
  }
}