Google Cloud Dataproc · Schema
Google Cloud Dataproc Cluster
A Dataproc cluster resource representing a managed Apache Hadoop and Spark cluster on Google Cloud.
Big DataData ProcessingGoogle CloudHadoopSpark
Properties
| Name | Type | Description |
|---|---|---|
| projectId | string | The Google Cloud Platform project ID that the cluster belongs to. |
| clusterName | string | The cluster name, which must be unique within a project. |
| config | object | The cluster config. |
| labels | object | The labels to associate with this cluster. |
| status | object | |
| clusterUuid | string | A cluster UUID generated by the Dataproc service. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/google-cloud-dataproc/refs/heads/main/json-schema/cluster-schema.json",
"title": "Google Cloud Dataproc Cluster",
"description": "A Dataproc cluster resource representing a managed Apache Hadoop and Spark cluster on Google Cloud.",
"type": "object",
"properties": {
"projectId": {
"type": "string",
"description": "The Google Cloud Platform project ID that the cluster belongs to."
},
"clusterName": {
"type": "string",
"description": "The cluster name, which must be unique within a project.",
"pattern": "^[a-z][-a-z0-9]{0,53}$"
},
"config": {
"type": "object",
"description": "The cluster config.",
"properties": {
"configBucket": {
"type": "string",
"description": "Cloud Storage bucket used for staging dependencies and config files."
},
"tempBucket": {
"type": "string",
"description": "Cloud Storage bucket used for temporary data."
},
"gceClusterConfig": {
"type": "object",
"properties": {
"zoneUri": { "type": "string" },
"networkUri": { "type": "string" },
"subnetworkUri": { "type": "string" },
"internalIpOnly": { "type": "boolean" },
"serviceAccountScopes": {
"type": "array",
"items": { "type": "string" }
},
"tags": {
"type": "array",
"items": { "type": "string" }
}
}
},
"masterConfig": {
"$ref": "#/$defs/InstanceGroupConfig"
},
"workerConfig": {
"$ref": "#/$defs/InstanceGroupConfig"
},
"secondaryWorkerConfig": {
"$ref": "#/$defs/InstanceGroupConfig"
},
"softwareConfig": {
"type": "object",
"properties": {
"imageVersion": {
"type": "string",
"description": "The version of software inside the cluster (e.g., 2.1-debian11)."
},
"properties": {
"type": "object",
"additionalProperties": { "type": "string" }
},
"optionalComponents": {
"type": "array",
"items": {
"type": "string",
"examples": ["JUPYTER", "ZEPPELIN", "DOCKER", "FLINK"]
}
}
}
},
"initializationActions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"executableFile": { "type": "string" },
"executionTimeout": { "type": "string" }
},
"required": ["executableFile"]
}
}
}
},
"labels": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "The labels to associate with this cluster."
},
"status": {
"type": "object",
"properties": {
"state": {
"type": "string",
"enum": ["UNKNOWN", "CREATING", "RUNNING", "ERROR", "ERROR_DUE_TO_UPDATE", "DELETING", "UPDATING", "STOPPING", "STOPPED", "STARTING", "REPAIRING"]
},
"stateStartTime": {
"type": "string",
"format": "date-time"
},
"detail": {
"type": "string"
}
}
},
"clusterUuid": {
"type": "string",
"description": "A cluster UUID generated by the Dataproc service."
}
},
"required": ["projectId", "clusterName", "config"],
"$defs": {
"InstanceGroupConfig": {
"type": "object",
"properties": {
"numInstances": {
"type": "integer",
"minimum": 0,
"description": "The number of VM instances in the instance group."
},
"machineTypeUri": {
"type": "string",
"description": "The Compute Engine machine type (e.g., n1-standard-4)."
},
"diskConfig": {
"type": "object",
"properties": {
"bootDiskType": {
"type": "string",
"enum": ["pd-standard", "pd-ssd", "pd-balanced"]
},
"bootDiskSizeGb": {
"type": "integer",
"minimum": 10
},
"numLocalSsds": {
"type": "integer",
"minimum": 0
}
}
},
"imageUri": {
"type": "string"
},
"preemptibility": {
"type": "string",
"enum": ["PREEMPTIBILITY_UNSPECIFIED", "NON_PREEMPTIBLE", "PREEMPTIBLE", "SPOT"]
}
}
}
}
}