Databricks · Schema
TaskSettings
AIAnalyticsApache SparkBig DataClean RoomsCloud ComputingDataData AnalyticsData EngineeringData GovernanceDelta LakeDelta SharingETLIdentity ManagementLakehouseMachine LearningMLflowModel ServingSecuritySQLUnity CatalogVector SearchVisualize
Properties
| Name | Type | Description |
|---|---|---|
| task_key | string | A unique key for the task within the job. Used to reference the task in dependencies and logging. |
| description | string | A description of the task. |
| depends_on | array | An array of objects specifying the task dependencies. Each dependency is identified by its task_key. |
| existing_cluster_id | string | An existing cluster to run the task on. |
| new_cluster | object | |
| job_cluster_key | string | Reference to a job_clusters entry. |
| notebook_task | object | |
| spark_jar_task | object | |
| spark_python_task | object | |
| spark_submit_task | object | |
| pipeline_task | object | |
| python_wheel_task | object | |
| sql_task | object | |
| dbt_task | object | |
| run_if | string | Condition to run this task. |
| timeout_seconds | integer | Timeout for this individual task. |
| max_retries | integer | Maximum number of retries for a failed task. |
| min_retry_interval_millis | integer | Minimum interval between retry attempts. |
| retry_on_timeout | boolean | Whether to retry when the task times out. |
| email_notifications | object | |
| libraries | array | Libraries to install on the cluster running this task. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/TaskSettings",
"title": "TaskSettings",
"type": "object",
"required": [
"task_key"
],
"properties": {
"task_key": {
"type": "string",
"description": "A unique key for the task within the job. Used to reference the task in dependencies and logging.",
"examples": [
"etl_task_1"
]
},
"description": {
"type": "string",
"description": "A description of the task.",
"example": "A sample description."
},
"depends_on": {
"type": "array",
"items": {
"type": "object",
"properties": {
"task_key": {
"type": "string"
},
"outcome": {
"type": "string"
}
}
},
"description": "An array of objects specifying the task dependencies. Each dependency is identified by its task_key.",
"example": []
},
"existing_cluster_id": {
"type": "string",
"description": "An existing cluster to run the task on.",
"example": "500123"
},
"new_cluster": {
"$ref": "#/components/schemas/CreateClusterRequest"
},
"job_cluster_key": {
"type": "string",
"description": "Reference to a job_clusters entry.",
"example": "example_value"
},
"notebook_task": {
"type": "object",
"properties": {
"notebook_path": {
"type": "string",
"description": "The absolute path of the notebook in the workspace."
},
"source": {
"type": "string",
"enum": [
"WORKSPACE",
"GIT"
],
"description": "The source of the notebook."
},
"base_parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Base parameters to pass to the notebook. These can be overridden at run time."
}
},
"example": "example_value"
},
"spark_jar_task": {
"type": "object",
"properties": {
"main_class_name": {
"type": "string"
},
"parameters": {
"type": "array",
"items": {
"type": "string"
}
},
"jar_uri": {
"type": "string"
}
},
"example": "example_value"
},
"spark_python_task": {
"type": "object",
"properties": {
"python_file": {
"type": "string",
"description": "URI of the Python file to execute."
},
"parameters": {
"type": "array",
"items": {
"type": "string"
}
},
"source": {
"type": "string",
"enum": [
"WORKSPACE",
"GIT"
]
}
},
"example": "example_value"
},
"spark_submit_task": {
"type": "object",
"properties": {
"parameters": {
"type": "array",
"items": {
"type": "string"
}
}
},
"example": "example_value"
},
"pipeline_task": {
"type": "object",
"properties": {
"pipeline_id": {
"type": "string"
},
"full_refresh": {
"type": "boolean"
}
},
"example": "example_value"
},
"python_wheel_task": {
"type": "object",
"properties": {
"package_name": {
"type": "string"
},
"entry_point": {
"type": "string"
},
"parameters": {
"type": "array",
"items": {
"type": "string"
}
},
"named_parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"example": "example_value"
},
"sql_task": {
"type": "object",
"properties": {
"query": {
"type": "object",
"properties": {
"query_id": {
"type": "string"
}
}
},
"dashboard": {
"type": "object",
"properties": {
"dashboard_id": {
"type": "string"
}
}
},
"alert": {
"type": "object",
"properties": {
"alert_id": {
"type": "string"
}
}
},
"warehouse_id": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"example": "example_value"
},
"dbt_task": {
"type": "object",
"properties": {
"project_directory": {
"type": "string"
},
"commands": {
"type": "array",
"items": {
"type": "string"
}
},
"schema": {
"type": "string"
},
"warehouse_id": {
"type": "string"
},
"catalog": {
"type": "string"
},
"profiles_directory": {
"type": "string"
}
},
"example": "example_value"
},
"run_if": {
"type": "string",
"enum": [
"ALL_SUCCESS",
"AT_LEAST_ONE_SUCCESS",
"NONE_FAILED",
"ALL_DONE",
"AT_LEAST_ONE_FAILED",
"ALL_FAILED"
],
"description": "Condition to run this task.",
"example": "ALL_SUCCESS"
},
"timeout_seconds": {
"type": "integer",
"description": "Timeout for this individual task.",
"example": 10
},
"max_retries": {
"type": "integer",
"description": "Maximum number of retries for a failed task.",
"example": 10
},
"min_retry_interval_millis": {
"type": "integer",
"description": "Minimum interval between retry attempts.",
"example": 10
},
"retry_on_timeout": {
"type": "boolean",
"description": "Whether to retry when the task times out.",
"example": true
},
"email_notifications": {
"$ref": "#/components/schemas/JobEmailNotifications"
},
"libraries": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Library"
},
"description": "Libraries to install on the cluster running this task.",
"example": []
}
}
}