Amazon Glue DataBrew · Schema
Rule
Represents a single data quality requirement that should be validated in the scope of this dataset.
Data AnalyticsData PreparationETLMachine Learning
Properties
| Name | Type | Description |
|---|---|---|
| Name | object | |
| Disabled | object | |
| CheckExpression | object | |
| SubstitutionMap | object | |
| Threshold | object | |
| ColumnSelectors | object |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/amazon-glue-databrew/refs/heads/main/json-schema/glue-databrew-rule-schema.json",
"title": "Rule",
"description": "Represents a single data quality requirement that should be validated in the scope of this dataset.",
"type": "object",
"properties": {
"Name": {
"allOf": [
{
"$ref": "#/components/schemas/RuleName"
},
{
"description": "The name of the rule."
}
]
},
"Disabled": {
"allOf": [
{
"$ref": "#/components/schemas/Disabled"
},
{
"description": "A value that specifies whether the rule is disabled. Once a rule is disabled, a profile job will not validate it during a job run. Default value is false."
}
]
},
"CheckExpression": {
"allOf": [
{
"$ref": "#/components/schemas/Expression"
},
{
"description": "<p>The expression which includes column references, condition names followed by variable references, possibly grouped and combined with other conditions. For example, <code>(:col1 starts_with :prefix1 or :col1 starts_with :prefix2) and (:col1 ends_with :suffix1 or :col1 ends_with :suffix2)</code>. Column and value references are substitution variables that should start with the ':' symbol. Depending on the context, substitution variables' values can be either an actual value or a column name. These values are defined in the SubstitutionMap. If a CheckExpression starts with a column reference, then ColumnSelectors in the rule should be null. If ColumnSelectors has been defined, then there should be no column reference in the left side of a condition, for example, <code>is_between :val1 and :val2</code>.</p> <p>For more information, see <a href=\"https://docs.aws.amazon.com/databrew/latest/dg/profile.data-quality-available-checks.html\">Available checks</a> </p>"
}
]
},
"SubstitutionMap": {
"allOf": [
{
"$ref": "#/components/schemas/ValuesMap"
},
{
"description": "The map of substitution variable names to their values used in a check expression. Variable names should start with a ':' (colon). Variable values can either be actual values or column names. To differentiate between the two, column names should be enclosed in backticks, for example, <code>\":col1\": \"`Column A`\".</code> "
}
]
},
"Threshold": {
"allOf": [
{
"$ref": "#/components/schemas/Threshold"
},
{
"description": "The threshold used with a non-aggregate check expression. Non-aggregate check expressions will be applied to each row in a specific column, and the threshold will be used to determine whether the validation succeeds."
}
]
},
"ColumnSelectors": {
"allOf": [
{
"$ref": "#/components/schemas/ColumnSelectorList"
},
{
"description": "List of column selectors. Selectors can be used to select columns using a name or regular expression from the dataset. Rule will be applied to selected columns."
}
]
}
},
"required": [
"Name",
"CheckExpression"
]
}