Amazon Glue · Schema

CreateCrawlerRequest

CreateCrawlerRequest schema from Amazon Glue API

AnalyticsData CatalogData IntegrationData PipelineETLServerless

Properties

Name Type Description
Name object
Role object
DatabaseName object
Description object
Targets object
Schedule object
Classifiers object
TablePrefix object
SchemaChangePolicy object
RecrawlPolicy object
LineageConfiguration object
LakeFormationConfiguration object
Configuration object
CrawlerSecurityConfiguration object
Tags object
View JSON Schema on GitHub

JSON Schema

glue-create-crawler-request-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/amazon-glue/refs/heads/main/json-schema/glue-create-crawler-request-schema.json",
  "title": "CreateCrawlerRequest",
  "description": "CreateCrawlerRequest schema from Amazon Glue API",
  "type": "object",
  "properties": {
    "Name": {
      "allOf": [
        {
          "$ref": "#/components/schemas/NameString"
        },
        {
          "description": "Name of the new crawler."
        }
      ]
    },
    "Role": {
      "allOf": [
        {
          "$ref": "#/components/schemas/Role"
        },
        {
          "description": "The IAM role or Amazon Resource Name (ARN) of an IAM role used by the new crawler to access customer resources."
        }
      ]
    },
    "DatabaseName": {
      "allOf": [
        {
          "$ref": "#/components/schemas/DatabaseName"
        },
        {
          "description": "The Glue database where results are written, such as: <code>arn:aws:daylight:us-east-1::database/sometable/*</code>."
        }
      ]
    },
    "Description": {
      "allOf": [
        {
          "$ref": "#/components/schemas/DescriptionString"
        },
        {
          "description": "A description of the new crawler."
        }
      ]
    },
    "Targets": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerTargets"
        },
        {
          "description": "A list of collection of targets to crawl."
        }
      ]
    },
    "Schedule": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CronExpression"
        },
        {
          "description": "A <code>cron</code> expression used to specify the schedule (see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html\">Time-Based Schedules for Jobs and Crawlers</a>. For example, to run something every day at 12:15 UTC, you would specify: <code>cron(15 12 * * ? *)</code>."
        }
      ]
    },
    "Classifiers": {
      "allOf": [
        {
          "$ref": "#/components/schemas/ClassifierNameList"
        },
        {
          "description": "A list of custom classifiers that the user has registered. By default, all built-in classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification."
        }
      ]
    },
    "TablePrefix": {
      "allOf": [
        {
          "$ref": "#/components/schemas/TablePrefix"
        },
        {
          "description": "The table prefix used for catalog tables that are created."
        }
      ]
    },
    "SchemaChangePolicy": {
      "allOf": [
        {
          "$ref": "#/components/schemas/SchemaChangePolicy"
        },
        {
          "description": "The policy for the crawler's update and deletion behavior."
        }
      ]
    },
    "RecrawlPolicy": {
      "allOf": [
        {
          "$ref": "#/components/schemas/RecrawlPolicy"
        },
        {
          "description": "A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run."
        }
      ]
    },
    "LineageConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/LineageConfiguration"
        },
        {
          "description": "Specifies data lineage configuration settings for the crawler."
        }
      ]
    },
    "LakeFormationConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/LakeFormationConfiguration"
        },
        {
          "description": "Specifies Lake Formation configuration settings for the crawler."
        }
      ]
    },
    "Configuration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerConfiguration"
        },
        {
          "description": "Crawler configuration information. This versioned JSON string allows users to specify aspects of a crawler's behavior. For more information, see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/crawler-configuration.html\">Setting crawler configuration options</a>."
        }
      ]
    },
    "CrawlerSecurityConfiguration": {
      "allOf": [
        {
          "$ref": "#/components/schemas/CrawlerSecurityConfiguration"
        },
        {
          "description": "The name of the <code>SecurityConfiguration</code> structure to be used by this crawler."
        }
      ]
    },
    "Tags": {
      "allOf": [
        {
          "$ref": "#/components/schemas/TagsMap"
        },
        {
          "description": "The tags to use with this crawler request. You may use tags to limit access to the crawler. For more information about tags in Glue, see <a href=\"https://docs.aws.amazon.com/glue/latest/dg/monitor-tags.html\">Amazon Web Services Tags in Glue</a> in the developer guide."
        }
      ]
    }
  },
  "required": [
    "Name",
    "Role",
    "Targets"
  ]
}