Amazon Neptune · Schema
Amazon Neptune Loader Job
Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL
Properties
| Name | Type | Description |
|---|---|---|
| source | string | Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key. |
| format | string | The data format of the source files to be loaded. |
| iamRoleArn | string | The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access. |
| region | string | The AWS Region of the S3 bucket containing the data to load. |
| mode | string | The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new. |
| failOnError | string | Whether to stop the entire load job when an error is encountered. |
| parallelism | string | The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources. |
| parserConfiguration | object | Optional parser configuration settings for RDF data. |
| updateSingleCardinalityProperties | string | Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format. |
| queueRequest | string | Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order. |
| dependencies | array | An array of load job IDs that must complete successfully before this job runs. |
| userProvidedEdgeIds | string | For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/api-search/amazon-neptune/json-schema/amazon-neptune-loader-job-schema.json",
"title": "Amazon Neptune Loader Job",
"description": "Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.",
"type": "object",
"required": [
"source",
"format",
"iamRoleArn",
"region"
],
"properties": {
"source": {
"type": "string",
"description": "Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.",
"examples": [
"s3://my-bucket/graph-data/",
"s3://my-bucket/data/vertices.csv"
]
},
"format": {
"type": "string",
"description": "The data format of the source files to be loaded.",
"enum": [
"csv",
"opencypher",
"ntriples",
"nquads",
"rdfxml",
"turtle"
]
},
"iamRoleArn": {
"type": "string",
"description": "The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.",
"pattern": "^arn:aws[a-z-]*:iam::[0-9]+:role/"
},
"region": {
"type": "string",
"description": "The AWS Region of the S3 bucket containing the data to load.",
"examples": [
"us-east-1",
"eu-west-1"
]
},
"mode": {
"type": "string",
"description": "The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.",
"enum": [
"NEW",
"RESUME",
"AUTO"
],
"default": "AUTO"
},
"failOnError": {
"type": "string",
"description": "Whether to stop the entire load job when an error is encountered.",
"enum": [
"TRUE",
"FALSE"
],
"default": "TRUE"
},
"parallelism": {
"type": "string",
"description": "The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.",
"enum": [
"LOW",
"MEDIUM",
"HIGH",
"OVERSUBSCRIBE"
],
"default": "HIGH"
},
"parserConfiguration": {
"type": "object",
"description": "Optional parser configuration settings for RDF data.",
"properties": {
"baseUri": {
"type": "string",
"description": "The base URI for resolving relative URIs in the data."
},
"namedGraphUri": {
"type": "string",
"description": "The default named graph URI for loaded triples."
},
"allowEmptyStrings": {
"type": "boolean",
"description": "Whether to allow empty string values for properties."
}
}
},
"updateSingleCardinalityProperties": {
"type": "string",
"description": "Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.",
"enum": [
"TRUE",
"FALSE"
],
"default": "FALSE"
},
"queueRequest": {
"type": "string",
"description": "Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.",
"enum": [
"TRUE",
"FALSE"
],
"default": "FALSE"
},
"dependencies": {
"type": "array",
"description": "An array of load job IDs that must complete successfully before this job runs.",
"items": {
"type": "string"
}
},
"userProvidedEdgeIds": {
"type": "string",
"description": "For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.",
"enum": [
"TRUE",
"FALSE"
]
}
},
"$defs": {
"LoaderJobStatus": {
"type": "object",
"title": "Loader Job Status",
"description": "The status of a Neptune bulk loader job.",
"properties": {
"loadId": {
"type": "string",
"description": "The unique identifier for the load job."
},
"overallStatus": {
"type": "object",
"properties": {
"fullUri": {
"type": "string",
"description": "The S3 URI of the data source."
},
"runNumber": {
"type": "integer",
"description": "The run number for this load."
},
"retryNumber": {
"type": "integer",
"description": "The number of retries."
},
"status": {
"type": "string",
"description": "The current status of the load job.",
"enum": [
"LOAD_NOT_STARTED",
"LOAD_IN_PROGRESS",
"LOAD_COMPLETED",
"LOAD_CANCELLED_BY_USER",
"LOAD_CANCELLED_DUE_TO_ERRORS",
"LOAD_FAILED",
"LOAD_UNEXPECTED_ERROR",
"LOAD_DATA_DEADLOCK",
"LOAD_DATA_FAILED_DUE_TO_FEED_MODIFIED_OR_DELETED",
"LOAD_S3_READ_ERROR",
"LOAD_S3_ACCESS_DENIED_ERROR",
"LOAD_COMMITTED_W_WRITE_CONFLICTS"
]
},
"totalTimeSpent": {
"type": "integer",
"description": "Total time spent on the load in seconds."
},
"startTime": {
"type": "integer",
"description": "The start time as a Unix timestamp."
},
"totalRecords": {
"type": "integer",
"description": "Total number of records processed."
},
"totalDuplicates": {
"type": "integer",
"description": "Total number of duplicate records encountered."
},
"parsingErrors": {
"type": "integer",
"description": "Total number of parsing errors."
},
"datatypeMismatchErrors": {
"type": "integer",
"description": "Total number of datatype mismatch errors."
},
"insertErrors": {
"type": "integer",
"description": "Total number of insert errors."
}
}
}
}
}
}
}