Amazon Neptune · Schema

Amazon Neptune Loader Job

Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.

DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL

Properties

Name Type Description
source string Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.
format string The data format of the source files to be loaded.
iamRoleArn string The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.
region string The AWS Region of the S3 bucket containing the data to load.
mode string The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.
failOnError string Whether to stop the entire load job when an error is encountered.
parallelism string The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.
parserConfiguration object Optional parser configuration settings for RDF data.
updateSingleCardinalityProperties string Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.
queueRequest string Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.
dependencies array An array of load job IDs that must complete successfully before this job runs.
userProvidedEdgeIds string For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.
View JSON Schema on GitHub

JSON Schema

amazon-neptune-loader-job-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://github.com/api-search/amazon-neptune/json-schema/amazon-neptune-loader-job-schema.json",
  "title": "Amazon Neptune Loader Job",
  "description": "Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.",
  "type": "object",
  "required": [
    "source",
    "format",
    "iamRoleArn",
    "region"
  ],
  "properties": {
    "source": {
      "type": "string",
      "description": "Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.",
      "examples": [
        "s3://my-bucket/graph-data/",
        "s3://my-bucket/data/vertices.csv"
      ]
    },
    "format": {
      "type": "string",
      "description": "The data format of the source files to be loaded.",
      "enum": [
        "csv",
        "opencypher",
        "ntriples",
        "nquads",
        "rdfxml",
        "turtle"
      ]
    },
    "iamRoleArn": {
      "type": "string",
      "description": "The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.",
      "pattern": "^arn:aws[a-z-]*:iam::[0-9]+:role/"
    },
    "region": {
      "type": "string",
      "description": "The AWS Region of the S3 bucket containing the data to load.",
      "examples": [
        "us-east-1",
        "eu-west-1"
      ]
    },
    "mode": {
      "type": "string",
      "description": "The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.",
      "enum": [
        "NEW",
        "RESUME",
        "AUTO"
      ],
      "default": "AUTO"
    },
    "failOnError": {
      "type": "string",
      "description": "Whether to stop the entire load job when an error is encountered.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "TRUE"
    },
    "parallelism": {
      "type": "string",
      "description": "The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.",
      "enum": [
        "LOW",
        "MEDIUM",
        "HIGH",
        "OVERSUBSCRIBE"
      ],
      "default": "HIGH"
    },
    "parserConfiguration": {
      "type": "object",
      "description": "Optional parser configuration settings for RDF data.",
      "properties": {
        "baseUri": {
          "type": "string",
          "description": "The base URI for resolving relative URIs in the data."
        },
        "namedGraphUri": {
          "type": "string",
          "description": "The default named graph URI for loaded triples."
        },
        "allowEmptyStrings": {
          "type": "boolean",
          "description": "Whether to allow empty string values for properties."
        }
      }
    },
    "updateSingleCardinalityProperties": {
      "type": "string",
      "description": "Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "FALSE"
    },
    "queueRequest": {
      "type": "string",
      "description": "Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.",
      "enum": [
        "TRUE",
        "FALSE"
      ],
      "default": "FALSE"
    },
    "dependencies": {
      "type": "array",
      "description": "An array of load job IDs that must complete successfully before this job runs.",
      "items": {
        "type": "string"
      }
    },
    "userProvidedEdgeIds": {
      "type": "string",
      "description": "For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.",
      "enum": [
        "TRUE",
        "FALSE"
      ]
    }
  },
  "$defs": {
    "LoaderJobStatus": {
      "type": "object",
      "title": "Loader Job Status",
      "description": "The status of a Neptune bulk loader job.",
      "properties": {
        "loadId": {
          "type": "string",
          "description": "The unique identifier for the load job."
        },
        "overallStatus": {
          "type": "object",
          "properties": {
            "fullUri": {
              "type": "string",
              "description": "The S3 URI of the data source."
            },
            "runNumber": {
              "type": "integer",
              "description": "The run number for this load."
            },
            "retryNumber": {
              "type": "integer",
              "description": "The number of retries."
            },
            "status": {
              "type": "string",
              "description": "The current status of the load job.",
              "enum": [
                "LOAD_NOT_STARTED",
                "LOAD_IN_PROGRESS",
                "LOAD_COMPLETED",
                "LOAD_CANCELLED_BY_USER",
                "LOAD_CANCELLED_DUE_TO_ERRORS",
                "LOAD_FAILED",
                "LOAD_UNEXPECTED_ERROR",
                "LOAD_DATA_DEADLOCK",
                "LOAD_DATA_FAILED_DUE_TO_FEED_MODIFIED_OR_DELETED",
                "LOAD_S3_READ_ERROR",
                "LOAD_S3_ACCESS_DENIED_ERROR",
                "LOAD_COMMITTED_W_WRITE_CONFLICTS"
              ]
            },
            "totalTimeSpent": {
              "type": "integer",
              "description": "Total time spent on the load in seconds."
            },
            "startTime": {
              "type": "integer",
              "description": "The start time as a Unix timestamp."
            },
            "totalRecords": {
              "type": "integer",
              "description": "Total number of records processed."
            },
            "totalDuplicates": {
              "type": "integer",
              "description": "Total number of duplicate records encountered."
            },
            "parsingErrors": {
              "type": "integer",
              "description": "Total number of parsing errors."
            },
            "datatypeMismatchErrors": {
              "type": "integer",
              "description": "Total number of datatype mismatch errors."
            },
            "insertErrors": {
              "type": "integer",
              "description": "Total number of insert errors."
            }
          }
        }
      }
    }
  }
}