Amazon Neptune · Schema

StartMLDataProcessingJobInput

StartMLDataProcessingJobInput schema from Neptune

DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL

Properties

Name Type Description
id string Unique identifier for the job (auto-generated if omitted).
inputDataS3Location string S3 URI for input data.
processedDataS3Location string S3 URI for output results.
previousDataProcessingJobId string Job ID of a previous job for incremental processing.
sagemakerIamRoleArn string IAM role ARN for SageMaker execution.
neptuneIamRoleArn string IAM role ARN for Neptune access.
processingInstanceType string ML instance type (default auto-selected ml.r5 type).
processingInstanceVolumeSizeInGB integer Disk volume size in GB (default 0 = auto-selected).
processingTimeOutInSeconds integer Timeout in seconds (default 86400).
modelType string Model type selection.
configFileName string Data specification file name.
subnets array Subnet IDs in Neptune VPC.
securityGroupIds array VPC security group IDs.
volumeEncryptionKMSKey string
s3OutputEncryptionKMSKey string
enableInterContainerTrafficEncryption boolean
View JSON Schema on GitHub

JSON Schema

data-start-ml-data-processing-job-input-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/amazon-neptune/refs/heads/main/json-schema/data-start-ml-data-processing-job-input-schema.json",
  "title": "StartMLDataProcessingJobInput",
  "description": "StartMLDataProcessingJobInput schema from Neptune",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique identifier for the job (auto-generated if omitted)."
    },
    "inputDataS3Location": {
      "type": "string",
      "description": "S3 URI for input data."
    },
    "processedDataS3Location": {
      "type": "string",
      "description": "S3 URI for output results."
    },
    "previousDataProcessingJobId": {
      "type": "string",
      "description": "Job ID of a previous job for incremental processing."
    },
    "sagemakerIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for SageMaker execution."
    },
    "neptuneIamRoleArn": {
      "type": "string",
      "description": "IAM role ARN for Neptune access."
    },
    "processingInstanceType": {
      "type": "string",
      "description": "ML instance type (default auto-selected ml.r5 type)."
    },
    "processingInstanceVolumeSizeInGB": {
      "type": "integer",
      "description": "Disk volume size in GB (default 0 = auto-selected)."
    },
    "processingTimeOutInSeconds": {
      "type": "integer",
      "description": "Timeout in seconds (default 86400)."
    },
    "modelType": {
      "type": "string",
      "description": "Model type selection.",
      "enum": [
        "heterogeneous",
        "kge"
      ]
    },
    "configFileName": {
      "type": "string",
      "description": "Data specification file name.",
      "default": "training-data-configuration.json"
    },
    "subnets": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Subnet IDs in Neptune VPC."
    },
    "securityGroupIds": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "VPC security group IDs."
    },
    "volumeEncryptionKMSKey": {
      "type": "string"
    },
    "s3OutputEncryptionKMSKey": {
      "type": "string"
    },
    "enableInterContainerTrafficEncryption": {
      "type": "boolean",
      "default": true
    }
  },
  "required": [
    "inputDataS3Location",
    "processedDataS3Location"
  ]
}