Amazon Neptune · Schema
CreateDataProcessingJobRequest
DatabaseGraph DatabaseGremlinNeptuneProperty GraphRDFSPARQL
Properties
| Name | Type | Description |
|---|---|---|
| id | string | Unique identifier for the job (auto-generated UUID if omitted). |
| inputDataS3Location | string | S3 URI for the input data. |
| processedDataS3Location | string | S3 URI where processed output is written. |
| previousDataProcessingJobId | string | Job ID of a previous job for incremental processing. |
| sagemakerIamRoleArn | string | IAM role ARN for SageMaker execution. |
| neptuneIamRoleArn | string | IAM role ARN for Neptune access. |
| processingInstanceType | string | ML instance type (default auto-selected ml.r5 type). |
| processingInstanceVolumeSizeInGB | integer | Disk volume size in GB (default 0 means auto-selected). |
| processingTimeOutInSeconds | integer | Timeout in seconds (default 86400, i.e., 1 day). |
| modelType | string | The type of model to prepare data for. |
| configFileName | string | The data specification configuration file name. |
| subnets | array | VPC subnet IDs for SageMaker processing. |
| securityGroupIds | array | VPC security group IDs. |
| volumeEncryptionKMSKey | string | KMS key for storage volume encryption. |
| s3OutputEncryptionKMSKey | string | KMS key for S3 output encryption. |
| enableInterContainerTrafficEncryption | boolean | Whether to enable inter-container traffic encryption. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/CreateDataProcessingJobRequest",
"title": "CreateDataProcessingJobRequest",
"type": "object",
"required": [
"inputDataS3Location",
"processedDataS3Location"
],
"properties": {
"id": {
"type": "string",
"description": "Unique identifier for the job (auto-generated UUID if omitted)."
},
"inputDataS3Location": {
"type": "string",
"description": "S3 URI for the input data."
},
"processedDataS3Location": {
"type": "string",
"description": "S3 URI where processed output is written."
},
"previousDataProcessingJobId": {
"type": "string",
"description": "Job ID of a previous job for incremental processing."
},
"sagemakerIamRoleArn": {
"type": "string",
"description": "IAM role ARN for SageMaker execution."
},
"neptuneIamRoleArn": {
"type": "string",
"description": "IAM role ARN for Neptune access."
},
"processingInstanceType": {
"type": "string",
"description": "ML instance type (default auto-selected ml.r5 type)."
},
"processingInstanceVolumeSizeInGB": {
"type": "integer",
"description": "Disk volume size in GB (default 0 means auto-selected)."
},
"processingTimeOutInSeconds": {
"type": "integer",
"description": "Timeout in seconds (default 86400, i.e., 1 day).",
"default": 86400
},
"modelType": {
"type": "string",
"description": "The type of model to prepare data for.",
"enum": [
"heterogeneous",
"kge"
]
},
"configFileName": {
"type": "string",
"description": "The data specification configuration file name.",
"default": "training-data-configuration.json"
},
"subnets": {
"type": "array",
"description": "VPC subnet IDs for SageMaker processing.",
"items": {
"type": "string"
}
},
"securityGroupIds": {
"type": "array",
"description": "VPC security group IDs.",
"items": {
"type": "string"
}
},
"volumeEncryptionKMSKey": {
"type": "string",
"description": "KMS key for storage volume encryption."
},
"s3OutputEncryptionKMSKey": {
"type": "string",
"description": "KMS key for S3 output encryption."
},
"enableInterContainerTrafficEncryption": {
"type": "boolean",
"description": "Whether to enable inter-container traffic encryption.",
"default": true
}
}
}