Apache Nutch · Schema
JobInfo
Information about a crawl job.
Web CrawlerIndexingSearchApacheJavaHadoopOpen Source
Properties
| Name | Type | Description |
|---|---|---|
| id | string | The unique job identifier. |
| type | string | The type of Nutch crawl job. |
| confId | string | The configuration ID used for this job. |
| args | object | Arguments passed to the job. |
| result | object | Result data returned after job completion. |
| state | string | The current state of a job. |
| msg | string | A human-readable status or error message. |
| crawlId | string | The crawl identifier associated with this job. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-job-info-schema.json",
"title": "JobInfo",
"description": "Information about a crawl job.",
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique job identifier."
},
"type": {
"type": "string",
"description": "The type of Nutch crawl job.",
"enum": [
"INJECT",
"GENERATE",
"FETCH",
"PARSE",
"UPDATEDB",
"INDEX",
"READDB",
"CLASS",
"INVERTLINKS",
"DEDUP"
]
},
"confId": {
"type": "string",
"description": "The configuration ID used for this job."
},
"args": {
"type": "object",
"additionalProperties": true,
"description": "Arguments passed to the job."
},
"result": {
"type": "object",
"additionalProperties": true,
"description": "Result data returned after job completion."
},
"state": {
"type": "string",
"description": "The current state of a job.",
"enum": [
"IDLE",
"RUNNING",
"FINISHED",
"FAILED",
"KILLED",
"STOPPING",
"KILLING",
"ANY"
]
},
"msg": {
"type": "string",
"description": "A human-readable status or error message."
},
"crawlId": {
"type": "string",
"description": "The crawl identifier associated with this job."
}
},
"required": [
"type",
"state"
]
}