Apache Nutch · Schema

JobInfo

Information about a crawl job.

Web CrawlerIndexingSearchApacheJavaHadoopOpen Source

Properties

Name Type Description
id string The unique job identifier.
type string The type of Nutch crawl job.
confId string The configuration ID used for this job.
args object Arguments passed to the job.
result object Result data returned after job completion.
state string The current state of a job.
msg string A human-readable status or error message.
crawlId string The crawl identifier associated with this job.
View JSON Schema on GitHub

JSON Schema

apache-nutch-job-info-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-job-info-schema.json",
  "title": "JobInfo",
  "description": "Information about a crawl job.",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "The unique job identifier."
    },
    "type": {
      "type": "string",
      "description": "The type of Nutch crawl job.",
      "enum": [
        "INJECT",
        "GENERATE",
        "FETCH",
        "PARSE",
        "UPDATEDB",
        "INDEX",
        "READDB",
        "CLASS",
        "INVERTLINKS",
        "DEDUP"
      ]
    },
    "confId": {
      "type": "string",
      "description": "The configuration ID used for this job."
    },
    "args": {
      "type": "object",
      "additionalProperties": true,
      "description": "Arguments passed to the job."
    },
    "result": {
      "type": "object",
      "additionalProperties": true,
      "description": "Result data returned after job completion."
    },
    "state": {
      "type": "string",
      "description": "The current state of a job.",
      "enum": [
        "IDLE",
        "RUNNING",
        "FINISHED",
        "FAILED",
        "KILLED",
        "STOPPING",
        "KILLING",
        "ANY"
      ]
    },
    "msg": {
      "type": "string",
      "description": "A human-readable status or error message."
    },
    "crawlId": {
      "type": "string",
      "description": "The crawl identifier associated with this job."
    }
  },
  "required": [
    "type",
    "state"
  ]
}