Apache Nutch · Schema

NutchServerInfo

Status information about the running Nutch server.

Web CrawlerIndexingSearchApacheJavaHadoopOpen Source

Properties

Name Type Description
startDate string The date and time the server was started.
configuration array Set of known configuration IDs.
jobs array All jobs (any state).
runningJobs array Currently running jobs.
View JSON Schema on GitHub

JSON Schema

apache-nutch-nutch-server-info-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-nutch-server-info-schema.json",
  "title": "NutchServerInfo",
  "description": "Status information about the running Nutch server.",
  "type": "object",
  "properties": {
    "startDate": {
      "type": "string",
      "format": "date-time",
      "description": "The date and time the server was started."
    },
    "configuration": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "uniqueItems": true,
      "description": "Set of known configuration IDs."
    },
    "jobs": {
      "type": "array",
      "items": {
        "type": "object",
        "description": "Information about a crawl job.",
        "required": [
          "type",
          "state"
        ],
        "properties": {
          "id": {
            "type": "string",
            "description": "The unique job identifier."
          },
          "type": {
            "type": "string",
            "description": "The type of Nutch crawl job.",
            "enum": [
              "INJECT",
              "GENERATE",
              "FETCH",
              "PARSE",
              "UPDATEDB",
              "INDEX",
              "READDB",
              "CLASS",
              "INVERTLINKS",
              "DEDUP"
            ]
          },
          "confId": {
            "type": "string",
            "description": "The configuration ID used for this job."
          },
          "args": {
            "type": "object",
            "additionalProperties": true,
            "description": "Arguments passed to the job."
          },
          "result": {
            "type": "object",
            "additionalProperties": true,
            "description": "Result data returned after job completion."
          },
          "state": {
            "type": "string",
            "description": "The current state of a job.",
            "enum": [
              "IDLE",
              "RUNNING",
              "FINISHED",
              "FAILED",
              "KILLED",
              "STOPPING",
              "KILLING",
              "ANY"
            ]
          },
          "msg": {
            "type": "string",
            "description": "A human-readable status or error message."
          },
          "crawlId": {
            "type": "string",
            "description": "The crawl identifier associated with this job."
          }
        }
      },
      "description": "All jobs (any state)."
    },
    "runningJobs": {
      "type": "array",
      "items": {
        "type": "object",
        "description": "Information about a crawl job.",
        "required": [
          "type",
          "state"
        ],
        "properties": {
          "id": {
            "type": "string",
            "description": "The unique job identifier."
          },
          "type": {
            "type": "string",
            "description": "The type of Nutch crawl job.",
            "enum": [
              "INJECT",
              "GENERATE",
              "FETCH",
              "PARSE",
              "UPDATEDB",
              "INDEX",
              "READDB",
              "CLASS",
              "INVERTLINKS",
              "DEDUP"
            ]
          },
          "confId": {
            "type": "string",
            "description": "The configuration ID used for this job."
          },
          "args": {
            "type": "object",
            "additionalProperties": true,
            "description": "Arguments passed to the job."
          },
          "result": {
            "type": "object",
            "additionalProperties": true,
            "description": "Result data returned after job completion."
          },
          "state": {
            "type": "string",
            "description": "The current state of a job.",
            "enum": [
              "IDLE",
              "RUNNING",
              "FINISHED",
              "FAILED",
              "KILLED",
              "STOPPING",
              "KILLING",
              "ANY"
            ]
          },
          "msg": {
            "type": "string",
            "description": "A human-readable status or error message."
          },
          "crawlId": {
            "type": "string",
            "description": "The crawl identifier associated with this job."
          }
        }
      },
      "description": "Currently running jobs."
    }
  },
  "required": [
    "configuration",
    "jobs",
    "runningJobs"
  ]
}