Apache Nutch · Schema

DbQuery

Parameters for a CrawlDB query.

Web CrawlerIndexingSearchApacheJavaHadoopOpen Source

Properties

Name Type Description
confId string Configuration ID. Falls back to "default" if not provided.
type string The type of CrawlDB query to execute.
args object Additional arguments for the query.
crawlId string The crawl identifier.
View JSON Schema on GitHub

JSON Schema

apache-nutch-db-query-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-db-query-schema.json",
  "title": "DbQuery",
  "description": "Parameters for a CrawlDB query.",
  "type": "object",
  "properties": {
    "confId": {
      "type": "string",
      "description": "Configuration ID. Falls back to \"default\" if not provided."
    },
    "type": {
      "type": "string",
      "description": "The type of CrawlDB query to execute.",
      "enum": [
        "stats",
        "dump",
        "topN",
        "url"
      ]
    },
    "args": {
      "type": "object",
      "additionalProperties": {
        "type": "string"
      },
      "description": "Additional arguments for the query."
    },
    "crawlId": {
      "type": "string",
      "description": "The crawl identifier."
    }
  },
  "required": [
    "crawlId",
    "type"
  ],
  "example": {
    "confId": "default",
    "type": "stats",
    "crawlId": "crawl-01",
    "args": {}
  }
}