Apache Nutch · Schema
DbQuery
Parameters for a CrawlDB query.
Web CrawlerIndexingSearchApacheJavaHadoopOpen Source
Properties
| Name | Type | Description |
|---|---|---|
| confId | string | Configuration ID. Falls back to "default" if not provided. |
| type | string | The type of CrawlDB query to execute. |
| args | object | Additional arguments for the query. |
| crawlId | string | The crawl identifier. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-db-query-schema.json",
"title": "DbQuery",
"description": "Parameters for a CrawlDB query.",
"type": "object",
"properties": {
"confId": {
"type": "string",
"description": "Configuration ID. Falls back to \"default\" if not provided."
},
"type": {
"type": "string",
"description": "The type of CrawlDB query to execute.",
"enum": [
"stats",
"dump",
"topN",
"url"
]
},
"args": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Additional arguments for the query."
},
"crawlId": {
"type": "string",
"description": "The crawl identifier."
}
},
"required": [
"crawlId",
"type"
],
"example": {
"confId": "default",
"type": "stats",
"crawlId": "crawl-01",
"args": {}
}
}