Apache Nutch · Schema
SeedList
A named list of seed URLs.
Web CrawlerIndexingSearchApacheJavaHadoopOpen Source
Properties
| Name | Type | Description |
|---|---|---|
| id | integer | The seed list identifier. |
| name | string | A human-readable name for this seed list. |
| seedFilePath | string | The HDFS path where the seed file is stored. Populated after creation. |
| seedUrls | array | The collection of seed URLs in this list. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/apache-nutch/refs/heads/main/json-schema/apache-nutch-seed-list-schema.json",
"title": "SeedList",
"description": "A named list of seed URLs.",
"type": "object",
"properties": {
"id": {
"type": "integer",
"format": "int64",
"minimum": 0,
"maximum": 9007199254740991,
"description": "The seed list identifier.",
"readOnly": true
},
"name": {
"type": "string",
"description": "A human-readable name for this seed list."
},
"seedFilePath": {
"type": "string",
"description": "The HDFS path where the seed file is stored. Populated after creation.",
"readOnly": true
},
"seedUrls": {
"type": "array",
"items": {
"type": "object",
"description": "A single seed URL entry.",
"properties": {
"id": {
"type": "integer",
"format": "int64",
"minimum": 0,
"maximum": 9007199254740991,
"description": "The seed URL identifier.",
"readOnly": true
},
"url": {
"type": "string",
"description": "The seed URL."
}
},
"example": {
"url": "https://example.com"
}
},
"description": "The collection of seed URLs in this list."
}
},
"required": [
"seedUrls"
],
"example": {
"name": "my-seeds",
"seedUrls": [
{
"url": "https://example.com"
},
{
"url": "https://nutch.apache.org"
}
]
}
}