Scrapfly · Schema

ScrapeRequest

AIData ExtractionScreenshotsWeb ScrapingProxiesBrowser Automation

Properties

Name Type Description
key string API key for authentication
url string Target URL to scrape
render_js boolean Enable JavaScript rendering
asp boolean Enable Anti Scraping Protection bypass
country string Proxy country (ISO 3166-1 alpha-2)
proxy_pool string Proxy network selection
format string Response content format
headers object Custom HTTP headers
timeout integer Request timeout in milliseconds
retry boolean Enable automatic retry
session string Session name for persistent cookies/fingerprint
cache boolean Enable response caching
cache_ttl integer Cache TTL in seconds
extraction_template string Structured data extraction template
extraction_prompt string LLM prompt for data extraction
debug boolean Store results for debugging
correlation_id string Group related scrapes
tags array Tags for categorizing scrapes
webhook_name string Webhook for async response delivery
View JSON Schema on GitHub

JSON Schema

scrapfly-scraperequest-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ScrapeRequest",
  "title": "ScrapeRequest",
  "type": "object",
  "required": [
    "key",
    "url"
  ],
  "properties": {
    "key": {
      "type": "string",
      "description": "API key for authentication"
    },
    "url": {
      "type": "string",
      "format": "uri",
      "description": "Target URL to scrape"
    },
    "render_js": {
      "type": "boolean",
      "default": false,
      "description": "Enable JavaScript rendering"
    },
    "asp": {
      "type": "boolean",
      "default": false,
      "description": "Enable Anti Scraping Protection bypass"
    },
    "country": {
      "type": "string",
      "description": "Proxy country (ISO 3166-1 alpha-2)"
    },
    "proxy_pool": {
      "type": "string",
      "enum": [
        "public_datacenter_pool",
        "residential_pool"
      ],
      "description": "Proxy network selection"
    },
    "format": {
      "type": "string",
      "enum": [
        "raw",
        "clean_html",
        "json",
        "markdown",
        "text"
      ],
      "default": "raw",
      "description": "Response content format"
    },
    "headers": {
      "type": "object",
      "additionalProperties": {
        "type": "string"
      },
      "description": "Custom HTTP headers"
    },
    "timeout": {
      "type": "integer",
      "default": 150000,
      "description": "Request timeout in milliseconds"
    },
    "retry": {
      "type": "boolean",
      "default": true,
      "description": "Enable automatic retry"
    },
    "session": {
      "type": "string",
      "description": "Session name for persistent cookies/fingerprint"
    },
    "cache": {
      "type": "boolean",
      "description": "Enable response caching"
    },
    "cache_ttl": {
      "type": "integer",
      "description": "Cache TTL in seconds"
    },
    "extraction_template": {
      "type": "string",
      "description": "Structured data extraction template"
    },
    "extraction_prompt": {
      "type": "string",
      "description": "LLM prompt for data extraction"
    },
    "debug": {
      "type": "boolean",
      "description": "Store results for debugging"
    },
    "correlation_id": {
      "type": "string",
      "description": "Group related scrapes"
    },
    "tags": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Tags for categorizing scrapes"
    },
    "webhook_name": {
      "type": "string",
      "description": "Webhook for async response delivery"
    }
  }
}