TextExtractionResult schema from Apache PDFBox
{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/api-evangelist/apache-pdfbox/refs/heads/main/json-schema/apache-pdfbox-text-extraction-result-schema.json", "title": "TextExtractionResult", "description": "TextExtractionResult schema from Apache PDFBox", "type": "object", "properties": { "documentId": { "type": "string", "example": "doc-abc123" }, "text": { "type": "string", "example": "This is extracted text from the PDF document." }, "pageCount": { "type": "integer", "example": 5 }, "wordCount": { "type": "integer", "example": 1234 } } }