Apache OpenNLP · Schema

TokenizationResult

TokenizationResult schema from Apache OpenNLP

Machine LearningNatural Language ProcessingNLPText ProcessingApacheOpen SourceJava

Properties

Name Type Description
tokens array Extracted tokens
spans array
probabilities array Confidence for each token boundary
View JSON Schema on GitHub

JSON Schema

apache-opennlp-tokenization-result-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/apache-opennlp/refs/heads/main/json-schema/apache-opennlp-tokenization-result-schema.json",
  "title": "TokenizationResult",
  "description": "TokenizationResult schema from Apache OpenNLP",
  "type": "object",
  "properties": {
    "tokens": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Extracted tokens",
      "example": [
        "Pierre",
        "Vinken",
        ",",
        "61",
        "years",
        "old"
      ]
    },
    "spans": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/Span"
      }
    },
    "probabilities": {
      "type": "array",
      "items": {
        "type": "number"
      },
      "description": "Confidence for each token boundary"
    }
  }
}