Weaviate · Schema

TokenizeRequest

Request body for the generic tokenize endpoint.

Vector DatabaseAIMachine LearningSemantic SearchOpen SourceGraphQLKubernetes

Properties

Name	Type	Description
text	string	The text to tokenize.
tokenization	string	The tokenization method to apply.
analyzerConfig	object
stopwords	object
stopwordPresets	object	Optional user-defined named stopword presets. Shape matches InvertedIndexConfig.stopwordPresets on a collection: each key is a preset name, each value is a plain list of stopwords. A preset name that

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://api-evangelist.github.io/weaviate/json-schema/weaviate-tokenize-request-schema.json",
  "title": "TokenizeRequest",
  "description": "Request body for the generic tokenize endpoint.",
  "type": "object",
  "properties": {
    "text": {
      "type": "string",
      "description": "The text to tokenize."
    },
    "tokenization": {
      "type": "string",
      "description": "The tokenization method to apply.",
      "enum": [
        "word",
        "lowercase",
        "whitespace",
        "field",
        "trigram",
        "gse",
        "kagome_kr",
        "kagome_ja",
        "gse_ch"
      ]
    },
    "analyzerConfig": {
      "$ref": "#/components/schemas/TextAnalyzerConfig"
    },
    "stopwords": {
      "$ref": "#/components/schemas/StopwordConfig"
    },
    "stopwordPresets": {
      "type": "object",
      "description": "Optional user-defined named stopword presets. Shape matches InvertedIndexConfig.stopwordPresets on a collection: each key is a preset name, each value is a plain list of stopwords. A preset name that matches a built-in ('en', 'none') fully replaces the built-in. Preset names must not be empty or whitespace-only; each word list must contain at least one word; individual words must not be empty or whitespace-only. Mutually exclusive with stopwords \u2014 pass one or the other, not both.",
      "additionalProperties": {
        "type": "array",
        "items": {
          "type": "string"
        }
      }
    }
  },
  "required": [
    "text",
    "tokenization"
  ]
}