Qdrant · Schema

Bm25Config

Configuration of the local bm25 models.

AIArtificial IntelligenceVector Databases

Properties

Name	Type	Description
k	number	Controls term frequency saturation. Higher values mean term frequency has more impact. Default is 1.2
b	number	Controls document length normalization. Ranges from 0 (no normalization) to 1 (full normalization). Higher values mean longer documents have less impact. Default is 0.75.
avg_len	number	Expected average document length in the collection. Default is 256.
tokenizer	object
language	string	Defines which language to use for text preprocessing. This parameter is used to construct default stopwords filter and stemmer. To disable language-specific processing, set this to `"language": "none"
lowercase	boolean	Lowercase the text before tokenization. Default is `true`.
ascii_folding	boolean	If true, normalize tokens by folding accented characters to ASCII (e.g., "ação" -> "acao"). Default is `false`.
stopwords	object	Configuration of the stopwords filter. Supports list of pre-defined languages and custom stopwords. Default: initialized for specified `language` or English if not specified.
stemmer	object	Configuration of the stemmer. Processes tokens to their root form. Default: initialized Snowball stemmer for specified `language` or English if not specified.
min_token_len	integer	Minimum token length to keep. If token is shorter than this, it will be discarded. Default is `None`, which means no minimum length.
max_token_len	integer	Maximum token length to keep. If token is longer than this, it will be discarded. Default is `None`, which means no maximum length.

View JSON Schema on GitHub

JSON Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/Bm25Config",
  "title": "Bm25Config",
  "description": "Configuration of the local bm25 models.",
  "type": "object",
  "properties": {
    "k": {
      "description": "Controls term frequency saturation. Higher values mean term frequency has more impact. Default is 1.2",
      "default": 1.2,
      "type": "number",
      "format": "double"
    },
    "b": {
      "description": "Controls document length normalization. Ranges from 0 (no normalization) to 1 (full normalization). Higher values mean longer documents have less impact. Default is 0.75.",
      "default": 0.75,
      "type": "number",
      "format": "double"
    },
    "avg_len": {
      "description": "Expected average document length in the collection. Default is 256.",
      "default": 256,
      "type": "number",
      "format": "double"
    },
    "tokenizer": {
      "$ref": "#/components/schemas/TokenizerType"
    },
    "language": {
      "description": "Defines which language to use for text preprocessing. This parameter is used to construct default stopwords filter and stemmer. To disable language-specific processing, set this to `\"language\": \"none\"`. If not specified, English is assumed.",
      "type": "string",
      "nullable": true
    },
    "lowercase": {
      "description": "Lowercase the text before tokenization. Default is `true`.",
      "type": "boolean",
      "nullable": true
    },
    "ascii_folding": {
      "description": "If true, normalize tokens by folding accented characters to ASCII (e.g., \"a\u00e7\u00e3o\" -> \"acao\"). Default is `false`.",
      "type": "boolean",
      "nullable": true
    },
    "stopwords": {
      "description": "Configuration of the stopwords filter. Supports list of pre-defined languages and custom stopwords. Default: initialized for specified `language` or English if not specified.",
      "anyOf": [
        {
          "$ref": "#/components/schemas/StopwordsInterface"
        },
        {
          "nullable": true
        }
      ]
    },
    "stemmer": {
      "description": "Configuration of the stemmer. Processes tokens to their root form. Default: initialized Snowball stemmer for specified `language` or English if not specified.",
      "anyOf": [
        {
          "$ref": "#/components/schemas/StemmingAlgorithm"
        },
        {
          "nullable": true
        }
      ]
    },
    "min_token_len": {
      "description": "Minimum token length to keep. If token is shorter than this, it will be discarded. Default is `None`, which means no minimum length.",
      "type": "integer",
      "format": "uint",
      "minimum": 0,
      "nullable": true
    },
    "max_token_len": {
      "description": "Maximum token length to keep. If token is longer than this, it will be discarded. Default is `None`, which means no maximum length.",
      "type": "integer",
      "format": "uint",
      "minimum": 0,
      "nullable": true
    }
  }
}