diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 396a0d02ae2..2c5677ec683 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -2337,7 +2337,6 @@ def create_scalar_index( Literal["BITMAP"], Literal["LABEL_LIST"], Literal["INVERTED"], - Literal["FTS"], Literal["NGRAM"], Literal["ZONEMAP"], Literal["BLOOMFILTER"], @@ -2406,7 +2405,7 @@ def create_scalar_index( called zones and stores summary statistics for each zone (min, max, null_count, nan_count, fragment_id, local_row_offset). It's very small but only effective if the column is at least approximately in sorted order. - * ``FTS/INVERTED``. It is used to index document columns. This index + * ``INVERTED``. It is used to index document columns. This index can conduct full-text searches. For example, a column that contains any word of query string "hello world". The results will be ranked by BM25. * ``BLOOMFILTER``. This inexact index uses a bloom filter. It is small @@ -2426,8 +2425,8 @@ def create_scalar_index( or string column. index_type : str The type of the index. One of ``"BTREE"``, ``"BITMAP"``, - ``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"FTS"``, - ``"INVERTED"`` or ``"BLOOMFILTER"``. + ``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"INVERTED"``, or + ``"BLOOMFILTER"``. name : str, optional The index name. If not provided, it will be generated from the column name. @@ -2456,8 +2455,8 @@ def create_scalar_index( It won't impact the performance of non-phrase queries even if it is set to True. base_tokenizer: str, default "simple" - This is for the ``INVERTED`` index. The base tokenizer to use. The value - can be: + This is for the ``INVERTED`` index. The base tokenizer to use. The + value can be: * "simple": splits tokens on whitespace and punctuation. * "whitespace": splits tokens on whitespace. * "raw": no tokenization. @@ -2548,7 +2547,7 @@ def create_scalar_index( raise NotImplementedError( ( 'Only "BTREE", "BITMAP", "NGRAM", "ZONEMAP", "LABEL_LIST", ' - 'or "INVERTED" or "BLOOMFILTER" are supported for ' + '"INVERTED", or "BLOOMFILTER" are supported for ' f"scalar columns. Received {index_type}", ) ) @@ -2581,7 +2580,7 @@ def create_scalar_index( field_type ): raise TypeError(f"NGRAM index column {column} must be a string") - elif index_type in ["INVERTED", "FTS"]: + elif index_type in ["INVERTED"]: value_type = field_type if pa.types.is_list(field_type) or pa.types.is_large_list(field_type): value_type = field_type.value_type