diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 59b6b40cc61..f0b95cb8cb0 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -2338,6 +2338,7 @@ def create_scalar_index( Literal["BITMAP"], Literal["LABEL_LIST"], Literal["INVERTED"], + Literal["FTS"], Literal["NGRAM"], Literal["ZONEMAP"], Literal["BLOOMFILTER"], @@ -2407,8 +2408,9 @@ def create_scalar_index( called zones and stores summary statistics for each zone (min, max, null_count, nan_count, fragment_id, local_row_offset). It's very small but only effective if the column is at least approximately in sorted order. - * ``INVERTED``. It is used to index document columns. This index - can conduct full-text searches. For example, a column that contains any word + * ``INVERTED`` (alias: ``FTS``). It is used to index document columns. This + index can conduct full-text searches. For example, a column that contains any + word of query string "hello world". The results will be ranked by BM25. * ``BLOOMFILTER``. This inexact index uses a bloom filter. It is small but can only handle filters with equals and not equals and may require @@ -2428,7 +2430,7 @@ def create_scalar_index( index_type : str The type of the index. One of ``"BTREE"``, ``"BITMAP"``, ``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"INVERTED"``, - ``"BLOOMFILTER"``, ``"RTREE"``. + ``"FTS"``, ``"BLOOMFILTER"``, ``"RTREE"``. name : str, optional The index name. If not provided, it will be generated from the column name. @@ -2548,6 +2550,7 @@ def create_scalar_index( "ZONEMAP", "LABEL_LIST", "INVERTED", + "FTS", "BLOOMFILTER", "RTREE", ]: @@ -2587,7 +2590,7 @@ def create_scalar_index( field_type ): raise TypeError(f"NGRAM index column {column} must be a string") - elif index_type in ["INVERTED"]: + elif index_type in ["INVERTED", "FTS"]: value_type = field_type if pa.types.is_list(field_type) or pa.types.is_large_list(field_type): value_type = field_type.value_type diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py index 90b2fa02ec4..443172db13e 100644 --- a/python/python/tests/test_scalar_index.py +++ b/python/python/tests/test_scalar_index.py @@ -663,6 +663,11 @@ def test_filter_with_fts_index(dataset): assert query == row.as_py() +def test_create_scalar_index_fts_alias(dataset): + dataset.create_scalar_index("doc", index_type="FTS", with_position=False) + assert any(idx["type"] == "Inverted" for idx in dataset.list_indices()) + + def test_multi_index_create(tmp_path): dataset = lance.write_dataset( pa.table({"ints": range(1024)}), tmp_path, max_rows_per_file=100 diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 715b9fa5e3b..dc88b6d72d9 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1822,7 +1822,7 @@ impl Dataset { "BLOOMFILTER" => IndexType::BloomFilter, "LABEL_LIST" => IndexType::LabelList, "RTREE" => IndexType::RTree, - "INVERTED" => IndexType::Inverted, + "INVERTED" | "FTS" => IndexType::Inverted, "IVF_FLAT" | "IVF_PQ" | "IVF_SQ" | "IVF_RQ" | "IVF_HNSW_FLAT" | "IVF_HNSW_PQ" | "IVF_HNSW_SQ" => IndexType::Vector, _ => { @@ -1879,7 +1879,7 @@ impl Dataset { params: Some(config.config.clone()), }) } - "INVERTED" => { + "INVERTED" | "FTS" => { let mut params = InvertedIndexParams::default(); if let Some(kwargs) = kwargs { if let Some(with_position) = kwargs.get_item("with_position")? { @@ -2031,7 +2031,7 @@ impl Dataset { index_type_up ); match index_type_up.as_str() { - "INVERTED" => { + "INVERTED" | "FTS" => { // Call merge_index_files function for inverted index lance_index::scalar::inverted::builder::merge_index_files( self.ds.object_store(),