diff --git a/faiss/Index.h b/faiss/Index.h index 0af35cfce2..f189bf3af0 100644 --- a/faiss/Index.h +++ b/faiss/Index.h @@ -62,6 +62,7 @@ enum NumericType { Float32, Float16, UInt8, + Int8, }; inline size_t get_numeric_type_size(NumericType numeric_type) { @@ -70,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) { return 4; case NumericType::Float16: return 2; + case NumericType::UInt8: + case NumericType::Int8: + return 1; default: FAISS_THROW_MSG( "Unknown Numeric Type. Only supports Float32, Float16"); diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 794b4fba2d..75c574694e 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) { #if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { IndexHNSWCagra* res = new IndexHNSWCagra(); - if (icg->get_numeric_type() == faiss::NumericType::Float16) { + if (icg->get_numeric_type() != faiss::NumericType::Float32) { res->base_level_only = true; } icg->copyTo(res); diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu index 31c1bcddd1..9f824ca6a4 100644 --- a/faiss/gpu/GpuIndex.cu +++ b/faiss/gpu/GpuIndex.cu @@ -194,6 +194,8 @@ void GpuIndex::addPaged_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type"); } @@ -251,6 +253,8 @@ void GpuIndex::addPage_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type"); } @@ -411,6 +415,22 @@ void GpuIndex::searchNonPaged_( stream, {n, this->d}); + searchImpl_( + n, + static_cast(vecs.data()), + numeric_type, + k, + outDistancesData, + outIndicesData, + params); + } else if (numeric_type == NumericType::Int8) { + auto vecs = toDeviceTemporary( + resources_.get(), + config_.device, + const_cast(static_cast(x)), + stream, + {n, this->d}); + searchImpl_( n, static_cast(vecs.data()), @@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_( outDistancesSlice.data(), outIndicesSlice.data(), params); + } else if (numeric_type == NumericType::Int8) { + searchNonPaged_( + num, + static_cast( + static_cast(x) + cur * this->d), + numeric_type, + k, + outDistancesSlice.data(), + outIndicesSlice.data(), + params); } } @@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG( "GpuIndex::searchFromCpuPaged_: Unsupported numeric type"); diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index eada5936ff..19aa78b21e 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) { cagraConfig_.guarantee_connectivity); std::get>>(index_)->train( n, static_cast(x)); + } else if (numeric_type == NumericType::Int8) { + index_ = std::make_shared>( + this->resources_.get(), + this->d, + cagraConfig_.intermediate_graph_degree, + cagraConfig_.graph_degree, + static_cast(cagraConfig_.build_algo), + cagraConfig_.nn_descent_niter, + cagraConfig_.store_dataset, + this->metric_type, + this->metric_arg, + INDICES_64_BIT, + ivf_pq_params, + ivf_pq_search_params, + cagraConfig_.refine_rate, + cagraConfig_.guarantee_connectivity); + std::get>>(index_)->train( + n, static_cast(x)); } else { FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type"); } @@ -232,6 +250,29 @@ void GpuIndexCagra::searchImpl_( params->hashmap_max_fill_rate, params->num_random_samplings, params->seed); + } else if (numeric_type == NumericType::Int8) { + Tensor queries( + const_cast(static_cast(x)), + {n, this->d}); + + std::get>>(index_)->search( + queries, + k, + outDistances, + outLabels, + params->max_queries, + params->itopk_size, + params->max_iterations, + static_cast(params->algo), + params->team_size, + params->search_width, + params->min_iterations, + params->thread_block_size, + static_cast(params->hashmap_mode), + params->hashmap_min_bitlen, + params->hashmap_max_fill_rate, + params->num_random_samplings, + params->seed); } else { FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type"); } @@ -314,6 +355,29 @@ void GpuIndexCagra::copyFrom( this->metric_type, this->metric_arg, INDICES_64_BIT); + } else if (numeric_type == NumericType::Int8) { + auto base_index = dynamic_cast(index->storage); + FAISS_ASSERT(base_index); + auto dataset = (uint8_t*)base_index->codes.data(); + + // decode what was encded by Quantizer8bitDirectSigned in + // ScalarQuantizer + int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d]; + for (int i = 0; i < index->ntotal * this->d; i++) { + decoded_train_dataset[i] = dataset[i] - 128; + } + + index_ = std::make_shared>( + this->resources_.get(), + this->d, + index->ntotal, + hnsw.nb_neighbors(0), + decoded_train_dataset, + knn_graph.data(), + this->metric_type, + this->metric_arg, + INDICES_64_BIT); + delete[] decoded_train_dataset; } else { FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type"); } @@ -348,6 +412,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { } else if (numeric_type_ == NumericType::Float16) { graph_degree = std::get>>(index_) ->get_knngraph_degree(); + } else if (numeric_type_ == NumericType::Int8) { + graph_degree = std::get>>(index_) + ->get_knngraph_degree(); } else { FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type"); } @@ -368,6 +435,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { auto qtype = ScalarQuantizer::QT_fp16; index->storage = new IndexScalarQuantizer(index->d, qtype, this->metric_type); + } else if (numeric_type_ == NumericType::Int8) { + auto qtype = ScalarQuantizer::QT_8bit_direct_signed; + index->storage = + new IndexScalarQuantizer(index->d, qtype, this->metric_type); } index->own_fields = true; @@ -438,6 +509,44 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->ntotal = n_train; } + if (allocation) { + delete[] train_dataset; + } + } else if (numeric_type_ == NumericType::Int8) { + int8_t* train_dataset; + const int8_t* dataset = + std::get>>(index_) + ->get_training_dataset(); + if (getDeviceForAddress(dataset) >= 0) { + train_dataset = new int8_t[n_train * index->d]; + allocation = true; + raft::copy( + train_dataset, + dataset, + n_train * index->d, + this->resources_->getRaftHandleCurrentDevice() + .get_stream()); + } else { + train_dataset = const_cast(dataset); + } + + index->init_level0 = false; + if (!index->base_level_only) { + FAISS_THROW_MSG( + "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo"); + } else { + index->hnsw.prepare_level_tab(n_train, false); + // applying encoding logic of Quantizer8bitDirectSigned + uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d]; + for (int i = 0; i < n_train * index->d; i++) { + encoded_train_dataset[i] = train_dataset[i] + 128; + } + index->storage->add_sa_codes( + n_train, encoded_train_dataset, nullptr); + delete[] encoded_train_dataset; + index->ntotal = n_train; + } + if (allocation) { delete[] train_dataset; } diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h index 49e2e0e800..0e4a5b252b 100644 --- a/faiss/gpu/GpuIndexCagra.h +++ b/faiss/gpu/GpuIndexCagra.h @@ -317,7 +317,8 @@ struct GpuIndexCagra : public GpuIndex { std::variant< std::monostate, std::shared_ptr>, - std::shared_ptr>> + std::shared_ptr>, + std::shared_ptr>> index_; }; diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu index 355eeaecb8..482e4d6727 100644 --- a/faiss/gpu/impl/CuvsCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -337,5 +337,6 @@ const data_t* CuvsCagra::get_training_dataset() const { template class CuvsCagra; template class CuvsCagra; +template class CuvsCagra; } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py index 0c26cc696b..1e997548fc 100644 --- a/faiss/gpu/test/test_cagra.py +++ b/faiss/gpu/test/test_cagra.py @@ -18,20 +18,30 @@ class TestComputeGT(unittest.TestCase): def do_compute_GT(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) - - # Get the data - database = ds.get_database() - queries = ds.get_queries() - - # Normalize for inner product to avoid duplicate neighbors - if metric == faiss.METRIC_INNER_PRODUCT: - # Normalize database vectors - database = database / np.linalg.norm(database, axis=1, keepdims=True) - # Normalize query vectors - queries = queries / np.linalg.norm(queries, axis=1, keepdims=True) - - Dref, Iref = faiss.knn(queries, database, k, metric) + + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + # Normalize for inner product to avoid duplicate neighbors + if metric == faiss.METRIC_INNER_PRODUCT: + # Normalize database vectors + data_base = data_base / np.linalg.norm(data_base, axis=1, keepdims=True) + # Normalize query vectors + data_query = data_query / np.linalg.norm(data_query, axis=1, keepdims=True) + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query + + Dref, Iref = faiss.knn(data_query, data_base, k, metric) res = faiss.StandardGpuResources() @@ -45,10 +55,8 @@ def do_compute_GT(self, metric, numeric_type): cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig) - database = ds.get_database().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_database() - index.train(database, numeric_type=numeric_type) - queries = ds.get_queries().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_queries() - Dnew, Inew = index.search(queries, k, numeric_type=numeric_type) + index.train(data_base_nt, numeric_type=numeric_type) + Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type) evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) @@ -64,6 +72,12 @@ def test_compute_GT_L2_FP16(self): def test_compute_GT_IP_FP16(self): self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float16) + def test_compute_GT_L2_Int8(self): + self.do_compute_GT(faiss.METRIC_L2, faiss.Int8) + + def test_compute_GT_IP_Int8(self): + self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Int8) + @unittest.skipIf( "CUVS" not in faiss.get_compile_options(), "only if cuVS is compiled in") @@ -72,27 +86,31 @@ class TestInterop(unittest.TestCase): def do_interop(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query res = faiss.StandardGpuResources() index = faiss.GpuIndexCagra(res, d, metric) - database = ( - ds.get_database().astype(np.float16) - if numeric_type == faiss.Float16 - else ds.get_database() - ) - index.train(database, numeric_type=numeric_type) - queries = ( - ds.get_queries().astype(np.float16) - if numeric_type == faiss.Float16 - else ds.get_queries() - ) - Dnew, Inew = index.search(queries, k, numeric_type=numeric_type) + index.train(data_base_nt, numeric_type=numeric_type) + Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type) cpu_index = faiss.index_gpu_to_cpu(index) # cpu index always search in fp32 - Dref, Iref = cpu_index.search(ds.get_queries(), k) + Dref, Iref = cpu_index.search(data_query, k) evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) @@ -100,7 +118,7 @@ def do_interop(self, metric, numeric_type): faiss.serialize_index(cpu_index)) gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index) - Dnew2, Inew2 = gpu_index.search(queries, k, numeric_type=numeric_type) + Dnew2, Inew2 = gpu_index.search(data_query_nt, k, numeric_type=numeric_type) evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k) @@ -116,6 +134,12 @@ def test_interop_L2_FP16(self): def test_interop_IP_FP16(self): self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float16) + def test_interop_L2_Int8(self): + self.do_interop(faiss.METRIC_L2, faiss.Int8) + + def test_interop_IP_Int8(self): + self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Int8) + @unittest.skipIf( "CUVS" not in faiss.get_compile_options(), @@ -125,19 +149,32 @@ class TestIDMapCagra(unittest.TestCase): def do_IDMapCagra(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) - Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric) + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query + + Dref, Iref = faiss.knn(data_query, data_base, k, metric) res = faiss.StandardGpuResources() index = faiss.GpuIndexCagra(res, d, metric) idMapIndex = faiss.IndexIDMap(index) - database = ds.get_database().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_database() - idMapIndex.train(database, numeric_type=numeric_type) + idMapIndex.train(data_base_nt, numeric_type=numeric_type) ids = np.array([i for i in range(10000)]) - idMapIndex.add_with_ids(database, ids, numeric_type=numeric_type) - queries = ds.get_queries().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_queries() - Dnew, Inew = idMapIndex.search(queries, k, numeric_type=numeric_type) + idMapIndex.add_with_ids(data_base_nt, ids, numeric_type=numeric_type) + Dnew, Inew = idMapIndex.search(data_query_nt, k, numeric_type=numeric_type) evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) @@ -152,3 +189,9 @@ def test_IDMapCagra_L2_FP16(self): def test_IDMapCagra_IP_FP16(self): self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float16) + + def test_IDMapCagra_L2_Int8(self): + self.do_IDMapCagra(faiss.METRIC_L2, faiss.Int8) + + def test_IDMapCagra_IP_Int8(self): + self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Int8) diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py index 848b84e190..394412b483 100644 --- a/faiss/python/class_wrappers.py +++ b/faiss/python/class_wrappers.py @@ -47,8 +47,10 @@ def _numeric_to_str(numeric_type): return 'float32' elif numeric_type == faiss.Float16: return 'float16' + elif numeric_type == faiss.Int8: + return 'int8' else: - raise ValueError("numeric type must be either faiss.Float32 or faiss.Float16 ") + raise ValueError("numeric type must be either faiss.Float32, faiss.Float16, or faiss.Int8") def replace_method(the_class, name, replacement, ignore_missing=False): """ Replaces a method in a class with another version. The old method