From 36aecf85b1ee1a6ee9aefed278b1c0069119ab17 Mon Sep 17 00:00:00 2001 From: jinsolp Date: Thu, 17 Jul 2025 23:14:08 +0000 Subject: [PATCH 1/4] int8 support --- faiss/Index.h | 5 + faiss/gpu/GpuCloner.cpp | 2 +- faiss/gpu/GpuIndex.cu | 32 +++++++ faiss/gpu/GpuIndexCagra.cu | 95 +++++++++++++++++++ faiss/gpu/GpuIndexCagra.h | 3 +- faiss/gpu/impl/CuvsCagra.cu | 1 + faiss/gpu/test/test_cagra.py | 167 ++++++++++++++++++++------------- faiss/python/class_wrappers.py | 43 ++++----- 8 files changed, 259 insertions(+), 89 deletions(-) diff --git a/faiss/Index.h b/faiss/Index.h index 95af05df74..f189bf3af0 100644 --- a/faiss/Index.h +++ b/faiss/Index.h @@ -61,6 +61,8 @@ struct DistanceComputer; enum NumericType { Float32, Float16, + UInt8, + Int8, }; inline size_t get_numeric_type_size(NumericType numeric_type) { @@ -69,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) { return 4; case NumericType::Float16: return 2; + case NumericType::UInt8: + case NumericType::Int8: + return 1; default: FAISS_THROW_MSG( "Unknown Numeric Type. Only supports Float32, Float16"); diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 8b0b6fa942..4cc463d145 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) { #if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { IndexHNSWCagra* res = new IndexHNSWCagra(); - if (icg->get_numeric_type() == faiss::NumericType::Float16) { + if (icg->get_numeric_type() != faiss::NumericType::Float32) { res->base_level_only = true; } icg->copyTo(res); diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu index 31c1bcddd1..4ef96b1d2b 100644 --- a/faiss/gpu/GpuIndex.cu +++ b/faiss/gpu/GpuIndex.cu @@ -194,6 +194,8 @@ void GpuIndex::addPaged_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type"); } @@ -251,6 +253,8 @@ void GpuIndex::addPage_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type"); } @@ -419,6 +423,22 @@ void GpuIndex::searchNonPaged_( outDistancesData, outIndicesData, params); + } else if (numeric_type == NumericType::Int8) { + auto vecs = toDeviceTemporary( + resources_.get(), + config_.device, + const_cast(static_cast(x)), + stream, + {n, this->d}); + + searchImplEx_( + n, + static_cast(vecs.data()), + numeric_type, + k, + outDistancesData, + outIndicesData, + params); } else { FAISS_THROW_MSG("GpuIndex::search: Unsupported numeric type"); } @@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_( outDistancesSlice.data(), outIndicesSlice.data(), params); + } else if (numeric_type == NumericType::Int8) { + searchNonPaged_( + num, + static_cast( + static_cast(x) + cur * this->d), + numeric_type, + k, + outDistancesSlice.data(), + outIndicesSlice.data(), + params); } } @@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_( dispatch(float{}); } else if (numeric_type == NumericType::Float16) { dispatch(half{}); + } else if (numeric_type == NumericType::Int8) { + dispatch(int8_t{}); } else { FAISS_THROW_MSG( "GpuIndex::searchFromCpuPaged_: Unsupported numeric type"); diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index 6bc4bc1cf5..8207225618 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) { cagraConfig_.guarantee_connectivity); std::get>>(index_)->train( n, static_cast(x)); + } else if (numeric_type == NumericType::Int8) { + index_ = std::make_shared>( + this->resources_.get(), + this->d, + cagraConfig_.intermediate_graph_degree, + cagraConfig_.graph_degree, + static_cast(cagraConfig_.build_algo), + cagraConfig_.nn_descent_niter, + cagraConfig_.store_dataset, + this->metric_type, + this->metric_arg, + INDICES_64_BIT, + ivf_pq_params, + ivf_pq_search_params, + cagraConfig_.refine_rate, + cagraConfig_.guarantee_connectivity); + std::get>>(index_)->train( + n, static_cast(x)); } else { FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type"); } @@ -224,6 +242,29 @@ void GpuIndexCagra::searchImpl_( params->hashmap_max_fill_rate, params->num_random_samplings, params->seed); + } else if (numeric_type == NumericType::Int8) { + Tensor queries( + const_cast(static_cast(x)), + {n, this->d}); + + std::get>>(index_)->search( + queries, + k, + outDistances, + outLabels, + params->max_queries, + params->itopk_size, + params->max_iterations, + static_cast(params->algo), + params->team_size, + params->search_width, + params->min_iterations, + params->thread_block_size, + static_cast(params->hashmap_mode), + params->hashmap_min_bitlen, + params->hashmap_max_fill_rate, + params->num_random_samplings, + params->seed); } else { FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type"); } @@ -306,6 +347,21 @@ void GpuIndexCagra::copyFrom( this->metric_type, this->metric_arg, INDICES_64_BIT); + } else if (numeric_type == NumericType::Int8) { + auto base_index = dynamic_cast(index->storage); + FAISS_ASSERT(base_index); + auto dataset = (int8_t*)base_index->codes.data(); + + index_ = std::make_shared>( + this->resources_.get(), + this->d, + index->ntotal, + hnsw.nb_neighbors(0), + dataset, + knn_graph.data(), + this->metric_type, + this->metric_arg, + INDICES_64_BIT); } else { FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type"); } @@ -340,6 +396,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { } else if (numeric_type_ == NumericType::Float16) { graph_degree = std::get>>(index_) ->get_knngraph_degree(); + } else if (numeric_type_ == NumericType::Int8) { + graph_degree = std::get>>(index_) + ->get_knngraph_degree(); } else { FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type"); } @@ -360,6 +419,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { auto qtype = ScalarQuantizer::QT_fp16; index->storage = new IndexScalarQuantizer(index->d, qtype, this->metric_type); + } else if (numeric_type_ == NumericType::Int8) { + auto qtype = ScalarQuantizer::QT_8bit_direct_signed; + index->storage = + new IndexScalarQuantizer(index->d, qtype, this->metric_type); } index->own_fields = true; @@ -430,6 +493,38 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->ntotal = n_train; } + if (allocation) { + delete[] train_dataset; + } + } else if (numeric_type_ == NumericType::Int8) { + int8_t* train_dataset; + const int8_t* dataset = + std::get>>(index_) + ->get_training_dataset(); + if (getDeviceForAddress(dataset) >= 0) { + train_dataset = new int8_t[n_train * index->d]; + allocation = true; + raft::copy( + train_dataset, + dataset, + n_train * index->d, + this->resources_->getRaftHandleCurrentDevice() + .get_stream()); + } else { + train_dataset = const_cast(dataset); + } + + index->init_level0 = false; + if (!index->base_level_only) { + FAISS_THROW_MSG( + "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo"); + } else { + index->hnsw.prepare_level_tab(n_train, false); + index->storage->add_sa_codes( + n_train, (uint8_t*)train_dataset, nullptr); + index->ntotal = n_train; + } + if (allocation) { delete[] train_dataset; } diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h index cf4a706e7d..2ce4818bb6 100644 --- a/faiss/gpu/GpuIndexCagra.h +++ b/faiss/gpu/GpuIndexCagra.h @@ -312,7 +312,8 @@ struct GpuIndexCagra : public GpuIndex { std::variant< std::monostate, std::shared_ptr>, - std::shared_ptr>> + std::shared_ptr>, + std::shared_ptr>> index_; }; diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu index acac6bcbbb..9a21f36145 100644 --- a/faiss/gpu/impl/CuvsCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -335,5 +335,6 @@ const data_t* CuvsCagra::get_training_dataset() const { template class CuvsCagra; template class CuvsCagra; +template class CuvsCagra; } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py index 9c9297c888..037d35cc4d 100644 --- a/faiss/gpu/test/test_cagra.py +++ b/faiss/gpu/test/test_cagra.py @@ -15,11 +15,26 @@ "only if cuVS is compiled in") class TestComputeGT(unittest.TestCase): - def do_compute_GT(self, metric): + def do_compute_GT(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) - Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric) + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query + + Dref, Iref = faiss.knn(data_query, data_base, k, metric) res = faiss.StandardGpuResources() @@ -31,69 +46,62 @@ def do_compute_GT(self, metric): cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig) - index.train(ds.get_database()) - Dnew, Inew = index.search(ds.get_queries(), k) + index.train(data_base_nt, numeric_type=numeric_type) + Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type) evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) def test_compute_GT_L2(self): - self.do_compute_GT(faiss.METRIC_L2) + self.do_compute_GT(faiss.METRIC_L2, faiss.Float32) def test_compute_GT_IP(self): - self.do_compute_GT(faiss.METRIC_INNER_PRODUCT) + self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float32) -@unittest.skipIf( - "CUVS" not in faiss.get_compile_options(), - "only if cuVS is compiled in") -class TestComputeGTFP16(unittest.TestCase): + def test_compute_GT_L2_FP16(self): + self.do_compute_GT(faiss.METRIC_L2, faiss.Float16) - def do_compute_GT(self, metric): - d = 64 - k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) - Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric) - - res = faiss.StandardGpuResources() - - # attempt to set custom IVF-PQ params - cagraIndexConfig = faiss.GpuIndexCagraConfig() - cagraIndexIVFPQConfig = faiss.IVFPQBuildCagraConfig() - cagraIndexIVFPQConfig.kmeans_trainset_fraction = 0.1 - cagraIndexConfig.ivf_pq_params = cagraIndexIVFPQConfig - cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ - - index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig) - fp16_data = ds.get_database().astype(np.float16) - index.train(fp16_data, faiss.Float16) - fp16_queries = ds.get_queries().astype(np.float16) - Dnew, Inew = index.search(fp16_queries, k, numeric_type=faiss.Float16) - - evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) + def test_compute_GT_IP_FP16(self): + self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float16) - def test_compute_GT_L2(self): - self.do_compute_GT(faiss.METRIC_L2) + def test_compute_GT_L2_Int8(self): + self.do_compute_GT(faiss.METRIC_L2, faiss.Int8) - def test_compute_GT_IP(self): - self.do_compute_GT(faiss.METRIC_INNER_PRODUCT) + def test_compute_GT_IP_Int8(self): + self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Int8) @unittest.skipIf( "CUVS" not in faiss.get_compile_options(), "only if cuVS is compiled in") class TestInterop(unittest.TestCase): - def do_interop(self, metric): + def do_interop(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query res = faiss.StandardGpuResources() index = faiss.GpuIndexCagra(res, d, metric) - index.train(ds.get_database()) - Dnew, Inew = index.search(ds.get_queries(), k) + index.train(data_base_nt, numeric_type=numeric_type) + Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type) cpu_index = faiss.index_gpu_to_cpu(index) - Dref, Iref = cpu_index.search(ds.get_queries(), k) + # cpu index always search in fp32 + Dref, Iref = cpu_index.search(data_query, k) evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) @@ -101,49 +109,80 @@ def do_interop(self, metric): faiss.serialize_index(cpu_index)) gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index) - Dnew2, Inew2 = gpu_index.search(ds.get_queries(), k) + Dnew2, Inew2 = gpu_index.search(data_query_nt, k, numeric_type=numeric_type) evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k) def test_interop_L2(self): - self.do_interop(faiss.METRIC_L2) + self.do_interop(faiss.METRIC_L2, faiss.Float32) def test_interop_IP(self): - self.do_interop(faiss.METRIC_INNER_PRODUCT) + self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float32) + + def test_interop_L2_FP16(self): + self.do_interop(faiss.METRIC_L2, faiss.Float16) + + def test_interop_IP_FP16(self): + self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float16) + + def test_interop_L2_Int8(self): + self.do_interop(faiss.METRIC_L2, faiss.Int8) + + def test_interop_IP_Int8(self): + self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Int8) + @unittest.skipIf( "CUVS" not in faiss.get_compile_options(), "only if cuVS is compiled in") -class TestInteropFP16(unittest.TestCase): +class TestIDMapCagra(unittest.TestCase): - def do_interop(self, metric): + def do_IDMapCagra(self, metric, numeric_type): d = 64 k = 12 - ds = datasets.SyntheticDataset(d, 0, 10000, 100) + if numeric_type == faiss.Int8: + data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8) + data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8) + data_base = data_base_nt.astype(np.float32) + data_query = data_query_nt.astype(np.float32) + else: + ds = datasets.SyntheticDataset(d, 0, 10000, 100) + data_base = ds.get_database() #fp32 + data_query = ds.get_queries() #fp32 + if numeric_type == faiss.Float16: + data_base_nt = data_base.astype(np.float16) + data_query_nt = data_query.astype(np.float16) + elif numeric_type == faiss.Float32: + data_base_nt = data_base + data_query_nt = data_query + + Dref, Iref = faiss.knn(data_query, data_base, k, metric) res = faiss.StandardGpuResources() index = faiss.GpuIndexCagra(res, d, metric) - fp16_data = ds.get_database().astype(np.float16) - index.train(fp16_data, faiss.Float16) - fp16_queries = ds.get_queries().astype(np.float16) - Dnew, Inew = index.search(fp16_queries, k, numeric_type=faiss.Float16) + idMapIndex = faiss.IndexIDMap(index) + idMapIndex.train(data_base_nt, numeric_type=numeric_type) + ids = np.array([i for i in range(10000)]) + idMapIndex.add_with_ids(data_base_nt, ids, numeric_type=numeric_type) + Dnew, Inew = idMapIndex.search(data_query_nt, k, numeric_type=numeric_type) - cpu_index = faiss.index_gpu_to_cpu(index) - Dref, Iref = cpu_index.search(ds.get_queries(), k) - evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k) - deserialized_index = faiss.deserialize_index( - faiss.serialize_index(cpu_index)) + def test_IDMapCagra_L2(self): + self.do_IDMapCagra(faiss.METRIC_L2, faiss.Float32) - gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index) - Dnew2, Inew2 = gpu_index.search(fp16_queries, k, numeric_type=faiss.Float16) + def test_IDMapCagra_IP(self): + self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float32) - evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k) + def test_IDMapCagra_L2_FP16(self): + self.do_IDMapCagra(faiss.METRIC_L2, faiss.Float16) - def test_interop_L2(self): - self.do_interop(faiss.METRIC_L2) + def test_IDMapCagra_IP_FP16(self): + self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float16) - def test_interop_IP(self): - self.do_interop(faiss.METRIC_INNER_PRODUCT) + def test_IDMapCagra_L2_Int8(self): + self.do_IDMapCagra(faiss.METRIC_L2, faiss.Int8) + + def test_IDMapCagra_IP_Int8(self): + self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Int8) diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py index 51d8f570cb..3b2ce37103 100644 --- a/faiss/python/class_wrappers.py +++ b/faiss/python/class_wrappers.py @@ -42,6 +42,15 @@ def _check_dtype_uint8(codes): " uint8, but found %s" % ("codes", codes.dtype)) return np.ascontiguousarray(codes) +def _numeric_to_str(numeric_type): + if numeric_type == faiss.Float32: + return 'float32' + elif numeric_type == faiss.Float16: + return 'float16' + elif numeric_type == faiss.Int8: + return 'int8' + else: + raise ValueError("numeric type must be either faiss.Float32, faiss.Float16, or faiss.Int8") def replace_method(the_class, name, replacement, ignore_missing=False): """ Replaces a method in a class with another version. The old method @@ -226,13 +235,10 @@ def replacement_add(self, x, numeric_type = faiss.Float32): n, d = x.shape assert d == self.d - if numeric_type == faiss.Float32: - x = np.ascontiguousarray(x, dtype='float32') - else: - x = np.ascontiguousarray(x, dtype='float16') - self.add_c(n, swig_ptr(x)) + x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type)) + self.addEx(n, swig_ptr(x), numeric_type) - def replacement_add_with_ids(self, x, ids): + def replacement_add_with_ids(self, x, ids, numeric_type = faiss.Float32): """Adds vectors with arbitrary ids to the index (not all indexes support this). The index must be trained before vectors can be added to it. Vector `i` is stored in `x[i]` and has id `ids[i]`. @@ -248,10 +254,11 @@ def replacement_add_with_ids(self, x, ids): """ n, d = x.shape assert d == self.d - x = np.ascontiguousarray(x, dtype='float32') - ids = np.ascontiguousarray(ids, dtype='int64') assert ids.shape == (n, ), 'not same nb of vectors as ids' - self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids)) + x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type)) + ids = np.ascontiguousarray(ids, dtype='int64') + self.add_with_idsEx(n, swig_ptr(x), numeric_type, swig_ptr(ids)) + def replacement_assign(self, x, k, labels=None): """Find the k nearest neighbors of the set of vectors x in the index. @@ -297,12 +304,8 @@ def replacement_train(self, x, numeric_type = faiss.Float32): """ n, d = x.shape assert d == self.d - if numeric_type == faiss.Float32: - x = np.ascontiguousarray(x, dtype='float32') - self.train_c(n, swig_ptr(x)) - else: - x = np.ascontiguousarray(x, dtype='float16') - self.train_c(n, swig_ptr(x), faiss.Float16) + x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type)) + self.trainEx(n, swig_ptr(x), numeric_type) def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type = faiss.Float32): @@ -333,10 +336,7 @@ def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type """ n, d = x.shape - if numeric_type == faiss.Float32: - x = np.ascontiguousarray(x, dtype='float32') - else: - x = np.ascontiguousarray(x, dtype='float16') + x = np.ascontiguousarray(x, _numeric_to_str(numeric_type)) assert d == self.d assert k > 0 @@ -351,10 +351,7 @@ def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type else: assert I.shape == (n, k) - if numeric_type == faiss.Float32: - self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I), params) - else: - self.search_c(n, swig_ptr(x), faiss.Float16, k, swig_ptr(D), swig_ptr(I), params) + self.searchEx(n, swig_ptr(x), numeric_type, k, swig_ptr(D), swig_ptr(I), params) return D, I def replacement_search_and_reconstruct(self, x, k, *, params=None, D=None, I=None, R=None): From 3fb1799c0419cd5ada6e321bdba48814613ee645 Mon Sep 17 00:00:00 2001 From: jinsolp Date: Tue, 22 Jul 2025 16:26:27 +0000 Subject: [PATCH 2/4] merge commit --- faiss/gpu/GpuIndex.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu index 4ef96b1d2b..9f824ca6a4 100644 --- a/faiss/gpu/GpuIndex.cu +++ b/faiss/gpu/GpuIndex.cu @@ -431,7 +431,7 @@ void GpuIndex::searchNonPaged_( stream, {n, this->d}); - searchImplEx_( + searchImpl_( n, static_cast(vecs.data()), numeric_type, From 2f577aef3be6706a2d0527f5ca403f44b740624a Mon Sep 17 00:00:00 2001 From: jinsolp Date: Wed, 23 Jul 2025 23:51:32 +0000 Subject: [PATCH 3/4] apply appropriate encode/decode --- faiss/gpu/GpuIndexCagra.cu | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index e19720f9f9..19aa78b21e 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -358,18 +358,26 @@ void GpuIndexCagra::copyFrom( } else if (numeric_type == NumericType::Int8) { auto base_index = dynamic_cast(index->storage); FAISS_ASSERT(base_index); - auto dataset = (int8_t*)base_index->codes.data(); + auto dataset = (uint8_t*)base_index->codes.data(); + + // decode what was encded by Quantizer8bitDirectSigned in + // ScalarQuantizer + int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d]; + for (int i = 0; i < index->ntotal * this->d; i++) { + decoded_train_dataset[i] = dataset[i] - 128; + } index_ = std::make_shared>( this->resources_.get(), this->d, index->ntotal, hnsw.nb_neighbors(0), - dataset, + decoded_train_dataset, knn_graph.data(), this->metric_type, this->metric_arg, INDICES_64_BIT); + delete[] decoded_train_dataset; } else { FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type"); } @@ -528,8 +536,14 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo"); } else { index->hnsw.prepare_level_tab(n_train, false); + // applying encoding logic of Quantizer8bitDirectSigned + uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d]; + for (int i = 0; i < n_train * index->d; i++) { + encoded_train_dataset[i] = train_dataset[i] + 128; + } index->storage->add_sa_codes( - n_train, (uint8_t*)train_dataset, nullptr); + n_train, encoded_train_dataset, nullptr); + delete[] encoded_train_dataset; index->ntotal = n_train; } From a58872ee155911a42a2f5e888abe928b161719fb Mon Sep 17 00:00:00 2001 From: jinsolp Date: Thu, 24 Jul 2025 17:48:20 +0000 Subject: [PATCH 4/4] proper merge conflict solve --- faiss/gpu/test/test_cagra.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py index e4f91a8736..bbfa056195 100644 --- a/faiss/gpu/test/test_cagra.py +++ b/faiss/gpu/test/test_cagra.py @@ -31,9 +31,9 @@ def do_compute_GT(self, metric, numeric_type): # Normalize for inner product to avoid duplicate neighbors if metric == faiss.METRIC_INNER_PRODUCT: # Normalize database vectors - database = database / np.linalg.norm(database, axis=1, keepdims=True) + data_base = data_base / np.linalg.norm(data_base, axis=1, keepdims=True) # Normalize query vectors - queries = queries / np.linalg.norm(queries, axis=1, keepdims=True) + data_query = data_query / np.linalg.norm(data_query, axis=1, keepdims=True) if numeric_type == faiss.Float16: data_base_nt = data_base.astype(np.float16) data_query_nt = data_query.astype(np.float16)