From 36aecf85b1ee1a6ee9aefed278b1c0069119ab17 Mon Sep 17 00:00:00 2001
From: jinsolp <jinsolp@nvidia.com>
Date: Thu, 17 Jul 2025 23:14:08 +0000
Subject: [PATCH 1/4] int8 support

---
 faiss/Index.h                  |   5 +
 faiss/gpu/GpuCloner.cpp        |   2 +-
 faiss/gpu/GpuIndex.cu          |  32 +++++++
 faiss/gpu/GpuIndexCagra.cu     |  95 +++++++++++++++++++
 faiss/gpu/GpuIndexCagra.h      |   3 +-
 faiss/gpu/impl/CuvsCagra.cu    |   1 +
 faiss/gpu/test/test_cagra.py   | 167 ++++++++++++++++++++-------------
 faiss/python/class_wrappers.py |  43 ++++-----
 8 files changed, 259 insertions(+), 89 deletions(-)
diff --git a/faiss/Index.h b/faiss/Index.h
index 95af05df74..f189bf3af0 100644
--- a/faiss/Index.h
+++ b/faiss/Index.h
@@ -61,6 +61,8 @@ struct DistanceComputer;
 enum NumericType {
     Float32,
     Float16,
+    UInt8,
+    Int8,
 };
 
 inline size_t get_numeric_type_size(NumericType numeric_type) {
@@ -69,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
             return 4;
         case NumericType::Float16:
             return 2;
+        case NumericType::UInt8:
+        case NumericType::Int8:
+            return 1;
         default:
             FAISS_THROW_MSG(
                     "Unknown Numeric Type. Only supports Float32, Float16");
diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp
index 8b0b6fa942..4cc463d145 100644
--- a/faiss/gpu/GpuCloner.cpp
+++ b/faiss/gpu/GpuCloner.cpp
@@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
 #if defined USE_NVIDIA_CUVS
     else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
         IndexHNSWCagra* res = new IndexHNSWCagra();
-        if (icg->get_numeric_type() == faiss::NumericType::Float16) {
+        if (icg->get_numeric_type() != faiss::NumericType::Float32) {
             res->base_level_only = true;
         }
         icg->copyTo(res);
diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu
index 31c1bcddd1..4ef96b1d2b 100644
--- a/faiss/gpu/GpuIndex.cu
+++ b/faiss/gpu/GpuIndex.cu
@@ -194,6 +194,8 @@ void GpuIndex::addPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type");
     }
@@ -251,6 +253,8 @@ void GpuIndex::addPage_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type");
     }
@@ -419,6 +423,22 @@ void GpuIndex::searchNonPaged_(
                 outDistancesData,
                 outIndicesData,
                 params);
+    } else if (numeric_type == NumericType::Int8) {
+        auto vecs = toDeviceTemporary<int8_t, 2>(
+                resources_.get(),
+                config_.device,
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                stream,
+                {n, this->d});
+
+        searchImplEx_(
+                n,
+                static_cast<const void*>(vecs.data()),
+                numeric_type,
+                k,
+                outDistancesData,
+                outIndicesData,
+                params);
     } else {
         FAISS_THROW_MSG("GpuIndex::search: Unsupported numeric type");
     }
@@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_(
                         outDistancesSlice.data(),
                         outIndicesSlice.data(),
                         params);
+            } else if (numeric_type == NumericType::Int8) {
+                searchNonPaged_(
+                        num,
+                        static_cast<const void*>(
+                                static_cast<const int8_t*>(x) + cur * this->d),
+                        numeric_type,
+                        k,
+                        outDistancesSlice.data(),
+                        outIndicesSlice.data(),
+                        params);
             }
         }
 
@@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG(
                 "GpuIndex::searchFromCpuPaged_: Unsupported numeric type");
diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
index 6bc4bc1cf5..8207225618 100644
--- a/faiss/gpu/GpuIndexCagra.cu
+++ b/faiss/gpu/GpuIndexCagra.cu
@@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) {
                 cagraConfig_.guarantee_connectivity);
         std::get<std::shared_ptr<CuvsCagra<half>>>(index_)->train(
                 n, static_cast<const half*>(x));
+    } else if (numeric_type == NumericType::Int8) {
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                cagraConfig_.intermediate_graph_degree,
+                cagraConfig_.graph_degree,
+                static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
+                cagraConfig_.nn_descent_niter,
+                cagraConfig_.store_dataset,
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT,
+                ivf_pq_params,
+                ivf_pq_search_params,
+                cagraConfig_.refine_rate,
+                cagraConfig_.guarantee_connectivity);
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->train(
+                n, static_cast<const int8_t*>(x));
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type");
     }
@@ -224,6 +242,29 @@ void GpuIndexCagra::searchImpl_(
                 params->hashmap_max_fill_rate,
                 params->num_random_samplings,
                 params->seed);
+    } else if (numeric_type == NumericType::Int8) {
+        Tensor<int8_t, 2, true> queries(
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                {n, this->d});
+
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->search(
+                queries,
+                k,
+                outDistances,
+                outLabels,
+                params->max_queries,
+                params->itopk_size,
+                params->max_iterations,
+                static_cast<faiss::cagra_search_algo>(params->algo),
+                params->team_size,
+                params->search_width,
+                params->min_iterations,
+                params->thread_block_size,
+                static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
+                params->hashmap_min_bitlen,
+                params->hashmap_max_fill_rate,
+                params->num_random_samplings,
+                params->seed);
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type");
     }
@@ -306,6 +347,21 @@ void GpuIndexCagra::copyFrom(
                 this->metric_type,
                 this->metric_arg,
                 INDICES_64_BIT);
+    } else if (numeric_type == NumericType::Int8) {
+        auto base_index = dynamic_cast<IndexScalarQuantizer*>(index->storage);
+        FAISS_ASSERT(base_index);
+        auto dataset = (int8_t*)base_index->codes.data();
+
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                index->ntotal,
+                hnsw.nb_neighbors(0),
+                dataset,
+                knn_graph.data(),
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT);
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type");
     }
@@ -340,6 +396,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
     } else if (numeric_type_ == NumericType::Float16) {
         graph_degree = std::get<std::shared_ptr<CuvsCagra<half>>>(index_)
                                ->get_knngraph_degree();
+    } else if (numeric_type_ == NumericType::Int8) {
+        graph_degree = std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                               ->get_knngraph_degree();
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type");
     }
@@ -360,6 +419,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
         auto qtype = ScalarQuantizer::QT_fp16;
         index->storage =
                 new IndexScalarQuantizer(index->d, qtype, this->metric_type);
+    } else if (numeric_type_ == NumericType::Int8) {
+        auto qtype = ScalarQuantizer::QT_8bit_direct_signed;
+        index->storage =
+                new IndexScalarQuantizer(index->d, qtype, this->metric_type);
     }
 
     index->own_fields = true;
@@ -430,6 +493,38 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
             index->ntotal = n_train;
         }
 
+        if (allocation) {
+            delete[] train_dataset;
+        }
+    } else if (numeric_type_ == NumericType::Int8) {
+        int8_t* train_dataset;
+        const int8_t* dataset =
+                std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                        ->get_training_dataset();
+        if (getDeviceForAddress(dataset) >= 0) {
+            train_dataset = new int8_t[n_train * index->d];
+            allocation = true;
+            raft::copy(
+                    train_dataset,
+                    dataset,
+                    n_train * index->d,
+                    this->resources_->getRaftHandleCurrentDevice()
+                            .get_stream());
+        } else {
+            train_dataset = const_cast<int8_t*>(dataset);
+        }
+
+        index->init_level0 = false;
+        if (!index->base_level_only) {
+            FAISS_THROW_MSG(
+                    "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo");
+        } else {
+            index->hnsw.prepare_level_tab(n_train, false);
+            index->storage->add_sa_codes(
+                    n_train, (uint8_t*)train_dataset, nullptr);
+            index->ntotal = n_train;
+        }
+
         if (allocation) {
             delete[] train_dataset;
         }
diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h
index cf4a706e7d..2ce4818bb6 100644
--- a/faiss/gpu/GpuIndexCagra.h
+++ b/faiss/gpu/GpuIndexCagra.h
@@ -312,7 +312,8 @@ struct GpuIndexCagra : public GpuIndex {
     std::variant<
             std::monostate,
             std::shared_ptr<CuvsCagra<float>>,
-            std::shared_ptr<CuvsCagra<half>>>
+            std::shared_ptr<CuvsCagra<half>>,
+            std::shared_ptr<CuvsCagra<int8_t>>>
             index_;
 };
 
diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu
index acac6bcbbb..9a21f36145 100644
--- a/faiss/gpu/impl/CuvsCagra.cu
+++ b/faiss/gpu/impl/CuvsCagra.cu
@@ -335,5 +335,6 @@ const data_t* CuvsCagra<data_t>::get_training_dataset() const {
 
 template class CuvsCagra<float>;
 template class CuvsCagra<half>;
+template class CuvsCagra<int8_t>;
 } // namespace gpu
 } // namespace faiss
diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py
index 9c9297c888..037d35cc4d 100644
--- a/faiss/gpu/test/test_cagra.py
+++ b/faiss/gpu/test/test_cagra.py
@@ -15,11 +15,26 @@
     "only if cuVS is compiled in")
 class TestComputeGT(unittest.TestCase):
 
-    def do_compute_GT(self, metric):
+    def do_compute_GT(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
-        Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric)
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
+
+        Dref, Iref = faiss.knn(data_query, data_base, k, metric)
 
         res = faiss.StandardGpuResources()
 
@@ -31,69 +46,62 @@ def do_compute_GT(self, metric):
         cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ
 
         index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig)
-        index.train(ds.get_database())
-        Dnew, Inew = index.search(ds.get_queries(), k)
+        index.train(data_base_nt, numeric_type=numeric_type)
+        Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type)
         
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
     def test_compute_GT_L2(self):
-        self.do_compute_GT(faiss.METRIC_L2)
+        self.do_compute_GT(faiss.METRIC_L2, faiss.Float32)
 
     def test_compute_GT_IP(self):
-        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT)
+        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float32)
 
-@unittest.skipIf(
-    "CUVS" not in faiss.get_compile_options(),
-    "only if cuVS is compiled in")
-class TestComputeGTFP16(unittest.TestCase):
+    def test_compute_GT_L2_FP16(self):
+        self.do_compute_GT(faiss.METRIC_L2, faiss.Float16)
 
-    def do_compute_GT(self, metric):
-        d = 64
-        k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
-        Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric)
-
-        res = faiss.StandardGpuResources()
-
-        # attempt to set custom IVF-PQ params
-        cagraIndexConfig = faiss.GpuIndexCagraConfig()
-        cagraIndexIVFPQConfig = faiss.IVFPQBuildCagraConfig()
-        cagraIndexIVFPQConfig.kmeans_trainset_fraction = 0.1
-        cagraIndexConfig.ivf_pq_params = cagraIndexIVFPQConfig
-        cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ
-
-        index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig)
-        fp16_data = ds.get_database().astype(np.float16)
-        index.train(fp16_data, faiss.Float16)
-        fp16_queries = ds.get_queries().astype(np.float16)
-        Dnew, Inew = index.search(fp16_queries, k, numeric_type=faiss.Float16)
-        
-        evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
+    def test_compute_GT_IP_FP16(self):
+        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
 
-    def test_compute_GT_L2(self):
-        self.do_compute_GT(faiss.METRIC_L2)
+    def test_compute_GT_L2_Int8(self):
+        self.do_compute_GT(faiss.METRIC_L2, faiss.Int8)
 
-    def test_compute_GT_IP(self):
-        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT)
+    def test_compute_GT_IP_Int8(self):
+        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
 
 @unittest.skipIf(
     "CUVS" not in faiss.get_compile_options(),
     "only if cuVS is compiled in")
 class TestInterop(unittest.TestCase):
 
-    def do_interop(self, metric):
+    def do_interop(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
 
         res = faiss.StandardGpuResources()
 
         index = faiss.GpuIndexCagra(res, d, metric)
-        index.train(ds.get_database())
-        Dnew, Inew = index.search(ds.get_queries(), k)
+        index.train(data_base_nt, numeric_type=numeric_type)
+        Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type)
 
         cpu_index = faiss.index_gpu_to_cpu(index)
-        Dref, Iref = cpu_index.search(ds.get_queries(), k)
+        # cpu index always search in fp32
+        Dref, Iref = cpu_index.search(data_query, k)
         
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
@@ -101,49 +109,80 @@ def do_interop(self, metric):
             faiss.serialize_index(cpu_index))
 
         gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index)
-        Dnew2, Inew2 = gpu_index.search(ds.get_queries(), k)
+        Dnew2, Inew2 = gpu_index.search(data_query_nt, k, numeric_type=numeric_type)
 
         evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k)
 
     def test_interop_L2(self):
-        self.do_interop(faiss.METRIC_L2)
+        self.do_interop(faiss.METRIC_L2, faiss.Float32)
 
     def test_interop_IP(self):
-        self.do_interop(faiss.METRIC_INNER_PRODUCT)
+        self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float32)
+
+    def test_interop_L2_FP16(self):
+        self.do_interop(faiss.METRIC_L2, faiss.Float16)
+
+    def test_interop_IP_FP16(self):
+        self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
+
+    def test_interop_L2_Int8(self):
+        self.do_interop(faiss.METRIC_L2, faiss.Int8)
+
+    def test_interop_IP_Int8(self):
+        self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
+
 
 @unittest.skipIf(
     "CUVS" not in faiss.get_compile_options(),
     "only if cuVS is compiled in")
-class TestInteropFP16(unittest.TestCase):
+class TestIDMapCagra(unittest.TestCase):
 
-    def do_interop(self, metric):
+    def do_IDMapCagra(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
+
+        Dref, Iref = faiss.knn(data_query, data_base, k, metric)
 
         res = faiss.StandardGpuResources()
 
         index = faiss.GpuIndexCagra(res, d, metric)
-        fp16_data = ds.get_database().astype(np.float16)
-        index.train(fp16_data, faiss.Float16)
-        fp16_queries = ds.get_queries().astype(np.float16)
-        Dnew, Inew = index.search(fp16_queries, k, numeric_type=faiss.Float16)
+        idMapIndex = faiss.IndexIDMap(index)
+        idMapIndex.train(data_base_nt, numeric_type=numeric_type)
+        ids = np.array([i for i in range(10000)])
+        idMapIndex.add_with_ids(data_base_nt, ids, numeric_type=numeric_type)
+        Dnew, Inew = idMapIndex.search(data_query_nt, k, numeric_type=numeric_type)
 
-        cpu_index = faiss.index_gpu_to_cpu(index)
-        Dref, Iref = cpu_index.search(ds.get_queries(), k)
-        
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
-        deserialized_index = faiss.deserialize_index(
-            faiss.serialize_index(cpu_index))
+    def test_IDMapCagra_L2(self):
+        self.do_IDMapCagra(faiss.METRIC_L2, faiss.Float32)
 
-        gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index)
-        Dnew2, Inew2 = gpu_index.search(fp16_queries, k, numeric_type=faiss.Float16)
+    def test_IDMapCagra_IP(self):
+        self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float32)
 
-        evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k)
+    def test_IDMapCagra_L2_FP16(self):
+        self.do_IDMapCagra(faiss.METRIC_L2, faiss.Float16)
 
-    def test_interop_L2(self):
-        self.do_interop(faiss.METRIC_L2)
+    def test_IDMapCagra_IP_FP16(self):
+        self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
 
-    def test_interop_IP(self):
-        self.do_interop(faiss.METRIC_INNER_PRODUCT)
+    def test_IDMapCagra_L2_Int8(self):
+        self.do_IDMapCagra(faiss.METRIC_L2, faiss.Int8)
+
+    def test_IDMapCagra_IP_Int8(self):
+        self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py
index 51d8f570cb..3b2ce37103 100644
--- a/faiss/python/class_wrappers.py
+++ b/faiss/python/class_wrappers.py
@@ -42,6 +42,15 @@ def _check_dtype_uint8(codes):
                         " uint8, but found %s" % ("codes", codes.dtype))
     return np.ascontiguousarray(codes)
 
+def _numeric_to_str(numeric_type):
+    if numeric_type == faiss.Float32:
+        return 'float32'
+    elif numeric_type == faiss.Float16:
+        return 'float16'
+    elif numeric_type == faiss.Int8:
+        return 'int8'
+    else:
+        raise ValueError("numeric type must be either faiss.Float32, faiss.Float16, or faiss.Int8")
 
 def replace_method(the_class, name, replacement, ignore_missing=False):
     """ Replaces a method in a class with another version. The old method
@@ -226,13 +235,10 @@ def replacement_add(self, x, numeric_type = faiss.Float32):
 
         n, d = x.shape
         assert d == self.d
-        if numeric_type == faiss.Float32:
-            x = np.ascontiguousarray(x, dtype='float32')
-        else:
-            x = np.ascontiguousarray(x, dtype='float16')
-        self.add_c(n, swig_ptr(x))
+        x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type))
+        self.addEx(n, swig_ptr(x), numeric_type)
 
-    def replacement_add_with_ids(self, x, ids):
+    def replacement_add_with_ids(self, x, ids, numeric_type = faiss.Float32):
         """Adds vectors with arbitrary ids to the index (not all indexes support this).
         The index must be trained before vectors can be added to it.
         Vector `i` is stored in `x[i]` and has id `ids[i]`.
@@ -248,10 +254,11 @@ def replacement_add_with_ids(self, x, ids):
         """
         n, d = x.shape
         assert d == self.d
-        x = np.ascontiguousarray(x, dtype='float32')
-        ids = np.ascontiguousarray(ids, dtype='int64')
         assert ids.shape == (n, ), 'not same nb of vectors as ids'
-        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
+        x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type))
+        ids = np.ascontiguousarray(ids, dtype='int64')
+        self.add_with_idsEx(n, swig_ptr(x), numeric_type, swig_ptr(ids))
+
 
     def replacement_assign(self, x, k, labels=None):
         """Find the k nearest neighbors of the set of vectors x in the index.
@@ -297,12 +304,8 @@ def replacement_train(self, x, numeric_type = faiss.Float32):
         """
         n, d = x.shape
         assert d == self.d
-        if numeric_type == faiss.Float32:
-            x = np.ascontiguousarray(x, dtype='float32')
-            self.train_c(n, swig_ptr(x))
-        else:
-            x = np.ascontiguousarray(x, dtype='float16')
-            self.train_c(n, swig_ptr(x), faiss.Float16)
+        x = np.ascontiguousarray(x, dtype=_numeric_to_str(numeric_type))
+        self.trainEx(n, swig_ptr(x), numeric_type)
         
 
     def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type = faiss.Float32):
@@ -333,10 +336,7 @@ def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type
         """
 
         n, d = x.shape
-        if numeric_type == faiss.Float32:
-            x = np.ascontiguousarray(x, dtype='float32')
-        else:
-            x = np.ascontiguousarray(x, dtype='float16')
+        x = np.ascontiguousarray(x, _numeric_to_str(numeric_type))
         assert d == self.d
 
         assert k > 0
@@ -351,10 +351,7 @@ def replacement_search(self, x, k, *, params=None, D=None, I=None, numeric_type
         else:
             assert I.shape == (n, k)
 
-        if numeric_type == faiss.Float32:
-            self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I), params)
-        else:
-            self.search_c(n, swig_ptr(x), faiss.Float16, k, swig_ptr(D), swig_ptr(I), params)
+        self.searchEx(n, swig_ptr(x), numeric_type, k, swig_ptr(D), swig_ptr(I), params)
         return D, I
 
     def replacement_search_and_reconstruct(self, x, k, *, params=None, D=None, I=None, R=None):

From 3fb1799c0419cd5ada6e321bdba48814613ee645 Mon Sep 17 00:00:00 2001
From: jinsolp <jinsolp@nvidia.com>
Date: Tue, 22 Jul 2025 16:26:27 +0000
Subject: [PATCH 2/4] merge commit

---
 faiss/gpu/GpuIndex.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu
index 4ef96b1d2b..9f824ca6a4 100644
--- a/faiss/gpu/GpuIndex.cu
+++ b/faiss/gpu/GpuIndex.cu
@@ -431,7 +431,7 @@ void GpuIndex::searchNonPaged_(
                 stream,
                 {n, this->d});
 
-        searchImplEx_(
+        searchImpl_(
                 n,
                 static_cast<const void*>(vecs.data()),
                 numeric_type,

From 2f577aef3be6706a2d0527f5ca403f44b740624a Mon Sep 17 00:00:00 2001
From: jinsolp <jinsolp@nvidia.com>
Date: Wed, 23 Jul 2025 23:51:32 +0000
Subject: [PATCH 3/4] apply appropriate encode/decode

---
 faiss/gpu/GpuIndexCagra.cu | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
index e19720f9f9..19aa78b21e 100644
--- a/faiss/gpu/GpuIndexCagra.cu
+++ b/faiss/gpu/GpuIndexCagra.cu
@@ -358,18 +358,26 @@ void GpuIndexCagra::copyFrom(
     } else if (numeric_type == NumericType::Int8) {
         auto base_index = dynamic_cast<IndexScalarQuantizer*>(index->storage);
         FAISS_ASSERT(base_index);
-        auto dataset = (int8_t*)base_index->codes.data();
+        auto dataset = (uint8_t*)base_index->codes.data();
+
+        // decode what was encded by Quantizer8bitDirectSigned in
+        // ScalarQuantizer
+        int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d];
+        for (int i = 0; i < index->ntotal * this->d; i++) {
+            decoded_train_dataset[i] = dataset[i] - 128;
+        }
 
         index_ = std::make_shared<CuvsCagra<int8_t>>(
                 this->resources_.get(),
                 this->d,
                 index->ntotal,
                 hnsw.nb_neighbors(0),
-                dataset,
+                decoded_train_dataset,
                 knn_graph.data(),
                 this->metric_type,
                 this->metric_arg,
                 INDICES_64_BIT);
+        delete[] decoded_train_dataset;
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type");
     }
@@ -528,8 +536,14 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
                     "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo");
         } else {
             index->hnsw.prepare_level_tab(n_train, false);
+            // applying encoding logic of Quantizer8bitDirectSigned
+            uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d];
+            for (int i = 0; i < n_train * index->d; i++) {
+                encoded_train_dataset[i] = train_dataset[i] + 128;
+            }
             index->storage->add_sa_codes(
-                    n_train, (uint8_t*)train_dataset, nullptr);
+                    n_train, encoded_train_dataset, nullptr);
+            delete[] encoded_train_dataset;
             index->ntotal = n_train;
         }
 

From a58872ee155911a42a2f5e888abe928b161719fb Mon Sep 17 00:00:00 2001
From: jinsolp <jinsolp@nvidia.com>
Date: Thu, 24 Jul 2025 17:48:20 +0000
Subject: [PATCH 4/4] proper merge conflict solve

---
 faiss/gpu/test/test_cagra.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py
index e4f91a8736..bbfa056195 100644
--- a/faiss/gpu/test/test_cagra.py
+++ b/faiss/gpu/test/test_cagra.py
@@ -31,9 +31,9 @@ def do_compute_GT(self, metric, numeric_type):
             # Normalize for inner product to avoid duplicate neighbors
             if metric == faiss.METRIC_INNER_PRODUCT:
                 # Normalize database vectors
-                database = database / np.linalg.norm(database, axis=1, keepdims=True)
+                data_base = data_base / np.linalg.norm(data_base, axis=1, keepdims=True)
                 # Normalize query vectors
-                queries = queries / np.linalg.norm(queries, axis=1, keepdims=True)
+                data_query = data_query / np.linalg.norm(data_query, axis=1, keepdims=True)
             if numeric_type == faiss.Float16:
                 data_base_nt = data_base.astype(np.float16)
                 data_query_nt = data_query.astype(np.float16)