diff --git a/faiss/Index.h b/faiss/Index.h
index 0af35cfce2..f189bf3af0 100644
--- a/faiss/Index.h
+++ b/faiss/Index.h
@@ -62,6 +62,7 @@ enum NumericType {
     Float32,
     Float16,
     UInt8,
+    Int8,
 };
 
 inline size_t get_numeric_type_size(NumericType numeric_type) {
@@ -70,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
             return 4;
         case NumericType::Float16:
             return 2;
+        case NumericType::UInt8:
+        case NumericType::Int8:
+            return 1;
         default:
             FAISS_THROW_MSG(
                     "Unknown Numeric Type. Only supports Float32, Float16");
diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp
index 794b4fba2d..75c574694e 100644
--- a/faiss/gpu/GpuCloner.cpp
+++ b/faiss/gpu/GpuCloner.cpp
@@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
 #if defined USE_NVIDIA_CUVS
     else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
         IndexHNSWCagra* res = new IndexHNSWCagra();
-        if (icg->get_numeric_type() == faiss::NumericType::Float16) {
+        if (icg->get_numeric_type() != faiss::NumericType::Float32) {
             res->base_level_only = true;
         }
         icg->copyTo(res);
diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu
index 31c1bcddd1..9f824ca6a4 100644
--- a/faiss/gpu/GpuIndex.cu
+++ b/faiss/gpu/GpuIndex.cu
@@ -194,6 +194,8 @@ void GpuIndex::addPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type");
     }
@@ -251,6 +253,8 @@ void GpuIndex::addPage_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type");
     }
@@ -411,6 +415,22 @@ void GpuIndex::searchNonPaged_(
                 stream,
                 {n, this->d});
 
+        searchImpl_(
+                n,
+                static_cast<const void*>(vecs.data()),
+                numeric_type,
+                k,
+                outDistancesData,
+                outIndicesData,
+                params);
+    } else if (numeric_type == NumericType::Int8) {
+        auto vecs = toDeviceTemporary<int8_t, 2>(
+                resources_.get(),
+                config_.device,
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                stream,
+                {n, this->d});
+
         searchImpl_(
                 n,
                 static_cast<const void*>(vecs.data()),
@@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_(
                         outDistancesSlice.data(),
                         outIndicesSlice.data(),
                         params);
+            } else if (numeric_type == NumericType::Int8) {
+                searchNonPaged_(
+                        num,
+                        static_cast<const void*>(
+                                static_cast<const int8_t*>(x) + cur * this->d),
+                        numeric_type,
+                        k,
+                        outDistancesSlice.data(),
+                        outIndicesSlice.data(),
+                        params);
             }
         }
 
@@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG(
                 "GpuIndex::searchFromCpuPaged_: Unsupported numeric type");
diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
index eada5936ff..19aa78b21e 100644
--- a/faiss/gpu/GpuIndexCagra.cu
+++ b/faiss/gpu/GpuIndexCagra.cu
@@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) {
                 cagraConfig_.guarantee_connectivity);
         std::get<std::shared_ptr<CuvsCagra<half>>>(index_)->train(
                 n, static_cast<const half*>(x));
+    } else if (numeric_type == NumericType::Int8) {
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                cagraConfig_.intermediate_graph_degree,
+                cagraConfig_.graph_degree,
+                static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
+                cagraConfig_.nn_descent_niter,
+                cagraConfig_.store_dataset,
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT,
+                ivf_pq_params,
+                ivf_pq_search_params,
+                cagraConfig_.refine_rate,
+                cagraConfig_.guarantee_connectivity);
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->train(
+                n, static_cast<const int8_t*>(x));
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type");
     }
@@ -232,6 +250,29 @@ void GpuIndexCagra::searchImpl_(
                 params->hashmap_max_fill_rate,
                 params->num_random_samplings,
                 params->seed);
+    } else if (numeric_type == NumericType::Int8) {
+        Tensor<int8_t, 2, true> queries(
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                {n, this->d});
+
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->search(
+                queries,
+                k,
+                outDistances,
+                outLabels,
+                params->max_queries,
+                params->itopk_size,
+                params->max_iterations,
+                static_cast<faiss::cagra_search_algo>(params->algo),
+                params->team_size,
+                params->search_width,
+                params->min_iterations,
+                params->thread_block_size,
+                static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
+                params->hashmap_min_bitlen,
+                params->hashmap_max_fill_rate,
+                params->num_random_samplings,
+                params->seed);
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type");
     }
@@ -314,6 +355,29 @@ void GpuIndexCagra::copyFrom(
                 this->metric_type,
                 this->metric_arg,
                 INDICES_64_BIT);
+    } else if (numeric_type == NumericType::Int8) {
+        auto base_index = dynamic_cast<IndexScalarQuantizer*>(index->storage);
+        FAISS_ASSERT(base_index);
+        auto dataset = (uint8_t*)base_index->codes.data();
+
+        // decode what was encded by Quantizer8bitDirectSigned in
+        // ScalarQuantizer
+        int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d];
+        for (int i = 0; i < index->ntotal * this->d; i++) {
+            decoded_train_dataset[i] = dataset[i] - 128;
+        }
+
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                index->ntotal,
+                hnsw.nb_neighbors(0),
+                decoded_train_dataset,
+                knn_graph.data(),
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT);
+        delete[] decoded_train_dataset;
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type");
     }
@@ -348,6 +412,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
     } else if (numeric_type_ == NumericType::Float16) {
         graph_degree = std::get<std::shared_ptr<CuvsCagra<half>>>(index_)
                                ->get_knngraph_degree();
+    } else if (numeric_type_ == NumericType::Int8) {
+        graph_degree = std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                               ->get_knngraph_degree();
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type");
     }
@@ -368,6 +435,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
         auto qtype = ScalarQuantizer::QT_fp16;
         index->storage =
                 new IndexScalarQuantizer(index->d, qtype, this->metric_type);
+    } else if (numeric_type_ == NumericType::Int8) {
+        auto qtype = ScalarQuantizer::QT_8bit_direct_signed;
+        index->storage =
+                new IndexScalarQuantizer(index->d, qtype, this->metric_type);
     }
 
     index->own_fields = true;
@@ -438,6 +509,44 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
             index->ntotal = n_train;
         }
 
+        if (allocation) {
+            delete[] train_dataset;
+        }
+    } else if (numeric_type_ == NumericType::Int8) {
+        int8_t* train_dataset;
+        const int8_t* dataset =
+                std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                        ->get_training_dataset();
+        if (getDeviceForAddress(dataset) >= 0) {
+            train_dataset = new int8_t[n_train * index->d];
+            allocation = true;
+            raft::copy(
+                    train_dataset,
+                    dataset,
+                    n_train * index->d,
+                    this->resources_->getRaftHandleCurrentDevice()
+                            .get_stream());
+        } else {
+            train_dataset = const_cast<int8_t*>(dataset);
+        }
+
+        index->init_level0 = false;
+        if (!index->base_level_only) {
+            FAISS_THROW_MSG(
+                    "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo");
+        } else {
+            index->hnsw.prepare_level_tab(n_train, false);
+            // applying encoding logic of Quantizer8bitDirectSigned
+            uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d];
+            for (int i = 0; i < n_train * index->d; i++) {
+                encoded_train_dataset[i] = train_dataset[i] + 128;
+            }
+            index->storage->add_sa_codes(
+                    n_train, encoded_train_dataset, nullptr);
+            delete[] encoded_train_dataset;
+            index->ntotal = n_train;
+        }
+
         if (allocation) {
             delete[] train_dataset;
         }
diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h
index 49e2e0e800..0e4a5b252b 100644
--- a/faiss/gpu/GpuIndexCagra.h
+++ b/faiss/gpu/GpuIndexCagra.h
@@ -317,7 +317,8 @@ struct GpuIndexCagra : public GpuIndex {
     std::variant<
             std::monostate,
             std::shared_ptr<CuvsCagra<float>>,
-            std::shared_ptr<CuvsCagra<half>>>
+            std::shared_ptr<CuvsCagra<half>>,
+            std::shared_ptr<CuvsCagra<int8_t>>>
             index_;
 };
 
diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu
index 355eeaecb8..482e4d6727 100644
--- a/faiss/gpu/impl/CuvsCagra.cu
+++ b/faiss/gpu/impl/CuvsCagra.cu
@@ -337,5 +337,6 @@ const data_t* CuvsCagra<data_t>::get_training_dataset() const {
 
 template class CuvsCagra<float>;
 template class CuvsCagra<half>;
+template class CuvsCagra<int8_t>;
 } // namespace gpu
 } // namespace faiss
diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py
index 0c26cc696b..1e997548fc 100644
--- a/faiss/gpu/test/test_cagra.py
+++ b/faiss/gpu/test/test_cagra.py
@@ -18,20 +18,30 @@ class TestComputeGT(unittest.TestCase):
     def do_compute_GT(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
-        
-        # Get the data
-        database = ds.get_database()
-        queries = ds.get_queries()
-        
-        # Normalize for inner product to avoid duplicate neighbors
-        if metric == faiss.METRIC_INNER_PRODUCT:
-            # Normalize database vectors
-            database = database / np.linalg.norm(database, axis=1, keepdims=True)
-            # Normalize query vectors
-            queries = queries / np.linalg.norm(queries, axis=1, keepdims=True)
-        
-        Dref, Iref = faiss.knn(queries, database, k, metric)
+
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            # Normalize for inner product to avoid duplicate neighbors
+            if metric == faiss.METRIC_INNER_PRODUCT:
+                # Normalize database vectors
+                data_base = data_base / np.linalg.norm(data_base, axis=1, keepdims=True)
+                # Normalize query vectors
+                data_query = data_query / np.linalg.norm(data_query, axis=1, keepdims=True)
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
+
+        Dref, Iref = faiss.knn(data_query, data_base, k, metric)
 
         res = faiss.StandardGpuResources()
 
@@ -45,10 +55,8 @@ def do_compute_GT(self, metric, numeric_type):
         cagraIndexConfig.build_algo = faiss.graph_build_algo_IVF_PQ
 
         index = faiss.GpuIndexCagra(res, d, metric, cagraIndexConfig)
-        database = ds.get_database().astype(np.float16) if numeric_type == faiss.Float16  else ds.get_database()
-        index.train(database, numeric_type=numeric_type)
-        queries = ds.get_queries().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_queries()
-        Dnew, Inew = index.search(queries, k, numeric_type=numeric_type)
+        index.train(data_base_nt, numeric_type=numeric_type)
+        Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type)
 
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
@@ -64,6 +72,12 @@ def test_compute_GT_L2_FP16(self):
     def test_compute_GT_IP_FP16(self):
         self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
 
+    def test_compute_GT_L2_Int8(self):
+        self.do_compute_GT(faiss.METRIC_L2, faiss.Int8)
+
+    def test_compute_GT_IP_Int8(self):
+        self.do_compute_GT(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
+
 @unittest.skipIf(
     "CUVS" not in faiss.get_compile_options(),
     "only if cuVS is compiled in")
@@ -72,27 +86,31 @@ class TestInterop(unittest.TestCase):
     def do_interop(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
 
         res = faiss.StandardGpuResources()
 
         index = faiss.GpuIndexCagra(res, d, metric)
-        database = (
-            ds.get_database().astype(np.float16)
-            if numeric_type == faiss.Float16
-            else ds.get_database()
-        )
-        index.train(database, numeric_type=numeric_type)
-        queries = (
-            ds.get_queries().astype(np.float16)
-            if numeric_type == faiss.Float16
-            else ds.get_queries()
-        )
-        Dnew, Inew = index.search(queries, k, numeric_type=numeric_type)
+        index.train(data_base_nt, numeric_type=numeric_type)
+        Dnew, Inew = index.search(data_query_nt, k, numeric_type=numeric_type)
 
         cpu_index = faiss.index_gpu_to_cpu(index)
         # cpu index always search in fp32
-        Dref, Iref = cpu_index.search(ds.get_queries(), k)
+        Dref, Iref = cpu_index.search(data_query, k)
 
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
@@ -100,7 +118,7 @@ def do_interop(self, metric, numeric_type):
             faiss.serialize_index(cpu_index))
 
         gpu_index = faiss.index_cpu_to_gpu(res, 0, deserialized_index)
-        Dnew2, Inew2 = gpu_index.search(queries, k, numeric_type=numeric_type)
+        Dnew2, Inew2 = gpu_index.search(data_query_nt, k, numeric_type=numeric_type)
 
         evaluation.check_ref_knn_with_draws(Dnew2, Inew2, Dnew, Inew, k)
 
@@ -116,6 +134,12 @@ def test_interop_L2_FP16(self):
     def test_interop_IP_FP16(self):
         self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
 
+    def test_interop_L2_Int8(self):
+        self.do_interop(faiss.METRIC_L2, faiss.Int8)
+
+    def test_interop_IP_Int8(self):
+        self.do_interop(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
+
 
 @unittest.skipIf(
     "CUVS" not in faiss.get_compile_options(),
@@ -125,19 +149,32 @@ class TestIDMapCagra(unittest.TestCase):
     def do_IDMapCagra(self, metric, numeric_type):
         d = 64
         k = 12
-        ds = datasets.SyntheticDataset(d, 0, 10000, 100)
-        Dref, Iref = faiss.knn(ds.get_queries(), ds.get_database(), k, metric)
+        if numeric_type == faiss.Int8:
+            data_base_nt = np.random.randint(-128, 128, size=(10000, d), dtype=np.int8)
+            data_query_nt = np.random.randint(-128, 128, size=(100, d), dtype=np.int8)
+            data_base = data_base_nt.astype(np.float32)
+            data_query = data_query_nt.astype(np.float32)
+        else:
+            ds = datasets.SyntheticDataset(d, 0, 10000, 100)
+            data_base = ds.get_database()  #fp32
+            data_query = ds.get_queries()   #fp32
+            if numeric_type == faiss.Float16:
+                data_base_nt = data_base.astype(np.float16)
+                data_query_nt = data_query.astype(np.float16)
+            elif numeric_type == faiss.Float32:
+                data_base_nt = data_base
+                data_query_nt = data_query
+
+        Dref, Iref = faiss.knn(data_query, data_base, k, metric)
 
         res = faiss.StandardGpuResources()
 
         index = faiss.GpuIndexCagra(res, d, metric)
         idMapIndex = faiss.IndexIDMap(index)
-        database = ds.get_database().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_database()
-        idMapIndex.train(database, numeric_type=numeric_type)
+        idMapIndex.train(data_base_nt, numeric_type=numeric_type)
         ids = np.array([i for i in range(10000)])
-        idMapIndex.add_with_ids(database, ids, numeric_type=numeric_type)
-        queries = ds.get_queries().astype(np.float16) if numeric_type == faiss.Float16 else ds.get_queries()
-        Dnew, Inew = idMapIndex.search(queries, k, numeric_type=numeric_type)
+        idMapIndex.add_with_ids(data_base_nt, ids, numeric_type=numeric_type)
+        Dnew, Inew = idMapIndex.search(data_query_nt, k, numeric_type=numeric_type)
 
         evaluation.check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, k)
 
@@ -152,3 +189,9 @@ def test_IDMapCagra_L2_FP16(self):
 
     def test_IDMapCagra_IP_FP16(self):
         self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Float16)
+
+    def test_IDMapCagra_L2_Int8(self):
+        self.do_IDMapCagra(faiss.METRIC_L2, faiss.Int8)
+
+    def test_IDMapCagra_IP_Int8(self):
+        self.do_IDMapCagra(faiss.METRIC_INNER_PRODUCT, faiss.Int8)
diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py
index 848b84e190..394412b483 100644
--- a/faiss/python/class_wrappers.py
+++ b/faiss/python/class_wrappers.py
@@ -47,8 +47,10 @@ def _numeric_to_str(numeric_type):
         return 'float32'
     elif numeric_type == faiss.Float16:
         return 'float16'
+    elif numeric_type == faiss.Int8:
+        return 'int8'
     else:
-        raise ValueError("numeric type must be either faiss.Float32 or faiss.Float16 ")
+        raise ValueError("numeric type must be either faiss.Float32, faiss.Float16, or faiss.Int8")
 
 def replace_method(the_class, name, replacement, ignore_missing=False):
     """ Replaces a method in a class with another version. The old method