facebookresearch · jinsolp · Jul 17, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/faiss/Index.h b/faiss/Index.h
@@ -62,6 +62,7 @@ enum NumericType {
     Float32,
     Float16,
     UInt8,
+    Int8,
 };
 
 inline size_t get_numeric_type_size(NumericType numeric_type) {
@@ -70,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
             return 4;
         case NumericType::Float16:
             return 2;
+        case NumericType::UInt8:
+        case NumericType::Int8:
+            return 1;
         default:
             FAISS_THROW_MSG(
                     "Unknown Numeric Type. Only supports Float32, Float16");

diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp
@@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
 #if defined USE_NVIDIA_CUVS
     else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
         IndexHNSWCagra* res = new IndexHNSWCagra();
-        if (icg->get_numeric_type() == faiss::NumericType::Float16) {
+        if (icg->get_numeric_type() != faiss::NumericType::Float32) {
             res->base_level_only = true;
         }
         icg->copyTo(res);

diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu
@@ -194,6 +194,8 @@ void GpuIndex::addPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type");
     }
@@ -251,6 +253,8 @@ void GpuIndex::addPage_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type");
     }
@@ -411,6 +415,22 @@ void GpuIndex::searchNonPaged_(
                 stream,
                 {n, this->d});
 
+        searchImpl_(
+                n,
+                static_cast<const void*>(vecs.data()),
+                numeric_type,
+                k,
+                outDistancesData,
+                outIndicesData,
+                params);
+    } else if (numeric_type == NumericType::Int8) {
+        auto vecs = toDeviceTemporary<int8_t, 2>(
+                resources_.get(),
+                config_.device,
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                stream,
+                {n, this->d});
+
         searchImpl_(
                 n,
                 static_cast<const void*>(vecs.data()),
@@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_(
                         outDistancesSlice.data(),
                         outIndicesSlice.data(),
                         params);
+            } else if (numeric_type == NumericType::Int8) {
+                searchNonPaged_(
+                        num,
+                        static_cast<const void*>(
+                                static_cast<const int8_t*>(x) + cur * this->d),
+                        numeric_type,
+                        k,
+                        outDistancesSlice.data(),
+                        outIndicesSlice.data(),
+                        params);
             }
         }
 
@@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_(
         dispatch(float{});
     } else if (numeric_type == NumericType::Float16) {
         dispatch(half{});
+    } else if (numeric_type == NumericType::Int8) {
+        dispatch(int8_t{});
     } else {
         FAISS_THROW_MSG(
                 "GpuIndex::searchFromCpuPaged_: Unsupported numeric type");

diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
@@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) {
                 cagraConfig_.guarantee_connectivity);
         std::get<std::shared_ptr<CuvsCagra<half>>>(index_)->train(
                 n, static_cast<const half*>(x));
+    } else if (numeric_type == NumericType::Int8) {
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                cagraConfig_.intermediate_graph_degree,
+                cagraConfig_.graph_degree,
+                static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
+                cagraConfig_.nn_descent_niter,
+                cagraConfig_.store_dataset,
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT,
+                ivf_pq_params,
+                ivf_pq_search_params,
+                cagraConfig_.refine_rate,
+                cagraConfig_.guarantee_connectivity);
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->train(
+                n, static_cast<const int8_t*>(x));
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type");
     }
@@ -232,6 +250,29 @@ void GpuIndexCagra::searchImpl_(
                 params->hashmap_max_fill_rate,
                 params->num_random_samplings,
                 params->seed);
+    } else if (numeric_type == NumericType::Int8) {
+        Tensor<int8_t, 2, true> queries(
+                const_cast<int8_t*>(static_cast<const int8_t*>(x)),
+                {n, this->d});
+
+        std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->search(
+                queries,
+                k,
+                outDistances,
+                outLabels,
+                params->max_queries,
+                params->itopk_size,
+                params->max_iterations,
+                static_cast<faiss::cagra_search_algo>(params->algo),
+                params->team_size,
+                params->search_width,
+                params->min_iterations,
+                params->thread_block_size,
+                static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
+                params->hashmap_min_bitlen,
+                params->hashmap_max_fill_rate,
+                params->num_random_samplings,
+                params->seed);
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type");
     }
@@ -314,6 +355,29 @@ void GpuIndexCagra::copyFrom(
                 this->metric_type,
                 this->metric_arg,
                 INDICES_64_BIT);
+    } else if (numeric_type == NumericType::Int8) {
+        auto base_index = dynamic_cast<IndexScalarQuantizer*>(index->storage);
+        FAISS_ASSERT(base_index);
+        auto dataset = (uint8_t*)base_index->codes.data();
+
+        // decode what was encded by Quantizer8bitDirectSigned in
+        // ScalarQuantizer
+        int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d];
+        for (int i = 0; i < index->ntotal * this->d; i++) {
+            decoded_train_dataset[i] = dataset[i] - 128;
+        }
+
+        index_ = std::make_shared<CuvsCagra<int8_t>>(
+                this->resources_.get(),
+                this->d,
+                index->ntotal,
+                hnsw.nb_neighbors(0),
+                decoded_train_dataset,
+                knn_graph.data(),
+                this->metric_type,
+                this->metric_arg,
+                INDICES_64_BIT);
+        delete[] decoded_train_dataset;
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type");
     }
@@ -348,6 +412,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
     } else if (numeric_type_ == NumericType::Float16) {
         graph_degree = std::get<std::shared_ptr<CuvsCagra<half>>>(index_)
                                ->get_knngraph_degree();
+    } else if (numeric_type_ == NumericType::Int8) {
+        graph_degree = std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                               ->get_knngraph_degree();
     } else {
         FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type");
     }
@@ -368,6 +435,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
         auto qtype = ScalarQuantizer::QT_fp16;
         index->storage =
                 new IndexScalarQuantizer(index->d, qtype, this->metric_type);
+    } else if (numeric_type_ == NumericType::Int8) {
+        auto qtype = ScalarQuantizer::QT_8bit_direct_signed;
+        index->storage =
+                new IndexScalarQuantizer(index->d, qtype, this->metric_type);
     }
 
     index->own_fields = true;
@@ -438,6 +509,44 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
             index->ntotal = n_train;
         }
 
+        if (allocation) {
+            delete[] train_dataset;
+        }
+    } else if (numeric_type_ == NumericType::Int8) {
+        int8_t* train_dataset;
+        const int8_t* dataset =
+                std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
+                        ->get_training_dataset();
+        if (getDeviceForAddress(dataset) >= 0) {
+            train_dataset = new int8_t[n_train * index->d];
+            allocation = true;
+            raft::copy(
+                    train_dataset,
+                    dataset,
+                    n_train * index->d,
+                    this->resources_->getRaftHandleCurrentDevice()
+                            .get_stream());
+        } else {
+            train_dataset = const_cast<int8_t*>(dataset);
+        }
+
+        index->init_level0 = false;
+        if (!index->base_level_only) {
+            FAISS_THROW_MSG(
+                    "Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo");
+        } else {
+            index->hnsw.prepare_level_tab(n_train, false);
+            // applying encoding logic of Quantizer8bitDirectSigned
+            uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d];
+            for (int i = 0; i < n_train * index->d; i++) {
+                encoded_train_dataset[i] = train_dataset[i] + 128;
+            }
+            index->storage->add_sa_codes(
+                    n_train, encoded_train_dataset, nullptr);
+            delete[] encoded_train_dataset;
+            index->ntotal = n_train;
+        }
+
         if (allocation) {
             delete[] train_dataset;
         }

diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h
@@ -317,7 +317,8 @@ struct GpuIndexCagra : public GpuIndex {
     std::variant<
             std::monostate,
             std::shared_ptr<CuvsCagra<float>>,
-            std::shared_ptr<CuvsCagra<half>>>
+            std::shared_ptr<CuvsCagra<half>>,
+            std::shared_ptr<CuvsCagra<int8_t>>>
             index_;
 };
 

diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu
@@ -337,5 +337,6 @@ const data_t* CuvsCagra<data_t>::get_training_dataset() const {
 
 template class CuvsCagra<float>;
 template class CuvsCagra<half>;
+template class CuvsCagra<int8_t>;
 } // namespace gpu
 } // namespace faiss