From ea7827dbdada7ea8497368b086223c876e0da0fe Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 3 Jul 2025 21:02:14 +0000 Subject: [PATCH 01/10] Copy distance matrix: work --- c_api/gpu/GpuAutoTune_c.cpp | 12 +++ c_api/gpu/GpuIndex_c.cpp | 63 +++++++++++- c_api/gpu/GpuIndex_c.h | 26 ++++- faiss/IndexHNSW.cpp | 2 + faiss/gpu/GpuCloner.cpp | 185 ++++++++++++++++++++++++++++++++++- faiss/gpu/GpuCloner.h | 5 + faiss/gpu/GpuIndexCagra.cu | 42 +++++++- faiss/gpu/impl/CuvsCagra.cu | 27 ++++- faiss/gpu/impl/CuvsCagra.cuh | 2 +- 9 files changed, 353 insertions(+), 11 deletions(-) diff --git a/c_api/gpu/GpuAutoTune_c.cpp b/c_api/gpu/GpuAutoTune_c.cpp index d5bc7e973c..a527890bf4 100644 --- a/c_api/gpu/GpuAutoTune_c.cpp +++ b/c_api/gpu/GpuAutoTune_c.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include "GpuClonerOptions_c.h" #include "macros_impl.h" @@ -22,10 +23,21 @@ using faiss::gpu::GpuClonerOptions; using faiss::gpu::GpuMultipleClonerOptions; using faiss::gpu::GpuResourcesProvider; + + int faiss_index_gpu_to_cpu(const FaissIndex* gpu_index, FaissIndex** p_out) { + if (gpu_index == nullptr) { + return -1; + } + + if (p_out == nullptr) { + return -1; + } + try { auto cpu_index = faiss::gpu::index_gpu_to_cpu( reinterpret_cast(gpu_index)); + *p_out = reinterpret_cast(cpu_index); } CATCH_AND_HANDLE diff --git a/c_api/gpu/GpuIndex_c.cpp b/c_api/gpu/GpuIndex_c.cpp index 92d675a2e8..f56edc54f7 100644 --- a/c_api/gpu/GpuIndex_c.cpp +++ b/c_api/gpu/GpuIndex_c.cpp @@ -8,9 +8,70 @@ // -*- c++ -*- #include "GpuIndex_c.h" -#include +#include "GpuAutoTune_c.h" #include "macros_impl.h" +#include +#include +#include +#include +#include +#include using faiss::gpu::GpuIndexConfig; DEFINE_GETTER(GpuIndexConfig, int, device) + +int faiss_index_gpu_to_cpu_new(const FaissIndex* gpu_index, FaissIndex** p_out) { + // Call the existing function from GpuAutoTune_c.cpp + int result = faiss_index_gpu_to_cpu(gpu_index, p_out); + return result; +} + +int faiss_index_cpu_to_gpu_new( + FaissGpuResourcesProvider* provider, + int device, + const FaissIndex* index, + FaissGpuIndex** p_out) { + // Call the existing function from GpuAutoTune_c.cpp + return faiss_index_cpu_to_gpu(provider, device, index, p_out); +} + +int faiss_GpuIndexCagra_new( + FaissIndex** p_index, + FaissStandardGpuResources* res, + int d, + FaissMetricType metric, + size_t graph_degree) { + try { + faiss::gpu::GpuIndexCagraConfig config; + config.graph_degree = graph_degree; + + auto gpu_res = + reinterpret_cast(res); + + auto cagra_index = new faiss::gpu::GpuIndexCagra( + gpu_res, + d, + static_cast(metric), + config); + + *p_index = reinterpret_cast(cagra_index); + return 0; + } + CATCH_AND_HANDLE +} + + + +int faiss_SearchParametersCagra_new( + FaissSearchParameters** p_params, + size_t itopk_size) { + try { + auto cagra_params = new faiss::gpu::SearchParametersCagra(); + cagra_params->itopk_size = itopk_size; + + *p_params = reinterpret_cast(cagra_params); + return 0; + } + CATCH_AND_HANDLE +} diff --git a/c_api/gpu/GpuIndex_c.h b/c_api/gpu/GpuIndex_c.h index 4b7aab061e..a0128d4117 100644 --- a/c_api/gpu/GpuIndex_c.h +++ b/c_api/gpu/GpuIndex_c.h @@ -10,18 +10,38 @@ #ifndef FAISS_GPU_INDEX_C_H #define FAISS_GPU_INDEX_C_H +#include "../Index_c.h" #include "../faiss_c.h" - +#include "StandardGpuResources_c.h" #ifdef __cplusplus extern "C" { #endif FAISS_DECLARE_CLASS(GpuIndexConfig) - FAISS_DECLARE_GETTER(GpuIndexConfig, int, device) - FAISS_DECLARE_CLASS_INHERITED(GpuIndex, Index) +FAISS_DECLARE_CLASS(SearchParameters) + +int faiss_GpuIndexCagra_new( + FaissIndex** p_index, + FaissStandardGpuResources* res, + int d, + FaissMetricType metric, + size_t graph_degree); + +int faiss_SearchParametersCagra_new( + FaissSearchParameters** p_params, + size_t itopk_size); + +int faiss_index_gpu_to_cpu_new(const FaissIndex* gpu_index, FaissIndex** p_out); + +int faiss_index_cpu_to_gpu_new( + FaissGpuResourcesProvider* provider, + int device, + const FaissIndex* index, + FaissGpuIndex** p_out); + #ifdef __cplusplus } #endif diff --git a/faiss/IndexHNSW.cpp b/faiss/IndexHNSW.cpp index 5983e9d831..2e6fbe873d 100644 --- a/faiss/IndexHNSW.cpp +++ b/faiss/IndexHNSW.cpp @@ -923,6 +923,8 @@ void IndexHNSWCagra::search( float* distances, idx_t* labels, const SearchParameters* params) const { + printf("🔍 C++: IndexHNSWCagra::search called\n"); + if (!base_level_only) { IndexHNSW::search(n, x, k, distances, labels, params); } else { diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 575ee2e0a5..2c88d4585e 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -37,10 +37,17 @@ #include #include #include +#include +#include namespace faiss { namespace gpu { +// Function declarations +void test_converted_cpu_index_search(const faiss::Index* cpu_index); +void test_gpu_index_before_conversion(const faiss::Index* gpu_index); +void test_cpu_index_after_copyto(const faiss::Index* cpu_index); + /********************************************************** * Cloning to CPU **********************************************************/ @@ -69,22 +76,33 @@ void ToCPUCloner::merge_index(Index* dst, Index* src, bool successive_ids) { } Index* ToCPUCloner::clone_Index(const Index* index) { + printf("🔍 C++: ToCPUCloner::clone_Index called\n"); + printf(" - Input index: %p\n", (void*)index); + if (auto ifl = dynamic_cast(index)) { + printf(" - Converting GpuIndexFlat to IndexFlat\n"); IndexFlat* res = new IndexFlat(); ifl->copyTo(res); + printf(" - GpuIndexFlat conversion completed\n"); return res; } else if (auto ifl = dynamic_cast(index)) { + printf(" - Converting GpuIndexIVFFlat to IndexIVFFlat\n"); IndexIVFFlat* res = new IndexIVFFlat(); ifl->copyTo(res); + printf(" - GpuIndexIVFFlat conversion completed\n"); return res; } else if ( auto ifl = dynamic_cast(index)) { + printf(" - Converting GpuIndexIVFScalarQuantizer to IndexIVFScalarQuantizer\n"); IndexIVFScalarQuantizer* res = new IndexIVFScalarQuantizer(); ifl->copyTo(res); + printf(" - GpuIndexIVFScalarQuantizer conversion completed\n"); return res; } else if (auto ipq = dynamic_cast(index)) { + printf(" - Converting GpuIndexIVFPQ to IndexIVFPQ\n"); IndexIVFPQ* res = new IndexIVFPQ(); ipq->copyTo(res); + printf(" - GpuIndexIVFPQ conversion completed\n"); return res; // for IndexShards and IndexReplicas we assume that the @@ -94,12 +112,19 @@ Index* ToCPUCloner::clone_Index(const Index* index) { } #if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { + printf(" - Converting GpuIndexCagra to IndexHNSWCagra\n"); IndexHNSWCagra* res = new IndexHNSWCagra(); + icg->copyTo(res); + printf(" - GpuIndexCagra conversion completed\n"); + printf(" - base_level_only flag: %s\n", res->base_level_only ? "true" : "false"); + printf(" - num_base_level_search_entrypoints: %d\n", res->num_base_level_search_entrypoints); + return res; } #endif else if (auto ish = dynamic_cast(index)) { + printf(" - Converting IndexShards\n"); int nshard = ish->count(); FAISS_ASSERT(nshard > 0); Index* res = clone_Index(ish->at(0)); @@ -108,19 +133,53 @@ Index* ToCPUCloner::clone_Index(const Index* index) { merge_index(res, res_i, ish->successive_ids); delete res_i; } + printf(" - IndexShards conversion completed\n"); return res; } else if (auto ipr = dynamic_cast(index)) { + printf(" - Converting IndexReplicas\n"); // just clone one of the replicas FAISS_ASSERT(ipr->count() > 0); - return clone_Index(ipr->at(0)); + auto result = clone_Index(ipr->at(0)); + printf(" - IndexReplicas conversion completed\n"); + return result; } else { - return Cloner::clone_Index(index); + printf(" - Using default Cloner::clone_Index for unknown type\n"); + auto result = Cloner::clone_Index(index); + printf(" - Default conversion completed\n"); + return result; } } faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index) { + printf("🔍 C++: index_gpu_to_cpu called\n"); + printf(" - Input gpu_index: %p\n", (void*)gpu_index); + printf(" - Input index ntotal: %ld\n", gpu_index->ntotal); + printf(" - Input index dimension: %d\n", gpu_index->d); + printf(" - Input index is_trained: %s\n", gpu_index->is_trained ? "true" : "false"); + ToCPUCloner cl; - return cl.clone_Index(gpu_index); + + // Test search on GPU index before conversion + printf("🔍 Testing GPU index before conversion in GpuCloner...\n"); + test_gpu_index_before_conversion(gpu_index); + + auto result = cl.clone_Index(gpu_index); + + printf(" - Conversion result: %p\n", (void*)result); + if (result != nullptr) { + printf(" - Result index ntotal: %ld\n", result->ntotal); + printf(" - Result index dimension: %d\n", result->d); + printf(" - Result index is_trained: %s\n", result->is_trained ? "true" : "false"); + + // Test search on the converted CPU index + printf("🔍 Testing converted CPU index in GpuCloner...\n"); + test_converted_cpu_index_search(result); + } else { + printf("❌ C++: ERROR - clone_Index returned null!\n"); + } + + printf("✅ C++: index_gpu_to_cpu completed\n"); + return result; } /********************************************************** @@ -588,5 +647,125 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( } } +// Test function to verify CPU index immediately after copyTo +void test_cpu_index_after_copyto(const faiss::Index* cpu_index) { + try { + // Create a simple test query + std::vector query(128, 2.0f); // 128 dimensions, all 2.0f + + // Allocate result arrays + std::vector distances(20); + std::vector labels(20); + + // Perform search + cpu_index->search(1, query.data(), 20, distances.data(), labels.data()); + + printf(" - After copyTo CPU index search results:\n"); + printf(" Top 5 results: "); + for (int i = 0; i < 5 && i < 20; i++) { + printf("(%ld:%.1f) ", labels[i], distances[i]); + } + printf("\n"); + + // Check for any NaN or zero distances + bool has_nan_or_zero = false; + for (int i = 0; i < 20; i++) { + if (std::isnan(distances[i]) || distances[i] == 0.0f) { + has_nan_or_zero = true; + break; + } + } + + if (has_nan_or_zero) { + printf(" - ⚠️ WARNING: Found NaN or zero distances after copyTo!\n"); + } else { + printf(" - ✅ After copyTo CPU index search results look valid\n"); + } + + } catch (const std::exception& e) { + printf(" - ❌ Error testing after copyTo CPU index search: %s\n", e.what()); + } +} + +// Test function to verify GPU index before conversion +void test_gpu_index_before_conversion(const faiss::Index* gpu_index) { + try { + // Create a simple test query + std::vector query(128, 2.0f); // 128 dimensions, all 2.0f + + // Allocate result arrays + std::vector distances(20); + std::vector labels(20); + + // Perform search + gpu_index->search(1, query.data(), 20, distances.data(), labels.data()); + + printf(" - GpuCloner GPU index search results:\n"); + printf(" Top 5 results: "); + for (int i = 0; i < 5 && i < 20; i++) { + printf("(%ld:%.1f) ", labels[i], distances[i]); + } + printf("\n"); + + // Check for any NaN or zero distances + bool has_nan_or_zero = false; + for (int i = 0; i < 20; i++) { + if (std::isnan(distances[i]) || distances[i] == 0.0f) { + has_nan_or_zero = true; + break; + } + } + + if (has_nan_or_zero) { + printf(" - ⚠️ WARNING: Found NaN or zero distances in GpuCloner GPU results!\n"); + } else { + printf(" - ✅ GpuCloner GPU index search results look valid\n"); + } + + } catch (const std::exception& e) { + printf(" - ❌ Error testing GpuCloner GPU index search: %s\n", e.what()); + } +} + +// Test function to verify converted CPU index search +void test_converted_cpu_index_search(const faiss::Index* cpu_index) { + try { + // Create a simple test query + std::vector query(128, 2.0f); // 128 dimensions, all 2.0f + + // Allocate result arrays + std::vector distances(20); + std::vector labels(20); + + // Perform search + cpu_index->search(1, query.data(), 20, distances.data(), labels.data()); + + printf(" - GpuCloner CPU index search results:\n"); + printf(" Top 5 results: "); + for (int i = 0; i < 5 && i < 20; i++) { + printf("(%ld:%.1f) ", labels[i], distances[i]); + } + printf("\n"); + + // Check for any NaN or zero distances + bool has_nan_or_zero = false; + for (int i = 0; i < 20; i++) { + if (std::isnan(distances[i]) || distances[i] == 0.0f) { + has_nan_or_zero = true; + break; + } + } + + if (has_nan_or_zero) { + printf(" - ⚠️ WARNING: Found NaN or zero distances in GpuCloner CPU results!\n"); + } else { + printf(" - ✅ GpuCloner CPU index search results look valid\n"); + } + + } catch (const std::exception& e) { + printf(" - ❌ Error testing GpuCloner CPU index search: %s\n", e.what()); + } +} + } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuCloner.h b/faiss/gpu/GpuCloner.h index d92d81ffe1..54c261200b 100644 --- a/faiss/gpu/GpuCloner.h +++ b/faiss/gpu/GpuCloner.h @@ -117,5 +117,10 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( const faiss::IndexBinary* index, const GpuMultipleClonerOptions* options = nullptr); +// Test function declarations +void test_cpu_index_after_copyto(const faiss::Index* cpu_index); +void test_gpu_index_before_conversion(const faiss::Index* gpu_index); +void test_converted_cpu_index_search(const faiss::Index* cpu_index); + } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index 42a6092ddd..d15970e943 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -23,10 +23,13 @@ #include #include +#include #include #include #include #include +#include +#include namespace faiss { namespace gpu { @@ -207,6 +210,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { DeviceScope scope(config_.device); + printf("🔍 C++: GpuIndexCagra::copyTo called\n"); + printf(" - GPU index ntotal: %ld, d: %d\n", this->ntotal, this->d); + // // Index information // @@ -235,22 +241,38 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { auto n_train = this->ntotal; float* train_dataset; + auto dataset = index_->get_training_dataset(); bool allocation = false; + printf(" - Dataset pointer: %p\n", dataset); + printf(" - Dataset device: %d\n", getDeviceForAddress(dataset)); + + if (getDeviceForAddress(dataset) >= 0) { train_dataset = new float[n_train * index->d]; allocation = true; + printf(" - Copying %ld vectors from GPU to CPU\n", n_train); raft::copy( train_dataset, dataset, n_train * index->d, this->resources_->getRaftHandleCurrentDevice().get_stream()); + + // Debug: Check first few values after GPU->CPU copy + printf(" - After GPU->CPU copy, first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", + train_dataset[0], train_dataset[1], train_dataset[2], train_dataset[3], train_dataset[4]); + printf(" - After GPU->CPU copy, values at d, d+1, d+2, d+3, d+4: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", + train_dataset[index->d], train_dataset[index->d+1], train_dataset[index->d+2], train_dataset[index->d+3], train_dataset[index->d+4]); } else { train_dataset = const_cast(dataset); + printf(" - Using CPU dataset directly\n"); + printf(" - CPU dataset first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", + train_dataset[0], train_dataset[1], train_dataset[2], train_dataset[3], train_dataset[4]); } - // turn off as level 0 is copied from CAGRA graph + // turn off as level 0 is copied from CAGRA graph index->init_level0 = false; + printf(" - Adding %ld vectors to CPU index\n", n_train); if (!index->base_level_only) { index->add(n_train, train_dataset); } else { @@ -258,6 +280,24 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->storage->add(n_train, train_dataset); index->ntotal = n_train; } + + // Debug: Check vectors in CPU index after adding + if (index->storage) { + auto flat_storage = dynamic_cast(index->storage); + if (flat_storage) { + const float* vectors = flat_storage->get_xb(); + printf(" - After adding to CPU index, first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", + vectors[0], vectors[1], vectors[2], vectors[3], vectors[4]); + printf(" - After adding to CPU index, values at d, d+1, d+2, d+3, d+4: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", + vectors[index->d], vectors[index->d+1], vectors[index->d+2], vectors[index->d+3], vectors[index->d+4]); + } + } + + // Test search immediately after adding vectors to CPU index + printf("🔍 Testing CPU index immediately after adding vectors...\n"); + fflush(stdout); + test_cpu_index_after_copyto(index); + if (allocation) { delete[] train_dataset; } diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu index f60e1e3ab5..f603716da3 100644 --- a/faiss/gpu/impl/CuvsCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -30,6 +30,7 @@ #include #include #include +#include namespace faiss { namespace gpu { @@ -50,6 +51,8 @@ CuvsCagra::CuvsCagra( ivf_pq_search_params, float refine_rate) : resources_(resources), + storage_(nullptr), + n_(0), dim_(dim), graph_build_algo_(graph_build_algo), nn_descent_niter_(nn_descent_niter), @@ -91,6 +94,8 @@ CuvsCagra::CuvsCagra( float metricArg, IndicesOptions indicesOptions) : resources_(resources), + storage_(nullptr), + n_(0), dim_(dim), metric_(metric), metricArg_(metricArg) { @@ -106,7 +111,9 @@ CuvsCagra::CuvsCagra( FAISS_ASSERT(distances_on_gpu == knn_graph_on_gpu); - storage_ = distances; + // Copy the distances data instead of just storing a pointer + storage_ = new float[n * dim]; + std::copy(distances, distances + n * dim, storage_); n_ = n; const raft::device_resources& raft_handle = @@ -158,7 +165,17 @@ CuvsCagra::CuvsCagra( } void CuvsCagra::train(idx_t n, const float* x) { - storage_ = x; + // Copy the training data instead of just storing a pointer + // This is necessary because the input data might be temporary memory + // that gets deallocated after the training call + if (storage_ != nullptr && storage_ != x) { + // Free previous storage if it was allocated by us + delete[] storage_; + } + + // Allocate new storage and copy the data + storage_ = new float[n * dim_]; + std::copy(x, x + n * dim_, storage_); n_ = n; const raft::device_resources& raft_handle = @@ -290,6 +307,12 @@ void CuvsCagra::search( void CuvsCagra::reset() { cuvs_index.reset(); + // Free allocated storage + if (storage_ != nullptr) { + delete[] storage_; + storage_ = nullptr; + } + n_ = 0; } idx_t CuvsCagra::get_knngraph_degree() const { diff --git a/faiss/gpu/impl/CuvsCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh index 8e458d8be2..18d9ed3ead 100644 --- a/faiss/gpu/impl/CuvsCagra.cuh +++ b/faiss/gpu/impl/CuvsCagra.cuh @@ -111,7 +111,7 @@ class CuvsCagra { GpuResources* resources_; /// Training dataset - const float* storage_; + float* storage_; int n_; /// Expected dimensionality of the vectors From c9662c00608fa8315acd6981812d500ce3535149 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 3 Jul 2025 21:30:17 +0000 Subject: [PATCH 02/10] Achieve with shared arena --- faiss/gpu/impl/CuvsCagra.cu | 27 ++------------------------- faiss/gpu/impl/CuvsCagra.cuh | 2 +- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu index f603716da3..f60e1e3ab5 100644 --- a/faiss/gpu/impl/CuvsCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -30,7 +30,6 @@ #include #include #include -#include namespace faiss { namespace gpu { @@ -51,8 +50,6 @@ CuvsCagra::CuvsCagra( ivf_pq_search_params, float refine_rate) : resources_(resources), - storage_(nullptr), - n_(0), dim_(dim), graph_build_algo_(graph_build_algo), nn_descent_niter_(nn_descent_niter), @@ -94,8 +91,6 @@ CuvsCagra::CuvsCagra( float metricArg, IndicesOptions indicesOptions) : resources_(resources), - storage_(nullptr), - n_(0), dim_(dim), metric_(metric), metricArg_(metricArg) { @@ -111,9 +106,7 @@ CuvsCagra::CuvsCagra( FAISS_ASSERT(distances_on_gpu == knn_graph_on_gpu); - // Copy the distances data instead of just storing a pointer - storage_ = new float[n * dim]; - std::copy(distances, distances + n * dim, storage_); + storage_ = distances; n_ = n; const raft::device_resources& raft_handle = @@ -165,17 +158,7 @@ CuvsCagra::CuvsCagra( } void CuvsCagra::train(idx_t n, const float* x) { - // Copy the training data instead of just storing a pointer - // This is necessary because the input data might be temporary memory - // that gets deallocated after the training call - if (storage_ != nullptr && storage_ != x) { - // Free previous storage if it was allocated by us - delete[] storage_; - } - - // Allocate new storage and copy the data - storage_ = new float[n * dim_]; - std::copy(x, x + n * dim_, storage_); + storage_ = x; n_ = n; const raft::device_resources& raft_handle = @@ -307,12 +290,6 @@ void CuvsCagra::search( void CuvsCagra::reset() { cuvs_index.reset(); - // Free allocated storage - if (storage_ != nullptr) { - delete[] storage_; - storage_ = nullptr; - } - n_ = 0; } idx_t CuvsCagra::get_knngraph_degree() const { diff --git a/faiss/gpu/impl/CuvsCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh index 18d9ed3ead..8e458d8be2 100644 --- a/faiss/gpu/impl/CuvsCagra.cuh +++ b/faiss/gpu/impl/CuvsCagra.cuh @@ -111,7 +111,7 @@ class CuvsCagra { GpuResources* resources_; /// Training dataset - float* storage_; + const float* storage_; int n_; /// Expected dimensionality of the vectors From 7809c5d785126c688e2bdfce2c4d6cf203764f12 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Mon, 7 Jul 2025 18:11:27 +0000 Subject: [PATCH 03/10] Clear output --- faiss/IndexHNSW.cpp | 2 - faiss/gpu/GpuCloner.cpp | 171 +------------------------------------ faiss/gpu/GpuCloner.h | 5 +- faiss/gpu/GpuIndexCagra.cu | 32 ------- 4 files changed, 2 insertions(+), 208 deletions(-) diff --git a/faiss/IndexHNSW.cpp b/faiss/IndexHNSW.cpp index 2e6fbe873d..5983e9d831 100644 --- a/faiss/IndexHNSW.cpp +++ b/faiss/IndexHNSW.cpp @@ -923,8 +923,6 @@ void IndexHNSWCagra::search( float* distances, idx_t* labels, const SearchParameters* params) const { - printf("🔍 C++: IndexHNSWCagra::search called\n"); - if (!base_level_only) { IndexHNSW::search(n, x, k, distances, labels, params); } else { diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 2c88d4585e..f0dd162762 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -44,9 +44,7 @@ namespace faiss { namespace gpu { // Function declarations -void test_converted_cpu_index_search(const faiss::Index* cpu_index); -void test_gpu_index_before_conversion(const faiss::Index* gpu_index); -void test_cpu_index_after_copyto(const faiss::Index* cpu_index); + /********************************************************** * Cloning to CPU @@ -76,33 +74,22 @@ void ToCPUCloner::merge_index(Index* dst, Index* src, bool successive_ids) { } Index* ToCPUCloner::clone_Index(const Index* index) { - printf("🔍 C++: ToCPUCloner::clone_Index called\n"); - printf(" - Input index: %p\n", (void*)index); - if (auto ifl = dynamic_cast(index)) { - printf(" - Converting GpuIndexFlat to IndexFlat\n"); IndexFlat* res = new IndexFlat(); ifl->copyTo(res); - printf(" - GpuIndexFlat conversion completed\n"); return res; } else if (auto ifl = dynamic_cast(index)) { - printf(" - Converting GpuIndexIVFFlat to IndexIVFFlat\n"); IndexIVFFlat* res = new IndexIVFFlat(); ifl->copyTo(res); - printf(" - GpuIndexIVFFlat conversion completed\n"); return res; } else if ( auto ifl = dynamic_cast(index)) { - printf(" - Converting GpuIndexIVFScalarQuantizer to IndexIVFScalarQuantizer\n"); IndexIVFScalarQuantizer* res = new IndexIVFScalarQuantizer(); ifl->copyTo(res); - printf(" - GpuIndexIVFScalarQuantizer conversion completed\n"); return res; } else if (auto ipq = dynamic_cast(index)) { - printf(" - Converting GpuIndexIVFPQ to IndexIVFPQ\n"); IndexIVFPQ* res = new IndexIVFPQ(); ipq->copyTo(res); - printf(" - GpuIndexIVFPQ conversion completed\n"); return res; // for IndexShards and IndexReplicas we assume that the @@ -112,19 +99,12 @@ Index* ToCPUCloner::clone_Index(const Index* index) { } #if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { - printf(" - Converting GpuIndexCagra to IndexHNSWCagra\n"); IndexHNSWCagra* res = new IndexHNSWCagra(); - icg->copyTo(res); - printf(" - GpuIndexCagra conversion completed\n"); - printf(" - base_level_only flag: %s\n", res->base_level_only ? "true" : "false"); - printf(" - num_base_level_search_entrypoints: %d\n", res->num_base_level_search_entrypoints); - return res; } #endif else if (auto ish = dynamic_cast(index)) { - printf(" - Converting IndexShards\n"); int nshard = ish->count(); FAISS_ASSERT(nshard > 0); Index* res = clone_Index(ish->at(0)); @@ -133,52 +113,21 @@ Index* ToCPUCloner::clone_Index(const Index* index) { merge_index(res, res_i, ish->successive_ids); delete res_i; } - printf(" - IndexShards conversion completed\n"); return res; } else if (auto ipr = dynamic_cast(index)) { - printf(" - Converting IndexReplicas\n"); // just clone one of the replicas FAISS_ASSERT(ipr->count() > 0); auto result = clone_Index(ipr->at(0)); - printf(" - IndexReplicas conversion completed\n"); return result; } else { - printf(" - Using default Cloner::clone_Index for unknown type\n"); auto result = Cloner::clone_Index(index); - printf(" - Default conversion completed\n"); return result; } } faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index) { - printf("🔍 C++: index_gpu_to_cpu called\n"); - printf(" - Input gpu_index: %p\n", (void*)gpu_index); - printf(" - Input index ntotal: %ld\n", gpu_index->ntotal); - printf(" - Input index dimension: %d\n", gpu_index->d); - printf(" - Input index is_trained: %s\n", gpu_index->is_trained ? "true" : "false"); - ToCPUCloner cl; - - // Test search on GPU index before conversion - printf("🔍 Testing GPU index before conversion in GpuCloner...\n"); - test_gpu_index_before_conversion(gpu_index); - auto result = cl.clone_Index(gpu_index); - - printf(" - Conversion result: %p\n", (void*)result); - if (result != nullptr) { - printf(" - Result index ntotal: %ld\n", result->ntotal); - printf(" - Result index dimension: %d\n", result->d); - printf(" - Result index is_trained: %s\n", result->is_trained ? "true" : "false"); - - // Test search on the converted CPU index - printf("🔍 Testing converted CPU index in GpuCloner...\n"); - test_converted_cpu_index_search(result); - } else { - printf("❌ C++: ERROR - clone_Index returned null!\n"); - } - - printf("✅ C++: index_gpu_to_cpu completed\n"); return result; } @@ -647,125 +596,7 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( } } -// Test function to verify CPU index immediately after copyTo -void test_cpu_index_after_copyto(const faiss::Index* cpu_index) { - try { - // Create a simple test query - std::vector query(128, 2.0f); // 128 dimensions, all 2.0f - - // Allocate result arrays - std::vector distances(20); - std::vector labels(20); - - // Perform search - cpu_index->search(1, query.data(), 20, distances.data(), labels.data()); - - printf(" - After copyTo CPU index search results:\n"); - printf(" Top 5 results: "); - for (int i = 0; i < 5 && i < 20; i++) { - printf("(%ld:%.1f) ", labels[i], distances[i]); - } - printf("\n"); - - // Check for any NaN or zero distances - bool has_nan_or_zero = false; - for (int i = 0; i < 20; i++) { - if (std::isnan(distances[i]) || distances[i] == 0.0f) { - has_nan_or_zero = true; - break; - } - } - - if (has_nan_or_zero) { - printf(" - ⚠️ WARNING: Found NaN or zero distances after copyTo!\n"); - } else { - printf(" - ✅ After copyTo CPU index search results look valid\n"); - } - - } catch (const std::exception& e) { - printf(" - ❌ Error testing after copyTo CPU index search: %s\n", e.what()); - } -} - -// Test function to verify GPU index before conversion -void test_gpu_index_before_conversion(const faiss::Index* gpu_index) { - try { - // Create a simple test query - std::vector query(128, 2.0f); // 128 dimensions, all 2.0f - // Allocate result arrays - std::vector distances(20); - std::vector labels(20); - - // Perform search - gpu_index->search(1, query.data(), 20, distances.data(), labels.data()); - - printf(" - GpuCloner GPU index search results:\n"); - printf(" Top 5 results: "); - for (int i = 0; i < 5 && i < 20; i++) { - printf("(%ld:%.1f) ", labels[i], distances[i]); - } - printf("\n"); - - // Check for any NaN or zero distances - bool has_nan_or_zero = false; - for (int i = 0; i < 20; i++) { - if (std::isnan(distances[i]) || distances[i] == 0.0f) { - has_nan_or_zero = true; - break; - } - } - - if (has_nan_or_zero) { - printf(" - ⚠️ WARNING: Found NaN or zero distances in GpuCloner GPU results!\n"); - } else { - printf(" - ✅ GpuCloner GPU index search results look valid\n"); - } - - } catch (const std::exception& e) { - printf(" - ❌ Error testing GpuCloner GPU index search: %s\n", e.what()); - } -} - -// Test function to verify converted CPU index search -void test_converted_cpu_index_search(const faiss::Index* cpu_index) { - try { - // Create a simple test query - std::vector query(128, 2.0f); // 128 dimensions, all 2.0f - - // Allocate result arrays - std::vector distances(20); - std::vector labels(20); - - // Perform search - cpu_index->search(1, query.data(), 20, distances.data(), labels.data()); - - printf(" - GpuCloner CPU index search results:\n"); - printf(" Top 5 results: "); - for (int i = 0; i < 5 && i < 20; i++) { - printf("(%ld:%.1f) ", labels[i], distances[i]); - } - printf("\n"); - - // Check for any NaN or zero distances - bool has_nan_or_zero = false; - for (int i = 0; i < 20; i++) { - if (std::isnan(distances[i]) || distances[i] == 0.0f) { - has_nan_or_zero = true; - break; - } - } - - if (has_nan_or_zero) { - printf(" - ⚠️ WARNING: Found NaN or zero distances in GpuCloner CPU results!\n"); - } else { - printf(" - ✅ GpuCloner CPU index search results look valid\n"); - } - - } catch (const std::exception& e) { - printf(" - ❌ Error testing GpuCloner CPU index search: %s\n", e.what()); - } -} } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuCloner.h b/faiss/gpu/GpuCloner.h index 54c261200b..5ae4a2ce4d 100644 --- a/faiss/gpu/GpuCloner.h +++ b/faiss/gpu/GpuCloner.h @@ -117,10 +117,7 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( const faiss::IndexBinary* index, const GpuMultipleClonerOptions* options = nullptr); -// Test function declarations -void test_cpu_index_after_copyto(const faiss::Index* cpu_index); -void test_gpu_index_before_conversion(const faiss::Index* gpu_index); -void test_converted_cpu_index_search(const faiss::Index* cpu_index); + } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index d15970e943..09029e8355 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -210,9 +210,6 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { DeviceScope scope(config_.device); - printf("🔍 C++: GpuIndexCagra::copyTo called\n"); - printf(" - GPU index ntotal: %ld, d: %d\n", this->ntotal, this->d); - // // Index information // @@ -244,35 +241,21 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { auto dataset = index_->get_training_dataset(); bool allocation = false; - printf(" - Dataset pointer: %p\n", dataset); - printf(" - Dataset device: %d\n", getDeviceForAddress(dataset)); - if (getDeviceForAddress(dataset) >= 0) { train_dataset = new float[n_train * index->d]; allocation = true; - printf(" - Copying %ld vectors from GPU to CPU\n", n_train); raft::copy( train_dataset, dataset, n_train * index->d, this->resources_->getRaftHandleCurrentDevice().get_stream()); - - // Debug: Check first few values after GPU->CPU copy - printf(" - After GPU->CPU copy, first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", - train_dataset[0], train_dataset[1], train_dataset[2], train_dataset[3], train_dataset[4]); - printf(" - After GPU->CPU copy, values at d, d+1, d+2, d+3, d+4: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", - train_dataset[index->d], train_dataset[index->d+1], train_dataset[index->d+2], train_dataset[index->d+3], train_dataset[index->d+4]); } else { train_dataset = const_cast(dataset); - printf(" - Using CPU dataset directly\n"); - printf(" - CPU dataset first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", - train_dataset[0], train_dataset[1], train_dataset[2], train_dataset[3], train_dataset[4]); } // turn off as level 0 is copied from CAGRA graph index->init_level0 = false; - printf(" - Adding %ld vectors to CPU index\n", n_train); if (!index->base_level_only) { index->add(n_train, train_dataset); } else { @@ -281,22 +264,7 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->ntotal = n_train; } - // Debug: Check vectors in CPU index after adding - if (index->storage) { - auto flat_storage = dynamic_cast(index->storage); - if (flat_storage) { - const float* vectors = flat_storage->get_xb(); - printf(" - After adding to CPU index, first 5 values: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", - vectors[0], vectors[1], vectors[2], vectors[3], vectors[4]); - printf(" - After adding to CPU index, values at d, d+1, d+2, d+3, d+4: [%.3f, %.3f, %.3f, %.3f, %.3f]\n", - vectors[index->d], vectors[index->d+1], vectors[index->d+2], vectors[index->d+3], vectors[index->d+4]); - } - } - // Test search immediately after adding vectors to CPU index - printf("🔍 Testing CPU index immediately after adding vectors...\n"); - fflush(stdout); - test_cpu_index_after_copyto(index); if (allocation) { delete[] train_dataset; From d2b3fa1177313abba41b65e4bd40ba7e8525817e Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Tue, 8 Jul 2025 20:43:02 +0000 Subject: [PATCH 04/10] Final code cleaning --- c_api/gpu/GpuIndex_c.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/c_api/gpu/GpuIndex_c.cpp b/c_api/gpu/GpuIndex_c.cpp index f56edc54f7..c4f094f5ee 100644 --- a/c_api/gpu/GpuIndex_c.cpp +++ b/c_api/gpu/GpuIndex_c.cpp @@ -22,7 +22,6 @@ using faiss::gpu::GpuIndexConfig; DEFINE_GETTER(GpuIndexConfig, int, device) int faiss_index_gpu_to_cpu_new(const FaissIndex* gpu_index, FaissIndex** p_out) { - // Call the existing function from GpuAutoTune_c.cpp int result = faiss_index_gpu_to_cpu(gpu_index, p_out); return result; } @@ -32,7 +31,6 @@ int faiss_index_cpu_to_gpu_new( int device, const FaissIndex* index, FaissGpuIndex** p_out) { - // Call the existing function from GpuAutoTune_c.cpp return faiss_index_cpu_to_gpu(provider, device, index, p_out); } @@ -61,8 +59,6 @@ int faiss_GpuIndexCagra_new( CATCH_AND_HANDLE } - - int faiss_SearchParametersCagra_new( FaissSearchParameters** p_params, size_t itopk_size) { From 1e206af00c11adc2cbfe506fc33b2cf6eec2fbb7 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Wed, 9 Jul 2025 21:53:13 +0000 Subject: [PATCH 05/10] Final code cleaning --- c_api/gpu/GpuAutoTune_c.cpp | 2 -- c_api/gpu/GpuIndex_c.cpp | 1 - faiss/gpu/GpuCloner.cpp | 6 ------ faiss/gpu/GpuCloner.h | 2 -- faiss/gpu/GpuIndexCagra.cu | 5 +---- 5 files changed, 1 insertion(+), 15 deletions(-) diff --git a/c_api/gpu/GpuAutoTune_c.cpp b/c_api/gpu/GpuAutoTune_c.cpp index a527890bf4..cd5c4efb69 100644 --- a/c_api/gpu/GpuAutoTune_c.cpp +++ b/c_api/gpu/GpuAutoTune_c.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include "GpuClonerOptions_c.h" #include "macros_impl.h" @@ -24,7 +23,6 @@ using faiss::gpu::GpuMultipleClonerOptions; using faiss::gpu::GpuResourcesProvider; - int faiss_index_gpu_to_cpu(const FaissIndex* gpu_index, FaissIndex** p_out) { if (gpu_index == nullptr) { return -1; diff --git a/c_api/gpu/GpuIndex_c.cpp b/c_api/gpu/GpuIndex_c.cpp index c4f094f5ee..588f6ab88c 100644 --- a/c_api/gpu/GpuIndex_c.cpp +++ b/c_api/gpu/GpuIndex_c.cpp @@ -15,7 +15,6 @@ #include #include #include -#include using faiss::gpu::GpuIndexConfig; diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index f0dd162762..772c85d2c9 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -38,14 +38,10 @@ #include #include #include -#include namespace faiss { namespace gpu { -// Function declarations - - /********************************************************** * Cloning to CPU **********************************************************/ @@ -596,7 +592,5 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( } } - - } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuCloner.h b/faiss/gpu/GpuCloner.h index 5ae4a2ce4d..d92d81ffe1 100644 --- a/faiss/gpu/GpuCloner.h +++ b/faiss/gpu/GpuCloner.h @@ -117,7 +117,5 @@ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple( const faiss::IndexBinary* index, const GpuMultipleClonerOptions* options = nullptr); - - } // namespace gpu } // namespace faiss diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index 09029e8355..26cb802f10 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -29,7 +29,6 @@ #include #include #include -#include namespace faiss { namespace gpu { @@ -254,7 +253,7 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { train_dataset = const_cast(dataset); } - // turn off as level 0 is copied from CAGRA graph + // turn off as level 0 is copied from CAGRA graph index->init_level0 = false; if (!index->base_level_only) { index->add(n_train, train_dataset); @@ -264,8 +263,6 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->ntotal = n_train; } - - if (allocation) { delete[] train_dataset; } From 0f4e1efd146733f8ef7df383c18e9a2006fb08fc Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 10 Jul 2025 17:21:30 +0000 Subject: [PATCH 06/10] Remove unnecessary import and throw exception for null pointer --- c_api/gpu/GpuAutoTune_c.cpp | 14 +++++++------- faiss/gpu/GpuCloner.cpp | 10 +++------- faiss/gpu/GpuIndexCagra.cu | 3 --- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/c_api/gpu/GpuAutoTune_c.cpp b/c_api/gpu/GpuAutoTune_c.cpp index cd5c4efb69..4db8d68ce8 100644 --- a/c_api/gpu/GpuAutoTune_c.cpp +++ b/c_api/gpu/GpuAutoTune_c.cpp @@ -24,15 +24,15 @@ using faiss::gpu::GpuResourcesProvider; int faiss_index_gpu_to_cpu(const FaissIndex* gpu_index, FaissIndex** p_out) { - if (gpu_index == nullptr) { - return -1; - } + try { + if (gpu_index == nullptr) { + throw std::invalid_argument("gpu_index cannot be null"); + } - if (p_out == nullptr) { - return -1; - } + if (p_out == nullptr) { + throw std::invalid_argument("p_out cannot be null"); + } - try { auto cpu_index = faiss::gpu::index_gpu_to_cpu( reinterpret_cast(gpu_index)); diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 772c85d2c9..575ee2e0a5 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -37,7 +37,6 @@ #include #include #include -#include namespace faiss { namespace gpu { @@ -113,18 +112,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) { } else if (auto ipr = dynamic_cast(index)) { // just clone one of the replicas FAISS_ASSERT(ipr->count() > 0); - auto result = clone_Index(ipr->at(0)); - return result; + return clone_Index(ipr->at(0)); } else { - auto result = Cloner::clone_Index(index); - return result; + return Cloner::clone_Index(index); } } faiss::Index* index_gpu_to_cpu(const faiss::Index* gpu_index) { ToCPUCloner cl; - auto result = cl.clone_Index(gpu_index); - return result; + return cl.clone_Index(gpu_index); } /********************************************************** diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index 26cb802f10..ef240819bb 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -23,12 +23,10 @@ #include #include -#include #include #include #include #include -#include namespace faiss { namespace gpu { @@ -237,7 +235,6 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { auto n_train = this->ntotal; float* train_dataset; - auto dataset = index_->get_training_dataset(); bool allocation = false; From 8c26b9da393e1afbbda8b9e3c76f473bbb515390 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 10 Jul 2025 17:42:04 +0000 Subject: [PATCH 07/10] Remove unnecessary import --- c_api/gpu/GpuAutoTune_c.cpp | 2 -- c_api/gpu/GpuIndex_c.cpp | 1 - c_api/gpu/GpuIndex_c.h | 1 - faiss/gpu/GpuIndexCagra.cu | 2 -- 4 files changed, 6 deletions(-) diff --git a/c_api/gpu/GpuAutoTune_c.cpp b/c_api/gpu/GpuAutoTune_c.cpp index 4db8d68ce8..6f7c1249cf 100644 --- a/c_api/gpu/GpuAutoTune_c.cpp +++ b/c_api/gpu/GpuAutoTune_c.cpp @@ -22,7 +22,6 @@ using faiss::gpu::GpuClonerOptions; using faiss::gpu::GpuMultipleClonerOptions; using faiss::gpu::GpuResourcesProvider; - int faiss_index_gpu_to_cpu(const FaissIndex* gpu_index, FaissIndex** p_out) { try { if (gpu_index == nullptr) { @@ -35,7 +34,6 @@ int faiss_index_gpu_to_cpu(const FaissIndex* gpu_index, FaissIndex** p_out) { auto cpu_index = faiss::gpu::index_gpu_to_cpu( reinterpret_cast(gpu_index)); - *p_out = reinterpret_cast(cpu_index); } CATCH_AND_HANDLE diff --git a/c_api/gpu/GpuIndex_c.cpp b/c_api/gpu/GpuIndex_c.cpp index 588f6ab88c..d51b377633 100644 --- a/c_api/gpu/GpuIndex_c.cpp +++ b/c_api/gpu/GpuIndex_c.cpp @@ -14,7 +14,6 @@ #include #include #include -#include using faiss::gpu::GpuIndexConfig; diff --git a/c_api/gpu/GpuIndex_c.h b/c_api/gpu/GpuIndex_c.h index a0128d4117..42edd322a2 100644 --- a/c_api/gpu/GpuIndex_c.h +++ b/c_api/gpu/GpuIndex_c.h @@ -11,7 +11,6 @@ #define FAISS_GPU_INDEX_C_H #include "../Index_c.h" -#include "../faiss_c.h" #include "StandardGpuResources_c.h" #ifdef __cplusplus extern "C" { diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index ef240819bb..42a6092ddd 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -237,7 +237,6 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { float* train_dataset; auto dataset = index_->get_training_dataset(); bool allocation = false; - if (getDeviceForAddress(dataset) >= 0) { train_dataset = new float[n_train * index->d]; allocation = true; @@ -259,7 +258,6 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->storage->add(n_train, train_dataset); index->ntotal = n_train; } - if (allocation) { delete[] train_dataset; } From 9d9931958cea150c1dd0f5ed8251434f6ec0c3f1 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 10 Jul 2025 17:48:04 +0000 Subject: [PATCH 08/10] Revert unnecessary changes --- c_api/gpu/GpuIndex_c.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c_api/gpu/GpuIndex_c.h b/c_api/gpu/GpuIndex_c.h index 42edd322a2..eb9f0fc834 100644 --- a/c_api/gpu/GpuIndex_c.h +++ b/c_api/gpu/GpuIndex_c.h @@ -12,12 +12,15 @@ #include "../Index_c.h" #include "StandardGpuResources_c.h" + #ifdef __cplusplus extern "C" { #endif FAISS_DECLARE_CLASS(GpuIndexConfig) + FAISS_DECLARE_GETTER(GpuIndexConfig, int, device) + FAISS_DECLARE_CLASS_INHERITED(GpuIndex, Index) FAISS_DECLARE_CLASS(SearchParameters) From 1dca35d77dcfa5e3bcd497a4e37c6ded02e6bc63 Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Tue, 5 Aug 2025 23:38:43 +0000 Subject: [PATCH 09/10] Add tests for using CAGRA in FAISS C API --- tests/CMakeLists.txt | 3 + tests/test_gpu_cagra_c_api.cpp | 222 +++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 tests/test_gpu_cagra_c_api.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 285b9090ed..af613a3ee6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -38,6 +38,7 @@ set(FAISS_TEST_SRC test_hamming.cpp test_mmap.cpp test_zerocopy.cpp + test_gpu_cagra_c_api.cpp ) add_executable(faiss_test ${FAISS_TEST_SRC}) @@ -84,9 +85,11 @@ find_package(GTest CONFIG REQUIRED) target_link_libraries(faiss_test PRIVATE OpenMP::OpenMP_CXX GTest::gtest_main + faiss_c $<$:hip::host> ) # Defines `gtest_discover_tests()`. include(GoogleTest) gtest_discover_tests(faiss_test) + diff --git a/tests/test_gpu_cagra_c_api.cpp b/tests/test_gpu_cagra_c_api.cpp new file mode 100644 index 0000000000..00144f7e29 --- /dev/null +++ b/tests/test_gpu_cagra_c_api.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include "c_api/AutoTune_c.h" +#include "c_api/Index_c.h" +#include "c_api/error_c.h" +#include "c_api/index_factory_c.h" +#include "c_api/index_io_c.h" +#include "c_api/gpu/DeviceUtils_c.h" +#include "c_api/gpu/GpuAutoTune_c.h" +#include "c_api/gpu/StandardGpuResources_c.h" +#include "c_api/gpu/GpuIndex_c.h" +#include "c_api/gpu/GpuResources_c.h" + +namespace { + +class GpuCagraCAPITest : public ::testing::Test { +protected: + void SetUp() override { + // Check GPU availability + int gpus = -1; + if (faiss_get_num_gpus(&gpus) != 0) { + GTEST_SKIP() << "Failed to get GPU count"; + } + + if (gpus <= 0) { + GTEST_SKIP() << "No GPUs available"; + } + + // Create GPU resources + if (faiss_StandardGpuResources_new(&gpu_res_) != 0) { + GTEST_SKIP() << "Failed to create GPU resources"; + } + } + + void TearDown() override { + if (gpu_res_) { + faiss_StandardGpuResources_free(gpu_res_); + gpu_res_ = nullptr; + } + } + + FaissStandardGpuResources* gpu_res_ = nullptr; + + // Helper function to generate random vectors + std::vector generateRandomVectors(int nb, int d) { + std::vector vectors(nb * d); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(-1.0f, 1.0f); + + for (int i = 0; i < nb * d; ++i) { + vectors[i] = dis(gen); + } + return vectors; + } +}; + +TEST_F(GpuCagraCAPITest, TestGpuIndexCagraCreation) { + // Test different dimensions and graph degrees + std::vector dimensions = {64, 128, 256}; + std::vector graph_degrees = {32, 64, 128}; + std::vector metrics = {METRIC_L2, METRIC_INNER_PRODUCT}; + + for (int d : dimensions) { + for (size_t graph_degree : graph_degrees) { + for (FaissMetricType metric : metrics) { + FaissIndex* index = nullptr; + + // Test index creation + EXPECT_EQ(faiss_GpuIndexCagra_new(&index, gpu_res_, d, metric, graph_degree), 0); + EXPECT_NE(index, nullptr); + + // Test basic properties + EXPECT_FALSE(faiss_Index_is_trained(index)); + + // Test dimension + EXPECT_EQ(faiss_Index_d(index), d); + + // Clean up + faiss_Index_free(index); + } + } + } +} + +TEST_F(GpuCagraCAPITest, TestSearchParametersCagraCreation) { + // Test different itopk sizes + std::vector itopk_sizes = {1, 5, 10, 20, 50}; + + for (size_t itopk_size : itopk_sizes) { + FaissSearchParameters* params = nullptr; + + // Test parameter creation + EXPECT_EQ(faiss_SearchParametersCagra_new(¶ms, itopk_size), 0); + EXPECT_NE(params, nullptr); + + // Clean up + faiss_SearchParameters_free(params); + } +} + +TEST_F(GpuCagraCAPITest, TestGpuToCpuConversion) { + // Create a GPU CAGRA index + int d = 128; + size_t graph_degree = 64; + FaissIndex* gpu_index = nullptr; + + EXPECT_EQ(faiss_GpuIndexCagra_new(&gpu_index, gpu_res_, d, METRIC_L2, graph_degree), 0); + EXPECT_NE(gpu_index, nullptr); + + // Add some vectors to train the index + int nb = 100; + auto xb = generateRandomVectors(nb, d); + EXPECT_EQ(faiss_Index_add(gpu_index, nb, xb.data()), 0); + + // Convert GPU index to CPU + FaissIndex* cpu_index = nullptr; + EXPECT_EQ(faiss_index_gpu_to_cpu(gpu_index, &cpu_index), 0); + EXPECT_NE(cpu_index, nullptr); + + // Test that both indices have the same basic properties + EXPECT_EQ(faiss_Index_d(gpu_index), faiss_Index_d(cpu_index)); + EXPECT_EQ(faiss_Index_is_trained(gpu_index), faiss_Index_is_trained(cpu_index)); + + // Clean up + faiss_Index_free(gpu_index); + faiss_Index_free(cpu_index); +} + +TEST_F(GpuCagraCAPITest, TestCpuToGpuConversion) { + // Create a simple CPU index first (use Flat for GPU conversion) + int d = 128; + FaissIndex* cpu_index = nullptr; + EXPECT_EQ(faiss_index_factory(&cpu_index, d, "Flat", METRIC_L2), 0); + EXPECT_NE(cpu_index, nullptr); + + // Add some vectors to the CPU index + int nb = 100; + auto xb = generateRandomVectors(nb, d); + EXPECT_EQ(faiss_Index_add(cpu_index, nb, xb.data()), 0); + + // Convert CPU index to GPU + FaissGpuIndex* gpu_index = nullptr; + EXPECT_EQ(faiss_index_cpu_to_gpu(reinterpret_cast(gpu_res_), 0, cpu_index, &gpu_index), 0); + EXPECT_NE(gpu_index, nullptr); + + // Test that both indices have the same basic properties + EXPECT_EQ(faiss_Index_d(cpu_index), faiss_Index_d(gpu_index)); + EXPECT_EQ(faiss_Index_is_trained(cpu_index), faiss_Index_is_trained(gpu_index)); + + // Clean up + faiss_Index_free(cpu_index); + faiss_Index_free(gpu_index); +} + +TEST_F(GpuCagraCAPITest, TestEndToEndWorkflow) { + // Generate test data + int d = 128; + int nb = 1000; + int nq = 10; + int k = 5; + + auto xb = generateRandomVectors(nb, d); + auto xq = generateRandomVectors(nq, d); + + // Create GPU CAGRA index + FaissIndex* gpu_index = nullptr; + size_t graph_degree = 64; + EXPECT_EQ(faiss_GpuIndexCagra_new(&gpu_index, gpu_res_, d, METRIC_L2, graph_degree), 0); + EXPECT_NE(gpu_index, nullptr); + + // Add vectors to the index + EXPECT_EQ(faiss_Index_add(gpu_index, nb, xb.data()), 0); + + // Create search parameters + FaissSearchParameters* search_params = nullptr; + size_t itopk_size = 10; + EXPECT_EQ(faiss_SearchParametersCagra_new(&search_params, itopk_size), 0); + EXPECT_NE(search_params, nullptr); + + // Perform search + std::vector I(k * nq); + std::vector D(k * nq); + + EXPECT_EQ(faiss_Index_search(gpu_index, nq, xq.data(), k, D.data(), I.data()), 0); + + // Verify search results + for (int i = 0; i < nq; ++i) { + for (int j = 0; j < k; ++j) { + EXPECT_GE(I[i * k + j], 0); + EXPECT_LT(I[i * k + j], nb); + EXPECT_GE(D[i * k + j], 0.0f); + } + } + + // Convert to CPU index + FaissIndex* cpu_index = nullptr; + EXPECT_EQ(faiss_index_gpu_to_cpu(gpu_index, &cpu_index), 0); + EXPECT_NE(cpu_index, nullptr); + + // Search with CPU index + std::vector I_cpu(k * nq); + std::vector D_cpu(k * nq); + + EXPECT_EQ(faiss_Index_search(cpu_index, nq, xq.data(), k, D_cpu.data(), I_cpu.data()), 0); + + // Clean up + faiss_SearchParameters_free(search_params); + faiss_Index_free(gpu_index); + faiss_Index_free(cpu_index); +} +} + From 691d7f3700c3c564772f776b0ef349181c8c239b Mon Sep 17 00:00:00 2001 From: QingyangYinUber Date: Thu, 14 Aug 2025 17:02:22 +0000 Subject: [PATCH 10/10] Clean code to follow the clang format --- c_api/gpu/GpuIndex_c.cpp | 16 +++++------ tests/test_gpu_cagra_c_api.cpp | 51 ++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/c_api/gpu/GpuIndex_c.cpp b/c_api/gpu/GpuIndex_c.cpp index d51b377633..9c6bc3a535 100644 --- a/c_api/gpu/GpuIndex_c.cpp +++ b/c_api/gpu/GpuIndex_c.cpp @@ -8,18 +8,20 @@ // -*- c++ -*- #include "GpuIndex_c.h" -#include "GpuAutoTune_c.h" -#include "macros_impl.h" #include #include #include #include +#include "GpuAutoTune_c.h" +#include "macros_impl.h" using faiss::gpu::GpuIndexConfig; DEFINE_GETTER(GpuIndexConfig, int, device) -int faiss_index_gpu_to_cpu_new(const FaissIndex* gpu_index, FaissIndex** p_out) { +int faiss_index_gpu_to_cpu_new( + const FaissIndex* gpu_index, + FaissIndex** p_out) { int result = faiss_index_gpu_to_cpu(gpu_index, p_out); return result; } @@ -42,14 +44,10 @@ int faiss_GpuIndexCagra_new( faiss::gpu::GpuIndexCagraConfig config; config.graph_degree = graph_degree; - auto gpu_res = - reinterpret_cast(res); + auto gpu_res = reinterpret_cast(res); auto cagra_index = new faiss::gpu::GpuIndexCagra( - gpu_res, - d, - static_cast(metric), - config); + gpu_res, d, static_cast(metric), config); *p_index = reinterpret_cast(cagra_index); return 0; diff --git a/tests/test_gpu_cagra_c_api.cpp b/tests/test_gpu_cagra_c_api.cpp index 00144f7e29..abe6fbd695 100644 --- a/tests/test_gpu_cagra_c_api.cpp +++ b/tests/test_gpu_cagra_c_api.cpp @@ -12,18 +12,18 @@ #include "c_api/AutoTune_c.h" #include "c_api/Index_c.h" #include "c_api/error_c.h" -#include "c_api/index_factory_c.h" -#include "c_api/index_io_c.h" #include "c_api/gpu/DeviceUtils_c.h" #include "c_api/gpu/GpuAutoTune_c.h" -#include "c_api/gpu/StandardGpuResources_c.h" #include "c_api/gpu/GpuIndex_c.h" #include "c_api/gpu/GpuResources_c.h" +#include "c_api/gpu/StandardGpuResources_c.h" +#include "c_api/index_factory_c.h" +#include "c_api/index_io_c.h" namespace { class GpuCagraCAPITest : public ::testing::Test { -protected: + protected: void SetUp() override { // Check GPU availability int gpus = -1; @@ -76,7 +76,10 @@ TEST_F(GpuCagraCAPITest, TestGpuIndexCagraCreation) { FaissIndex* index = nullptr; // Test index creation - EXPECT_EQ(faiss_GpuIndexCagra_new(&index, gpu_res_, d, metric, graph_degree), 0); + EXPECT_EQ( + faiss_GpuIndexCagra_new( + &index, gpu_res_, d, metric, graph_degree), + 0); EXPECT_NE(index, nullptr); // Test basic properties @@ -114,7 +117,10 @@ TEST_F(GpuCagraCAPITest, TestGpuToCpuConversion) { size_t graph_degree = 64; FaissIndex* gpu_index = nullptr; - EXPECT_EQ(faiss_GpuIndexCagra_new(&gpu_index, gpu_res_, d, METRIC_L2, graph_degree), 0); + EXPECT_EQ( + faiss_GpuIndexCagra_new( + &gpu_index, gpu_res_, d, METRIC_L2, graph_degree), + 0); EXPECT_NE(gpu_index, nullptr); // Add some vectors to train the index @@ -129,7 +135,9 @@ TEST_F(GpuCagraCAPITest, TestGpuToCpuConversion) { // Test that both indices have the same basic properties EXPECT_EQ(faiss_Index_d(gpu_index), faiss_Index_d(cpu_index)); - EXPECT_EQ(faiss_Index_is_trained(gpu_index), faiss_Index_is_trained(cpu_index)); + EXPECT_EQ( + faiss_Index_is_trained(gpu_index), + faiss_Index_is_trained(cpu_index)); // Clean up faiss_Index_free(gpu_index); @@ -150,12 +158,20 @@ TEST_F(GpuCagraCAPITest, TestCpuToGpuConversion) { // Convert CPU index to GPU FaissGpuIndex* gpu_index = nullptr; - EXPECT_EQ(faiss_index_cpu_to_gpu(reinterpret_cast(gpu_res_), 0, cpu_index, &gpu_index), 0); + EXPECT_EQ( + faiss_index_cpu_to_gpu( + reinterpret_cast(gpu_res_), + 0, + cpu_index, + &gpu_index), + 0); EXPECT_NE(gpu_index, nullptr); // Test that both indices have the same basic properties EXPECT_EQ(faiss_Index_d(cpu_index), faiss_Index_d(gpu_index)); - EXPECT_EQ(faiss_Index_is_trained(cpu_index), faiss_Index_is_trained(gpu_index)); + EXPECT_EQ( + faiss_Index_is_trained(cpu_index), + faiss_Index_is_trained(gpu_index)); // Clean up faiss_Index_free(cpu_index); @@ -175,7 +191,10 @@ TEST_F(GpuCagraCAPITest, TestEndToEndWorkflow) { // Create GPU CAGRA index FaissIndex* gpu_index = nullptr; size_t graph_degree = 64; - EXPECT_EQ(faiss_GpuIndexCagra_new(&gpu_index, gpu_res_, d, METRIC_L2, graph_degree), 0); + EXPECT_EQ( + faiss_GpuIndexCagra_new( + &gpu_index, gpu_res_, d, METRIC_L2, graph_degree), + 0); EXPECT_NE(gpu_index, nullptr); // Add vectors to the index @@ -191,7 +210,9 @@ TEST_F(GpuCagraCAPITest, TestEndToEndWorkflow) { std::vector I(k * nq); std::vector D(k * nq); - EXPECT_EQ(faiss_Index_search(gpu_index, nq, xq.data(), k, D.data(), I.data()), 0); + EXPECT_EQ( + faiss_Index_search(gpu_index, nq, xq.data(), k, D.data(), I.data()), + 0); // Verify search results for (int i = 0; i < nq; ++i) { @@ -211,12 +232,14 @@ TEST_F(GpuCagraCAPITest, TestEndToEndWorkflow) { std::vector I_cpu(k * nq); std::vector D_cpu(k * nq); - EXPECT_EQ(faiss_Index_search(cpu_index, nq, xq.data(), k, D_cpu.data(), I_cpu.data()), 0); + EXPECT_EQ( + faiss_Index_search( + cpu_index, nq, xq.data(), k, D_cpu.data(), I_cpu.data()), + 0); // Clean up faiss_SearchParameters_free(search_params); faiss_Index_free(gpu_index); faiss_Index_free(cpu_index); } -} - +} // namespace