Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions faiss/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum NumericType {
Float32,
Float16,
UInt8,
Int8,
};

inline size_t get_numeric_type_size(NumericType numeric_type) {
Expand All @@ -70,6 +71,9 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
return 4;
case NumericType::Float16:
return 2;
case NumericType::UInt8:
case NumericType::Int8:
return 1;
default:
FAISS_THROW_MSG(
"Unknown Numeric Type. Only supports Float32, Float16");
Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/GpuCloner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
#if defined USE_NVIDIA_CUVS
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
IndexHNSWCagra* res = new IndexHNSWCagra();
if (icg->get_numeric_type() == faiss::NumericType::Float16) {
if (icg->get_numeric_type() != faiss::NumericType::Float32) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my learning, what happens in the 16 and int8 case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for copyTo where we copy from GpuIndexCagra to IndexHNSWCagra.
If base_level_only=false, we end up calling hnsw_add_vertices, which only support fp32 computations on CPU, so we need it to be true.
When base_level_only=true, we can use the storage(which is IndexScalarQuantizer)'s add() instead. : )

res->base_level_only = true;
}
icg->copyTo(res);
Expand Down
32 changes: 32 additions & 0 deletions faiss/gpu/GpuIndex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ void GpuIndex::addPaged_(
dispatch(float{});
} else if (numeric_type == NumericType::Float16) {
dispatch(half{});
} else if (numeric_type == NumericType::Int8) {
dispatch(int8_t{});
} else {
FAISS_THROW_MSG("GpuIndex::addPaged_: Unsupported numeric type");
}
Expand Down Expand Up @@ -251,6 +253,8 @@ void GpuIndex::addPage_(
dispatch(float{});
} else if (numeric_type == NumericType::Float16) {
dispatch(half{});
} else if (numeric_type == NumericType::Int8) {
dispatch(int8_t{});
} else {
FAISS_THROW_MSG("GpuIndex::addPage_: Unsupported numeric type");
}
Expand Down Expand Up @@ -411,6 +415,22 @@ void GpuIndex::searchNonPaged_(
stream,
{n, this->d});

searchImpl_(
n,
static_cast<const void*>(vecs.data()),
numeric_type,
k,
outDistancesData,
outIndicesData,
params);
} else if (numeric_type == NumericType::Int8) {
auto vecs = toDeviceTemporary<int8_t, 2>(
resources_.get(),
config_.device,
const_cast<int8_t*>(static_cast<const int8_t*>(x)),
stream,
{n, this->d});

searchImpl_(
n,
static_cast<const void*>(vecs.data()),
Expand Down Expand Up @@ -489,6 +509,16 @@ void GpuIndex::searchFromCpuPaged_(
outDistancesSlice.data(),
outIndicesSlice.data(),
params);
} else if (numeric_type == NumericType::Int8) {
searchNonPaged_(
num,
static_cast<const void*>(
static_cast<const int8_t*>(x) + cur * this->d),
numeric_type,
k,
outDistancesSlice.data(),
outIndicesSlice.data(),
params);
}
}

Expand Down Expand Up @@ -645,6 +675,8 @@ void GpuIndex::searchFromCpuPaged_(
dispatch(float{});
} else if (numeric_type == NumericType::Float16) {
dispatch(half{});
} else if (numeric_type == NumericType::Int8) {
dispatch(int8_t{});
} else {
FAISS_THROW_MSG(
"GpuIndex::searchFromCpuPaged_: Unsupported numeric type");
Expand Down
109 changes: 109 additions & 0 deletions faiss/gpu/GpuIndexCagra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,24 @@ void GpuIndexCagra::train(idx_t n, const void* x, NumericType numeric_type) {
cagraConfig_.guarantee_connectivity);
std::get<std::shared_ptr<CuvsCagra<half>>>(index_)->train(
n, static_cast<const half*>(x));
} else if (numeric_type == NumericType::Int8) {
index_ = std::make_shared<CuvsCagra<int8_t>>(
this->resources_.get(),
this->d,
cagraConfig_.intermediate_graph_degree,
cagraConfig_.graph_degree,
static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
cagraConfig_.nn_descent_niter,
cagraConfig_.store_dataset,
this->metric_type,
this->metric_arg,
INDICES_64_BIT,
ivf_pq_params,
ivf_pq_search_params,
cagraConfig_.refine_rate,
cagraConfig_.guarantee_connectivity);
std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->train(
n, static_cast<const int8_t*>(x));
} else {
FAISS_THROW_MSG("GpuIndexCagra::train unsupported data type");
}
Expand Down Expand Up @@ -232,6 +250,29 @@ void GpuIndexCagra::searchImpl_(
params->hashmap_max_fill_rate,
params->num_random_samplings,
params->seed);
} else if (numeric_type == NumericType::Int8) {
Tensor<int8_t, 2, true> queries(
const_cast<int8_t*>(static_cast<const int8_t*>(x)),
{n, this->d});

std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)->search(
queries,
k,
outDistances,
outLabels,
params->max_queries,
params->itopk_size,
params->max_iterations,
static_cast<faiss::cagra_search_algo>(params->algo),
params->team_size,
params->search_width,
params->min_iterations,
params->thread_block_size,
static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
params->hashmap_min_bitlen,
params->hashmap_max_fill_rate,
params->num_random_samplings,
params->seed);
} else {
FAISS_THROW_MSG("GpuIndexCagra::searchImpl_ unsupported data type");
}
Expand Down Expand Up @@ -314,6 +355,29 @@ void GpuIndexCagra::copyFrom(
this->metric_type,
this->metric_arg,
INDICES_64_BIT);
} else if (numeric_type == NumericType::Int8) {
auto base_index = dynamic_cast<IndexScalarQuantizer*>(index->storage);
FAISS_ASSERT(base_index);
auto dataset = (uint8_t*)base_index->codes.data();

// decode what was encded by Quantizer8bitDirectSigned in
// ScalarQuantizer
int8_t* decoded_train_dataset = new int8_t[index->ntotal * index->d];
for (int i = 0; i < index->ntotal * this->d; i++) {
decoded_train_dataset[i] = dataset[i] - 128;
}

index_ = std::make_shared<CuvsCagra<int8_t>>(
this->resources_.get(),
this->d,
index->ntotal,
hnsw.nb_neighbors(0),
decoded_train_dataset,
knn_graph.data(),
this->metric_type,
this->metric_arg,
INDICES_64_BIT);
delete[] decoded_train_dataset;
} else {
FAISS_THROW_MSG("GpuIndexCagra::copyFrom unsupported data type");
}
Expand Down Expand Up @@ -348,6 +412,9 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
} else if (numeric_type_ == NumericType::Float16) {
graph_degree = std::get<std::shared_ptr<CuvsCagra<half>>>(index_)
->get_knngraph_degree();
} else if (numeric_type_ == NumericType::Int8) {
graph_degree = std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
->get_knngraph_degree();
} else {
FAISS_THROW_MSG("GpuIndexCagra::copyTo unsupported data type");
}
Expand All @@ -368,6 +435,10 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
auto qtype = ScalarQuantizer::QT_fp16;
index->storage =
new IndexScalarQuantizer(index->d, qtype, this->metric_type);
} else if (numeric_type_ == NumericType::Int8) {
auto qtype = ScalarQuantizer::QT_8bit_direct_signed;
index->storage =
new IndexScalarQuantizer(index->d, qtype, this->metric_type);
}

index->own_fields = true;
Expand Down Expand Up @@ -438,6 +509,44 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
index->ntotal = n_train;
}

if (allocation) {
delete[] train_dataset;
}
} else if (numeric_type_ == NumericType::Int8) {
int8_t* train_dataset;
const int8_t* dataset =
std::get<std::shared_ptr<CuvsCagra<int8_t>>>(index_)
->get_training_dataset();
if (getDeviceForAddress(dataset) >= 0) {
train_dataset = new int8_t[n_train * index->d];
allocation = true;
raft::copy(
train_dataset,
dataset,
n_train * index->d,
this->resources_->getRaftHandleCurrentDevice()
.get_stream());
} else {
train_dataset = const_cast<int8_t*>(dataset);
}

index->init_level0 = false;
if (!index->base_level_only) {
FAISS_THROW_MSG(
"Only base level copy is supported for Int8 types in GpuIndexCagra::copyTo");
} else {
index->hnsw.prepare_level_tab(n_train, false);
// applying encoding logic of Quantizer8bitDirectSigned
uint8_t* encoded_train_dataset = new uint8_t[n_train * index->d];
for (int i = 0; i < n_train * index->d; i++) {
encoded_train_dataset[i] = train_dataset[i] + 128;
}
index->storage->add_sa_codes(
n_train, encoded_train_dataset, nullptr);
delete[] encoded_train_dataset;
index->ntotal = n_train;
}

if (allocation) {
delete[] train_dataset;
}
Expand Down
3 changes: 2 additions & 1 deletion faiss/gpu/GpuIndexCagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ struct GpuIndexCagra : public GpuIndex {
std::variant<
std::monostate,
std::shared_ptr<CuvsCagra<float>>,
std::shared_ptr<CuvsCagra<half>>>
std::shared_ptr<CuvsCagra<half>>,
std::shared_ptr<CuvsCagra<int8_t>>>
index_;
};

Expand Down
1 change: 1 addition & 0 deletions faiss/gpu/impl/CuvsCagra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -337,5 +337,6 @@ const data_t* CuvsCagra<data_t>::get_training_dataset() const {

template class CuvsCagra<float>;
template class CuvsCagra<half>;
template class CuvsCagra<int8_t>;
} // namespace gpu
} // namespace faiss
Loading
Loading