Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ SVS offers a fully-featured and yet simple Python API, compatible with most stan
SVS is written in C++ to facilitate its integration into performance-critical applications.

**Please note** that this repository only contains the open-source portion of the SVS library, which supports all functionalities and features described in the [documentation](https://intel.github.io/ScalableVectorSearch/), except for our proprietary vector compression techniques, specifically LVQ [[ABHT23]](#1) and Leanvec [[TBAH24]](#2). These techniques are closed-source and supported exclusively on Intel hardware. We provide [shared library](https://github.com/intel/ScalableVectorSearch/releases) and [PyPI package](https://pypi.org/project/scalable-vs/) to enable these vector compression techniques in C++ and Python, respectively.
For guidance on using the shared library, please refer to [this example](https://github.com/intel/ScalableVectorSearch/tree/main/examples/cpp/shared).

## Performance

Expand Down
40 changes: 40 additions & 0 deletions examples/cpp/shared/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.21)

project(svs_shared_library_example
LANGUAGES CXX
)

# Other AVX versions can be found at https://github.com/intel/ScalableVectorSearch/releases.
set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v0.0.8-dev/svs-shared-library-0.0.8-NIGHTLY-20250520-256-AVX512.tar.gz")

include(FetchContent)
FetchContent_Declare(
svs
URL "${SVS_URL}"
)
FetchContent_MakeAvailable(svs)

list(APPEND CMAKE_PREFIX_PATH "${svs_SOURCE_DIR}")
find_package(svs REQUIRED)
find_library(SVS_SHARED svs_shared_library)

set(SVS_CXX_STANDARD 20)
SET(CMAKE_CXX_FLAGS "-O3 -DNDEBUG -std=gnu++20 -march=native -mtune=native -Werror -Wall -Wextra -Wpedantic" )

add_executable(shared shared.cpp)
target_link_libraries(shared PUBLIC ${SVS_SHARED} svs::svs)

27 changes: 27 additions & 0 deletions examples/cpp/shared/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!--
~ Copyright 2025 Intel Corporation
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

A simple example to use SVS shared library.

Follow the commands below to compile and use SVS shared library:

```
mkdir build
cd build
CC=gcc-11 CXX=g++-11 cmake ../
make -j
./shared
```
140 changes: 140 additions & 0 deletions examples/cpp/shared/shared.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// SVS
#include "svs/core/recall.h"
#include "svs/extensions/flat/leanvec.h"
#include "svs/extensions/flat/lvq.h"
#include "svs/extensions/vamana/leanvec.h"
#include "svs/extensions/vamana/lvq.h"
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"

#include "utils.h"

// Alternative main definition
#include "svsmain.h"

// SVS setup and parameters
size_t num_threads = 4;
size_t search_window_size = 20;
size_t n_neighbors = 1;
std::string dfname = "data.vecs";
std::string dfname_f16 = "data_f16.vecs";
std::string qfname = "query.vecs";
std::string gtfname = "gt.vecs";

const std::filesystem::path& config_path = "./config";
const std::filesystem::path& graph_path = "./graph";
const std::filesystem::path& config_path_dynamic = "./config_dynamic";
const std::filesystem::path& graph_path_dynamic = "./graph_dynamic";

void svs_setup() {
// convert to fp16
auto reader = svs::io::vecs::VecsReader<float>{dfname};
auto writer = svs::io::vecs::VecsWriter<svs::Float16>{dfname_f16, reader.ndims()};
{
for (auto i : reader) {
writer << i;
}
}
writer.flush();
}

auto create_lvq_data() {
auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) {
auto data = svs::VectorDataLoader<svs::Float16>(dfname_f16).load();
return svs::quantization::lvq::LVQDataset<4, 4>::compress(data, threadpool, 32);
});

auto threadpool = svs::threads::as_threadpool(num_threads);
auto data = svs::detail::dispatch_load(compressor, threadpool);
return data;
}

template <typename Data, typename Distance>
void vamana_build(Data& data, Distance distance) {
auto parameters = svs::index::vamana::VamanaBuildParameters{
1.2, // alpha
64, // graph max degree
128, // search window size
750, // max candidate pool size
60, // prune to degree
true, // full search history
};

auto tic = svs::lib::now();
svs::Vamana index = svs::Vamana::build<float>(parameters, data, distance, num_threads);
auto build_time = svs::lib::time_difference(tic);
fmt::print(
"Vamana index build time: {} Distance: {}\n",
build_time,
svs::name(svs::distance_type_v<Distance>)
);
index.save("config", "graph", "data");
}

template <typename Data, typename Distance>
void vamana_search(Data& data, Distance distance) {
auto index = svs::Vamana::assemble<float>(
config_path, svs::GraphLoader(graph_path), data, distance, num_threads
);

index.set_search_window_size(search_window_size);
const auto query_data = svs::load_data<float>(qfname);
const auto groundtruth = svs::load_data<int>(gtfname);

auto tic = svs::lib::now();
auto query_result = index.search(query_data, n_neighbors);
auto search_time = svs::lib::time_difference(tic);

std::vector<double> qps;
for (int i = 0; i < 5; i++) {
tic = svs::lib::now();
query_result = index.search(query_data, n_neighbors);
search_time = svs::lib::time_difference(tic);
qps.push_back(query_data.size() / search_time);
}

auto recall = svs::k_recall_at_n(groundtruth, query_result, n_neighbors, n_neighbors);
fmt::print(
"Vamana Distance: {}, sws: {}, Recall: {}, Max QPS: {:7.3f} \n",
svs::name(svs::distance_type_v<Distance>),
search_window_size,
recall,
*std::max_element(qps.begin(), qps.end())
);
}

// Alternative main definition
int svs_main(std::vector<std::string> SVS_UNUSED(args)) {
const size_t dim = 512;
size_t dataset_size = 100;
size_t query_size = 10;

generate_random_data(dim, dataset_size, query_size);
svs_setup();

auto data = create_lvq_data();
vamana_build(data, svs::distance::DistanceL2());
vamana_search(data, svs::distance::DistanceL2());

return 0;
}

// Special main providing some helpful utilties.
SVS_DEFINE_MAIN();
54 changes: 54 additions & 0 deletions examples/cpp/shared/svsmain.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <exception>
#include <iostream>
#include <span>
#include <string>
#include <vector>

#include "svs/lib/exception.h"

namespace svsmain {
namespace detail {
std::vector<std::string> process_args(int argc, char* argv[]) {
std::vector<std::string> args{};
for (auto& i : std::span(argv, argc)) {
args.emplace_back(i);
}
return args;
}
} // namespace detail
} // namespace svsmain

#define SVS_DEFINE_MAIN() \
int main(int argc, char* argv[]) { \
int return_code = 0; \
try { \
return_code = svs_main(svsmain::detail::process_args(argc, argv)); \
} catch (const svs::ANNException& err) { \
std::cerr << "Application terminated with ANNException: " << err.what() \
<< std::endl; \
return EXIT_FAILURE; \
} catch (const std::exception& err) { \
std::cerr << "Application terminated with unknown exception: " << err.what() \
<< std::endl; \
return EXIT_FAILURE; \
} \
return return_code; \
}
117 changes: 117 additions & 0 deletions examples/cpp/shared/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*****************************************************
* I/O functions for fvecs, ivecs and xVecs
*****************************************************/

#include <random>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

int fvec_fwrite(FILE* fo, const float* v, int d) {
int ret;
ret = fwrite(&d, sizeof(int), 1, fo);
if (ret != 1) {
perror("fvec_fwrite: write error 1");
return -1;
}
ret = fwrite(v, sizeof(float), d, fo);
if (ret != d) {
perror("fvec_fwrite: write error 2");
return -1;
}
return 0;
}

int fvecs_write(const char* fname, int d, int n, const float* vf) {
FILE* fo = fopen(fname, "w");
if (!fo) {
perror("fvecs_write: cannot open file");
return -1;
}

int i;
/* write down the vectors as fvecs */
for (i = 0; i < n; i++) {
if (fvec_fwrite(fo, vf + i * d, d) < 0)
return -1;
}
fclose(fo);
return n;
}

int ivec_iwrite(FILE* fo, const int* v, int d) {
int ret;
ret = fwrite(&d, sizeof(int), 1, fo);
if (ret != 1) {
perror("fvec_fwrite: write error 1");
return -1;
}
ret = fwrite(v, sizeof(float), d, fo);
if (ret != d) {
perror("fvec_fwrite: write error 2");
return -1;
}
return 0;
}

int ivecs_write(const char* fname, int d, int n, const int* vf) {
FILE* fo = fopen(fname, "w");
if (!fo) {
perror("fvecs_write: cannot open file");
return -1;
}

int i;
/* write down the vectors as fvecs */
for (i = 0; i < n; i++) {
if (ivec_iwrite(fo, vf + i * d, d) < 0)
return -1;
}
fclose(fo);
return n;
}

void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_size) {
float dataset_std = 1.0f, query_std = 0.1f;

std::default_random_engine generator;
std::normal_distribution<float> dataset_dist(0.0f, dataset_std);
std::normal_distribution<float> query_dist(0.0f, query_std);
std::uniform_int_distribution<> uni_dist(0, dataset_size - 1);

generator.seed(100);
std::vector<float> dataset(dataset_size * data_dim);
for (size_t i = 0; i < dataset.size(); ++i) {
dataset[i] = dataset_dist(generator);
}

std::vector<float> queries(query_size * data_dim);
std::vector<int> gt(query_size);
for (size_t i = 0; i < query_size; ++i) {
int e = uni_dist(generator);
for (size_t j = 0; j < data_dim; ++j) {
queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator);
}
gt[i] = e;
}

fvecs_write("data.vecs", data_dim, dataset_size, dataset.data());
fvecs_write("query.vecs", data_dim, query_size, queries.data());
ivecs_write("gt.vecs", 1, query_size, gt.data());
}
Loading