-
Notifications
You must be signed in to change notification settings - Fork 35
Add the usage example and test for shared library #110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
adccf77
8cd1db8
b9edbe8
b0b3f72
d79de82
d5faa28
c5a75a9
f14594a
776c4ae
a68294a
f30724d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| # Copyright 2025 Intel Corporation | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| cmake_minimum_required(VERSION 3.21) | ||
|
|
||
| project(svs_shared_library_example | ||
| LANGUAGES CXX | ||
| ) | ||
|
|
||
| # Other AVX versions can be found at https://github.com/intel/ScalableVectorSearch/releases. | ||
| set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v0.0.7/svs-shared-library-0.0.7-avx2-static.tar.gz") | ||
dian-lun-lin marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| include(FetchContent) | ||
| FetchContent_Declare( | ||
| svs | ||
| URL "${SVS_URL}" | ||
| ) | ||
| FetchContent_MakeAvailable(svs) | ||
|
|
||
| list(APPEND CMAKE_PREFIX_PATH "${svs_SOURCE_DIR}") | ||
| find_package(svs REQUIRED) | ||
| find_library(SVS_SHARED svs_shared_library) | ||
|
|
||
| set(SVS_CXX_STANDARD 20) | ||
| SET(CMAKE_CXX_FLAGS "-O3 -DNDEBUG -std=gnu++20 -march=native -mtune=native -Werror -Wall -Wextra -Wpedantic" ) | ||
|
|
||
| add_executable(shared shared.cpp) | ||
| target_link_libraries(shared PUBLIC ${SVS_SHARED} svs::svs) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| <!-- | ||
| ~ Copyright 2025 Intel Corporation | ||
| ~ | ||
| ~ Licensed under the Apache License, Version 2.0 (the "License"); | ||
| ~ you may not use this file except in compliance with the License. | ||
| ~ You may obtain a copy of the License at | ||
| ~ | ||
| ~ http://www.apache.org/licenses/LICENSE-2.0 | ||
| ~ | ||
| ~ Unless required by applicable law or agreed to in writing, software | ||
| ~ distributed under the License is distributed on an "AS IS" BASIS, | ||
| ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| ~ See the License for the specific language governing permissions and | ||
| ~ limitations under the License. | ||
| --> | ||
|
|
||
| A simple example to use SVS shared library. | ||
|
|
||
| Follow the commands below to compile and use SVS shared library: | ||
|
|
||
| ``` | ||
| mkdir build | ||
| cd build | ||
| CC=gcc-11 CXX=g++-11 cmake ../ | ||
| make -j | ||
| ./shared | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| /* | ||
| * Copyright 2025 Intel Corporation | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| // SVS | ||
| #include "svs/core/recall.h" | ||
| #include "svs/extensions/flat/leanvec.h" | ||
| #include "svs/extensions/flat/lvq.h" | ||
| #include "svs/extensions/vamana/leanvec.h" | ||
| #include "svs/extensions/vamana/lvq.h" | ||
| #include "svs/orchestrators/dynamic_vamana.h" | ||
| #include "svs/orchestrators/exhaustive.h" | ||
| #include "svs/orchestrators/vamana.h" | ||
|
|
||
| #include "utils.h" | ||
|
|
||
| // Alternative main definition | ||
| #include "svsmain.h" | ||
|
|
||
| // SVS setup and parameters | ||
| size_t num_threads = 4; | ||
| size_t search_window_size = 20; | ||
| size_t n_neighbors = 1; | ||
| std::string dfname = "data.vecs"; | ||
| std::string dfname_f16 = "data_f16.vecs"; | ||
| std::string qfname = "query.vecs"; | ||
| std::string gtfname = "gt.vecs"; | ||
|
|
||
| const std::filesystem::path& config_path = "./config"; | ||
| const std::filesystem::path& graph_path = "./graph"; | ||
| // const std::filesystem::path& data_path = "./data"; | ||
|
||
| const std::filesystem::path& config_path_dynamic = "./config_dynamic"; | ||
| const std::filesystem::path& graph_path_dynamic = "./graph_dynamic"; | ||
|
|
||
| void svs_setup() { | ||
| // convert to fp16 | ||
| auto reader = svs::io::vecs::VecsReader<float>{dfname}; | ||
| auto writer = svs::io::vecs::VecsWriter<svs::Float16>{dfname_f16, reader.ndims()}; | ||
| { | ||
| for (auto i : reader) { | ||
| writer << i; | ||
| } | ||
| } | ||
| writer.flush(); | ||
| } | ||
|
|
||
| auto create_lvq_data() { | ||
| auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { | ||
| auto data = svs::VectorDataLoader<svs::Float16>(dfname_f16).load(); | ||
| return svs::quantization::lvq::LVQDataset<4, 4>::compress(data, threadpool, 32); | ||
| }); | ||
|
|
||
| auto threadpool = svs::threads::as_threadpool(num_threads); | ||
| auto data = svs::detail::dispatch_load(compressor, threadpool); | ||
| return data; | ||
| } | ||
|
|
||
| template <typename Data, typename Distance> | ||
| void vamana_build(Data& data, Distance distance) { | ||
| auto parameters = svs::index::vamana::VamanaBuildParameters{ | ||
| 1.2, // alpha | ||
| 64, // graph max degree | ||
| 128, // search window size | ||
| 750, // max candidate pool size | ||
| 60, // prune to degree | ||
| true, // full search history | ||
| }; | ||
|
|
||
| auto tic = svs::lib::now(); | ||
| svs::Vamana index = svs::Vamana::build<float>(parameters, data, distance, num_threads); | ||
| auto build_time = svs::lib::time_difference(tic); | ||
| fmt::print( | ||
| "Vamana index build time: {} Distance: {}\n", | ||
| build_time, | ||
| svs::name(svs::distance_type_v<Distance>) | ||
| ); | ||
| index.save("config", "graph", "data"); | ||
| } | ||
|
|
||
| template <typename Data, typename Distance> | ||
| void vamana_search(Data& data, Distance distance) { | ||
| auto index = svs::Vamana::assemble<float>( | ||
| config_path, svs::GraphLoader(graph_path), data, distance, num_threads | ||
| ); | ||
|
|
||
| index.set_search_window_size(search_window_size); | ||
| const auto query_data = svs::load_data<float>(qfname); | ||
| const auto groundtruth = svs::load_data<int>(gtfname); | ||
|
|
||
| auto tic = svs::lib::now(); | ||
| auto query_result = index.search(query_data, n_neighbors); | ||
| auto search_time = svs::lib::time_difference(tic); | ||
|
|
||
| std::vector<double> qps; | ||
| for (int i = 0; i < 5; i++) { | ||
| tic = svs::lib::now(); | ||
| query_result = index.search(query_data, n_neighbors); | ||
| search_time = svs::lib::time_difference(tic); | ||
| qps.push_back(query_data.size() / search_time); | ||
| } | ||
|
|
||
| auto recall = svs::k_recall_at_n(groundtruth, query_result, n_neighbors, n_neighbors); | ||
| // fmt::print("Raw QPS: {:7.3f} \n", fmt::join(qps, ", ")); | ||
| fmt::print( | ||
| "Vamana Distance: {}, sws: {}, Recall: {}, Max QPS: {:7.3f} \n", | ||
| svs::name(svs::distance_type_v<Distance>), | ||
| search_window_size, | ||
| recall, | ||
| *std::max_element(qps.begin(), qps.end()) | ||
| ); | ||
| } | ||
|
|
||
| // Alternative main definition | ||
| int svs_main(std::vector<std::string> SVS_UNUSED(args)) { | ||
| const size_t dim = 512; | ||
| size_t dataset_size = 100; | ||
| size_t query_size = 10; | ||
|
|
||
| generate_random_data(dim, dataset_size, query_size); | ||
| svs_setup(); | ||
|
|
||
| auto data = create_lvq_data(); | ||
| vamana_build(data, svs::distance::DistanceL2()); | ||
| vamana_search(data, svs::distance::DistanceL2()); | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| // Special main providing some helpful utilties. | ||
| SVS_DEFINE_MAIN(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily within the scope of this PR, but this utils and svsmain functionality are defined in as many as 5 different places in codebase, might be worth defining this in a single utils file to reduce duplication at some point. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| /* | ||
| * Copyright 2025 Intel Corporation | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <exception> | ||
| #include <iostream> | ||
| #include <span> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "svs/lib/exception.h" | ||
|
|
||
| namespace svsmain { | ||
| namespace detail { | ||
| std::vector<std::string> process_args(int argc, char* argv[]) { | ||
| std::vector<std::string> args{}; | ||
| for (auto& i : std::span(argv, argc)) { | ||
| args.emplace_back(i); | ||
| } | ||
| return args; | ||
| } | ||
| } // namespace detail | ||
| } // namespace svsmain | ||
|
|
||
| #define SVS_DEFINE_MAIN() \ | ||
| int main(int argc, char* argv[]) { \ | ||
| int return_code = 0; \ | ||
| try { \ | ||
| return_code = svs_main(svsmain::detail::process_args(argc, argv)); \ | ||
| } catch (const svs::ANNException& err) { \ | ||
| std::cerr << "Application terminated with ANNException: " << err.what() \ | ||
| << std::endl; \ | ||
| return EXIT_FAILURE; \ | ||
| } catch (const std::exception& err) { \ | ||
| std::cerr << "Application terminated with unknown exception: " << err.what() \ | ||
| << std::endl; \ | ||
| return EXIT_FAILURE; \ | ||
| } \ | ||
| return return_code; \ | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| /* | ||
| * Copyright 2025 Intel Corporation | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| /***************************************************** | ||
| * I/O functions for fvecs, ivecs and xVecs | ||
| *****************************************************/ | ||
|
|
||
| #include <random> | ||
| #include <sys/stat.h> | ||
| #include <sys/types.h> | ||
| #include <unistd.h> | ||
|
|
||
| int fvec_fwrite(FILE* fo, const float* v, int d) { | ||
| int ret; | ||
| ret = fwrite(&d, sizeof(int), 1, fo); | ||
| if (ret != 1) { | ||
| perror("fvec_fwrite: write error 1"); | ||
| return -1; | ||
| } | ||
| ret = fwrite(v, sizeof(float), d, fo); | ||
| if (ret != d) { | ||
| perror("fvec_fwrite: write error 2"); | ||
| return -1; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| int fvecs_write(const char* fname, int d, int n, const float* vf) { | ||
| FILE* fo = fopen(fname, "w"); | ||
| if (!fo) { | ||
| perror("fvecs_write: cannot open file"); | ||
| return -1; | ||
| } | ||
|
|
||
| int i; | ||
| /* write down the vectors as fvecs */ | ||
| for (i = 0; i < n; i++) { | ||
| if (fvec_fwrite(fo, vf + i * d, d) < 0) | ||
| return -1; | ||
| } | ||
| fclose(fo); | ||
| return n; | ||
| } | ||
|
|
||
| int ivec_iwrite(FILE* fo, const int* v, int d) { | ||
| int ret; | ||
| ret = fwrite(&d, sizeof(int), 1, fo); | ||
| if (ret != 1) { | ||
| perror("fvec_fwrite: write error 1"); | ||
| return -1; | ||
| } | ||
| ret = fwrite(v, sizeof(float), d, fo); | ||
| if (ret != d) { | ||
| perror("fvec_fwrite: write error 2"); | ||
| return -1; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| int ivecs_write(const char* fname, int d, int n, const int* vf) { | ||
| FILE* fo = fopen(fname, "w"); | ||
| if (!fo) { | ||
| perror("fvecs_write: cannot open file"); | ||
| return -1; | ||
| } | ||
|
|
||
| int i; | ||
| /* write down the vectors as fvecs */ | ||
| for (i = 0; i < n; i++) { | ||
| if (ivec_iwrite(fo, vf + i * d, d) < 0) | ||
| return -1; | ||
| } | ||
| fclose(fo); | ||
| return n; | ||
| } | ||
|
|
||
| void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_size) { | ||
| float dataset_std = 1.0f, query_std = 0.1f; | ||
|
|
||
| std::default_random_engine generator; | ||
| std::normal_distribution<float> dataset_dist(0.0f, dataset_std); | ||
| std::normal_distribution<float> query_dist(0.0f, query_std); | ||
| std::uniform_int_distribution<> uni_dist(0, dataset_size - 1); | ||
|
|
||
| generator.seed(100); | ||
| std::vector<float> dataset(dataset_size * data_dim); | ||
| for (size_t i = 0; i < dataset.size(); ++i) { | ||
| dataset[i] = dataset_dist(generator); | ||
| } | ||
|
|
||
| std::vector<float> queries(query_size * data_dim); | ||
| std::vector<int> gt(query_size); | ||
| for (size_t i = 0; i < query_size; ++i) { | ||
| int e = uni_dist(generator); | ||
| for (size_t j = 0; j < data_dim; ++j) { | ||
| queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator); | ||
| } | ||
| gt[i] = e; | ||
| } | ||
|
|
||
| fvecs_write("data.vecs", data_dim, dataset_size, dataset.data()); | ||
| fvecs_write("query.vecs", data_dim, query_size, queries.data()); | ||
| ivecs_write("gt.vecs", 1, query_size, gt.data()); | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.