-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Patrick <[email protected]>
- Loading branch information
1 parent
4e7ff32
commit 6ebc640
Showing
4 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
CXX=g++ | ||
CXXFLAGS=-fopenmp -O3 -std=c++14 -fPIC -DNDEBUG -Wall -g -lblas | ||
EXTRA_INCLUDE_FLAGS=-I../../pecos/core/ | ||
ARCHFLAG=-march=native | ||
|
||
all: go | ||
|
||
go: example.cpp | ||
${CXX} -o go ${CXXFLAGS} example.cpp -I. ${EXTRA_INCLUDE_FLAGS} ${ARCHFLAG} | ||
clean: | ||
rm -rf *.so *.o go |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
|
||
|
||
## Notice | ||
- Currently we only support L2 distance with 4 Bits Product Quantization. | ||
- We are working on extending to angular and ip distance measures. | ||
|
||
## Install Prerequisite | ||
|
||
To run this project, prerequisite is the same as building PECOS. | ||
|
||
* For Ubuntu (18.04, 20.04): | ||
``` bash | ||
sudo apt-get update && sudo apt-get install -y build-essential git python3 python3-distutils python3-venv | ||
``` | ||
* For Amazon Linux 2 Image: | ||
``` bash | ||
sudo yum -y install python3 python3-devel python3-distutils python3-venv && sudo yum -y groupinstall 'Development Tools' | ||
``` | ||
One needs to install at least one BLAS library to compile PECOS, e.g. `OpenBLAS`: | ||
* For Ubuntu (18.04, 20.04): | ||
``` bash | ||
sudo apt-get install -y libopenblas-dev | ||
``` | ||
* For Amazon Linux 2 Image and AMI: | ||
``` bash | ||
sudo amazon-linux-extras install epel -y | ||
sudo yum install openblas-devel -y | ||
``` | ||
|
||
## Prepare Data | ||
|
||
Get the exemplar sift-128-eucldiean dataset | ||
|
||
```bash | ||
wget https://archive.org/download/pecos-dataset/ann-benchmarks/sift-euclidean-128.tar.gz | ||
``` | ||
|
||
Extract the dataset | ||
|
||
```bash | ||
tar -xf sift-euclidean-128.tar.gz | ||
``` | ||
|
||
The prepared dataset consists of 3 .npy files : X.trn.npy (training data), X.tst.npy (testing data) and Y.tst.npy (10 Nearest neighbors in training data of test data). | ||
|
||
## Compile the source code | ||
|
||
```bash | ||
Make clean go | ||
``` | ||
|
||
a runnable named "go" will be generated. | ||
|
||
## Running the compiled runnable | ||
|
||
the runnable take arguments in the following form : | ||
```bash | ||
./go data_folder model_folder space M efC #threads efs num_rerank sub_dimension | ||
``` | ||
|
||
data_folder is the place where 3 npy files stored. model_folder is the place to store the trained model. If a saved model is found, we will load the model instead of training a new one. space denotes the distance measure to use. Currently, we only support L2. M is the maximal edge connection used in HNSW. efC is the Maximal connecting edges during construction used in HNSW. #threads is the number of threads to build the graph. Up to now, these hypaer-parameters relate to the construction, and they will be used to name the trained model directory. efs is the search queue size in the inference step. num_rerank is the number of points in the queue that we will further rerank again using original features instead of quantized distance. sub_dimension is the dimension of each subspace in Product Quantization. If sub_dimension is set to 0, it will use default scheme. That is, if original data dimension <= 400, we use sub_dimension == 1, otherwise we use sub_dimension == 2. | ||
|
||
Here, we provide an example of command executing the runnable : | ||
|
||
```bash | ||
./go sift-euclidean-128 sift-euclidean-128 l2 8 500 24 10 10 0 | ||
``` | ||
|
||
## Experiment | ||
|
||
The compiled source code in example.cpp already repeats the inference 10 times. So to evaluate under ann-benchmark protocol, we could simply use python to iterate hyper-parameters and record done results. | ||
|
||
```bash | ||
python run.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
#include <iostream> | ||
#include <string> | ||
#include <unordered_set> | ||
#include "utils/matrix.hpp" | ||
#include "utils/scipy_loader.hpp" | ||
#include "ann/hnsw.hpp" | ||
|
||
|
||
|
||
class StopW { | ||
std::chrono::steady_clock::time_point time_begin; | ||
public: | ||
StopW() { | ||
time_begin = std::chrono::steady_clock::now(); | ||
} | ||
|
||
float getElapsedTimeMicro() { | ||
std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); | ||
return (std::chrono::duration_cast<std::chrono::microseconds>(time_end - time_begin).count()); | ||
} | ||
|
||
void reset() { | ||
time_begin = std::chrono::steady_clock::now(); | ||
} | ||
}; | ||
|
||
|
||
int num_rerank; | ||
int sub_dimension; | ||
using pecos::ann::index_type; | ||
|
||
typedef float32_t value_type; | ||
typedef uint64_t mem_index_type; | ||
typedef pecos::NpyArray<value_type> scipy_npy_t; | ||
|
||
|
||
auto npy_to_drm = [](scipy_npy_t& X_npy) -> pecos::drm_t { | ||
pecos::drm_t X; | ||
X.rows = X_npy.shape[0]; | ||
X.cols = X_npy.shape[1]; | ||
X.val = X_npy.array.data(); | ||
return X; | ||
}; | ||
|
||
|
||
template<typename MAT, typename feat_vec_t> | ||
void run_dense(std::string data_dir , char* model_path, index_type M, index_type efC, index_type max_level, int threads, int efs) { | ||
// data prepare | ||
scipy_npy_t X_trn_npy(data_dir + "/X.trn.npy"); | ||
scipy_npy_t X_tst_npy(data_dir + "/X.tst.npy"); | ||
scipy_npy_t Y_tst_npy(data_dir + "/Y.tst.npy"); | ||
auto X_trn = npy_to_drm(X_trn_npy); | ||
auto X_tst = npy_to_drm(X_tst_npy); | ||
auto Y_tst = npy_to_drm(Y_tst_npy); | ||
// model prepare | ||
index_type topk = 10; | ||
pecos::ann::HNSWProductQuantizer4Bits<float, feat_vec_t> indexer; | ||
FILE* fp = fopen(model_path, "rb"); | ||
if (!fp) { | ||
// if subspace_dimension is set to 0, it will use default scheme. That is, | ||
// if dimension <= 400, we use subspace_dimension 1, otherwise we use 2. | ||
indexer.train(X_trn, M, efC, 0, 200, threads, max_level); | ||
std::cout<< "After train" <<std::endl; | ||
indexer.save(model_path); | ||
std::cout<< "After save" <<std::endl; | ||
indexer.load(model_path); | ||
} else { | ||
indexer.load(model_path); | ||
fclose(fp); | ||
} | ||
|
||
// prepare searcher for inference | ||
index_type num_data = X_tst.rows; | ||
auto searcher = indexer.create_searcher(); | ||
searcher.prepare_inference(); | ||
|
||
|
||
double latency = std::numeric_limits<double>::max(); | ||
// REPEAT 10 times and report the best result | ||
for (int repeat = 0; repeat < 10; repeat++) { | ||
double inner_latency = 0.0; | ||
for (index_type idx = 0; idx < num_data; idx++) { | ||
StopW stopw = StopW(); | ||
auto ret_pairs = indexer.predict_single(X_tst.get_row(idx), efs, topk, searcher, num_rerank); | ||
double ss = stopw.getElapsedTimeMicro(); | ||
inner_latency += ss; | ||
} | ||
latency = std::min(latency, inner_latency); | ||
} | ||
// inference and calculate recalls | ||
double recall = 0.0; | ||
for (index_type idx = 0; idx < num_data; idx++) { | ||
auto ret_pairs = indexer.predict_single(X_tst.get_row(idx), efs, topk, searcher, num_rerank); | ||
std::unordered_set<pecos::csr_t::index_type> true_indices; | ||
|
||
for (auto k = 0u; k < topk; k++) { | ||
true_indices.insert(Y_tst.get_row(idx).val[k]); // assume Y_tst is ascendingly sorted by distance | ||
} | ||
for (auto dist_idx_pair : ret_pairs) { | ||
if (true_indices.find(dist_idx_pair.node_id) != true_indices.end()) { | ||
recall += 1.0; | ||
} | ||
} | ||
} | ||
recall = recall / num_data / topk; | ||
latency = latency / num_data / 1000.; | ||
std::cout<< recall << " : " << 1.0 / latency * 1e3 << "," <<std::endl; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
std::string data_dir = argv[1]; | ||
std::string model_dir = argv[2]; | ||
std::string space_name = argv[3]; | ||
index_type M = (index_type) atoi(argv[4]); | ||
index_type efC = (index_type) atoi(argv[5]); | ||
int threads = atoi(argv[6]); | ||
int efs = atoi(argv[7]); | ||
num_rerank = atoi(argv[8]); | ||
sub_dimension = atoi(argv[9]); | ||
index_type max_level = 8; | ||
char model_path[2048]; | ||
sprintf(model_path, "%s/pecos.%s.M-%d_efC-%d_t-%d_d-%d.bin", model_dir.c_str(), space_name.c_str(), M, efC, threads, sub_dimension); | ||
// currently only support l2 | ||
if (space_name.compare("l2") == 0) { | ||
run_dense<pecos::drm_t, pecos::ann::FeatVecDenseL2Simd<float>>(data_dir, model_path, M, efC, max_level, threads, efs); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import os | ||
cmd = "./go sift-euclidean-128 sift-euclidean-128 l2 %d 500 24 %d %d 0" | ||
for args in [8, 16, 24, 36, 48, 64, 96]: | ||
for efs in [10, 20, 40, 80, 120, 200, 400]: | ||
os.system(cmd % (args, efs, efs)) | ||
if efs - 20 > 0: | ||
os.system(cmd % (args, efs, 20)) | ||
if efs - 50 > 0: | ||
os.system(cmd % (args, efs, 50)) |