From e92370a06eaa3bbc5bacdba65cc9c3f125590071 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Mon, 24 Jun 2024 17:12:55 -0400 Subject: [PATCH] Simplify options for HNSW indexes (#2533) HNSW regressions for MS MARCO regressions: revert to "default settings" Continuation of #2531 (which was for BEIR) Will need to adjust score tolerance, but will circle back in a separate PR for that. --- ...ssage.bge-base-en-v1.5.flat-int8.cached.md | 22 +- ...passage.bge-base-en-v1.5.flat-int8.onnx.md | 22 +- ...19-passage.bge-base-en-v1.5.flat.cached.md | 22 +- ...dl19-passage.bge-base-en-v1.5.flat.onnx.md | 22 +- ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md | 12 +- ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 12 +- ...19-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +- ...dl19-passage.bge-base-en-v1.5.hnsw.onnx.md | 2 +- ...ere-embed-english-v3.0.flat-int8.cached.md | 22 +- ...e.cohere-embed-english-v3.0.flat.cached.md | 22 +- ...ere-embed-english-v3.0.hnsw-int8.cached.md | 12 +- ...e.cohere-embed-english-v3.0.hnsw.cached.md | 12 +- ...passage.cos-dpr-distil.flat-int8.cached.md | 22 +- ...9-passage.cos-dpr-distil.flat-int8.onnx.md | 22 +- ...dl19-passage.cos-dpr-distil.flat.cached.md | 22 +- ...s-dl19-passage.cos-dpr-distil.flat.onnx.md | 22 +- ...passage.cos-dpr-distil.hnsw-int8.cached.md | 12 +- ...9-passage.cos-dpr-distil.hnsw-int8.onnx.md | 12 +- ...dl19-passage.cos-dpr-distil.hnsw.cached.md | 2 +- ...s-dl19-passage.cos-dpr-distil.hnsw.onnx.md | 2 +- ...19-passage.openai-ada2.flat-int8.cached.md | 22 +- ...ns-dl19-passage.openai-ada2.flat.cached.md | 22 +- ...19-passage.openai-ada2.hnsw-int8.cached.md | 12 +- ...ns-dl19-passage.openai-ada2.hnsw.cached.md | 12 +- ...ssage.bge-base-en-v1.5.flat-int8.cached.md | 22 +- ...passage.bge-base-en-v1.5.flat-int8.onnx.md | 22 +- ...20-passage.bge-base-en-v1.5.flat.cached.md | 22 +- ...dl20-passage.bge-base-en-v1.5.flat.onnx.md | 22 +- ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md | 12 +- ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 12 +- ...20-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +- ...dl20-passage.bge-base-en-v1.5.hnsw.onnx.md | 2 +- ...ere-embed-english-v3.0.flat-int8.cached.md | 22 +- ...e.cohere-embed-english-v3.0.flat.cached.md | 22 +- ...ere-embed-english-v3.0.hnsw-int8.cached.md | 12 +- ...e.cohere-embed-english-v3.0.hnsw.cached.md | 12 +- ...passage.cos-dpr-distil.flat-int8.cached.md | 22 +- ...0-passage.cos-dpr-distil.flat-int8.onnx.md | 22 +- ...dl20-passage.cos-dpr-distil.flat.cached.md | 22 +- ...s-dl20-passage.cos-dpr-distil.flat.onnx.md | 22 +- ...passage.cos-dpr-distil.hnsw-int8.cached.md | 12 +- ...0-passage.cos-dpr-distil.hnsw-int8.onnx.md | 12 +- ...dl20-passage.cos-dpr-distil.hnsw.cached.md | 2 +- ...s-dl20-passage.cos-dpr-distil.hnsw.onnx.md | 2 +- ...20-passage.openai-ada2.flat-int8.cached.md | 22 +- ...ns-dl20-passage.openai-ada2.flat.cached.md | 22 +- ...20-passage.openai-ada2.hnsw-int8.cached.md | 12 +- ...ns-dl20-passage.openai-ada2.hnsw.cached.md | 12 +- ...ssage.bge-base-en-v1.5.flat-int8.cached.md | 22 +- ...passage.bge-base-en-v1.5.flat-int8.onnx.md | 22 +- ...v1-passage.bge-base-en-v1.5.flat.cached.md | 22 +- ...o-v1-passage.bge-base-en-v1.5.flat.onnx.md | 22 +- ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md | 12 +- ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 12 +- ...v1-passage.bge-base-en-v1.5.hnsw.cached.md | 2 +- ...o-v1-passage.bge-base-en-v1.5.hnsw.onnx.md | 2 +- ...ere-embed-english-v3.0.flat-int8.cached.md | 22 +- ...e.cohere-embed-english-v3.0.flat.cached.md | 22 +- ...ere-embed-english-v3.0.hnsw-int8.cached.md | 12 +- ...e.cohere-embed-english-v3.0.hnsw.cached.md | 12 +- ...passage.cos-dpr-distil.flat-int8.cached.md | 22 +- ...1-passage.cos-dpr-distil.flat-int8.onnx.md | 22 +- ...o-v1-passage.cos-dpr-distil.flat.cached.md | 22 +- ...rco-v1-passage.cos-dpr-distil.flat.onnx.md | 22 +- ...passage.cos-dpr-distil.hnsw-int8.cached.md | 12 +- ...1-passage.cos-dpr-distil.hnsw-int8.onnx.md | 12 +- ...o-v1-passage.cos-dpr-distil.hnsw.cached.md | 2 +- ...rco-v1-passage.cos-dpr-distil.hnsw.onnx.md | 2 +- ...v1-passage.openai-ada2.flat-int8.cached.md | 22 +- ...arco-v1-passage.openai-ada2.flat.cached.md | 22 +- ...v1-passage.openai-ada2.hnsw-int8.cached.md | 12 +- ...arco-v1-passage.openai-ada2.hnsw.cached.md | 12 +- src/main/python/regressions-batch03.txt | 348 +++++++++--------- src/main/python/regressions-batch04.txt | 296 +++++++-------- ...age.bge-base-en-v1.5.flat-int8.cached.yaml | 2 +- ...ssage.bge-base-en-v1.5.flat-int8.onnx.yaml | 2 +- ...-passage.bge-base-en-v1.5.flat.cached.yaml | 2 +- ...19-passage.bge-base-en-v1.5.flat.onnx.yaml | 2 +- ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml | 4 +- ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 4 +- ...-passage.bge-base-en-v1.5.hnsw.cached.yaml | 2 +- ...19-passage.bge-base-en-v1.5.hnsw.onnx.yaml | 2 +- ...e-embed-english-v3.0.flat-int8.cached.yaml | 2 +- ...cohere-embed-english-v3.0.flat.cached.yaml | 2 +- ...e-embed-english-v3.0.hnsw-int8.cached.yaml | 4 +- ...cohere-embed-english-v3.0.hnsw.cached.yaml | 4 +- ...ssage.cos-dpr-distil.flat-int8.cached.yaml | 2 +- ...passage.cos-dpr-distil.flat-int8.onnx.yaml | 2 +- ...19-passage.cos-dpr-distil.flat.cached.yaml | 2 +- ...dl19-passage.cos-dpr-distil.flat.onnx.yaml | 2 +- ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml | 4 +- ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml | 4 +- ...19-passage.cos-dpr-distil.hnsw.cached.yaml | 2 +- ...dl19-passage.cos-dpr-distil.hnsw.onnx.yaml | 2 +- ...-passage.openai-ada2.flat-int8.cached.yaml | 2 +- .../dl19-passage.openai-ada2.flat.cached.yaml | 2 +- ...-passage.openai-ada2.hnsw-int8.cached.yaml | 4 +- .../dl19-passage.openai-ada2.hnsw.cached.yaml | 4 +- ...age.bge-base-en-v1.5.flat-int8.cached.yaml | 2 +- ...ssage.bge-base-en-v1.5.flat-int8.onnx.yaml | 2 +- ...-passage.bge-base-en-v1.5.flat.cached.yaml | 2 +- ...20-passage.bge-base-en-v1.5.flat.onnx.yaml | 2 +- ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml | 4 +- ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 4 +- ...-passage.bge-base-en-v1.5.hnsw.cached.yaml | 2 +- ...20-passage.bge-base-en-v1.5.hnsw.onnx.yaml | 2 +- ...e-embed-english-v3.0.flat-int8.cached.yaml | 2 +- ...cohere-embed-english-v3.0.flat.cached.yaml | 2 +- ...e-embed-english-v3.0.hnsw-int8.cached.yaml | 4 +- ...cohere-embed-english-v3.0.hnsw.cached.yaml | 4 +- ...ssage.cos-dpr-distil.flat-int8.cached.yaml | 2 +- ...passage.cos-dpr-distil.flat-int8.onnx.yaml | 2 +- ...20-passage.cos-dpr-distil.flat.cached.yaml | 2 +- ...dl20-passage.cos-dpr-distil.flat.onnx.yaml | 2 +- ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml | 4 +- ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml | 4 +- ...20-passage.cos-dpr-distil.hnsw.cached.yaml | 2 +- ...dl20-passage.cos-dpr-distil.hnsw.onnx.yaml | 2 +- ...-passage.openai-ada2.flat-int8.cached.yaml | 2 +- .../dl20-passage.openai-ada2.flat.cached.yaml | 2 +- ...-passage.openai-ada2.hnsw-int8.cached.yaml | 4 +- .../dl20-passage.openai-ada2.hnsw.cached.yaml | 4 +- ...age.bge-base-en-v1.5.flat-int8.cached.yaml | 2 +- ...ssage.bge-base-en-v1.5.flat-int8.onnx.yaml | 2 +- ...-passage.bge-base-en-v1.5.flat.cached.yaml | 2 +- ...v1-passage.bge-base-en-v1.5.flat.onnx.yaml | 2 +- ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml | 4 +- ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 4 +- ...-passage.bge-base-en-v1.5.hnsw.cached.yaml | 2 +- ...v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml | 2 +- ...e-embed-english-v3.0.flat-int8.cached.yaml | 2 +- ...cohere-embed-english-v3.0.flat.cached.yaml | 2 +- ...e-embed-english-v3.0.hnsw-int8.cached.yaml | 4 +- ...cohere-embed-english-v3.0.hnsw.cached.yaml | 4 +- ...ssage.cos-dpr-distil.flat-int8.cached.yaml | 2 +- ...passage.cos-dpr-distil.flat-int8.onnx.yaml | 2 +- ...v1-passage.cos-dpr-distil.flat.cached.yaml | 2 +- ...o-v1-passage.cos-dpr-distil.flat.onnx.yaml | 2 +- ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml | 4 +- ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml | 4 +- ...v1-passage.cos-dpr-distil.hnsw.cached.yaml | 2 +- ...o-v1-passage.cos-dpr-distil.hnsw.onnx.yaml | 2 +- ...-passage.openai-ada2.flat-int8.cached.yaml | 2 +- ...co-v1-passage.openai-ada2.flat.cached.yaml | 2 +- ...-passage.openai-ada2.hnsw-int8.cached.yaml | 4 +- ...co-v1-passage.openai-ada2.hnsw.cached.yaml | 4 +- 146 files changed, 970 insertions(+), 970 deletions(-) diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md index 88707d0d88..e4ebbebe05 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md index 46bdf8b268..ec809a3ec9 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat-int8.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md index 83c6269171..6c42e9a6ab 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md index 9cbacd3ae4..3a8d7b2453 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.flat.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md index 0f9ac0e344..dfb916cdd2 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md index 61e35bcda9..b6b0689ea5 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md index 68c21b41a1..742744f7e5 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md index 62cc18dd28..77537a951d 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.md index 7fc9bff0e5..e8d1e33a56 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -72,17 +72,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat.cached.md index e01f360d41..f110f1c93e 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat.cached.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.flat.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.flat.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -72,17 +72,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index 0b315fcb0f..2c3985fd07 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md index 5790a6f827..b8c6825dd5 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.cached.md index eb75366f05..e490cbfc5e 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.onnx.md index cb2d6aec3a..d593ffefdf 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat-int8.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,7 +77,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -86,10 +86,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.cached.md index 020676970e..0f9c561a59 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.cached.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.flat.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.onnx.md index 96e3c48ea0..5c2d912f80 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.flat.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.flat.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,7 +77,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -86,10 +86,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md index c2eb13d4c0..70a66a7981 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md index 6b3b8fb2d1..9aa8383ef9 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -82,7 +82,7 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil & ``` @@ -91,10 +91,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md index 3f49b4990a..c4eb2dcb3b 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md index 26639f38df..44b66615f0 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.flat-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.flat-int8.cached.md index c8341373ee..f7af596259 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.flat-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.flat-int8.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.flat.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.flat.cached.md index bb32664798..1e9d458d7e 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.flat.cached.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.flat.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -threads 16 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md index f9dc73b958..c241c421b3 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md index e962fb3cae..6101c5d241 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md index 35177a8809..3632387c88 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md index bed23610a3..fc54a93138 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat-int8.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md index 0395d55a0d..6c50c6f441 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md index d1666fb151..61f2381ee0 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.flat.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md index 1389133f8c..4f964bbf15 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md index 8fe0ca8dfd..a4a0e59285 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md index 0f1eb7dd85..82ef436897 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md index d9e1ef0c0d..df2686e60a 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.md index 3ba8bc1240..725547f160 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -72,17 +72,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat.cached.md index b079796f62..57de997741 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat.cached.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.flat.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.flat.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -72,17 +72,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index fc24f69cf3..a81cc2311a 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md index 718b72144d..aa28c99e1c 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.cached.md index ff89f5625a..66d954ff0a 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.onnx.md index ca25db7bc8..f155791784 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.onnx.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat-int8.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,7 +77,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -86,10 +86,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.cached.md index 74388f563b..229270703f 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.cached.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.flat.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.dl20.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.onnx.md index 4af395076f..b5bcaeb12f 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.onnx.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.flat.onnx.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.flat.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,7 +77,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -86,10 +86,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md index a66f2fbc4e..da4b118033 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.dl20.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md index 0a5031af13..1aca72eb64 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -82,7 +82,7 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl20.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil & ``` @@ -91,10 +91,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl20.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl20.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.dl20.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md index f90a87fc8c..2e60059bab 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md index 5ffe3a67d8..ce2bdd0834 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.flat-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.flat-int8.cached.md index cb30f862b4..604ff127b8 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.flat-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.flat-int8.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.flat-int8.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.flat.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.flat.cached.md index 75b0051912..eb6dc2d268 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.flat.cached.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.flat.cached.md @@ -36,15 +36,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.flat.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -54,14 +54,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -threads 16 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md index 08cb273595..5f7f9c2a1a 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md index 72ad94978d..f9f9b70fb5 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md @@ -57,7 +57,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md index d0c57deaf4..b192407bee 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md index b5ad99b849..c952f0d51a 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md index c48d80b26b..13219a1477 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.cached \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md index 0594655c67..940cb41ed1 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar -P collections/ -tar xvf collections/msmarco-passage.bge-base-en-v1.5.tar -C collections/ +tar xvf collections/msmarco-passage-bge-base-en-v1.5.tar -C collections/ ``` -To confirm, `msmarco-passage.bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. +To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum `353d2c9e72e858897ad479cca4ea0db1`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.onnx \ - --corpus-path collections/msmarco-passage.bge-base-en-v1.5 + --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.bge-base-en-v1.5 \ + -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -threads 16 \ - >& logs/log.msmarco-passage.bge-base-en-v1.5 & + >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` -The path `/path/to/msmarco-passage.bge-base-en-v1.5/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-bge-base-en-v1.5/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-flat-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md index 3241e99e40..78e2f4b56c 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md index 02083f3abe..ff8d251335 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md index 9ba8bf2c07..01f458764b 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md index 549d61efe7..080fe67cbe 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-bge-base-en-v1.5 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-bge-base-en-v1.5 & ``` diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.md index 596f4927f3..e4ff49d07b 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -71,17 +71,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.md index 148f64f69d..c71dd2e9e9 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.md @@ -31,15 +31,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar -P collections/ -tar xvf collections/msmarco-passage.cohere-embed-english-v3.0.tar -C collections/ +tar xvf collections/msmarco-passage-cohere-embed-english-v3.0.tar -C collections/ ``` -To confirm, `msmarco-passage.cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. +To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 checksum `06a6e38a0522850c6aa504db7b2617f5`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached \ - --corpus-path collections/msmarco-passage.cohere-embed-english-v3.0 + --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` ## Indexing @@ -49,14 +49,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cohere-embed-english-v3.0 \ + -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -threads 16 \ - >& logs/log.msmarco-passage.cohere-embed-english-v3.0 & + >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` -The path `/path/to/msmarco-passage.cohere-embed-english-v3.0/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cohere-embed-english-v3.0/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -71,17 +71,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-flat-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index 80c68013f6..5cf1a11852 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md index 8b7e862312..98741acdd8 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -52,7 +52,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cohere-embed-english-v3.0 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cohere-embed-english-v3.0 & ``` @@ -74,17 +74,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-hnsw-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.md index 208f70b6d2..9f026eca2f 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat-int8.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.md index a471630f09..86b6cf7c7b 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,7 +73,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -82,10 +82,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-int8-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.cached.md index cd8071d7ef..5af3fbd187 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat.cached \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.onnx.md index f7a119a9b2..923b9d3a11 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.flat.onnx.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar -P collections/ -tar xvf collections/msmarco-passage.cos-dpr-distil.tar -C collections/ +tar xvf collections/msmarco-passage-cos-dpr-distil.tar -C collections/ ``` -To confirm, `msmarco-passage.cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. +To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum `e20ffbc8b5e7f760af31298aefeaebbd`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat.onnx \ - --corpus-path collections/msmarco-passage.cos-dpr-distil + --corpus-path collections/msmarco-passage-cos-dpr-distil ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.cos-dpr-distil \ + -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -threads 16 \ - >& logs/log.msmarco-passage.cos-dpr-distil & + >& logs/log.msmarco-passage-cos-dpr-distil & ``` -The path `/path/to/msmarco-passage.cos-dpr-distil/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-cos-dpr-distil/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,7 +73,7 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -encoder CosDprDistil & ``` @@ -82,10 +82,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-flat-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md index 1ff6066552..eca31bdbb9 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md index baa03a30a1..a72f7ec16e 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` @@ -78,7 +78,7 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil & ``` @@ -87,10 +87,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-int8-onnx.topics.msmarco-passage.dev-subset.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md index b04cf54cc7..4f908615ef 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md index 0502e74da6..4aad9f8895 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-cos-dpr-distil \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-cos-dpr-distil & ``` diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat-int8.cached.md index 9873944d37..fc935316b9 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat-int8.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.flat-int8.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building quantized flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -threads 16 -quantize.int8 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat.cached.md index 74b845af2c..6bcba8f1db 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.flat.cached.md @@ -33,15 +33,15 @@ Download the corpus and unpack into `collections/`: ```bash wget https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar -P collections/ -tar xvf collections/msmarco-passage.openai-ada2.tar -C collections/ +tar xvf collections/msmarco-passage-openai-ada2.tar -C collections/ ``` -To confirm, `msmarco-passage.openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. +To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4d843d522ff3a3af7edbee789a63402`. With the corpus downloaded, the following command will perform the remaining steps below: ```bash python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.flat.cached \ - --corpus-path collections/msmarco-passage.openai-ada2 + --corpus-path collections/msmarco-passage-openai-ada2 ``` ## Indexing @@ -51,14 +51,14 @@ Sample indexing command, building flat indexes: ```bash bin/run.sh io.anserini.index.IndexCollection \ -collection JsonDenseVectorCollection \ - -input /path/to/msmarco-passage.openai-ada2 \ + -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -threads 16 \ - >& logs/log.msmarco-passage.openai-ada2 & + >& logs/log.msmarco-passage-openai-ada2 & ``` -The path `/path/to/msmarco-passage.openai-ada2/` should point to the corpus downloaded above. +The path `/path/to/msmarco-passage-openai-ada2/` should point to the corpus downloaded above. Upon completion, we should have an index with 8,841,823 documents. ## Retrieval @@ -73,17 +73,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-flat.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-flat-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md index a8086ddfc1..a1b562ed84 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 \ + -threads 16 -M 16 -efC 100 -quantize.int8 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-int8-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md index d36b9cfdee..383c26e5a5 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md @@ -54,7 +54,7 @@ bin/run.sh io.anserini.index.IndexHnswDenseVectors \ -input /path/to/msmarco-passage-openai-ada2 \ -generator DenseVectorDocumentGenerator \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ - -threads 16 -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 \ + -threads 16 -M 16 -efC 100 \ >& logs/log.msmarco-passage-openai-ada2 & ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-hnsw-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness diff --git a/src/main/python/regressions-batch03.txt b/src/main/python/regressions-batch03.txt index 753a6e990a..69ce690413 100644 --- a/src/main/python/regressions-batch03.txt +++ b/src/main/python/regressions-batch03.txt @@ -51,43 +51,43 @@ python src/main/python/run_regression.py --verify --search --regression dl23-doc python src/main/python/run_regression.py --verify --search --regression rag24-doc-segmented-raggy-dev > logs/log.rag24-doc-segmented-raggy-dev.txt 2>&1 # Flat indexes for MS MARCO v1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.cos-dpr-distil.flat.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.flat.cached > logs/log.dl19-passage.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.flat.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.flat.cached > logs/log.dl19-passage.cos-dpr-distil.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.flat.cached > logs/log.dl19-passage.openai-ada2.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.cached > logs/log.dl19-passage.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cohere-embed-english-v3.0.flat.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.flat.cached > logs/log.dl19-passage.cos-dpr-distil.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.openai-ada2.flat.cached > logs/log.dl19-passage.openai-ada2.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.cached > logs/log.dl19-passage.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.flat-int8.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.flat-int8.cached > logs/log.dl19-passage.cos-dpr-distil.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.flat-int8.cached > logs/log.dl19-passage.openai-ada2.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.cached > logs/log.dl19-passage.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cohere-embed-english-v3.0.flat-int8.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.flat-int8.cached > logs/log.dl19-passage.cos-dpr-distil.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.openai-ada2.flat-int8.cached > logs/log.dl19-passage.openai-ada2.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.flat.onnx > logs/log.dl19-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.flat.onnx > logs/log.dl19-passage.cos-dpr-distil.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.flat.onnx > logs/log.dl19-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.flat.onnx > logs/log.dl19-passage.cos-dpr-distil.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.dl19-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.flat-int8.onnx > logs/log.dl19-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.dl19-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.flat-int8.onnx > logs/log.dl19-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.flat.cached > logs/log.dl20-passage.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.flat.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.flat.cached > logs/log.dl20-passage.cos-dpr-distil.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.flat.cached > logs/log.dl20-passage.openai-ada2.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.cached > logs/log.dl20-passage.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cohere-embed-english-v3.0.flat.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.flat.cached > logs/log.dl20-passage.cos-dpr-distil.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.openai-ada2.flat.cached > logs/log.dl20-passage.openai-ada2.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.cached > logs/log.dl20-passage.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.flat-int8.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.flat-int8.cached > logs/log.dl20-passage.cos-dpr-distil.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.flat-int8.cached > logs/log.dl20-passage.openai-ada2.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.cached > logs/log.dl20-passage.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cohere-embed-english-v3.0.flat-int8.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.flat-int8.cached > logs/log.dl20-passage.cos-dpr-distil.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.openai-ada2.flat-int8.cached > logs/log.dl20-passage.openai-ada2.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.flat.onnx > logs/log.dl20-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.flat.onnx > logs/log.dl20-passage.cos-dpr-distil.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.flat.onnx > logs/log.dl20-passage.bge-base-en-v1.5.flat.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.flat.onnx > logs/log.dl20-passage.cos-dpr-distil.flat.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.dl20-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.flat-int8.onnx > logs/log.dl20-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.flat-int8.onnx > logs/log.dl20-passage.bge-base-en-v1.5.flat-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.flat-int8.onnx > logs/log.dl20-passage.cos-dpr-distil.flat-int8.onnx.txt 2>&1 # MS MARCO V1 doc python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc > logs/log.msmarco-v1-doc.txt 2>&1 @@ -128,171 +128,171 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached > logs/log.msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # MS MARCO V1 passage search-only -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.splade-pp-ed.onnx > logs/log.msmarco-v1-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression msmarco-v1-passage.splade-pp-sd.onnx > logs/log.msmarco-v1-passage.splade-pp-sd.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.splade-pp-ed.onnx > logs/log.msmarco-v1-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression msmarco-v1-passage.splade-pp-sd.onnx > logs/log.msmarco-v1-passage.splade-pp-sd.onnx.txt 2>&1 # MS MARCO V2 passage search-only python src/main/python/run_regression.py --verify --search --regression msmarco-v2-passage.splade-pp-ed.onnx > logs/log.msmarco-v2-passage.splade-pp-ed.onnx.txt 2>&1 python src/main/python/run_regression.py --verify --search --regression msmarco-v2-passage.splade-pp-sd.onnx > logs/log.msmarco-v2-passage.splade-pp-sd.onnx.txt 2>&1 # DL19 -python src/main/python/run_regression.py --search --regression dl19-passage > logs/log.dl19-passage.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bm25-b8 > logs/log.dl19-passage.bm25-b8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.wp-ca > logs/log.dl19-passage.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.wp-tok > logs/log.dl19-passage.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.wp-hgf > logs/log.dl19-passage.wp-hgf.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.docTTTTTquery > logs/log.dl19-passage.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-ed.onnx > logs/log.dl19-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-sd.onnx > logs/log.dl19-passage.splade-pp-sd.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.fw > logs/log.dl19-passage.cos-dpr-distil.fw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.lexlsh > logs/log.dl19-passage.cos-dpr-distil.lexlsh.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw.cached > logs/log.dl19-passage.openai-ada2.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw-int8.cached > logs/log.dl19-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-passage.unicoil.cached > logs/log.dl19-passage.unicoil.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.unicoil-noexp.cached > logs/log.dl19-passage.unicoil-noexp.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-ed.cached > logs/log.dl19-passage.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-sd.cached > logs/log.dl19-passage.splade-pp-sd.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-doc > logs/log.dl19-doc.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc.wp-ca > logs/log.dl19-doc.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc.wp-tok > logs/log.dl19-doc.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc.wp-hgf > logs/log.dl19-doc.wp-hgf.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc.docTTTTTquery > logs/log.dl19-doc.docTTTTTquery.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl19-doc-segmented > logs/log.dl19-doc-segmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.wp-ca > logs/log.dl19-doc-segmented.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.wp-tok > logs/log.dl19-doc-segmented.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.docTTTTTquery > logs/log.dl19-doc-segmented.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil.cached > logs/log.dl19-doc-segmented.unicoil.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil-noexp.cached > logs/log.dl19-doc-segmented.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage > logs/log.dl19-passage.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bm25-b8 > logs/log.dl19-passage.bm25-b8.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.wp-ca > logs/log.dl19-passage.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.wp-tok > logs/log.dl19-passage.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.wp-hgf > logs/log.dl19-passage.wp-hgf.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.docTTTTTquery > logs/log.dl19-passage.docTTTTTquery.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.splade-pp-ed.onnx > logs/log.dl19-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.splade-pp-sd.onnx > logs/log.dl19-passage.splade-pp-sd.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.fw > logs/log.dl19-passage.cos-dpr-distil.fw.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.lexlsh > logs/log.dl19-passage.cos-dpr-distil.lexlsh.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.openai-ada2.hnsw.cached > logs/log.dl19-passage.openai-ada2.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.openai-ada2.hnsw-int8.cached > logs/log.dl19-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-passage.unicoil.cached > logs/log.dl19-passage.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.unicoil-noexp.cached > logs/log.dl19-passage.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.splade-pp-ed.cached > logs/log.dl19-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.splade-pp-sd.cached > logs/log.dl19-passage.splade-pp-sd.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-doc > logs/log.dl19-doc.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc.wp-ca > logs/log.dl19-doc.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc.wp-tok > logs/log.dl19-doc.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc.wp-hgf > logs/log.dl19-doc.wp-hgf.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc.docTTTTTquery > logs/log.dl19-doc.docTTTTTquery.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented > logs/log.dl19-doc-segmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented.wp-ca > logs/log.dl19-doc-segmented.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented.wp-tok > logs/log.dl19-doc-segmented.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented.docTTTTTquery > logs/log.dl19-doc-segmented.docTTTTTquery.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented.unicoil.cached > logs/log.dl19-doc-segmented.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl19-doc-segmented.unicoil-noexp.cached > logs/log.dl19-doc-segmented.unicoil-noexp.cached.txt 2>&1 # DL20 -python src/main/python/run_regression.py --search --regression dl20-passage > logs/log.dl20-passage.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bm25-b8 > logs/log.dl20-passage.bm25-b8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.wp-ca > logs/log.dl20-passage.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.wp-tok > logs/log.dl20-passage.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.wp-hgf > logs/log.dl20-passage.wp-hgf.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.docTTTTTquery > logs/log.dl20-passage.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-ed.onnx > logs/log.dl20-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-sd.onnx > logs/log.dl20-passage.splade-pp-sd.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.fw > logs/log.dl20-passage.cos-dpr-distil.fw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.lexlsh > logs/log.dl20-passage.cos-dpr-distil.lexlsh.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw.cached > logs/log.dl20-passage.openai-ada2.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw-int8.cached > logs/log.dl20-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-passage.unicoil.cached > logs/log.dl20-passage.unicoil.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.unicoil-noexp.cached > logs/log.dl20-passage.unicoil-noexp.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-ed.cached > logs/log.dl20-passage.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-sd.cached > logs/log.dl20-passage.splade-pp-sd.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-doc > logs/log.dl20-doc.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc.wp-ca > logs/log.dl20-doc.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc.wp-tok > logs/log.dl20-doc.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc.wp-hgf > logs/log.dl20-doc.wp-hgf.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc.docTTTTTquery > logs/log.dl20-doc.docTTTTTquery.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl20-doc-segmented > logs/log.dl20-doc-segmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.wp-ca > logs/log.dl20-doc-segmented.wp-ca.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.wp-tok > logs/log.dl20-doc-segmented.wp-tok.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.docTTTTTquery > logs/log.dl20-doc-segmented.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil.cached > logs/log.dl20-doc-segmented.unicoil.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil-noexp.cached > logs/log.dl20-doc-segmented.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage > logs/log.dl20-passage.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bm25-b8 > logs/log.dl20-passage.bm25-b8.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.wp-ca > logs/log.dl20-passage.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.wp-tok > logs/log.dl20-passage.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.wp-hgf > logs/log.dl20-passage.wp-hgf.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.docTTTTTquery > logs/log.dl20-passage.docTTTTTquery.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.splade-pp-ed.onnx > logs/log.dl20-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.splade-pp-sd.onnx > logs/log.dl20-passage.splade-pp-sd.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.fw > logs/log.dl20-passage.cos-dpr-distil.fw.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.lexlsh > logs/log.dl20-passage.cos-dpr-distil.lexlsh.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.openai-ada2.hnsw.cached > logs/log.dl20-passage.openai-ada2.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.openai-ada2.hnsw-int8.cached > logs/log.dl20-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-passage.unicoil.cached > logs/log.dl20-passage.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.unicoil-noexp.cached > logs/log.dl20-passage.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.splade-pp-ed.cached > logs/log.dl20-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.splade-pp-sd.cached > logs/log.dl20-passage.splade-pp-sd.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.onnx > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-doc > logs/log.dl20-doc.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc.wp-ca > logs/log.dl20-doc.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc.wp-tok > logs/log.dl20-doc.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc.wp-hgf > logs/log.dl20-doc.wp-hgf.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc.docTTTTTquery > logs/log.dl20-doc.docTTTTTquery.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented > logs/log.dl20-doc-segmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented.wp-ca > logs/log.dl20-doc-segmented.wp-ca.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented.wp-tok > logs/log.dl20-doc-segmented.wp-tok.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented.docTTTTTquery > logs/log.dl20-doc-segmented.docTTTTTquery.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented.unicoil.cached > logs/log.dl20-doc-segmented.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl20-doc-segmented.unicoil-noexp.cached > logs/log.dl20-doc-segmented.unicoil-noexp.cached.txt 2>&1 # DL21 -python src/main/python/run_regression.py --search --regression dl21-passage > logs/log.dl21-passage.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.d2q-t5 > logs/log.dl21-passage.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage > logs/log.dl21-passage.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.d2q-t5 > logs/log.dl21-passage.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage-augmented > logs/log.dl21-passage-augmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage-augmented.d2q-t5 > logs/log.dl21-passage-augmented-d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage-augmented > logs/log.dl21-passage-augmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage-augmented.d2q-t5 > logs/log.dl21-passage-augmented-d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-noexp-0shot.cached > logs/log.dl21-passage.unicoil-noexp-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-0shot.cached > logs/log.dl21-passage.unicoil-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-ed.cached > logs/log.dl21-passage.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-ed.onnx > logs/log.dl21-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-sd.cached > logs/log.dl21-passage.splade-pp-sd.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-sd.onnx > logs/log.dl21-passage.splade-pp-sd.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.unicoil-noexp-0shot.cached > logs/log.dl21-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.unicoil-0shot.cached > logs/log.dl21-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.splade-pp-ed.cached > logs/log.dl21-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.splade-pp-ed.onnx > logs/log.dl21-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.splade-pp-sd.cached > logs/log.dl21-passage.splade-pp-sd.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-passage.splade-pp-sd.onnx > logs/log.dl21-passage.splade-pp-sd.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc > logs/log.dl21-doc.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc.d2q-t5 > logs/log.dl21-doc.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc > logs/log.dl21-doc.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc.d2q-t5 > logs/log.dl21-doc.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented > logs/log.dl21-doc-segmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.d2q-t5 > logs/log.dl21-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot.cached > logs/log.dl21-doc-segmented.unicoil-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented > logs/log.dl21-doc-segmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented.d2q-t5 > logs/log.dl21-doc-segmented.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented.unicoil-0shot.cached > logs/log.dl21-doc-segmented.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # DL22 -python src/main/python/run_regression.py --search --regression dl22-passage > logs/log.dl22-passage.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.d2q-t5 > logs/log.dl22-passage.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage > logs/log.dl22-passage.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.d2q-t5 > logs/log.dl22-passage.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage-augmented > logs/log.dl22-passage-augmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage-augmented.d2q-t5 > logs/log.dl22-passage-augmented.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage-augmented > logs/log.dl22-passage-augmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage-augmented.d2q-t5 > logs/log.dl22-passage-augmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-noexp-0shot.cached > logs/log.dl22-passage.unicoil-noexp-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-0shot.cached > logs/log.dl22-passage.unicoil-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-ed.cached > logs/log.dl22-passage.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-ed.onnx > logs/log.dl22-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-sd.cached > logs/log.dl22-passage.splade-pp-sd.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-sd.onnx > logs/log.dl22-passage.splade-pp-sd.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.unicoil-noexp-0shot.cached > logs/log.dl22-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.unicoil-0shot.cached > logs/log.dl22-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.splade-pp-ed.cached > logs/log.dl22-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.splade-pp-ed.onnx > logs/log.dl22-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.splade-pp-sd.cached > logs/log.dl22-passage.splade-pp-sd.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-passage.splade-pp-sd.onnx > logs/log.dl22-passage.splade-pp-sd.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc > logs/log.dl22-doc.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc.d2q-t5 > logs/log.dl22-doc.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc > logs/log.dl22-doc.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc.d2q-t5 > logs/log.dl22-doc.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented > logs/log.dl22-doc-segmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented.d2q-t5 > logs/log.dl22-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc-segmented > logs/log.dl22-doc-segmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc-segmented.d2q-t5 > logs/log.dl22-doc-segmented.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # DL23 -python src/main/python/run_regression.py --search --regression dl23-passage > logs/log.dl23-passage.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.d2q-t5 > logs/log.dl23-passage.d2q-t5.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl23-passage-augmented > logs/log.dl23-passage-augmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage-augmented.d2q-t5 > logs/log.dl23-passage-augmented.d2q-t5.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-noexp-0shot.cached > logs/log.dl23-passage.unicoil-noexp-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-0shot.cached > logs/log.dl23-passage.unicoil-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-ed.cached > logs/log.dl23-passage.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-ed.onnx > logs/log.dl23-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-sd.cached > logs/log.dl23-passage.splade-pp-sd.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-sd.onnx > logs/log.dl23-passage.splade-pp-sd.onnx.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl23-doc > logs/log.dl23-doc.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc.d2q-t5 > logs/log.dl23-doc.d2q-t5.txt 2>&1 - -python src/main/python/run_regression.py --search --regression dl23-doc-segmented > logs/log.dl23-doc-segmented.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc-segmented.d2q-t5 > logs/log.dl23-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage > logs/log.dl23-passage.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.d2q-t5 > logs/log.dl23-passage.d2q-t5.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl23-passage-augmented > logs/log.dl23-passage-augmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage-augmented.d2q-t5 > logs/log.dl23-passage-augmented.d2q-t5.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl23-passage.unicoil-noexp-0shot.cached > logs/log.dl23-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.unicoil-0shot.cached > logs/log.dl23-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.splade-pp-ed.cached > logs/log.dl23-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.splade-pp-ed.onnx > logs/log.dl23-passage.splade-pp-ed.onnx.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.splade-pp-sd.cached > logs/log.dl23-passage.splade-pp-sd.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-passage.splade-pp-sd.onnx > logs/log.dl23-passage.splade-pp-sd.onnx.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl23-doc > logs/log.dl23-doc.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-doc.d2q-t5 > logs/log.dl23-doc.d2q-t5.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression dl23-doc-segmented > logs/log.dl23-doc-segmented.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-doc-segmented.d2q-t5 > logs/log.dl23-doc-segmented.d2q-t5.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 diff --git a/src/main/python/regressions-batch04.txt b/src/main/python/regressions-batch04.txt index 6e8f78c98e..921ff7858e 100644 --- a/src/main/python/regressions-batch04.txt +++ b/src/main/python/regressions-batch04.txt @@ -118,65 +118,65 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.flat.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat-int8.cached.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-trec-covid.splade-pp-ed.onnx > logs/log.beir-v1.0.0-trec-covid.splade-pp-ed.onnx.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-bioasq.splade-pp-ed.onnx > logs/log.beir-v1.0.0-bioasq.splade-pp-ed.onnx.txt 2>&1 @@ -328,95 +328,95 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-climate-fever.multifield > logs/log.beir-v1.0.0-climate-fever.multifield.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-scifact.multifield > logs/log.beir-v1.0.0-scifact.multifield.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-covid.splade-pp-ed.cached > logs/log.beir-v1.0.0-trec-covid.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-bioasq.splade-pp-ed.cached > logs/log.beir-v1.0.0-bioasq.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nfcorpus.splade-pp-ed.cached > logs/log.beir-v1.0.0-nfcorpus.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nq.splade-pp-ed.cached > logs/log.beir-v1.0.0-nq.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-hotpotqa.splade-pp-ed.cached > logs/log.beir-v1.0.0-hotpotqa.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fiqa.splade-pp-ed.cached > logs/log.beir-v1.0.0-fiqa.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-signal1m.splade-pp-ed.cached > logs/log.beir-v1.0.0-signal1m.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-news.splade-pp-ed.cached > logs/log.beir-v1.0.0-trec-news.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-robust04.splade-pp-ed.cached > logs/log.beir-v1.0.0-robust04.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-arguana.splade-pp-ed.cached > logs/log.beir-v1.0.0-arguana.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-webis-touche2020.splade-pp-ed.cached > logs/log.beir-v1.0.0-webis-touche2020.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-android.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-android.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-english.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-english.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gis.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-gis.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-physics.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-physics.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-stats.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-stats.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-tex.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-tex.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-unix.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-unix.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-quora.splade-pp-ed.cached > logs/log.beir-v1.0.0-quora.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-dbpedia-entity.splade-pp-ed.cached > logs/log.beir-v1.0.0-dbpedia-entity.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scidocs.splade-pp-ed.cached > logs/log.beir-v1.0.0-scidocs.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fever.splade-pp-ed.cached > logs/log.beir-v1.0.0-fever.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-climate-fever.splade-pp-ed.cached > logs/log.beir-v1.0.0-climate-fever.splade-pp-ed.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scifact.splade-pp-ed.cached > logs/log.beir-v1.0.0-scifact.splade-pp-ed.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.txt 2>&1 - -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-covid.splade-pp-ed.cached > logs/log.beir-v1.0.0-trec-covid.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-bioasq.splade-pp-ed.cached > logs/log.beir-v1.0.0-bioasq.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nfcorpus.splade-pp-ed.cached > logs/log.beir-v1.0.0-nfcorpus.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nq.splade-pp-ed.cached > logs/log.beir-v1.0.0-nq.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-hotpotqa.splade-pp-ed.cached > logs/log.beir-v1.0.0-hotpotqa.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fiqa.splade-pp-ed.cached > logs/log.beir-v1.0.0-fiqa.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-signal1m.splade-pp-ed.cached > logs/log.beir-v1.0.0-signal1m.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-news.splade-pp-ed.cached > logs/log.beir-v1.0.0-trec-news.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-robust04.splade-pp-ed.cached > logs/log.beir-v1.0.0-robust04.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-arguana.splade-pp-ed.cached > logs/log.beir-v1.0.0-arguana.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-webis-touche2020.splade-pp-ed.cached > logs/log.beir-v1.0.0-webis-touche2020.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-android.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-android.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-english.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-english.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gis.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-gis.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-physics.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-physics.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-stats.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-stats.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-tex.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-tex.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-unix.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-unix.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-quora.splade-pp-ed.cached > logs/log.beir-v1.0.0-quora.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-dbpedia-entity.splade-pp-ed.cached > logs/log.beir-v1.0.0-dbpedia-entity.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scidocs.splade-pp-ed.cached > logs/log.beir-v1.0.0-scidocs.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fever.splade-pp-ed.cached > logs/log.beir-v1.0.0-fever.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-climate-fever.splade-pp-ed.cached > logs/log.beir-v1.0.0-climate-fever.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scifact.splade-pp-ed.cached > logs/log.beir-v1.0.0-scifact.splade-pp-ed.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw.cached.txt 2>&1 + +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-nq.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-quora.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-fever.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --verify --search --regression beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached > logs/log.beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 # MIRACL python src/main/python/run_regression.py --index --verify --search --regression miracl-v1.0-ar > logs/log.miracl-v1.0-ar.txt 2>&1 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.cached.yaml index 8df98db6e2..ce003cc01a 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.yaml index 3823f7e936..94fb7650fd 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.cached.yaml index 70813ba369..68cd1dab14 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.cached.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.onnx.yaml index e532610788..305736ebdd 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index cff74a2891..af877e2c0b 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml index c78c86465e..14d4e1438e 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml index eb9fc27de3..4eeb1942e6 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml index 9b87263dbe..73796aa95a 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml index 88dfda2642..9a2586feae 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat.cached.yaml index f2a354af3c..0b19e93b80 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index 76a1e43360..aa91abdb7f 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-int8-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index 2c11af4800..99a9b9652c 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.cached.yaml index f1c03fba41..4587d48888 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.onnx.yaml index 01ddca2d06..27a496798d 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.cached.yaml index f3ac4c7e28..d8e5b7d4d8 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.onnx.yaml index 2bfbdcaef1..d73561e115 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml index 4ea10ff86e..bfb7bf2930 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw-cached + - name: cos-dpr-distil-hnsw-int8-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml index 535baa79be..a70061e7ae 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw-onnx + - name: cos-dpr-distil-hnsw-int8-onnx display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml index 91e5704ba4..73576b9798 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml index 400a13dd9a..8455574323 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.flat-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.flat-int8.cached.yaml index 33e81fbcf2..8b3ff54914 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.flat.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.flat.cached.yaml index 2603390cb2..ce3c65b49f 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.flat.cached.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml index b7bb1a9be5..a0b05f6cc5 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-int8-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml index 6b16e279aa..21ac8f5c9d 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.cached.yaml index 6aa3a2b2e2..b452906eba 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.yaml index 4dff82f950..cd8da49de3 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.cached.yaml index c054caa2b2..3eb2e446ba 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.cached.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.onnx.yaml index 81e45483dc..70a2c8a3c8 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index e7556640cb..e092a742bb 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml index 83a9c01b02..78a5495329 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml index a4f206ea3e..f08b12e116 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml index 2ee8d399a4..526e313788 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml index 8466ccedd7..d51573f3c9 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat.cached.yaml index 62fc68c67b..27a76a8b3e 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index d1e2122a67..5b3f8b4056 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-int8-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index 98f8a1b6f7..9fda129765 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.cached.yaml index dccadbd2f1..db9fe615bb 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.onnx.yaml index 4af77af70c..8e9b220e33 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.cached.yaml index 38cbe45064..db387e81b1 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.onnx.yaml index 3b30d0b1c8..e1e76d8fc8 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml index 3752d632c3..5dce72b873 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cos-dpr-distil-hnsw-cached + - name: cos-dpr-distil-hnsw-int8-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml index 90ea797f16..b20ad4affd 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cos-dpr-distil-hnsw-onnx + - name: cos-dpr-distil-hnsw-int8-onnx display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml index 93707fb267..ff5c51375f 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml index 4824ed1533..a7f57293c2 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.flat-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.flat-int8.cached.yaml index f40ce62786..df872c5a51 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.flat-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.flat.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.flat.cached.yaml index f29f0f3039..0192004e66 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.flat.cached.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml index 9525e0d0e5..787a2fb507 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-int8-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml index 8aa54378a9..13b1ac8a14 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.yaml index 580711e953..d0441af08f 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.yaml index 3fb36958bd..f1362b4fab 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.yaml index 17ad188c22..c7fa671bc9 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.yaml index 1d49a6b3e6..57b6ec2132 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.bge-base-en-v1.5 +corpus: msmarco-passage-bge-base-en-v1.5 corpus_path: collections/msmarco/msmarco-passage-bge-base-en-v1.5/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-bge-base-en-v1.5.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index 75b2c5af3f..0008962a82 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: bge-hnsw-cached + - name: bge-hnsw-int8-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml index fb3ee508dd..61a07240de 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: bge-hnsw-onnx + - name: bge-hnsw-int8-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml index 8f988721aa..61e0188f74 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml index b71f9a2e80..996c2daafb 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml index 6a3a6b8eb5..9044dbe815 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.yaml index 026df9cc38..21d708228c 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cohere-embed-english-v3.0 +corpus: msmarco-passage-cohere-embed-english-v3.0 corpus_path: collections/msmarco/msmarco-passage-cohere-embed-english-v3.0/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cohere-embed-english-v3.0.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index 6c7d9641e7..53fd269549 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: nDCG@10 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-int8-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index 80a5b04ca3..bd45211b5d 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: nDCG@10 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cohere-embed-english-v3.0-cached + - name: cohere-embed-english-v3.0-hnsw-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.yaml index 74a766cc75..5c25a84ae4 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.yaml index b7e008d106..d3d549f807 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat-int8.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.cached.yaml index 07f849a2b2..8ad1489535 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.onnx.yaml index 8a69ee88e8..7c49f7202c 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.flat.onnx.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.cos-dpr-distil +corpus: msmarco-passage-cos-dpr-distil corpus_path: collections/msmarco/msmarco-passage-cos-dpr-distil/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-cos-dpr-distil.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml index 8aae7ac52e..7b1dd87fb1 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cos-dpr-distil-hnsw-cached + - name: cos-dpr-distil-hnsw-int8-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml index 5fae532ada..b5347ae6e0 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cos-dpr-distil-hnsw-onnx + - name: cos-dpr-distil-hnsw-int8-onnx display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml index 0e11ea03f7..5fa7e7c702 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml index bc989aad88..0c79b27540 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat-int8.cached.yaml index 73ab591471..b0a7e46134 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat-int8.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat.cached.yaml index 7638595883..a8e556fee7 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.flat.cached.yaml @@ -1,5 +1,5 @@ --- -corpus: msmarco-passage.openai-ada2 +corpus: msmarco-passage-openai-ada2 corpus_path: collections/msmarco/msmarco-passage-openai-ada2/ download_url: https://rgw.cs.uwaterloo.ca/pyserini/data/msmarco-passage-openai-ada2.tar diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml index 8b4f084f1d..66b1bb8cde 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -quantize.int8 +index_options: -M 16 -efC 100 -quantize.int8 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-int8-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml index f3a193fd57..10cb1a48f3 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml @@ -10,7 +10,7 @@ index_type: hnsw collection_class: JsonDenseVectorCollection generator_class: DenseVectorDocumentGenerator index_threads: 16 -index_options: -M 16 -efC 100 -memoryBuffer 65536 -noMerge -maxThreadMemoryBeforeFlush 1945 +index_options: -M 16 -efC 100 metrics: - metric: AP@1000 @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: openai-ada2-cached + - name: openai-ada2-hnsw-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000