Skip to content

Commit

Permalink
Add initial implementation of repro matrix for MS MARCO v1 passage (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Apr 27, 2022
1 parent 888682d commit 244828f
Show file tree
Hide file tree
Showing 2 changed files with 335 additions and 0 deletions.
201 changes: 201 additions & 0 deletions pyserini/resources/msmarco-v1-passage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
conditions:
- name: bm25
display: BM25 (k1=0.82, b=0.68)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-slim --output _R_ --bm25
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1875
R@1K: 0.8573
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.2903
nDCG@10: 0.4973
R@1K: 0.7450
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.2876
nDCG@10: 0.4876
R@1K: 0.8031
- name: bm25-rm3
display: BM25+RM3 (k1=0.82, b=0.68)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-full --output _R_ --bm25 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1668
R@1K: 0.8687
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3377
nDCG@10: 0.5231
R@1K: 0.7792
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3056
nDCG@10: 0.4808
R@1K: 0.8286
- name: bm25-default
display: BM25 (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-slim --output _R_ --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1840
R@1K: 0.8526
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3013
nDCG@10: 0.5058
R@1K: 0.7501
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.2856
nDCG@10: 0.4796
R@1K: 0.7863
- name: bm25-rm3-default
display: BM25+RM3 (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-full --output _R_ --bm25 --k1 0.9 --b 0.4 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.1564
R@1K: 0.8606
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.3390
nDCG@10: 0.5180
R@1K: 0.7998
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.3019
nDCG@10: 0.4821
R@1K: 0.8217
- name: bm25-d2q-t5
display: BM25 w/ doc2query-T5 (k1=2.18, b=0.86)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-d2q-t5 --output _R_ --bm25
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2816
R@1K: 0.9506
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4046
nDCG@10: 0.6336
R@1K: 0.8134
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4171
nDCG@10: 0.6265
R@1K: 0.8393
- name: bm25-d2q-t5-default
display: BM25 w/ doc2query-T5 (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --topics _T_ --index msmarco-v1-passage-d2q-t5 --output _R_ --bm25 --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.2723
R@1K: 0.9470
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4034
nDCG@10: 0.6417
R@1K: 0.8310
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4074
nDCG@10: 0.6187
R@1K: 0.8452
# TODO: We can't do the RM3 variant of bm25-d2q-t5 because we don't have a pre-built index with docvectors stored.
- name: unicoil-otf
display: uniCOIL (w/ doc2query-T5) - otf
command: python -m pyserini.search.lucene --index msmarco-v1-passage-unicoil --topics _T_ --encoder castorini/unicoil-msmarco-passage --output _R_ --batch 36 --threads 12 --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3509
R@1K: 0.9581
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4617
nDCG@10: 0.7027
R@1K: 0.8291
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4429
nDCG@10: 0.6745
R@1K: 0.8433
- name: unicoil
display: uniCOIL (w/ doc2query-T5) - pre-encoded
command: python -m pyserini.search.lucene --index msmarco-v1-passage-unicoil --topics _T_ --output _R_ --batch 36 --threads 12 --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset-unicoil
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3516
R@1K: 0.9582
- topic_key: dl19-passage-unicoil
eval_key: dl19-passage
scores:
- MAP: 0.4612
nDCG@10: 0.7024
R@1K: 0.8292
- topic_key: dl20-unicoil
eval_key: dl20-passage
scores:
- MAP: 0.4430
nDCG@10: 0.6745
R@1K: 0.8430
- name: tct_colbert-v2-hnp-otf
display: TCT_ColBERT-V2-HN+ - otf
command: python -m pyserini.search.faiss --index msmarco-passage-tct_colbert-v2-hnp-bf --topics _T_ --encoder castorini/tct_colbert-v2-hnp-msmarco --output _R_ --batch-size 36 --threads 12
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3584
R@1K: 0.9695
- topic_key: dl19-passage
eval_key: dl19-passage
scores:
- MAP: 0.4469
nDCG@10: 0.7204
R@1K: 0.8261
- topic_key: dl20
eval_key: dl20-passage
scores:
- MAP: 0.4754
nDCG@10: 0.6882
R@1K: 0.8429
- name: tct_colbert-v2-hnp
display: TCT_ColBERT-V2-HN+ - pre-encoded
command: python -m pyserini.search.faiss --index msmarco-passage-tct_colbert-v2-hnp-bf --topics _T_ --encoded-queries tct_colbert-v2-hnp-msmarco-passage-dev-subset --output _R_ --batch-size 36 --threads 12
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
scores:
- MRR@10: 0.3584
R@1K: 0.9695
# TODO: We don't have DL19 an DL20 queries pre-encoded
134 changes: 134 additions & 0 deletions scripts/repro_matrix/run_all_msmarco_v1_passage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import math
import os
import subprocess
import yaml
from collections import defaultdict


collection = 'msmarco-v1-passage'

fail_str = '\033[91m[FAIL]\033[0m'
ok_str = '[OK] '

trec_eval_metric_definitions = {
'msmarco-passage-dev-subset': {
'MRR@10': '-c -M 10 -m recip_rank',
'R@1K': '-c -m recall.1000'
},
'dl19-passage': {
'MAP': '-c -l 2 -m map',
'nDCG@10': '-c -m ndcg_cut.10',
'R@1K': '-c -l 2 -m recall.1000'
},
'dl20-passage': {
'MAP': '-c -l 2 -m map',
'nDCG@10': '-c -m ndcg_cut.10',
'R@1K': '-c -l 2 -m recall.1000'
}
}

table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
table_keys = {}


def run_command(cmd):
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
stdout = stdout.decode('utf-8')
stderr = stderr.decode('utf-8')

return stdout, stderr


def run_eval_and_return_metric(metric, eval_key, runfile):
eval_cmd = f'python -m pyserini.eval.trec_eval {trec_eval_metric_definitions[eval_key][metric]} {eval_key} runs/{runfile}'
eval_stdout, eval_stderr = run_command(eval_cmd)

# TODO: This is very brittle... fix me later.
return eval_stdout.split('\n')[-3].split('\t')[2]


def find_table_topic_set_key(topic_key):
# E.g., we want to map variants like 'dl19-passage-unicoil' and 'dl19-passage' both into 'dl19'
key = ''
if topic_key.startswith('dl19'):
key = 'dl19'
elif topic_key.startswith('dl20'):
key = 'dl20'
elif topic_key.startswith('msmarco'):
key = 'msmarco'

return key


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate regression matrix for MS MARCO V1 passage corpus.')
parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.')
args = parser.parse_args()

with open('pyserini/resources/msmarco-v1-passage.yaml') as f:
yaml_data = yaml.safe_load(f)
for condition in yaml_data['conditions']:
name = condition['name']
display = condition['display']
cmd_template = condition['command']

print(f'# Running condition "{name}": {display}\n')
for topic_set in condition['topics']:
topic_key = topic_set['topic_key']
eval_key = topic_set['eval_key']

print(f' - topic_key: {topic_key}')

runfile = f'run.{collection}.{topic_key}.{name}.txt'
cmd = cmd_template.replace('_R_', f'runs/{runfile}').replace('_T_', topic_key)

if not os.path.exists(f'runs/{runfile}'):
print(f' Running: {cmd}')
os.system(cmd)

print('')
for expected in topic_set['scores']:
for metric in expected:
table_keys[name] = display
if not args.skip_eval:
score = float(run_eval_and_return_metric(metric, eval_key, runfile))
result = ok_str if math.isclose(score, float(expected[metric])) else fail_str + f' expected {expected[metric]:.4f}'
print(f' {metric:7}: {score:.4f} {result}')
table[name][find_table_topic_set_key(topic_key)][metric] = score
else:
table[name][find_table_topic_set_key(topic_key)][metric] = expected[metric]

print('')

print(' ' * 49 + 'TREC 2019' + ' ' * 16 + 'TREC 2020' + ' ' * 12 + 'MS MARCO dev')
print(' ' * 45 + 'MAP nDCG@10 R@1K MAP nDCG@10 R@1K MRR@10 R@1K')
print(' ' * 42 + '-' * 22 + ' ' + '-' * 22 + ' ' + '-' * 14)
for name in ['bm25', 'bm25-rm3', 'bm25-d2q-t5', '',
'bm25-default', 'bm25-rm3-default', 'bm25-d2q-t5-default', '',
'unicoil', 'unicoil-otf', '',
'tct_colbert-v2-hnp', 'tct_colbert-v2-hnp-otf']:
if not name:
print('')
continue
print(f'{table_keys[name]:40}' +
f'{table[name]["dl19"]["MAP"]:8.4f}{table[name]["dl19"]["nDCG@10"]:8.4f}{table[name]["dl19"]["R@1K"]:8.4f} ' +
f'{table[name]["dl20"]["MAP"]:8.4f}{table[name]["dl20"]["nDCG@10"]:8.4f}{table[name]["dl20"]["R@1K"]:8.4f} ' +
f'{table[name]["msmarco"]["MRR@10"]:8.4f}{table[name]["msmarco"]["R@1K"]:8.4f}')

0 comments on commit 244828f

Please sign in to comment.