Skip to content

Commit

Permalink
TIRA Evaluator: Extract top-docs for DiffIR and render them by default.
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Jul 19, 2023
1 parent 99d0426 commit a997cc2
Show file tree
Hide file tree
Showing 9 changed files with 1,641 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ir-measures/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM webis/tira-ir-datasets-starter:0.0.54
FROM webis/tira-ir-datasets-starter:0.0.55

RUN apk add jq libffi-dev && pip3 install ir-datasets ir-measures approvaltests exceptiongroup pytest jupyter

92 changes: 92 additions & 0 deletions ir-measures/ir_measures_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import ir_measures
from ir_measures import Qrel, ScoredDoc, Measure, Metric
import sys
import pandas as pd


def add_error(
Expand Down Expand Up @@ -803,6 +805,96 @@ def main():

write_prototext(aggregated, per_query, output_path)

irds_id = irds_id_from_metadata_or_none(args.qrels)
if irds_id:
try:
render_results(args.run, irds_id, output_path)
except Exception as e:
pass


def normalized_run(run, depth=1000):
run = pd.read_csv(run, sep="\s+", names=["qid", "q0", "docno", "rank", "score", "system"])

try:
run['qid'] = run['qid'].astype(int)
except:
pass

run = run.copy().sort_values(["qid", "score", "docno"], ascending=[True, False, False]).reset_index()

if 'Q0' not in run.columns:
run['Q0'] = 0

run = run.groupby("qid")[["qid", "Q0", "docno", "score", "system"]].head(depth)

# Make sure that rank position starts by 1
run["rank"] = 1
run["rank"] = run.groupby("qid")["rank"].cumsum()

return run[['qid', 'Q0', 'docno', 'rank', 'score', 'system']]

def queries_dict(irds_dataset):
return {str(i.query_id): i for i in irds_dataset.queries_iter()}

def qrels_dict(irds_dataset):
ret = {}

for qrel in irds_dataset.qrels_iter():
qid = str(qrel.query_id)
if qid not in ret:
ret[qid] = {}
ret[qid][str(qrel.doc_id)] = qrel.relevance

return ret

def render_results(run_file, irds_id, output_path, top_k=10):
sys.path.append('/tira/application/src/tira/')
import ir_datasets
from ir_datasets_loader import IrDatasetsLoader
irds_loader = IrDatasetsLoader()
dataset = ir_datasets.load(irds_id)
all_queries = queries_dict(dataset)
all_qrels = qrels_dict(dataset)

docs_store = dataset.docs_store()
excerpt_for_rendering = {'queries': {}, 'documents': {}, 'qrels': {}}

run = normalized_run(run_file, top_k)

for _, i in run.iterrows():
qid = str(i.qid)
docno = str(i.docno)
excerpt_for_rendering['queries'][qid] = all_queries[qid]
excerpt_for_rendering['documents'][docno] = docs_store.get(docno)

if qid in all_qrels and docno in all_qrels[qid]:
if qid not in excerpt_for_rendering['qrels']:
excerpt_for_rendering['qrels'][qid] = {}
excerpt_for_rendering['qrels'][qid][docno] = all_qrels[qid][docno]

excerpt_for_rendering['queries'] = {k: json.loads(irds_loader.map_query_as_jsonl(v)) for k,v in excerpt_for_rendering['queries'].items()}
excerpt_for_rendering['documents'] = {k: json.loads(irds_loader.map_doc(v)) for k,v in excerpt_for_rendering['documents'].items()}

with open(output_path / '.data-top-10-for-rendering.jsonl', 'w') as output_file:
output_file.write(json.dumps(excerpt_for_rendering))

from diffir.run import diff_from_local_data
_, rendered_serp = diff_from_local_data([str(run_file.resolve())], [str((output_path / '.data-top-10-for-rendering.jsonl').resolve())], cli=False, web=True, print_html=False)

with open(output_path / 'serp.html', 'w') as output_file:
output_file.write(rendered_serp)


def irds_id_from_metadata_or_none(f):
try:
f = Path(f)
for p in [f / 'metadata.json', f.parent / 'metadata.json']:
if (p).is_file():
return json.load(open(p))['ir_datasets_id']
except:
pass


if __name__ == '__main__':
main()

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"ir_datasets_id": "cranfield"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1 0 184 2
1 0 29 2
1 0 31 2
1 0 12 3
1 0 51 3
1 0 102 3
1 0 13 4
1 0 14 4
1 0 15 4
1 0 57 2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"qid": "1", "query": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft .", "original_query": {"query_id": "1", "text": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft ."}}
12 changes: 12 additions & 0 deletions ir-measures/tests/test-io/test-input/test-input-cranfield/run.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
1 Q0 486 2 62.79701451212168 test
1 Q0 13 3 56.803264456568286 test
1 Q0 184 4 56.51739566097967 test
1 Q0 663 5 55.39478197135031 test
1 Q0 12 6 43.839787644246826 test
1 Q0 746 7 37.43292477354407 test
1 Q0 876 8 32.51120171695948 test
1 Q0 359 9 26.834240611060522 test
1 Q0 573 10 25.4155055815354 test
1 Q0 102 11 24.4155055815354 test
1 Q0 57 12 23.4155055815354 test
1 Q0 51 1 75.0153524139896 test
29 changes: 28 additions & 1 deletion ir-measures/tests/test_with_approvals.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def run_capture_stdout_files(

return buffer.getvalue()\
.replace(str(_TEST_IO_DIR) + '/', '')\
.replace(str(tmp_path/output_dir), output_dir) + captured_files
.replace(str(tmp_path/output_dir), output_dir)\
.replace(str(tmp_path), '<output-dir>/') + '\n'.join([i for i in captured_files.split('\n') if not i.strip().startswith('var data =')])


def _run_capture_stdout_files_fail(
Expand Down Expand Up @@ -492,3 +493,29 @@ def test_all_valid():
])
shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
verify(actual)

def test_all_valid_with_rendering():
shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
(_TEST_IO_DIR / 'test-output').mkdir(exist_ok=True, parents=True)
actual = _run_capture_stdout_files_pass([
'--run', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/run.txt',
'--qrels', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/qrels.txt',
'--topics', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/queries.jsonl',
'--measures', 'P@2', 'nDCG@2',
'--output', f'{_TEST_OUTPUT_DIR}/test-output',
])
shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
verify(actual)

def test_all_valid_with_rendering_wrong_qrels_and_queries():
shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
(_TEST_IO_DIR / 'test-output').mkdir(exist_ok=True, parents=True)
actual = _run_capture_stdout_files_pass([
'--run', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/run.txt',
'--qrels', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/qrels.txt',
'--topics', f'{_TEST_IO_DIR}/test-input/topics_sample_valid.jsonl',
'--measures', 'P@2', 'nDCG@2',
'--output', f'{_TEST_OUTPUT_DIR}/test-output',
])
shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
verify(actual)

0 comments on commit a997cc2

Please sign in to comment.