TIRA Evaluator: Extract top-docs for DiffIR and render them by default.

#5
tira-io · Jul 19, 2023 · a997cc2 · a997cc2
1 parent 99d0426
commit a997cc2
Show file tree

Hide file tree

Showing 9 changed files with 1,641 additions and 2 deletions.
diff --git a/ir-measures/Dockerfile.dev b/ir-measures/Dockerfile.dev
@@ -1,4 +1,4 @@
-FROM webis/tira-ir-datasets-starter:0.0.54
+FROM webis/tira-ir-datasets-starter:0.0.55
 
 RUN apk add jq libffi-dev && pip3 install ir-datasets ir-measures approvaltests exceptiongroup pytest jupyter
 
diff --git a/ir-measures/ir_measures_evaluator.py b/ir-measures/ir_measures_evaluator.py
@@ -9,6 +9,8 @@
 
 import ir_measures
 from ir_measures import Qrel, ScoredDoc, Measure, Metric
+import sys
+import pandas as pd
 
 
 def add_error(
@@ -803,6 +805,96 @@ def main():
 
     write_prototext(aggregated, per_query, output_path)
 
+    irds_id = irds_id_from_metadata_or_none(args.qrels)
+    if irds_id:
+        try:
+            render_results(args.run, irds_id, output_path)
+        except Exception as e:
+            pass
+
+
+def normalized_run(run, depth=1000):
+    run = pd.read_csv(run, sep="\s+", names=["qid", "q0", "docno", "rank", "score", "system"])
+
+    try:
+        run['qid'] = run['qid'].astype(int)
+    except:
+        pass
+
+    run = run.copy().sort_values(["qid", "score", "docno"], ascending=[True, False, False]).reset_index()
+
+    if 'Q0' not in run.columns:
+        run['Q0'] = 0
+
+    run = run.groupby("qid")[["qid", "Q0", "docno", "score", "system"]].head(depth)
+
+    # Make sure that rank position starts by 1
+    run["rank"] = 1
+    run["rank"] = run.groupby("qid")["rank"].cumsum()
+
+    return run[['qid', 'Q0', 'docno', 'rank', 'score', 'system']]
+
+def queries_dict(irds_dataset):
+    return {str(i.query_id): i for i in irds_dataset.queries_iter()}
+
+def qrels_dict(irds_dataset):
+    ret = {}
+
+    for qrel in irds_dataset.qrels_iter():
+        qid = str(qrel.query_id)
+        if qid not in ret:
+            ret[qid] = {}
+        ret[qid][str(qrel.doc_id)] = qrel.relevance
+
+    return ret
+
+def render_results(run_file, irds_id, output_path, top_k=10):
+    sys.path.append('/tira/application/src/tira/')
+    import ir_datasets
+    from ir_datasets_loader import IrDatasetsLoader
+    irds_loader = IrDatasetsLoader()
+    dataset = ir_datasets.load(irds_id)
+    all_queries = queries_dict(dataset)
+    all_qrels = qrels_dict(dataset)
+
+    docs_store = dataset.docs_store()
+    excerpt_for_rendering = {'queries': {}, 'documents': {}, 'qrels': {}}
+
+    run = normalized_run(run_file, top_k)
+
+    for _, i in run.iterrows():
+        qid = str(i.qid)
+        docno = str(i.docno)
+        excerpt_for_rendering['queries'][qid] = all_queries[qid]
+        excerpt_for_rendering['documents'][docno] = docs_store.get(docno)
+
+        if qid in all_qrels and docno in all_qrels[qid]:
+            if qid not in excerpt_for_rendering['qrels']:
+                excerpt_for_rendering['qrels'][qid] = {}
+            excerpt_for_rendering['qrels'][qid][docno] = all_qrels[qid][docno]
+
+    excerpt_for_rendering['queries'] = {k: json.loads(irds_loader.map_query_as_jsonl(v)) for k,v in excerpt_for_rendering['queries'].items()}
+    excerpt_for_rendering['documents'] = {k: json.loads(irds_loader.map_doc(v)) for k,v in excerpt_for_rendering['documents'].items()}
+
+    with open(output_path / '.data-top-10-for-rendering.jsonl', 'w') as output_file:
+        output_file.write(json.dumps(excerpt_for_rendering))
+
+    from diffir.run import diff_from_local_data
+    _, rendered_serp = diff_from_local_data([str(run_file.resolve())], [str((output_path / '.data-top-10-for-rendering.jsonl').resolve())], cli=False, web=True, print_html=False)
+
+    with open(output_path / 'serp.html', 'w') as output_file:
+        output_file.write(rendered_serp)
+
+
+def irds_id_from_metadata_or_none(f):
+    try:
+        f = Path(f)
+        for p in [f / 'metadata.json', f.parent / 'metadata.json']:
+            if (p).is_file():
+                return json.load(open(p))['ir_datasets_id']
+    except:
+        pass
+
 
 if __name__ == '__main__':
     main()
diff --git a/...sures/tests/approved_files/test_with_approvals.test_all_valid_with_rendering.approved.txt b/...sures/tests/approved_files/test_with_approvals.test_all_valid_with_rendering.approved.txt
diff --git a/...es/test_with_approvals.test_all_valid_with_rendering_wrong_qrels_and_queries.approved.txt b/...es/test_with_approvals.test_all_valid_with_rendering_wrong_qrels_and_queries.approved.txt
diff --git a/ir-measures/tests/test-io/test-input/test-input-cranfield/metadata.json b/ir-measures/tests/test-io/test-input/test-input-cranfield/metadata.json
@@ -0,0 +1 @@
+{"ir_datasets_id": "cranfield"}
diff --git a/ir-measures/tests/test-io/test-input/test-input-cranfield/qrels.txt b/ir-measures/tests/test-io/test-input/test-input-cranfield/qrels.txt
@@ -0,0 +1,10 @@
+1 0 184 2
+1 0 29 2
+1 0 31 2
+1 0 12 3
+1 0 51 3
+1 0 102 3
+1 0 13 4
+1 0 14 4
+1 0 15 4
+1 0 57 2
diff --git a/ir-measures/tests/test-io/test-input/test-input-cranfield/queries.jsonl b/ir-measures/tests/test-io/test-input/test-input-cranfield/queries.jsonl
@@ -0,0 +1 @@
+{"qid": "1", "query": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft .", "original_query": {"query_id": "1", "text": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft ."}}
diff --git a/ir-measures/tests/test-io/test-input/test-input-cranfield/run.txt b/ir-measures/tests/test-io/test-input/test-input-cranfield/run.txt
@@ -0,0 +1,12 @@
+1 Q0 486 2 62.79701451212168 test
+1 Q0 13 3 56.803264456568286 test
+1 Q0 184 4 56.51739566097967 test
+1 Q0 663 5 55.39478197135031 test
+1 Q0 12 6 43.839787644246826 test
+1 Q0 746 7 37.43292477354407 test
+1 Q0 876 8 32.51120171695948 test
+1 Q0 359 9 26.834240611060522 test
+1 Q0 573 10 25.4155055815354 test
+1 Q0 102 11 24.4155055815354 test
+1 Q0 57 12 23.4155055815354 test
+1 Q0 51 1 75.0153524139896 test
diff --git a/ir-measures/tests/test_with_approvals.py b/ir-measures/tests/test_with_approvals.py
@@ -58,7 +58,8 @@ def run_capture_stdout_files(
 
     return buffer.getvalue()\
         .replace(str(_TEST_IO_DIR) + '/', '')\
-        .replace(str(tmp_path/output_dir), output_dir) + captured_files
+        .replace(str(tmp_path/output_dir), output_dir)\
+        .replace(str(tmp_path), '<output-dir>/') + '\n'.join([i for i in captured_files.split('\n') if not i.strip().startswith('var data =')])
 
 
 def _run_capture_stdout_files_fail(
@@ -492,3 +493,29 @@ def test_all_valid():
     ])
     shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
     verify(actual)
+
+def test_all_valid_with_rendering():
+    shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
+    (_TEST_IO_DIR / 'test-output').mkdir(exist_ok=True, parents=True)
+    actual = _run_capture_stdout_files_pass([
+        '--run', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/run.txt',
+        '--qrels', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/qrels.txt',
+        '--topics', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/queries.jsonl',
+        '--measures', 'P@2', 'nDCG@2',
+        '--output', f'{_TEST_OUTPUT_DIR}/test-output',
+    ])
+    shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
+    verify(actual)
+
+def test_all_valid_with_rendering_wrong_qrels_and_queries():
+    shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
+    (_TEST_IO_DIR / 'test-output').mkdir(exist_ok=True, parents=True)
+    actual = _run_capture_stdout_files_pass([
+        '--run', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/run.txt',
+        '--qrels', f'{_TEST_IO_DIR}/test-input/test-input-cranfield/qrels.txt',
+        '--topics', f'{_TEST_IO_DIR}/test-input/topics_sample_valid.jsonl',
+        '--measures', 'P@2', 'nDCG@2',
+        '--output', f'{_TEST_OUTPUT_DIR}/test-output',
+    ])
+    shutil.rmtree(_TEST_IO_DIR / 'test-output', ignore_errors=True)
+    verify(actual)
-Original file line number
+Diff line change
@@ -0,0 +1,10 @@
+0 184 2
+0 29 2
+0 31 2
+0 12 3
+0 51 3
+0 102 3
+0 13 4
+0 14 4
+0 15 4
+0 57 2
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"qid": "1", "query": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft .", "original_query": {"query_id": "1", "text": "what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft ."}}