From 888682d65ebc1e39d78c8f5a11da3a96bbd1d59c Mon Sep 17 00:00:00 2001 From: jx3yang <65411020+jx3yang@users.noreply.github.com> Date: Mon, 25 Apr 2022 14:38:43 -0400 Subject: [PATCH] Reproduce MS MARCO results and fix unit tests failure (#1126) --- docs/experiments-msmarco-doc.md | 3 ++- docs/experiments-msmarco-passage.md | 3 ++- pyserini/encode/_base.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/experiments-msmarco-doc.md b/docs/experiments-msmarco-doc.md index aa5d8b773..7f821133d 100644 --- a/docs/experiments-msmarco-doc.md +++ b/docs/experiments-msmarco-doc.md @@ -169,4 +169,5 @@ We can see that Anserini's (tuned) BM25 baseline is already much better than the + Results reproduced by [@AceZhan](https://github.com/AceZhan) on 2022-01-14 (commit [`68be809`](https://github.com/castorini/pyserini/commit/68be8090b8553fc6eaf352ac690a6de9d3dc82dd)) + Results reproduced by [@jh8liang](https://github.com/jh8liang) on 2022-02-06 (commit [`e03e068`](https://github.com/castorini/pyserini/commit/e03e06880ad4f6d67a1666c1dd45ce4250adc95d)) + Results reproduced by [@HAKSOAT](https://github.com/HAKSOAT) on 2022-03-11 (commit [`7796685`](https://github.com/castorini/pyserini/commit/77966851755163e36489544fb08f73171e98103f)) -+ Results reproduced by [@jasper-xian](https://github.com/jasper-xian) on 2022-03-27 (commit [`5668edd`](https://github.com/castorini/pyserini/commit/5668edd6f1e61e9c57d600d41d3d1f58b775d371)) \ No newline at end of file ++ Results reproduced by [@jasper-xian](https://github.com/jasper-xian) on 2022-03-27 (commit [`5668edd`](https://github.com/castorini/pyserini/commit/5668edd6f1e61e9c57d600d41d3d1f58b775d371)) ++ Results reproduced by [@jx3yang](https://github.com/jx3yang) on 2022-04-25 (commit [`53333e0`](https://github.com/castorini/pyserini/commit/53333e0fb77371e049e24b10da3a20646c7b5af7)) diff --git a/docs/experiments-msmarco-passage.md b/docs/experiments-msmarco-passage.md index 75db36dae..a83038cca 100644 --- a/docs/experiments-msmarco-passage.md +++ b/docs/experiments-msmarco-passage.md @@ -169,4 +169,5 @@ On the other hand, recall@1000 provides the upper bound effectiveness of downstr + Results reproduced by [@AceZhan](https://github.com/AceZhan) on 2022-01-14 (commit [`68be809`](https://github.com/castorini/pyserini/commit/68be8090b8553fc6eaf352ac690a6de9d3dc82dd)) + Results reproduced by [@jh8liang](https://github.com/jh8liang) on 2022-02-06 (commit [`e03e068`](https://github.com/castorini/pyserini/commit/e03e06880ad4f6d67a1666c1dd45ce4250adc95d)) + Results reproduced by [@HAKSOAT](https://github.com/HAKSOAT) on 2022-03-10 (commit [`7796685`](https://github.com/castorini/pyserini/commit/77966851755163e36489544fb08f73171e98103f)) -+ Results reproduced by [@jasper-xian](https://github.com/jasper-xian) on 2022-03-27 (commit [`5668edd`](https://github.com/castorini/pyserini/commit/5668edd6f1e61e9c57d600d41d3d1f58b775d371)) \ No newline at end of file ++ Results reproduced by [@jasper-xian](https://github.com/jasper-xian) on 2022-03-27 (commit [`5668edd`](https://github.com/castorini/pyserini/commit/5668edd6f1e61e9c57d600d41d3d1f58b775d371)) ++ Results reproduced by [@jx3yang](https://github.com/jx3yang) on 2022-04-25 (commit [`53333e0`](https://github.com/castorini/pyserini/commit/53333e0fb77371e049e24b10da3a20646c7b5af7)) diff --git a/pyserini/encode/_base.py b/pyserini/encode/_base.py index f57fdb259..4723b4da8 100644 --- a/pyserini/encode/_base.py +++ b/pyserini/encode/_base.py @@ -127,7 +127,7 @@ def __init__(self, dir_path): def __enter__(self): if not os.path.exists(self.dir_path): - os.mkdir(self.dir_path) + os.makedirs(self.dir_path) self.file = open(os.path.join(self.dir_path, self.filename), 'w') def __exit__(self, exc_type, exc_val, exc_tb): @@ -154,7 +154,7 @@ def __init__(self, dir_path, dimension=768): def __enter__(self): if not os.path.exists(self.dir_path): - os.mkdir(self.dir_path) + os.makedirs(self.dir_path) self.id_file = open(os.path.join(self.dir_path, self.id_file_name), 'w') def __exit__(self, exc_type, exc_val, exc_tb):