Skip to content

Commit

Permalink
Merge pull request #2145 from recommenders-team/miguel/mind
Browse files Browse the repository at this point in the history
Add new URL of MIND small and MIND large
  • Loading branch information
miguelgfierro committed Aug 22, 2024
2 parents 61568e6 + 608f684 commit fd5f861
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 21 deletions.
8 changes: 4 additions & 4 deletions recommenders/datasets/mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@


URL_MIND_LARGE_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip"
)
URL_MIND_LARGE_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip"
)
URL_MIND_SMALL_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip"
)
URL_MIND_SMALL_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip"
)
URL_MIND_DEMO_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip"
Expand Down
4 changes: 2 additions & 2 deletions recommenders/models/newsrec/newsrec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,15 @@ def get_mind_data_set(type):

if type == "large":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDlarge_train.zip",
"MINDlarge_dev.zip",
"MINDlarge_utils.zip",
)

elif type == "small":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDsmall_train.zip",
"MINDsmall_dev.zip",
"MINDsmall_utils.zip",
Expand Down
30 changes: 15 additions & 15 deletions tests/data_validation/recommenders/datasets/test_mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,34 @@
'"0x8D8B8AD5B126C3B"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
"52952752",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52953372",
"0x8D834F2EB31BDEC",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
"30945572",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30946172",
"0x8D834F2EBA8D865",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
"530196631",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"531361237",
"0x8D8244E90C15C07",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
"103456245",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103593383",
"0x8D8244E92005849",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
"0x8D8B8AD5B2ED4C9",
),
],
)
Expand All @@ -75,9 +75,9 @@ def test_download_mind_demo(tmp):
def test_download_mind_small(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
assert statinfo.st_size == 52952752
assert statinfo.st_size == 52953372
statinfo = os.stat(valid_path)
assert statinfo.st_size == 30945572
assert statinfo.st_size == 30946172


def test_extract_mind_demo(tmp):
Expand Down Expand Up @@ -127,9 +127,9 @@ def test_extract_mind_small(tmp):
def test_download_mind_large(tmp_path):
train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
statinfo = os.stat(train_path)
assert statinfo.st_size == 530196631
assert statinfo.st_size == 531361237
statinfo = os.stat(valid_path)
assert statinfo.st_size == 103456245
assert statinfo.st_size == 103593383


def test_extract_mind_large(tmp):
Expand Down

0 comments on commit fd5f861

Please sign in to comment.