dataset repository for preprocessed ComMU dataset
This dataset was generated by preprocess.py
in POZAlabs/ComMU-Code Repository.
Just use Git API!
$ git clone https://github.com/YAIxPOZAlabs/ComMU-processed.git
... or you can make your custom downloader with python.
#!/usr/bin/env python3
dataset_info = {
'target_train.npy': (
'https://github.com/YAIxPOZAlabs/ComMU-processed/blob/master/target_train.npy?raw=true',
'3721233979443a35b66de1f55baf0c89'),
'target_val.npy': (
'https://github.com/YAIxPOZAlabs/ComMU-processed/blob/master/target_val.npy?raw=true',
'cc1c15d9198bf7ba3316bf3676b4a997'),
'input_val.npy': (
'https://github.com/YAIxPOZAlabs/ComMU-processed/blob/master/input_val.npy?raw=true',
'dd3b8d6f9aba6ffc40fd04c1195ea899'),
'input_train.npy': (
'https://github.com/YAIxPOZAlabs/ComMU-processed/blob/master/input_train.npy?raw=true',
'bf4e8f53c5bd0b5b45fa430db95d8b54')
}
def download(download_path, mode=0o755):
import pathlib
import hashlib
from urllib.request import urlretrieve
download_path = pathlib.Path(download_path).absolute()
download_path.parent.mkdir(mode=mode, exist_ok=True)
download_path.mkdir(mode=mode, exist_ok=True)
for idx, (filename, (url, checksum)) in enumerate(dataset_info.items(), start=1):
filepath = download_path.joinpath(filename)
print(filename, "({idx}/{total})".format(idx=idx, total=len(dataset_info)))
urlretrieve(url, str(filepath))
filepath.chmod(mode=mode)
with open(filepath, 'rb') as f:
assert checksum == hashlib.md5(f.read()).hexdigest()
if __name__ == '__main__':
download(".")