zlin7
diff --git a/‎.gitignore
+131 b/‎.gitignore
+131
diff --git a/‎README.md
+14 b/‎README.md
+14
diff --git a/‎Temp/data/Load/L1-train.csv
+85,441 b/‎Temp/data/Load/L1-train.csv
+85,441
diff --git a/‎_settings.py
+28 b/‎_settings.py
+28
diff --git a/‎data/__init__.py b/‎data/__init__.py
diff --git a/‎data/online_dataset.py
+110 b/‎data/online_dataset.py
+110
diff --git a/‎data/preprocessing/__init__.py b/‎data/preprocessing/__init__.py
diff --git a/‎data/preprocessing/covid.py
+33 b/‎data/preprocessing/covid.py
+33
@@ -0,0 +1,131 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+.idea/
@@ -0,0 +1,14 @@
+# Conformal Prediction Intervals with Temporal Dependence
+
+This is the code associated with "Conformal Prediction Intervals with Temporal Dependence".
+
+To replicate the (`Load`) experiments:
+1. Train the base models: `python -m utils.main_experiments`
+2. run `main.ipynb` for results
+
+For other datasets, please download the corresponding data and change the `__main__` section.
+
+## Requirements
+`numpy`, `torch`, `pandas`, `scipy`, `matplotlib`,  and `tqdm` 
+(`jupyter` and `notebook` if you want to use the notebook) 
+`env.yml` contains the full environment.
@@ -0,0 +1,28 @@
+import os
+import getpass
+import sys
+
+
+_ON_SERVER = True
+
+__CUR_FILE_PATH = os.path.dirname(os.path.abspath(__file__))
+
+#==============================Data Related
+COVID_NAME = 'COVID'
+MIMIC_NAME = 'MIMIC'
+EEG_NAME = 'EEG'
+GEFCom_NAME = 'Load'
+
+
+WORKSPACE = os.path.join(__CUR_FILE_PATH, "Temp")
+DATA_PATH = os.path.join(WORKSPACE, 'data')
+_PERSIST_PATH = os.path.join(WORKSPACE, 'cache')
+LOG_OUTPUT_DIR = os.path.join(WORKSPACE, 'logs')
+BASE_MODEL_PATH = os.path.join(WORKSPACE, 'base_models')
+if not os.path.isdir(BASE_MODEL_PATH): os.makedirs(BASE_MODEL_PATH)
+RANDOM_SEED = 7
+
+NCOLS = 80
+
+
+METHOD_PLACEHOLDER = 'CPTD'
@@ -0,0 +1,110 @@
+import torch
+from torch.utils.data import Dataset
+import numpy as np
+
+from _settings import COVID_NAME, MIMIC_NAME, GEFCom_NAME, EEG_NAME
+
+HORIZON_LENGTHS = {MIMIC_NAME: 30, GEFCom_NAME: 24, COVID_NAME: 30, EEG_NAME: 63}
+DEFAULT_NTESTS = {MIMIC_NAME: 100, COVID_NAME: 80, GEFCom_NAME: 700, EEG_NAME:200}
+DEFAULT_NVALIDS  = {MIMIC_NAME: 100, COVID_NAME: 100, GEFCom_NAME: 200, EEG_NAME:100}
+
+def pre_pad(x, max_length):
+    lx = len(x)
+    return torch.cat([torch.zeros([max_length - lx, x.shape[1]], dtype=torch.float), x],0)
+
+def _to_device(data_or_model, device):
+    if isinstance(device, tuple) or isinstance(device, list):
+        device = device[0]
+    def _to_device(d):
+        try:
+            return d.to(device)
+        except: #if device is a list/tuple, we don't do anything as this should be dataparalle. (hacky, I know)
+            return d
+    if isinstance(data_or_model, tuple) or isinstance(data_or_model, list):
+        return tuple([_to_device(x) for x in data_or_model])
+    return _to_device(data_or_model)
+
+class DatasetWrapperFull(Dataset):
+    def __init__(self, X, Y, max_full_length):
+        # Aligned at the end (aka pre-padding)
+        super(DatasetWrapperFull, self).__init__()
+        self.max_full_length = max_full_length
+        self.X, self.Y = X, Y
+
+    def __len__(self):
+        return len(self.Y)
+
+    def __getitem__(self, idx):
+        x = self.X[idx]
+        y = self.Y[idx]
+        if not (isinstance(x, torch.Tensor) and isinstance(y, torch.Tensor)):
+            x = torch.tensor(x, dtype=torch.float)
+            y = torch.tensor(y, dtype=torch.float)
+        lx, ly = len(x), len(y)
+        x, y = pre_pad(x, self.max_full_length), pre_pad(y, self.max_full_length)
+        return x, y, lx, ly
+
+    @classmethod
+    def _sep_data(cls, res):
+        return res
+
+
+def get_split_idx(n_train, n_calibration, n_test, seed, idx=None):
+    total = n_train + n_calibration + n_test
+    if idx is None:
+        idx = np.arange(total)
+    else:
+        assert len(idx) == total
+    perm = np.random.RandomState(seed=seed).permutation(n_train + n_calibration + n_test)
+    train_idx = idx[perm[:n_train]]
+    calibration_idx = idx[perm[n_train: n_train + n_calibration]]
+    train_calibration_idx = idx[perm[: n_train + n_calibration]]
+    test_idx = idx[perm[n_train + n_calibration:]]
+    return train_idx, calibration_idx, train_calibration_idx, test_idx
+
+
+def get_horizon(dataset):
+    return HORIZON_LENGTHS[dataset.split("-")[0]]
+
+def get_default_ntest(dataset):
+    return DEFAULT_NTESTS[dataset.split("-")[0]]
+
+def get_default_ncal(dataset):
+    return DEFAULT_NVALIDS[dataset.split("-")[0]]
+
+def get_default_data(dataset, conformal=True, seed=0, **kwargs):
+    assert conformal
+    if dataset.startswith(EEG_NAME):
+        import data.preprocessing.eeg as eeg
+        return eeg.get_splits(conformal=conformal, seed=seed, **kwargs)
+    if dataset == MIMIC_NAME:
+        import data.preprocessing.mimic as mimic
+        return mimic.get_splits(conformal=conformal, seed=seed, **kwargs)
+    if dataset.startswith(GEFCom_NAME):
+        import data.preprocessing.gefc as gefc
+        if dataset == f"{GEFCom_NAME}-R":
+            return gefc.get_gefc_data(conformal=conformal, seed=seed, random_split=True, **kwargs)
+        else:
+            return gefc.get_gefc_data(conformal=conformal, seed=None, **kwargs)
+    if dataset.startswith(COVID_NAME):
+        import data.preprocessing.covid as covid
+        return covid.get_splits(conformal=conformal, seed=seed, **kwargs)
+
+
+if __name__ == "__main__":
+
+    #from data.preprocessing.data_processing_covid import get_raw_covid_data
+    #data = get_raw_covid_data(cached=True)
+
+    #o = DatasetWrapper(data[:, :-1], data, max_length=5, horizon=2)
+    #res = get_Xsection_data(MIMIC_NAME)
+    #res = get_multihorizon_data(EEG_NAME, horizon=16)
+    #res[0][0]
+    #tr, va, te = get_TS_data(SPX_NAME)
+    #te[0]
+    #res = get_default_data(GEFCom_NAME)
+
+    #print(res[0][0][0].shape)
+    pass
+
+
@@ -0,0 +1,33 @@
+import os.path
+import numpy as np
+import pandas as pd
+
+from _settings import WORKSPACE, DATA_PATH
+from data.online_dataset import get_split_idx, DatasetWrapperFull
+
+COVID_ROOT = os.path.join(DATA_PATH, 'COVID')
+
+def get_normalized_daily_counts(startdate='2022-03-01', enddate='2022-03-31'):
+    #Download the data from https://coronavirus.data.gov.uk/
+    df = pd.read_csv(os.path.join(COVID_ROOT, "ltla_2022-04-14.csv"))
+    df = df.pivot_table(values=['newCasesBySpecimenDate'], index='date', columns='areaCode').fillna(0.)
+    tdf = df.rolling(window=365, min_periods=365).mean().dropna().shift(1)
+    rate = df.reindex(tdf.index) / tdf
+    return rate.loc[startdate:enddate]
+
+def get_splits(conformal=True, seed=7, horizon=30, n_train=200, n_calibration=100, n_test=80):
+    data = np.expand_dims(get_normalized_daily_counts().T.values, 2)
+    Xs, Ys = data[:, :-1], data[:, 1:]
+    train_idx, calibration_idx, train_calibration_idx, test_idx = get_split_idx(n_train, n_calibration, n_test, seed=seed)
+
+    if conformal:
+        kwargs = {"max_full_length": horizon}
+        train_dataset = DatasetWrapperFull(Xs[train_idx], Ys[train_idx], **kwargs)
+        calibration_dataset = DatasetWrapperFull(Xs[calibration_idx], Ys[calibration_idx], **kwargs)
+    else:
+        kwargs = {"max_full_length": horizon}
+        train_dataset = DatasetWrapperFull(Xs[train_calibration_idx], Ys[train_calibration_idx], **kwargs)
+        calibration_dataset = None
+    test_dataset = DatasetWrapperFull(Xs[test_idx], Ys[test_idx], **kwargs)
+    return train_dataset, calibration_dataset, test_dataset
+