Skip to content

Commit f5b3396

Browse files
committed
init
1 parent 6e8a95b commit f5b3396

25 files changed

+87965
-0
lines changed

.gitignore

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/
130+
131+
.idea/

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Conformal Prediction Intervals with Temporal Dependence
2+
3+
This is the code associated with "Conformal Prediction Intervals with Temporal Dependence".
4+
5+
To replicate the (`Load`) experiments:
6+
1. Train the base models: `python -m utils.main_experiments`
7+
2. run `main.ipynb` for results
8+
9+
For other datasets, please download the corresponding data and change the `__main__` section.
10+
11+
## Requirements
12+
`numpy`, `torch`, `pandas`, `scipy`, `matplotlib`, and `tqdm`
13+
(`jupyter` and `notebook` if you want to use the notebook)
14+
`env.yml` contains the full environment.

Temp/data/Load/L1-train.csv

+85,441
Large diffs are not rendered by default.

_settings.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import os
2+
import getpass
3+
import sys
4+
5+
6+
_ON_SERVER = True
7+
8+
__CUR_FILE_PATH = os.path.dirname(os.path.abspath(__file__))
9+
10+
#==============================Data Related
11+
COVID_NAME = 'COVID'
12+
MIMIC_NAME = 'MIMIC'
13+
EEG_NAME = 'EEG'
14+
GEFCom_NAME = 'Load'
15+
16+
17+
WORKSPACE = os.path.join(__CUR_FILE_PATH, "Temp")
18+
DATA_PATH = os.path.join(WORKSPACE, 'data')
19+
_PERSIST_PATH = os.path.join(WORKSPACE, 'cache')
20+
LOG_OUTPUT_DIR = os.path.join(WORKSPACE, 'logs')
21+
BASE_MODEL_PATH = os.path.join(WORKSPACE, 'base_models')
22+
if not os.path.isdir(BASE_MODEL_PATH): os.makedirs(BASE_MODEL_PATH)
23+
RANDOM_SEED = 7
24+
25+
NCOLS = 80
26+
27+
28+
METHOD_PLACEHOLDER = 'CPTD'

data/__init__.py

Whitespace-only changes.

data/online_dataset.py

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import torch
2+
from torch.utils.data import Dataset
3+
import numpy as np
4+
5+
from _settings import COVID_NAME, MIMIC_NAME, GEFCom_NAME, EEG_NAME
6+
7+
HORIZON_LENGTHS = {MIMIC_NAME: 30, GEFCom_NAME: 24, COVID_NAME: 30, EEG_NAME: 63}
8+
DEFAULT_NTESTS = {MIMIC_NAME: 100, COVID_NAME: 80, GEFCom_NAME: 700, EEG_NAME:200}
9+
DEFAULT_NVALIDS = {MIMIC_NAME: 100, COVID_NAME: 100, GEFCom_NAME: 200, EEG_NAME:100}
10+
11+
def pre_pad(x, max_length):
12+
lx = len(x)
13+
return torch.cat([torch.zeros([max_length - lx, x.shape[1]], dtype=torch.float), x],0)
14+
15+
def _to_device(data_or_model, device):
16+
if isinstance(device, tuple) or isinstance(device, list):
17+
device = device[0]
18+
def _to_device(d):
19+
try:
20+
return d.to(device)
21+
except: #if device is a list/tuple, we don't do anything as this should be dataparalle. (hacky, I know)
22+
return d
23+
if isinstance(data_or_model, tuple) or isinstance(data_or_model, list):
24+
return tuple([_to_device(x) for x in data_or_model])
25+
return _to_device(data_or_model)
26+
27+
class DatasetWrapperFull(Dataset):
28+
def __init__(self, X, Y, max_full_length):
29+
# Aligned at the end (aka pre-padding)
30+
super(DatasetWrapperFull, self).__init__()
31+
self.max_full_length = max_full_length
32+
self.X, self.Y = X, Y
33+
34+
def __len__(self):
35+
return len(self.Y)
36+
37+
def __getitem__(self, idx):
38+
x = self.X[idx]
39+
y = self.Y[idx]
40+
if not (isinstance(x, torch.Tensor) and isinstance(y, torch.Tensor)):
41+
x = torch.tensor(x, dtype=torch.float)
42+
y = torch.tensor(y, dtype=torch.float)
43+
lx, ly = len(x), len(y)
44+
x, y = pre_pad(x, self.max_full_length), pre_pad(y, self.max_full_length)
45+
return x, y, lx, ly
46+
47+
@classmethod
48+
def _sep_data(cls, res):
49+
return res
50+
51+
52+
def get_split_idx(n_train, n_calibration, n_test, seed, idx=None):
53+
total = n_train + n_calibration + n_test
54+
if idx is None:
55+
idx = np.arange(total)
56+
else:
57+
assert len(idx) == total
58+
perm = np.random.RandomState(seed=seed).permutation(n_train + n_calibration + n_test)
59+
train_idx = idx[perm[:n_train]]
60+
calibration_idx = idx[perm[n_train: n_train + n_calibration]]
61+
train_calibration_idx = idx[perm[: n_train + n_calibration]]
62+
test_idx = idx[perm[n_train + n_calibration:]]
63+
return train_idx, calibration_idx, train_calibration_idx, test_idx
64+
65+
66+
def get_horizon(dataset):
67+
return HORIZON_LENGTHS[dataset.split("-")[0]]
68+
69+
def get_default_ntest(dataset):
70+
return DEFAULT_NTESTS[dataset.split("-")[0]]
71+
72+
def get_default_ncal(dataset):
73+
return DEFAULT_NVALIDS[dataset.split("-")[0]]
74+
75+
def get_default_data(dataset, conformal=True, seed=0, **kwargs):
76+
assert conformal
77+
if dataset.startswith(EEG_NAME):
78+
import data.preprocessing.eeg as eeg
79+
return eeg.get_splits(conformal=conformal, seed=seed, **kwargs)
80+
if dataset == MIMIC_NAME:
81+
import data.preprocessing.mimic as mimic
82+
return mimic.get_splits(conformal=conformal, seed=seed, **kwargs)
83+
if dataset.startswith(GEFCom_NAME):
84+
import data.preprocessing.gefc as gefc
85+
if dataset == f"{GEFCom_NAME}-R":
86+
return gefc.get_gefc_data(conformal=conformal, seed=seed, random_split=True, **kwargs)
87+
else:
88+
return gefc.get_gefc_data(conformal=conformal, seed=None, **kwargs)
89+
if dataset.startswith(COVID_NAME):
90+
import data.preprocessing.covid as covid
91+
return covid.get_splits(conformal=conformal, seed=seed, **kwargs)
92+
93+
94+
if __name__ == "__main__":
95+
96+
#from data.preprocessing.data_processing_covid import get_raw_covid_data
97+
#data = get_raw_covid_data(cached=True)
98+
99+
#o = DatasetWrapper(data[:, :-1], data, max_length=5, horizon=2)
100+
#res = get_Xsection_data(MIMIC_NAME)
101+
#res = get_multihorizon_data(EEG_NAME, horizon=16)
102+
#res[0][0]
103+
#tr, va, te = get_TS_data(SPX_NAME)
104+
#te[0]
105+
#res = get_default_data(GEFCom_NAME)
106+
107+
#print(res[0][0][0].shape)
108+
pass
109+
110+

data/preprocessing/__init__.py

Whitespace-only changes.

data/preprocessing/covid.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os.path
2+
import numpy as np
3+
import pandas as pd
4+
5+
from _settings import WORKSPACE, DATA_PATH
6+
from data.online_dataset import get_split_idx, DatasetWrapperFull
7+
8+
COVID_ROOT = os.path.join(DATA_PATH, 'COVID')
9+
10+
def get_normalized_daily_counts(startdate='2022-03-01', enddate='2022-03-31'):
11+
#Download the data from https://coronavirus.data.gov.uk/
12+
df = pd.read_csv(os.path.join(COVID_ROOT, "ltla_2022-04-14.csv"))
13+
df = df.pivot_table(values=['newCasesBySpecimenDate'], index='date', columns='areaCode').fillna(0.)
14+
tdf = df.rolling(window=365, min_periods=365).mean().dropna().shift(1)
15+
rate = df.reindex(tdf.index) / tdf
16+
return rate.loc[startdate:enddate]
17+
18+
def get_splits(conformal=True, seed=7, horizon=30, n_train=200, n_calibration=100, n_test=80):
19+
data = np.expand_dims(get_normalized_daily_counts().T.values, 2)
20+
Xs, Ys = data[:, :-1], data[:, 1:]
21+
train_idx, calibration_idx, train_calibration_idx, test_idx = get_split_idx(n_train, n_calibration, n_test, seed=seed)
22+
23+
if conformal:
24+
kwargs = {"max_full_length": horizon}
25+
train_dataset = DatasetWrapperFull(Xs[train_idx], Ys[train_idx], **kwargs)
26+
calibration_dataset = DatasetWrapperFull(Xs[calibration_idx], Ys[calibration_idx], **kwargs)
27+
else:
28+
kwargs = {"max_full_length": horizon}
29+
train_dataset = DatasetWrapperFull(Xs[train_calibration_idx], Ys[train_calibration_idx], **kwargs)
30+
calibration_dataset = None
31+
test_dataset = DatasetWrapperFull(Xs[test_idx], Ys[test_idx], **kwargs)
32+
return train_dataset, calibration_dataset, test_dataset
33+

0 commit comments

Comments
 (0)