Skip to content

Commit c40f454

Browse files
some cleaning.
1 parent 648d14b commit c40f454

File tree

10 files changed

+37
-16
lines changed

10 files changed

+37
-16
lines changed

2_harmonize_amsterdam.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
"""
88
from amsterdam_preprocessing.timeseries import amsterdamTSP
99
from amsterdam_preprocessing.flat_and_labels import Ams_FLProcessor
10-
import polars as pl
1110

1211
tsp = amsterdamTSP(
13-
ts_chunks='numericitems.parquet',
12+
ts_pth='numericitems.parquet',
1413
listitems_pth='listitems.parquet',
1514
gcs_scores_pth='glasgow_coma_scores.parquet')
1615

2_harmonize_eicu.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88
from eicu_preprocessing.flat_and_labels import eicu_FLProcessor
99
from eicu_preprocessing.timeseries import eicuTSP
10-
import polars as pl
10+
1111
tsp = eicuTSP(
1212
lab_pth='lab.parquet',
1313
resp_pth='tsresp.parquet',

2_harmonize_hirid.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88
from hirid_preprocessing.flat_and_labels import Hir_FLProcessing
99
from hirid_preprocessing.timeseries import hiridTSP
10-
import polars as pl
10+
1111
tsp = hiridTSP(ts='timeseries.parquet',
1212
pharma='medication.parquet')
1313

amsterdam_preprocessing/timeseries.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ class amsterdamTSP(TimeseriesProcessor):
88
* 1 wide table: the gcs_score tables that was computed in 1_amsterdam.py
99
* 1 medication table that was computed in 1_amsterdam.py
1010
"""
11-
def __init__(self, ts_chunks, listitems_pth, gcs_scores_pth):
11+
def __init__(self, ts_pth, listitems_pth, gcs_scores_pth):
1212
super().__init__(dataset='amsterdam')
13-
self.lf_ts = self.scan(self.savepath+ts_chunks)
13+
self.lf_ts = self.scan(self.savepath+ts_pth)
1414
self.lf_listitems = self.scan(self.savepath+listitems_pth)
1515
self.lf_medication = self.scan(self.med_savepath)
1616

@@ -56,6 +56,11 @@ def run_harmonization(self):
5656
self.medication_to_long(lf_med)
5757

5858
def run_for_preprocessed(self, reset_dir=None):
59+
raise UserWarning("This function is not maintained. It should be replaced"
60+
"by a cleaner/faster alternative in the future.\n"
61+
"Contributions welcome.")
62+
63+
5964
self.reset_dir(reset_dir)
6065

6166
lf_ts = self.harmonize_columns(self.lf_ts, **self.colnames_ts)

database_processing/timeseriesprocessor.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,19 @@ def timeseries_to_long(self,
224224
lf_long=None,
225225
lf_wide=None,
226226
sink=True):
227-
cols_index = {self.idx_col: pl.Int64, self.time_col: pl.Int64}
227+
cols_index = {self.idx_col: pl.Int64, self.time_col: pl.Duration}
228228
if lf_wide is None:
229229
lf_wide = pl.LazyFrame(schema=cols_index|{'dummy': pl.Float32})
230230
if lf_long is None:
231-
lf_long = pl.LazyFrame(schema=cols_index | {'variable':pl.String, 'value': pl.Float32})
232-
233-
lf_wide_melted = lf_wide.melt(['patient', 'time']).with_columns(pl.col('value').cast(pl.Float32, strict=False))
231+
lf_long = pl.LazyFrame(schema=cols_index | {'variable':pl.String,
232+
'value': pl.Float32})
233+
234+
lf_wide_melted = (lf_wide
235+
.melt(['patient', 'time'])
236+
.with_columns(
237+
pl.col('value').cast(pl.Float32, strict=False)
238+
)
239+
)
234240

235241
lf = (pl.concat([df.select(sorted(df.columns)) for df in [lf_wide_melted, lf_long]], how='vertical_relaxed')
236242
.with_columns(

eicu_preprocessing/timeseries.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,13 @@ def run_harmonization(self):
102102
lf_tsinout,
103103
])
104104

105+
print('Collecting tsperiodic and tsaperiodic', end='')
105106
#not collecting here causes errors in the following methods. polars bug ?
106107
lf_ts_hor = (pl.concat([lf_tsperiodic, lf_tsaperiodic],
107108
how='diagonal',
108109
rechunk=True)
109110
.collect(streaming=True))
111+
print(' -> Done')
110112

111113
lf_ts_ver = self.filter_tables(lf_ts_ver,
112114
kept_variables=self.kept_ts)
@@ -116,7 +118,9 @@ def run_harmonization(self):
116118

117119

118120
def run_preprocessing(self, reset_dir=None):
119-
121+
raise UserWarning("This function is not maintained. It should be replaced"
122+
"by a cleaner/faster alternative in the future.\n"
123+
"Contributions welcome.")
120124
self.reset_dir(reset_dir)
121125

122126

hirid_preprocessing/timeseries.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ def run_harmonization(self):
4545

4646

4747
def run_preprocessing(self, reset_dir=None):
48-
48+
raise UserWarning("This function is not maintained. It should be replaced"
49+
"by a cleaner/faster alternative in the future.\n"
50+
"Contributions welcome.")
4951
self.reset_dir(reset_dir)
5052

5153
kept_variables = (self.kept_ts+['Body weight', 'Body height measure'])

mimic3_preprocessing/mimic3preparator.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def gen_timeseries(self):
384384
.drop_nulls()
385385
.with_columns(
386386
pl.col('CHARTTIME').str.to_datetime("%Y-%m-%d %H:%M:%S"),
387-
pl.col('ICUSTAY_ID').cast(pl.Int64)
387+
pl.col('ICUSTAY_ID').cast(pl.Int32)
388388
)
389389
.pipe(self.pl_prepare_tstable,
390390
col_measuretime='CHARTTIME',
@@ -394,6 +394,7 @@ def gen_timeseries(self):
394394
unit_los='day')
395395
.join(ditems.select('ITEMID', 'LABEL'), on='ITEMID')
396396
.drop('ITEMID')
397-
.collect(streaming=True))
397+
#.collect(streaming=True)
398+
)
398399

399400
self.save(ts, self.ts_savepath)

mimic3_preprocessing/timeseries.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,14 @@ def run_harmonization(self):
8282
lf_ts = self.filter_tables(lf_timeser,
8383
kept_variables=self.kept_ts)
8484

85-
8685
self.timeseries_to_long(lf_ts)
8786
self.medication_to_long(lf_med)
8887

8988

9089
def run_preprocessing(self):
91-
90+
raise UserWarning("This function is not maintained. It should be replaced"
91+
"by a cleaner/faster alternative in the future.\n"
92+
"Contributions welcome.")
9293

9394
lf_medication = self.harmonize_columns(self.lf_medication,
9495
**self.colnames_med)

mimic4_preprocessing/timeseries.py

+3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ def run_harmonization(self):
8888
self.medication_to_long(lf_med)
8989

9090
def run_preprocessing(self, reset_dir=None):
91+
raise UserWarning("This function is not maintained. It should be replaced"
92+
"by a cleaner/faster alternative in the future.\n"
93+
"Contributions welcome.")
9194
self.reset_dir(reset_dir)
9295

9396
lf_outputevents = self.harmonize_columns(self.lf_outputevents,

0 commit comments

Comments
 (0)