-
Notifications
You must be signed in to change notification settings - Fork 6
/
utils.py
30 lines (22 loc) · 951 Bytes
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
def read_pickle(f):
with open(f, 'rb') as handle:
return pickle.load(handle)
def save_pickle(f, file_path):
with open(file_path + '.pickle', 'wb') as handle:
pickle.dump(f, handle, protocol=pickle.HIGHEST_PROTOCOL)
def scale_select_data(train, test, df_scale, cols, sc=StandardScaler(), scale_factor=0.1, scale_cols=None):
score_cols = sorted(list(set(cols)))
sc.fit(df_scale[score_cols])
train_scaled = train[score_cols].copy()
test_scaled = test[score_cols].copy()
if scale_cols:
train_scaled.loc[:, scale_cols] *= scale_factor
test_scaled.loc[:, scale_cols] *= scale_factor
train_scaled = pd.DataFrame(sc.transform(train_scaled), columns=score_cols)
test_scaled = pd.DataFrame(sc.transform(test_scaled), columns=score_cols)
return train_scaled, test_scaled