-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_transform.py
61 lines (51 loc) · 1.86 KB
/
data_transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import numpy as np
from .const import nc
class DataTransform:
def __init__(self, df: pd.DataFrame):
super().__init__()
self.df = df
def __getattr__(self, item):
if item not in self.__dict__.keys():
return getattr(self.df, item)
else:
return self.__dict__[item]
def __getitem__(self, item):
try:
return self.__dict__[item]
except:
return self.df[item]
def __repr__(self):
return self.df.__repr__()
def get_dummy(self, columns=None):
if not columns:
columns = self.df[nc.style_name].drop_duplicates().tolist()
columns = dict(zip(columns, columns))
if self.df.empty:
return pd.DataFrame(columns=[nc.code_name] + list(dict(columns).values()))
self.df[nc.style_name] = self.df[nc.style_name].apply(lambda x: dict(columns)[x])
df_ = pd.get_dummies(self.df[nc.style_name])
self.df = pd.concat([self.df.drop(nc.style_name, axis=1), df_], axis=1)
return self
def rename(self, columns):
self.df = self.df.rename(columns=columns)
return self
def clear_data(self, *conds):
for cond in conds:
self.df = self.df.query(cond)
return self
def get_df(self):
return self.df
def align(self, *dfs):
dfs = (self.df, ) + dfs
if any(len(df.shape) == 1 or 1 in df.shape for df in dfs):
dims = 1
else:
dims = 2
mut_index = sorted(reduce(lambda x, y: x.intersection(y), (df.index for df in dfs)))
mut_columns = sorted(reduce(lambda x, y: x.intersection(y), (df.columns for df in dfs)))
if dims == 2:
dfs = [df.loc[mut_index, mut_columns] for df in dfs]
else:
dfs = [df.loc[mut_index, :] for df in dfs]
return dfs