-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_extractor.py
32 lines (27 loc) · 1.15 KB
/
feature_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
import os
class FeatureExtractor(object):
def __init__(self):
pass
def fit(self, X_df, y_array):
pass
def transform(self, X_df):
X_encoded = X_df
path = os.path.dirname(__file__)
data_weather = pd.read_csv(os.path.join(path, "external_data.csv"))
X_weather = data_weather[['Date', 'AirPort', 'Max TemperatureC']]
X_weather = X_weather.rename(
columns={'Date': 'DateOfDeparture', 'AirPort': 'Arrival'})
X_encoded = pd.merge(X_encoded, X_weather, how='left',
left_on=['DateOfDeparture', 'Arrival'],
right_on=['DateOfDeparture', 'Arrival'],
sort=False)
X_encoded = X_encoded.join(pd.get_dummies(
X_encoded['Departure'], prefix='d'))
X_encoded = X_encoded.join(
pd.get_dummies(X_encoded['Arrival'], prefix='a'))
X_encoded = X_encoded.drop('Departure', axis=1)
X_encoded = X_encoded.drop('Arrival', axis=1)
X_encoded = X_encoded.drop('DateOfDeparture', axis=1)
X_array = X_encoded.values
return X_array