-
Notifications
You must be signed in to change notification settings - Fork 0
/
kotsu_test.py
114 lines (90 loc) · 3.39 KB
/
kotsu_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from hashlib import sha1
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sktime.forecasting.compose import make_reduction
import numpy as np
from sktime.forecasting.compose import NetworkPipelineForecaster
from sktime.forecasting.base import ForecastingHorizon
from sktime.datasets import load_shampoo_sales
from sktime.forecasting.compose._reduce import _sliding_window_transform
from sklearn.ensemble import RandomForestClassifier
from hmmlearn import hmm
import kotsu
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.performance_metrics.forecasting import MeanSquaredPercentageError
from sktime.forecasting.model_evaluation import evaluate
import mlflow
# LOAD DATASET
dts_number = 0
daily_train = pd.read_csv('Dataset/Train/Daily-train.csv', skiprows=dts_number, nrows=1)
daily_test = pd.read_csv('Dataset/Test/Daily-test.csv', skiprows=dts_number, nrows=1)
info = pd.read_csv('Dataset/M4-info.csv', skiprows=dts_number, nrows=1)
y_train = daily_train.dropna(axis=1)
y_train = pd.DataFrame(data=y_train.values[0][1:-1])
series_name = daily_train.iloc[0,0]
start_index = info.iloc[-1,-1]
index = pd.date_range(start_index,periods=y_train.shape[0],freq='D')
y_train.index = index
y_train= y_train.astype('float')
y_test = daily_test.dropna(axis=1)
y_test = pd.DataFrame(data=y_test.values[0][1:-1])
start_index = y_train.index[-1] + pd.DateOffset(1)
index = pd.date_range(start_index,periods=y_test.shape[0],freq='D')
y_test.index = index
y_test= y_test.astype('float')
# DEFINE MODEL
regressor = RandomForestRegressor()
forecaster = make_reduction(regressor, window_length=15, strategy="recursive")
fh = ForecastingHorizon(np.arange(1,len(y_test)+1))
# Directional change reduced to forecasting
def converter(y1,y2):
"""
Converts regression output to directional change output where 1 means up 0 means down
Parameters
----------
y1 : pd.Series
Series preceeding y2. It is used only to calculate the value of the first item in y2 (up down relative to the last value of y1)
y2 : pd.Series
Returns
-------
pd.Series
"""
concat_y1_y2 = pd.concat([y1[-2:-1],y2])
dc = concat_y1_y2.shift(-1) > concat_y1_y2
dc = dc[0:-1]
dc[dc==True] =1
dc[dc==False] =0
dc = dc.astype('int')
return dc
steps = ([
('forecaster',forecaster, {'fit':{'y': 'original_y', 'fh':'original_fh'},
'predict':{'fh':'original_fh'}
}),
('converter', converter, {'fit':None,
'predict':{'y1':y_train,'y2':'forecaster'}})
])
# BENCHMARK WITH MLFLOW
model_registry_m4benchmarking = kotsu.registration.ModelRegistry()
model_registry_m4benchmarking.register(
id="BenchmarkingPipe-v1",
entry_point=NetworkPipelineForecaster,
kwargs={'steps':steps}
)
validation_registry_m4benchmarking = kotsu.registration.ValidationRegistry()
def factory():
"""Factory for airline cross validation."""
def airline_cross_validation(model):
"""Airline dataset cross validation."""
error = MeanSquaredPercentageError()
error(y_pred=y_test,y_true=y_test)
return error
return airline_cross_validation
validation_registry_m4benchmarking.register(
id="first_dataset-v1",
entry_point=factory,
)
kotsu.run.run(
model_registry_m4benchmarking,
validation_registry_m4benchmarking,
"./kotsu_results_interval.csv",
)