Skip to content

Commit 1ecad40

Browse files
authored
Merge pull request #36 from IBM/time_series
Add timeseries wrapper
2 parents a1279c6 + e61acb9 commit 1ecad40

File tree

4 files changed

+276
-15
lines changed

4 files changed

+276
-15
lines changed

docs/source/predictive.rst

+23-15
Original file line numberDiff line numberDiff line change
@@ -57,42 +57,50 @@ Association Rules
5757
:undoc-members:
5858
:show-inheritance:
5959

60-
Classification base module
61-
--------------------------------------------------
60+
Bisecting KMeans
61+
-----------------------------------------------------
6262

63-
.. automodule:: nzpyida.analytics.predictive.classification
63+
.. automodule:: nzpyida.analytics.predictive.bisecting_kmeans
6464
:members:
6565
:undoc-members:
6666
:show-inheritance:
6767

68-
Regression base module
69-
----------------------------------------------
68+
Two Step Clustering
69+
-----------------------------------------------------
7070

71-
.. automodule:: nzpyida.analytics.predictive.regression
71+
.. automodule:: nzpyida.analytics.predictive.two_step_clustering
7272
:members:
7373
:undoc-members:
7474
:show-inheritance:
7575

76-
Predictive Modeling base module
77-
--------------------------------------------------------
76+
Time Series Forecasting
77+
-----------------------------------------------------
7878

79-
.. automodule:: nzpyida.analytics.predictive.predictive_modeling
79+
.. automodule:: nzpyida.analytics.predictive.timeseries
8080
:members:
8181
:undoc-members:
8282
:show-inheritance:
8383

84-
Bisecting KMeans
85-
-----------------------------------------------------
84+
Classification base module
85+
--------------------------------------------------
8686

87-
.. automodule:: nzpyida.analytics.predictive.bisecting_kmeans
87+
.. automodule:: nzpyida.analytics.predictive.classification
8888
:members:
8989
:undoc-members:
9090
:show-inheritance:
9191

92-
Two Step Clustering
93-
-----------------------------------------------------
92+
Regression base module
93+
----------------------------------------------
9494

95-
.. automodule:: nzpyida.analytics.predictive.two_step_clustering
95+
.. automodule:: nzpyida.analytics.predictive.regression
96+
:members:
97+
:undoc-members:
98+
:show-inheritance:
99+
100+
Predictive Modeling base module
101+
--------------------------------------------------------
102+
103+
.. automodule:: nzpyida.analytics.predictive.predictive_modeling
96104
:members:
97105
:undoc-members:
98106
:show-inheritance:

nzpyida/analytics/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .predictive.bisecting_kmeans import BisectingKMeans
1818
from .predictive.regression_trees import DecisionTreeRegressor
1919
from .predictive.two_step_clustering import TwoStepClustering
20+
from .predictive.timeseries import TimeSeries
2021
from .exploration.distribution import bitable, moments, histogram, outliers
2122
from .exploration.distribution import quantile, unitable
2223
from .transform.discretization import EFDisc, EMDisc, EWDisc
+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
#-----------------------------------------------------------------------------
4+
# Copyright (c) 2023, IBM Corp.
5+
# All rights reserved.
6+
#
7+
# Distributed under the terms of the BSD Simplified License.
8+
#
9+
# The full license is in the LICENSE file, distributed with this software.
10+
#-----------------------------------------------------------------------------
11+
"""
12+
A time series model is built by analyzing series of timed numeric values, and is
13+
applied immediately for predicting future values. The model itself is stored but
14+
not really needed any more (except for understanding the predicted values).
15+
16+
If specified, a table <outtable> is additionally created with the following columns:
17+
<by>, <time>, forecast, standarderror. The table contains the forecast values for
18+
future time points of the time series identified by <by>. For each prediction,
19+
the standarderror value indicates a confidence interval around the forecast value.
20+
21+
If specified, a table <seasadjtable> is additionally created with the following columns:
22+
<by>, <time>, adjusted. The values in column <target> of the input table are seasonally
23+
adjusted and then copied into this table, with the values of columns <by> and <time>
24+
"""
25+
from typing import List
26+
from nzpyida.frame import IdaDataFrame
27+
from nzpyida.base import IdaDataBase
28+
from nzpyida.analytics.predictive.predictive_modeling import PredictiveModeling
29+
from nzpyida.analytics.utils import call_proc_df_in_out
30+
from nzpyida.analytics.model_manager import ModelManager
31+
32+
class TimeSeries(PredictiveModeling):
33+
"""
34+
Time Series Model
35+
"""
36+
def __init__(self, idadb: IdaDataBase, model_name: str):
37+
"""
38+
Creates Time Series
39+
"""
40+
super().__init__(idadb, model_name)
41+
self.fit_proc = "TIMESERIES"
42+
self.has_print_proc = True
43+
44+
def fit_predict(self, in_df: IdaDataFrame, time_column: str, target_column: str, by_column: str=None,
45+
out_table: str=None, description_table: str=None, algorithm: str='ExponentialSmoothing',
46+
interpolation_method: str='linear', from_time=None, to_time=None, forecast_horizon: str=None,
47+
forecast_times: str=None, trend: str=None, seasonality: str=None, period: float=None,
48+
unit: str=None, p: int=None, d: int=None, q: int=None, sp: int=None, sd: int=None, sq: int=None,
49+
saesonally_adjusted_table: str=None ) -> IdaDataFrame:
50+
"""
51+
Predicts future values of series of timed numeric values
52+
53+
Parameters
54+
----------
55+
in_df : IdaDataFrame
56+
the input data frame
57+
58+
time_column : str
59+
the input data frame column which define an order on the numeric values
60+
61+
target_columns : str
62+
the input data frame column which contains the numeric values
63+
64+
by_column : str
65+
the input data frame column which uniquely identifies a serie of values.
66+
If not specified, all numeric values belong to only one time series.
67+
68+
out_table : str
69+
the output data frmae containing predicted future values. This parameter
70+
is not allowed for algorithm = SpectralAnalysis. If not specified,
71+
no output table is written out
72+
73+
description_table : str
74+
the optional input data frame containing the name and descriptions of the
75+
time series. The table must contain following columns: <by_column>, 'NAME'=str,
76+
'DESCRIPTION'=str. If not specified, the series do not have a name or a description
77+
78+
algorithm : str
79+
the time series algorithm to use. Allowed values are: ExponentialSmoothing,
80+
ARIMA, SeasonalTrendDecomposition, SpectralAnalysis
81+
82+
interpolation_method : str
83+
the interpolation method. Allowed values are: linear, cubicspline, exponentialspline
84+
85+
from_time : same as type of <time column>
86+
the value of column time to start the analysis from. If not specified, the analysis
87+
starts from the first value of the time series in the input table
88+
89+
to_time : same as type of <time column>
90+
the value of column time to stop the analysis at. If not specified, the analysis
91+
stops at the last value of the time series in the input table
92+
93+
forecast_horizon : str
94+
the value of column time until which to predict. This parameter is not allowed for
95+
algorithm=SpectralAnalysis. If not specified, the algorithm determines itself
96+
until which time it predicts values
97+
98+
forecast_times : str
99+
list of semi-column separated values of column time to predict at. This parameter
100+
is not allowed for algorithm=SpectralAnalysis. If not specified, the times to predict
101+
values at is determined by the algorithm
102+
103+
trend : str
104+
the trend type for algorithm=ExponentialSmoothing. Allowed values are: N (none),
105+
A (addditive), DA (damped additive), M (multiplicative), DM (damped multiplicative).
106+
If not specified, the trend type is determined by the algorithm
107+
108+
seasonality : str
109+
the seasonality type for algorithm=ExponentialSmoothing. Allowed values are: N (none),
110+
A (addditive), M (multiplicative). If not specified, the seasonality type is
111+
determined by the algorithm
112+
113+
period : float
114+
the seasonality period. This parameter is not allowed for algorithm=SpectralAnalysis.
115+
If not specified, the seasonality period is determined by the algorithm. If set to 0,
116+
no seasonality period will be considered by the algorithm
117+
118+
unit : str
119+
the seasonality period unit. This parameter is not allowed for algorithm=SpectralAnalysis.
120+
This parameter must be specified if the parameter period is specified and the <time_column>
121+
is of type date, time or timestamp. Otherwise, it must not be spe- cified. Allowed values are:
122+
ms, s, min, h, d, wk, qtr, q, a, y
123+
124+
p : int
125+
the parameter p for algorithm=ARIMA, either equal to or below specified value.
126+
If not specified, the algorithm will determine its best value automatically
127+
128+
d : int
129+
the parameter d for algorithm=ARIMA, either equal to or below specified value.
130+
If not specified, the algorithm will determine its best value automatically
131+
132+
q : int
133+
the parameter q for algorithm=ARIMA, either equal to or below specified value.
134+
If not specified, the algorithm will determine its best value automatically
135+
136+
sp : int
137+
the seasonal parameter SP for algorithm=ARIMA, either equal to or below specified value.
138+
If not specified, the algorithm will determine its best value automatically
139+
140+
sd : int
141+
the seasonal parameter SD for algorithm=ARIMA, either equal to or below specified value.
142+
If not specified, the algorithm will determine its best value automatically
143+
144+
sq : int
145+
the seasonal parameter SQ for algorithm=ARIMA, either equal to or below specified value.
146+
If not specified, the algorithm will determine its best value automatically
147+
148+
saesonally_adjusted_table : str
149+
the output table containing seasonally adjusted values. This parameter is not allowed
150+
for algorithm=SpectralAnalysis or algorithm=ARIMA. If not specified, no output table
151+
is written out
152+
"""
153+
154+
params = {
155+
'model': self.model_name,
156+
'time': time_column,
157+
'target': target_column,
158+
'by': by_column,
159+
'desctable': description_table,
160+
'algorithm': algorithm,
161+
'interpolationmethod': interpolation_method,
162+
'from': from_time,
163+
'to': to_time,
164+
'forecasthorizon': forecast_horizon,
165+
'forecasttimes': forecast_times,
166+
'trend': trend,
167+
'seasonality': seasonality,
168+
'period': period,
169+
'unit': unit,
170+
'p': p,
171+
'd': d,
172+
'q': q,
173+
'SP': sp,
174+
'SD': sd,
175+
'SQ': sq,
176+
'seasadjtable': saesonally_adjusted_table,
177+
}
178+
179+
if not isinstance(in_df, IdaDataFrame):
180+
raise TypeError("Argument in_df should be an IdaDataFrame")
181+
182+
ModelManager(self.idadb).drop_model(self.model_name)
183+
184+
return call_proc_df_in_out(proc=self.fit_proc, in_df=in_df, params=params,
185+
out_table=out_table)[0]
+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
#-----------------------------------------------------------------------------
4+
# Copyright (c) 2023, IBM Corp.
5+
# All rights reserved.
6+
#
7+
# Distributed under the terms of the BSD Simplified License.
8+
#
9+
# The full license is in the LICENSE file, distributed with this software.
10+
#-----------------------------------------------------------------------------
11+
12+
from nzpyida.analytics.predictive.timeseries import TimeSeries
13+
from nzpyida.base import IdaDataBase
14+
from nzpyida.frame import IdaDataFrame
15+
from nzpyida.analytics.model_manager import ModelManager
16+
import pytest
17+
from nzpyida.analytics.tests.conftest import MOD_NAME, TAB_NAME_TRAIN, OUT_TABLE_PRED
18+
import pandas as pd
19+
from math import sin
20+
21+
22+
@pytest.fixture(scope='module')
23+
def mm(idadb: IdaDataBase):
24+
return ModelManager(idadb)
25+
26+
@pytest.fixture
27+
def clean_up(idadb, mm):
28+
if mm.model_exists(MOD_NAME):
29+
mm.drop_model(MOD_NAME)
30+
if idadb.exists_table(OUT_TABLE_PRED):
31+
idadb.drop_table(OUT_TABLE_PRED)
32+
yield
33+
if mm.model_exists(MOD_NAME):
34+
mm.drop_model(MOD_NAME)
35+
if idadb.exists_table(OUT_TABLE_PRED):
36+
idadb.drop_table(OUT_TABLE_PRED)
37+
38+
39+
@pytest.fixture
40+
def idf_train(idadb: IdaDataBase):
41+
if idadb.exists_table(TAB_NAME_TRAIN):
42+
idadb.drop_table(TAB_NAME_TRAIN)
43+
44+
time_series = [sin(x)+x for x in range(200)]
45+
df = pd.DataFrame.from_dict({
46+
"TIME": range(200),
47+
"VALUE": time_series
48+
})
49+
yield idadb.as_idadataframe(df, TAB_NAME_TRAIN)
50+
51+
if idadb.exists_table(TAB_NAME_TRAIN):
52+
idadb.drop_table(TAB_NAME_TRAIN)
53+
54+
55+
def test_timeseries(idadb: IdaDataBase, mm: ModelManager, idf_train: IdaDataFrame, clean_up):
56+
model = TimeSeries(idadb, MOD_NAME)
57+
assert model
58+
assert not mm.model_exists(MOD_NAME)
59+
60+
outtab = model.fit_predict(idf_train, time_column="TIME", target_column="VALUE", out_table=OUT_TABLE_PRED,
61+
forecast_horizon='399')
62+
63+
assert mm.model_exists(MOD_NAME)
64+
assert outtab
65+
assert len(outtab) == 200
66+
assert round(outtab.head(10).iloc[-1]["VALUE"]) == round(sin(210)+210)
67+
assert round(outtab.tail().iloc[-1]["VALUE"]) == round(sin(399)+399)

0 commit comments

Comments
 (0)