Skip to content

Commit

Permalink
Merge pull request #184 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.5.7
  • Loading branch information
winedarksea authored May 23, 2023
2 parents de2d2a1 + 6037679 commit 27999a2
Show file tree
Hide file tree
Showing 44 changed files with 796 additions and 197 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

AutoTS is a time series package for Python designed for rapidly deploying high-accuracy forecasts at scale.

In 2023, AutoTS has won the M6 forecasting competition, delivering the highest performance investment decisions across 12 months of stock market forecasting.
In 2023, AutoTS has won in the M6 forecasting competition, delivering the highest performance investment decisions across 12 months of stock market forecasting.

There are dozens of forecasting models usable in the `sklearn` style of `.fit()` and `.predict()`.
These includes naive, statistical, machine learning, and deep learning models.
Expand Down
10 changes: 6 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.5.6 🌌 🌌 🌌
* fixes for annoying things broken by pandas 2.0, without them giving deprecation warnings
* also for gluonts who also love breaking their API
* ensembling tuning
# 0.5.7 🛂🛂🛂
* slight changes to holiday_flag to allow list in some cases
* DatepartRegressionTransformer now accepts holiday country input as regressor
* added RegressionFilter
* changed bounded behavior of AlignLastValue
* small bug fixes

### New Model Checklist:
* Add to ModelMonster in auto_model.py
Expand Down
5 changes: 4 additions & 1 deletion autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,9 @@ def ModelPrediction(
pass
print(error_msg)

transformer_object = GeneralTransformer(**transformation_dict, n_jobs=n_jobs)
transformer_object = GeneralTransformer(
**transformation_dict, n_jobs=n_jobs, holiday_country=holiday_country
)
df_train_transformed = transformer_object._fit(df_train)

# make sure regressor has same length. This could be a problem if wrong size regressor is passed.
Expand Down Expand Up @@ -736,6 +738,7 @@ def ModelPrediction(

transformationStartTime = datetime.datetime.now()
# Inverse the transformations, NULL FILLED IN UPPER/LOWER ONLY
# forecast inverse MUST come before upper and lower bounds inverse
df_forecast.forecast = pd.DataFrame(
transformer_object.inverse_transform(df_forecast.forecast)
)
Expand Down
43 changes: 29 additions & 14 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import sys
import time
import traceback as tb

from autots.tools.shaping import (
long_to_wide,
Expand Down Expand Up @@ -50,6 +51,7 @@ class AutoTS(object):
Args:
forecast_length (int): number of periods over which to evaluate forecast. Can be overriden later in .predict().
when you don't have much historical data, using a small forecast length for .fit and the full desired forecast lenght for .predict is usually the best possible approach given limitations.
frequency (str): 'infer' or a specific pandas datetime offset. Can be used to force rollup of data (ie daily input, but frequency 'M' will rollup to monthly).
prediction_interval (float): 0-1, uncertainty range for upper and lower forecasts. Adjust range, but rarely matches actual containment.
max_generations (int): number of genetic algorithms generations to run.
Expand Down Expand Up @@ -87,8 +89,9 @@ class AutoTS(object):
transformer_list (list): list of transformers to use, or dict of transformer:probability. Note this does not apply to initial templates.
can accept string aliases: "all", "fast", "superfast"
transformer_max_depth (int): maximum number of sequential transformers to generate for new Random Transformers. Fewer will be faster.
models_mode (str): option to adjust parameter options for newly generated models. Currently includes:
'default', 'deep' (searches more params, likely slower), and 'regressor' (forces 'User' regressor mode in regressor capable models)
models_mode (str): option to adjust parameter options for newly generated models. Only sporadically utilized. Currently includes:
'default'/'random', 'deep' (searches more params, likely slower), and 'regressor' (forces 'User' regressor mode in regressor capable models),
'gradient_boosting', 'neuralnets' (~Regression class models only)
num_validations (int): number of cross validations to perform. 0 for just train/test on best split.
Possible confusion: num_validations is the number of validations to perform *after* the first eval segment, so totally eval/validations will be this + 1.
Also "auto" and "max" aliases available. Max maxes out at 50.
Expand Down Expand Up @@ -427,7 +430,7 @@ def __init__(
)
self.initial_results = TemplateEvalObject()
self.best_model_name = ""
self.best_model_params = ""
self.best_model_params = {}
self.best_model_transformation_params = ""
self.traceback = True if verbose > 1 else False
self.future_regressor_train = None
Expand Down Expand Up @@ -928,7 +931,11 @@ def fit(

# preclean data
if self.preclean is not None:
self.preclean_transformer = GeneralTransformer(**self.preclean)
self.preclean_transformer = GeneralTransformer(
**self.preclean,
n_jobs=self.n_jobs,
holiday_country=self.holiday_country,
)
df_wide_numeric = self.preclean_transformer.fit_transform(df_wide_numeric)

self.df_wide_numeric = df_wide_numeric
Expand Down Expand Up @@ -1153,7 +1160,9 @@ def fit(
result_file=result_file,
)
except Exception as e:
print(f"Ensembling Error: {repr(e)}")
print(
f"Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
)

# drop any duplicates in results
self.initial_results.model_results = (
Expand Down Expand Up @@ -1283,7 +1292,9 @@ def fit(
first_validation=False,
)
except Exception as e:
print(f"Ensembling Error: {repr(e)}")
print(
f"Post-Validation Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
)
time.sleep(5)

error_msg_template = """No models available from validation.
Expand Down Expand Up @@ -1554,7 +1565,9 @@ def fit(
].copy()
except Exception as e:
if self.verbose >= 0:
print(f"Ensembling Error: {repr(e)}")
print(
f"Horizontal/Mosaic Ensembling Error: {repr(e)}: {''.join(tb.format_exception(None, e, e.__traceback__))}"
)
hens_model_results = TemplateEvalObject().model_results.copy()

# rerun validation_results aggregation with new models added
Expand Down Expand Up @@ -1624,7 +1637,9 @@ def fit(
self.ensemble_check = int(self.best_model_ensemble > 0)

# set flags to check if regressors or ensemble used in final model.
self.used_regressor_check = self._regr_param_check(self.best_model_params)
self.used_regressor_check = self._regr_param_check(
self.best_model_params.copy()
)
self.regressor_used = self.used_regressor_check
# clean up any remaining print statements
sys.stdout.flush()
Expand Down Expand Up @@ -1866,7 +1881,7 @@ def predict(
for interval in prediction_interval:
df_forecast = model_forecast(
model_name=self.best_model_name,
model_param_dict=self.best_model_params,
model_param_dict=self.best_model_params.copy(),
model_transform_dict=self.best_model_transformation_params,
df_train=self.df_wide_numeric,
forecast_length=forecast_length,
Expand Down Expand Up @@ -1916,7 +1931,7 @@ def predict(
else:
df_forecast = model_forecast(
model_name=self.best_model_name,
model_param_dict=self.best_model_params,
model_param_dict=self.best_model_params.copy(),
model_transform_dict=self.best_model_transformation_params,
df_train=self.df_wide_numeric,
forecast_length=forecast_length,
Expand Down Expand Up @@ -2333,7 +2348,7 @@ def back_forecast(
result = back_forecast(
df=input_df,
model_name=self.best_model_name,
model_param_dict=self.best_model_params,
model_param_dict=self.best_model_params.copy(),
model_transform_dict=self.best_model_transformation_params,
future_regressor_train=self.future_regressor_train,
n_splits=n_splits,
Expand All @@ -2356,7 +2371,7 @@ def horizontal_to_df(self):
raise ValueError("No best_model. AutoTS .fit() needs to be run.")
if self.best_model['Ensemble'].iloc[0] != 2:
raise ValueError("Only works on horizontal ensemble type models.")
ModelParameters = self.best_model_params
ModelParameters = self.best_model_params.copy()
series = ModelParameters['series']
series = pd.DataFrame.from_dict(series, orient="index").reset_index(drop=False)
if series.shape[1] > 2:
Expand Down Expand Up @@ -2398,7 +2413,7 @@ def mosaic_to_df(self):
raise ValueError("No best_model. AutoTS .fit() needs to be run.")
if self.best_model_ensemble != 2:
raise ValueError("Only works on horizontal ensemble type models.")
ModelParameters = self.best_model_params
ModelParameters = self.best_model_params.copy()
if str(ModelParameters['model_name']).lower() != 'mosaic':
raise ValueError("Only works on mosaic ensembles.")
series = pd.DataFrame.from_dict(ModelParameters['series'])
Expand Down Expand Up @@ -2709,7 +2724,7 @@ def plot_horizontal_model_count(
elif self.best_model_ensemble != 2:
raise ValueError("this plot only works on horizontal-style ensembles.")

if str(self.best_model_params['model_name']).lower() == "mosaic":
if str(self.best_model_params.get('model_name', None)).lower() == "mosaic":
series = self.mosaic_to_df()
transformers = series.stack().value_counts()
else:
Expand Down
6 changes: 3 additions & 3 deletions autots/evaluator/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def full_metric_evaluation(
u_weights[-1, :] = first_weight * 0.5

# over/under estimate mask
ovm = filled_full_mae_errors > 0
ovm = full_errors > 0

# note a number of these are created from my own imagination (winedarksea)
# those are also subject to change as they are tested and refined
Expand All @@ -517,9 +517,9 @@ def full_metric_evaluation(
'made': mean_absolute_differential_error(lA, lF, 1, scaler=scaler),
# aggregate error
'mage': mage, # Gandalf approved
'underestimate': np.sum(filled_full_mae_errors[~ovm], axis=0),
'underestimate': np.nansum(full_errors[~ovm], axis=0),
'mle': msle(full_errors, full_mae_errors, log_errors, nan_flag=nan_flag),
'overestimate': np.sum(filled_full_mae_errors[ovm], axis=0),
'overestimate': np.nansum(full_errors[ovm], axis=0),
'imle': msle(
-full_errors,
full_mae_errors,
Expand Down
11 changes: 10 additions & 1 deletion autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,16 @@ def plot(
start_date=start_date,
)
if title is None:
title = f"{series} with model {str(self.model_name)[0:80]}"
title_prelim = str(self.model_name)[0:80]
if title_prelim == "Ensemble":
ensemble_type = self.model_parameters.get('model_name', "unknown")
if ensemble_type == "Horizontal":
title_prelim = self.model_parameters['series'].get(
series, "Horizontal"
)
else:
title_prelim = ensemble_type
title = f"{series} with model {title_prelim}"
if vline is None:
return plot_df.plot(title=title, **kwargs)
else:
Expand Down
40 changes: 28 additions & 12 deletions autots/models/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
import pandas as pd

# using transformer version of Anomaly/Holiday to use a lower level import than evaluator
from autots.tools.seasonal import create_seasonality_feature, seasonal_int
from autots.tools.seasonal import (
create_seasonality_feature,
seasonal_int,
datepart_components,
date_part_methods,
)
from autots.tools.transform import (
GeneralTransformer,
RandomTransform,
Expand Down Expand Up @@ -139,6 +144,7 @@ def __init__(
random_seed: int = 2022,
verbose: int = 0,
n_jobs: int = "auto",
**kwargs,
):
if preprocessing_transformation is None:
preprocessing_transformation = {}
Expand Down Expand Up @@ -1750,8 +1756,9 @@ def get_new_params(self, method='fast'):
'UnivariateMotif',
'UnobservedComponents',
"KalmanStateSpace",
'RRVAR',
],
[0.05, 0.05, 0.1, 0.05, 0.05, 0.15, 0.05, 0.05, 0.05, 0.05],
[0.05, 0.05, 0.1, 0.05, 0.05, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05],
k=1,
)[0]
trend_model = {'Model': model_str}
Expand Down Expand Up @@ -1853,7 +1860,7 @@ def get_new_params(self, method='fast'):
[0.6, 0.2, 0.1, 0.05, 0.02, 0.03],
)[0]
recency_weighting = random.choices(
[None, 0.05, 0.1, 0.25], [0.7, 0.1, 0.1, 0.1]
[None, 0.05, 0.1, 0.25, 0.5], [0.7, 0.1, 0.1, 0.1, 0.05]
)[0]
if linear_model in ['lstsq']:
linear_model = {
Expand Down Expand Up @@ -1888,21 +1895,30 @@ def get_new_params(self, method='fast'):
ar_interaction_seasonality = random.choices(
[None, 7, 'dayofweek', 'common_fourier'], [0.4, 0.2, 0.2, 0.2]
)[0]
seasonalities = random.choices(
[
[7, 365.25],
["dayofweek", 365.25],
["month", "dayofweek", "weekdayofmonth"],
['weekdayofmonth', 'common_fourier'],
"other",
],
[0.1, 0.1, 0.1, 0.05, 0.1],
)[0]
if seasonalities == "other":
predefined = random.choices([True, False], [0.5, 0.5])[0]
if predefined:
seasonalities = random.choice(date_part_methods)
else:
comp_opts = datepart_components + [7, 365.25, 12]
seasonalities = random.choices(comp_opts, k=2)
return {
"preprocessing_transformation": RandomTransform(
transformer_list=filters, transformer_max_depth=2, allow_none=True
),
"scaling": scaling,
# "past_impacts_intervention": self.past_impacts_intervention,
"seasonalities": random.choices(
[
[7, 365.25],
["dayofweek", 365.25],
["month", "dayofweek", "weekdayofmonth"],
['weekdayofmonth', 'common_fourier'],
],
[0.1, 0.1, 0.1, 0.05],
)[0],
"seasonalities": seasonalities,
"ar_lags": ar_lags,
"ar_interaction_seasonality": ar_interaction_seasonality,
"anomaly_detector_params": anomaly_detector_params,
Expand Down
12 changes: 12 additions & 0 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,10 @@ def HorizontalTemplateGenerator(
)
nomen = 'Horizontal'
metric = 'Score-max'
if len(mods_per_series) < per_series.shape[1]:
raise ValueError(
"ERROR in Horizontal Generation insufficient series created, horizontal-max"
)
best5_params = {
'Model': 'Ensemble',
'ModelParameters': json.dumps(
Expand Down Expand Up @@ -1229,6 +1233,10 @@ def HorizontalTemplateGenerator(
)
nomen = 'Horizontal'
metric = 'Score'
if len(mods_per_series) < per_series.shape[1]:
raise ValueError(
"ERROR in Horizontal Generation insufficient series created, horizontal"
)
best5_params = {
'Model': 'Ensemble',
'ModelParameters': json.dumps(
Expand Down Expand Up @@ -1284,6 +1292,10 @@ def HorizontalTemplateGenerator(
)
nomen = 'Horizontal'
metric = 'Score-min'
if len(mods_per_series) < per_series.shape[1]:
raise ValueError(
"ERROR in Horizontal Generation insufficient series created, horizontal-min"
)
best5_params = {
'Model': 'Ensemble',
'ModelParameters': json.dumps(
Expand Down
4 changes: 3 additions & 1 deletion autots/models/model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
'GluonTS',
'SeasonalNaive',
'UnobservedComponents',
'VARMAX',
# 'VARMAX',
'VECM',
'DynamicFactor',
'MotifSimulation',
Expand Down Expand Up @@ -147,6 +147,7 @@
}
# models that should be fast given many CPU cores
fast_parallel = {**parallel, **fast}
fast_parallel_no_arima = {i: fast_parallel[i] for i in fast_parallel if i != 'ARIMA'}
# models that are explicitly not production ready
experimental = [
'MotifSimulation',
Expand Down Expand Up @@ -332,6 +333,7 @@
"superfast": superfast,
"parallel": parallel,
"fast_parallel": fast_parallel,
"fast_parallel_no_arima": fast_parallel_no_arima,
"probabilistic": probabilistic,
"multivariate": multivariate,
"univariate": univariate,
Expand Down
Loading

0 comments on commit 27999a2

Please sign in to comment.