Skip to content

Commit

Permalink
Add ProcessMLE and bump the statsmodels version (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
StrikerRUS authored Mar 18, 2020
1 parent 25ef055 commit 46c6ab2
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pip install m2cgen

| | Classification | Regression |
| --- | --- | --- |
| **Linear** | <ul><li>scikit-learn<ul><li>LogisticRegression</li><li>LogisticRegressionCV</li><li>PassiveAggressiveClassifier</li><li>Perceptron</li><li>RidgeClassifier</li><li>RidgeClassifierCV</li><li>SGDClassifier</li></ul></li><li>lightning<ul><li>AdaGradClassifier</li><li>CDClassifier</li><li>FistaClassifier</li><li>SAGAClassifier</li><li>SAGClassifier</li><li>SDCAClassifier</li><li>SGDClassifier</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>ARDRegression</li><li>BayesianRidge</li><li>ElasticNet</li><li>ElasticNetCV</li><li>HuberRegressor</li><li>Lars</li><li>LarsCV</li><li>Lasso</li><li>LassoCV</li><li>LassoLars</li><li>LassoLarsCV</li><li>LassoLarsIC</li><li>LinearRegression</li><li>OrthogonalMatchingPursuit</li><li>OrthogonalMatchingPursuitCV</li><li>PassiveAggressiveRegressor</li><li>RANSACRegressor(only supported regression estimators can be used as a base estimator)</li><li>Ridge</li><li>RidgeCV</li><li>SGDRegressor</li><li>TheilSenRegressor</li></ul><li>StatsModels<ul><li>Generalized Least Squares (GLS)</li><li>Generalized Least Squares with AR Errors (GLSAR)</li><li>Ordinary Least Squares (OLS)</li><li>Quantile Regression (QuantReg)</li><li>Weighted Least Squares (WLS)</li></ul><li>lightning<ul><li>AdaGradRegressor</li><li>CDRegressor</li><li>FistaRegressor</li><li>SAGARegressor</li><li>SAGRegressor</li><li>SDCARegressor</li></ul></li></ul> |
| **Linear** | <ul><li>scikit-learn<ul><li>LogisticRegression</li><li>LogisticRegressionCV</li><li>PassiveAggressiveClassifier</li><li>Perceptron</li><li>RidgeClassifier</li><li>RidgeClassifierCV</li><li>SGDClassifier</li></ul></li><li>lightning<ul><li>AdaGradClassifier</li><li>CDClassifier</li><li>FistaClassifier</li><li>SAGAClassifier</li><li>SAGClassifier</li><li>SDCAClassifier</li><li>SGDClassifier</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>ARDRegression</li><li>BayesianRidge</li><li>ElasticNet</li><li>ElasticNetCV</li><li>HuberRegressor</li><li>Lars</li><li>LarsCV</li><li>Lasso</li><li>LassoCV</li><li>LassoLars</li><li>LassoLarsCV</li><li>LassoLarsIC</li><li>LinearRegression</li><li>OrthogonalMatchingPursuit</li><li>OrthogonalMatchingPursuitCV</li><li>PassiveAggressiveRegressor</li><li>RANSACRegressor(only supported regression estimators can be used as a base estimator)</li><li>Ridge</li><li>RidgeCV</li><li>SGDRegressor</li><li>TheilSenRegressor</li></ul><li>StatsModels<ul><li>Generalized Least Squares (GLS)</li><li>Generalized Least Squares with AR Errors (GLSAR)</li><li>Ordinary Least Squares (OLS)</li><li>[Gaussian] Process Regression Using Maximum Likelihood-based Estimation (ProcessMLE)</li><li>Quantile Regression (QuantReg)</li><li>Weighted Least Squares (WLS)</li></ul><li>lightning<ul><li>AdaGradRegressor</li><li>CDRegressor</li><li>FistaRegressor</li><li>SAGARegressor</li><li>SAGRegressor</li><li>SDCARegressor</li></ul></li></ul> |
| **SVM** | <ul><li>scikit-learn<ul><li>LinearSVC</li><li>NuSVC</li><li>SVC</li></ul></li><li>lightning<ul><li>KernelSVC (binary only, multiclass is not supported yet)</li><li>LinearSVC</li></ul></li></ul> | <ul><li>scikit-learn<ul><li>LinearSVR</li><li>NuSVR</li><li>SVR</li></ul></li><li>lightning<ul><li>LinearSVR</li></ul></li></ul> |
| **Tree** | <ul><li>DecisionTreeClassifier</li><li>ExtraTreeClassifier</li></ul> | <ul><li>DecisionTreeRegressor</li><li>ExtraTreeRegressor</li></ul> |
| **Random Forest** | <ul><li>ExtraTreesClassifier</li><li>LGBMClassifier(rf booster only)</li><li>RandomForestClassifier</li><li>XGBRFClassifier(binary only, multiclass is not supported yet)</li></ul> | <ul><li>ExtraTreesRegressor</li><li>LGBMRegressor(rf booster only)</li><li>RandomForestRegressor</li><li>XGBRFRegressor</li></ul> |
Expand Down
5 changes: 4 additions & 1 deletion m2cgen/assemblers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .linear import (SklearnLinearModelAssembler,
StatsmodelsLinearModelAssembler)
StatsmodelsLinearModelAssembler,
ProcessMLEModelAssembler)
from .tree import TreeModelAssembler
from .ensemble import RandomForestModelAssembler
from .boosting import (XGBoostModelAssemblerSelector,
Expand All @@ -12,6 +13,7 @@
__all__ = [
SklearnLinearModelAssembler,
StatsmodelsLinearModelAssembler,
ProcessMLEModelAssembler,
RANSACModelAssembler,
TreeModelAssembler,
RandomForestModelAssembler,
Expand Down Expand Up @@ -72,6 +74,7 @@
"sklearn_TheilSenRegressor": SklearnLinearModelAssembler,

# Statsmodels Linear Regressors
"statsmodels_ProcessMLEResults": ProcessMLEModelAssembler,
"statsmodels_RegressionResultsWrapper": StatsmodelsLinearModelAssembler,
"statsmodels_RegularizedResultsWrapper": StatsmodelsLinearModelAssembler,

Expand Down
9 changes: 9 additions & 0 deletions m2cgen/assemblers/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ def _get_coef(self):
else self.model.params)


class ProcessMLEModelAssembler(BaseLinearModelAssembler):

def _get_intercept(self):
return 0.0

def _get_coef(self):
return self.model.params[:self.model.k_exog]


def _linear_to_ast(coef, intercept):
feature_weight_mul_ops = []

Expand Down
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ pytest-mock==1.13.0
coveralls==1.9.2
pytest-cov==2.8.1
py-mini-racer==0.1.18
statsmodels==0.10.2
statsmodels==0.11.1
git+git://github.com/scikit-learn-contrib/lightning.git@b96f9c674968496e854078163c8814049a7b9f43
85 changes: 85 additions & 0 deletions tests/assemblers/test_linear.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.process_regression import ProcessMLE
from lightning.regression import AdaGradRegressor
from lightning.classification import AdaGradClassifier
from sklearn import linear_model
Expand Down Expand Up @@ -318,6 +319,90 @@ def test_statsmodels_unknown_constant_position():
assembler.assemble()


def test_statsmodels_processmle():
estimator = utils.StatsmodelsSklearnLikeWrapper(
ProcessMLE,
dict(init=dict(exog_scale=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
exog_smooth=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
exog_noise=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
time=np.kron(
np.ones(len(utils.get_regression_model_trainer().y_train) // 3),
np.arange(3)),
groups=np.kron(
np.arange(len(utils.get_regression_model_trainer().y_train) // 3),
np.ones(3))),
fit=dict(maxiter=1)))
_, __, estimator = utils.get_regression_model_trainer()(estimator)

assembler = assemblers.ProcessMLEModelAssembler(estimator)
actual = assembler.assemble()

feature_weight_mul = [
ast.BinNumExpr(
ast.FeatureRef(0),
ast.NumVal(-0.0932673973),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(1),
ast.NumVal(0.0480819091),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(2),
ast.NumVal(-0.0063734439),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(3),
ast.NumVal(2.7510656855),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(4),
ast.NumVal(-3.0836268637),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(5),
ast.NumVal(5.9605290000),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(6),
ast.NumVal(-0.0077880716),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(7),
ast.NumVal(-0.9685365627),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(8),
ast.NumVal(0.1688777882),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(9),
ast.NumVal(-0.0092446419),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(10),
ast.NumVal(-0.3924930042),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(11),
ast.NumVal(0.01506511708295605),
ast.BinNumOpType.MUL),
ast.BinNumExpr(
ast.FeatureRef(12),
ast.NumVal(-0.4177000096),
ast.BinNumOpType.MUL),
]

expected = assemblers.utils.apply_op_to_expressions(
ast.BinNumOpType.ADD,
ast.NumVal(0.0),
*feature_weight_mul)

assert utils.cmp_exprs(actual, expected)


def test_lightning_regression():
estimator = AdaGradRegressor(random_state=1)
utils.get_regression_model_trainer()(estimator)
Expand Down
18 changes: 18 additions & 0 deletions tests/e2e/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import xgboost
import statsmodels.api as sm
from statsmodels.regression.process_regression import ProcessMLE
import lightning.classification as light_clf
import lightning.regression as light_reg
from sklearn import linear_model, svm
Expand Down Expand Up @@ -279,6 +280,23 @@ def classification_binary_random(model):
regression(utils.StatsmodelsSklearnLikeWrapper(
sm.OLS,
dict(fit_regularized=STATSMODELS_LINEAR_REGULARIZED_PARAMS))),
regression(utils.StatsmodelsSklearnLikeWrapper(
ProcessMLE,
dict(init=dict(exog_scale=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
exog_smooth=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
exog_noise=np.ones(
(len(utils.get_regression_model_trainer().y_train), 2)),
time=np.kron(
np.ones(
len(utils.get_regression_model_trainer().y_train) // 3),
np.arange(3)),
groups=np.kron(
np.arange(
len(utils.get_regression_model_trainer().y_train) // 3),
np.ones(3))),
fit=dict(maxiter=2)))),
regression(utils.StatsmodelsSklearnLikeWrapper(
sm.QuantReg,
dict(init=dict(fit_intercept=True)))),
Expand Down

0 comments on commit 46c6ab2

Please sign in to comment.