Skip to content

Commit

Permalink
Multivariate Time Series with Generic Gradient Boosting
Browse files Browse the repository at this point in the history
Multivariate Time Series with Generic Gradient Boosting
  • Loading branch information
thierrymoudiki authored Nov 10, 2024
2 parents dfd97ba + 2a18319 commit e59281c
Show file tree
Hide file tree
Showing 41 changed files with 2,489 additions and 611 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ run-examples: ## run all examples with one command
find examples -maxdepth 2 -name "*.py" -exec python3 {} \;

run-booster: ## run all boosting estimators examples with one command
find examples -maxdepth 2 -name "*boost_*.py" -exec python3 {} \;
find examples -maxdepth 2 -name "*boost*.py" -exec python3 {} \;

run-lazy: ## run all lazy estimators examples with one command
find examples -maxdepth 2 -name "*lazy*.py" -exec python3 {} \;
47 changes: 17 additions & 30 deletions examples/adaopt_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")


print("\n breast cancer ---------- \n")

# data 1
breast_cancer = load_breast_cancer()
Expand All @@ -26,25 +26,26 @@
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.AdaOpt(n_jobs=4, type_dist="euclidean", verbose=1)
#obj = ms.AdaOpt()
#obj = ms.AdaOpt(n_jobs=4, type_dist="euclidean", verbose=1)
obj = ms.AdaOpt()
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

obj = ms.AdaOpt(n_jobs=4, type_dist="euclidean", verbose=1,
n_clusters_input=2)
#obj = ms.AdaOpt()
#obj = ms.AdaOpt(n_jobs=4, type_dist="euclidean", verbose=1,
# n_clusters_input=2)
obj = ms.AdaOpt()
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print("\n wine ---------- \n")

# data 2
wine = load_wine()
Expand All @@ -70,6 +71,8 @@
print(obj.score(X_test, y_test))
print(time()-start)

print("\n iris ---------- \n")

# data 3
iris = load_iris()
Z = iris.data
Expand All @@ -96,6 +99,8 @@
print(time()-start)


print("\n digits ---------- \n")

# data 4
digits = load_digits()
Z = digits.data
Expand All @@ -116,19 +121,12 @@
n_jobs=3, type_dist="euclidean", verbose=1)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
print("Elapsed: ", time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)
print("Elapsed: ", time()-start)

# with clustering
obj = ms.AdaOpt(n_clusters=25, k=1)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)
# ------

obj = ms.AdaOpt(n_iterations=50,
learning_rate=0.3,
Expand All @@ -138,22 +136,11 @@
gamma=0.01,
tolerance=1e-4,
row_sample=1,
k=1, n_clusters_input=5,
k=1, backend="gpu",
n_jobs=3, type_dist="euclidean", verbose=1)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
print("Elapsed: ", time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

# with clustering
obj = ms.AdaOpt(n_clusters=25, k=1,
n_clusters_input=3)
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print("Elapsed: ", time()-start)
6 changes: 0 additions & 6 deletions examples/download_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,3 @@
print(f"===== df2: \n {df2} \n")
print(f"===== df2.dtypes: \n {df2.dtypes}")

print("\n====================================================== \n")

# Affairs dataset
df3 = ms.download(pkgname="AER", dataset="Affairs", source="https://zeileis.r-universe.dev/")
print(f"===== df3: \n {df3} \n")
print(f"===== df3.dtypes: \n {df3.dtypes}")
101 changes: 0 additions & 101 deletions examples/genboost_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,69 +39,6 @@
clf = DecisionTreeRegressor()
clf2 = KernelRidge()

obj = ms.GenericBoostingClassifier(clf, tolerance=1e-2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

obj = ms.GenericBoostingClassifier(clf, tolerance=1e-2, n_clusters=2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

# data 2
print("\n")
print("wine data -----")

wine = load_wine()
Z = wine.data
t = wine.target
np.random.seed(879423)
X_train, X_test, y_train, y_test = train_test_split(Z, t,
test_size=0.2)

obj = ms.GenericBoostingClassifier(clf)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

obj = ms.GenericBoostingClassifier(clf, n_clusters=3)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

# data 3
print("\n")
print("iris data -----")
Expand Down Expand Up @@ -132,19 +69,6 @@
print("GenericBoosting KRR -----")
print("\n")

obj = ms.GenericBoostingClassifier(clf2, tolerance=1e-2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

obj = ms.GenericBoostingClassifier(clf2, tolerance=1e-2, n_clusters=2)
print(obj.get_params())
start = time()
Expand Down Expand Up @@ -194,28 +118,3 @@

print(obj.obj['loss'])

# data 3
print("\n")
print("iris data -----")

iris = load_iris()
Z = iris.data
t = iris.target
np.random.seed(734563)
X_train, X_test, y_train, y_test = train_test_split(Z, t,
test_size=0.2)


obj = ms.GenericBoostingClassifier(clf2)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

37 changes: 37 additions & 0 deletions examples/genboost_classifier_krr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
from sklearn.datasets import load_digits, load_breast_cancer, load_wine, load_iris, load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from time import time
from os import chdir
from sklearn import metrics
import os

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

import mlsauce as ms

#ridge

print("\n")
print("GenericBoosting KernelRidge -----")
print("\n")

# data 1
dataset = load_wine()
X = dataset.data
y = dataset.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

clf = ms.KRLSRegressor()
obj = ms.GenericBoostingClassifier(clf)

print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print("Elapsed", time()-start)

pred = obj.predict(X_test)
print("Accuracy", metrics.accuracy_score(y_test, pred))
Original file line number Diff line number Diff line change
@@ -1,28 +1,21 @@
import numpy as np
from sklearn.datasets import load_digits, load_breast_cancer, load_wine, load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from time import time
from os import chdir
from sklearn import metrics
import os

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

print(os.path.relpath(os.path.dirname(__file__)))

#wd="/workspace/mlsauce/mlsauce/examples"
#
#chdir(wd)

import mlsauce as ms

#ridge

print("\n")
print("GenericBoosting Decision tree -----")
print("GenericBoosting Ridge -----")
print("\n")

print("\n")
Expand All @@ -37,10 +30,24 @@
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

clf = ExtraTreeRegressor()
clf2 = LinearRegression()
clf = ms.RidgeRegressor(reg_lambda=0.05)
clf2 = ms.RidgeRegressor(reg_lambda=0.05, backend="gpu")

obj = ms.GenericBoostingClassifier(clf)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(obj.score(X_test, y_test))
print(time()-start)

print(obj.obj['loss'])

print(obj.obj['fit_obj_i'])

obj = ms.HistGenericBoostingClassifier(clf)
# needs more data
obj = ms.GenericBoostingClassifier(clf2, backend="gpu")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
Expand Down
Loading

0 comments on commit e59281c

Please sign in to comment.