From cd50598a9f0082de30b350a7995b4164a2770613 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 16:59:48 +0100
Subject: [PATCH 01/26] init version with hist-gbooster

---
 examples/hist_genboost_classifier.py          |  55 +++++
 examples/hist_genboost_regressor.py           |  45 ++++
 examples/lazy_histbooster_classification.py   |  29 +++
 examples/lazy_histbooster_regression.py       |  59 +++++
 mlsauce/__init__.py                           |   5 +
 mlsauce/booster/__init__.py                   |   4 +
 mlsauce/booster/_booster_classifier.py        | 197 ++++++++++++++++-
 mlsauce/booster/_booster_regressor.py         | 208 +++++++++++++++++-
 mlsauce/lazybooster/lazyboosterclassif.py     | 116 +++++++---
 mlsauce/lazybooster/lazyboosterregression.py  |  46 +++-
 mlsauce/utils/__init__.py                     |   2 +
 mlsauce/utils/histofeatures/__init__.py       |   3 +
 .../utils/histofeatures/gethistofeatures.py   |  99 +++++++++
 13 files changed, 815 insertions(+), 53 deletions(-)
 create mode 100644 examples/hist_genboost_classifier.py
 create mode 100644 examples/hist_genboost_regressor.py
 create mode 100644 examples/lazy_histbooster_classification.py
 create mode 100644 examples/lazy_histbooster_regression.py
 create mode 100644 mlsauce/utils/histofeatures/__init__.py
 create mode 100644 mlsauce/utils/histofeatures/gethistofeatures.py

diff --git a/examples/hist_genboost_classifier.py b/examples/hist_genboost_classifier.py
new file mode 100644
index 0000000..f0ef839
--- /dev/null
+++ b/examples/hist_genboost_classifier.py
@@ -0,0 +1,55 @@
+import numpy as np 
+from sklearn.datasets import load_digits, load_breast_cancer, load_wine, load_iris
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LinearRegression
+from time import time
+from os import chdir
+from sklearn import metrics
+import os 
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+print(os.path.relpath(os.path.dirname(__file__)))
+
+#wd="/workspace/mlsauce/mlsauce/examples"
+#
+#chdir(wd)
+
+import mlsauce as ms
+
+#ridge
+
+print("\n")
+print("GenericBoosting Decision tree -----")
+print("\n")
+
+print("\n")
+print("breast_cancer data -----")
+
+# data 1
+breast_cancer = load_breast_cancer()
+X = breast_cancer.data
+y = breast_cancer.target
+# split data into training test and test set
+np.random.seed(15029)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+clf = ExtraTreeRegressor()
+clf2 = LinearRegression()
+
+obj = ms.HistGenericBoostingClassifier(clf)
+print(obj.get_params())
+start = time()
+obj.fit(X_train, y_train)
+print(time()-start)
+start = time()
+print(obj.score(X_test, y_test))
+print(time()-start)
+
+print(obj.obj['loss'])
+
+print(obj.obj['fit_obj_i'])
+
diff --git a/examples/hist_genboost_regressor.py b/examples/hist_genboost_regressor.py
new file mode 100644
index 0000000..857d96a
--- /dev/null
+++ b/examples/hist_genboost_regressor.py
@@ -0,0 +1,45 @@
+import subprocess
+import sys
+import os 
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+
+subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])
+
+import mlsauce as ms
+import numpy as np 
+import matplotlib.pyplot as plt
+from sklearn.datasets import load_diabetes, fetch_california_housing
+from sklearn.linear_model import Ridge, LinearRegression
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.tree import ExtraTreeRegressor
+from time import time
+from os import chdir
+from sklearn import metrics
+
+
+print("\n")
+print("diabetes data -----")
+
+regr = ExtraTreeRegressor()
+
+diabetes = load_diabetes()
+X = diabetes.data
+y = diabetes.target
+# split data into training test and test set
+np.random.seed(15029)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+
+obj = ms.HistGenericBoostingRegressor(regr)
+print(obj.get_params())
+start = time()
+obj.fit(X_train, y_train)
+print(time()-start)
+start = time()
+print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
+print(time()-start)
+print(obj.obj['loss'])
+
diff --git a/examples/lazy_histbooster_classification.py b/examples/lazy_histbooster_classification.py
new file mode 100644
index 0000000..2b625e6
--- /dev/null
+++ b/examples/lazy_histbooster_classification.py
@@ -0,0 +1,29 @@
+import os 
+import mlsauce as ms 
+from sklearn.datasets import load_breast_cancer, load_iris, load_wine, load_digits
+from sklearn.model_selection import train_test_split
+from time import time
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+#load_models = [load_breast_cancer, load_iris, load_wine, load_digits]
+load_models = [load_breast_cancer, load_iris, load_wine]
+#load_models = [load_digits]
+
+for model in load_models: 
+
+    data = model()
+    X = data.data
+    y= data.target
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 13)
+
+    clf = ms.LazyBoostingClassifier(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                    custom_metric=None, preprocess=False)
+
+    start = time()
+    models, predictioms = clf.fit(X_train, X_test, y_train, y_test, hist=True)
+    print(f"\nElapsed: {time() - start} seconds\n")
+
+    print(models)
+
diff --git a/examples/lazy_histbooster_regression.py b/examples/lazy_histbooster_regression.py
new file mode 100644
index 0000000..aa68da6
--- /dev/null
+++ b/examples/lazy_histbooster_regression.py
@@ -0,0 +1,59 @@
+import os 
+import mlsauce as ms
+import numpy as np
+from sklearn.datasets import load_diabetes
+from sklearn.datasets import fetch_california_housing
+from sklearn.model_selection import train_test_split
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+data = load_diabetes()
+X = data.data
+y= data.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
+
+data = fetch_california_housing()
+X = data.data[0:1000,:]
+y= data.target[0:1000]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, 
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test, hist=True)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
+
+
+from sklearn.datasets import fetch_openml
+
+# Load the dataset from OpenML
+boston = fetch_openml(name='boston', version=1, as_frame=True)
+
+# Get the features and target
+X = boston.data
+y = boston.target
+
+# Display the first few rows
+print(X.head())
+print(y.head())
+
+np.random.seed(1509)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+X_train = X_train.astype(np.float64)
+X_test = X_test.astype(np.float64)
+y_train = y_train.astype(np.float64)
+y_test = y_test.astype(np.float64)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test, hist=True)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
diff --git a/mlsauce/__init__.py b/mlsauce/__init__.py
index 2dddb7d..9767523 100644
--- a/mlsauce/__init__.py
+++ b/mlsauce/__init__.py
@@ -61,6 +61,9 @@
         LSBoostRegressor,
         GenericBoostingClassifier,
         GenericBoostingRegressor,
+        HistGenericBoostingRegressor,
+        HistGenericBoostingClassifier,
+
     )
     from .lazybooster import LazyBoostingClassifier, LazyBoostingRegressor
     from .multitaskregressor import MultiTaskRegressor
@@ -77,6 +80,8 @@
         "LSBoostClassifier",
         "GenericBoostingClassifier",
         "GenericBoostingRegressor",
+        "HistGenericBoostingClassifier",
+        "HistGenericBoostingRegressor",
         "StumpClassifier",
         "ElasticNetRegressor",
         "LassoRegressor",
diff --git a/mlsauce/booster/__init__.py b/mlsauce/booster/__init__.py
index 786fe8f..b8941dc 100644
--- a/mlsauce/booster/__init__.py
+++ b/mlsauce/booster/__init__.py
@@ -1,11 +1,15 @@
 from ._booster_regressor import LSBoostRegressor
 from ._booster_regressor import GenericBoostingRegressor
+from ._booster_regressor import HistGenericBoostingRegressor
 from ._booster_classifier import LSBoostClassifier
 from ._booster_classifier import GenericBoostingClassifier
+from ._booster_classifier import HistGenericBoostingClassifier
 
 __all__ = [
     "LSBoostClassifier",
     "LSBoostRegressor",
     "GenericBoostingClassifier",
     "GenericBoostingRegressor",
+    "HistGenericBoostingRegressor", 
+    "HistGenericBoostingClassifier"
 ]
diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 0d395a8..ef126d0 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -11,7 +11,7 @@
     from . import _boosterc as boosterc
 except ImportError:
     import _boosterc as boosterc
-from ..utils import cluster, check_and_install
+from ..utils import cluster, check_and_install, get_histo_features
 
 
 class LSBoostClassifier(BaseEstimator, ClassifierMixin):
@@ -672,3 +672,198 @@ def __init__(
             weights_distr=weights_distr,
             base_model=self.base_model,
         )
+
+class HistGenericBoostingClassifier(GenericBoostingClassifier):
+    """Histogram-based Generic Boosting classifier (using any classifier as base learner).
+
+    Attributes:
+
+        base_model: object
+            base learner (default is ExtraTreeRegressor) to be boosted.
+
+        n_estimators: int
+            number of boosting iterations.
+
+        learning_rate: float
+            controls the learning speed at training time.
+
+        n_hidden_features: int
+            number of nodes in successive hidden layers.
+
+        reg_lambda: float
+            L2 regularization parameter for successive errors in the optimizer
+            (at training time).
+
+        alpha: float
+            compromise between L1 and L2 regularization (must be in [0, 1]),
+            for `solver` == 'enet'.
+
+        row_sample: float
+            percentage of rows chosen from the training set.
+
+        col_sample: float
+            percentage of columns chosen from the training set.
+
+        dropout: float
+            percentage of nodes dropped from the training set.
+
+        tolerance: float
+            controls early stopping in gradient descent (at training time).
+
+        direct_link: bool
+            indicates whether the original features are included (True) in model's
+            fitting or not (False).
+
+        verbose: int
+            progress bar (yes = 1) or not (no = 0) (currently).
+
+        seed: int
+            reproducibility seed for nodes_sim=='uniform', clustering and dropout.
+
+        backend: str
+            type of backend; must be in ('cpu', 'gpu', 'tpu')
+
+        solver: str
+            type of 'weak' learner; currently in ('ridge', 'lasso', 'enet').
+            'enet' is a combination of 'ridge' and 'lasso' called Elastic Net.
+
+        activation: str
+            activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
+
+        n_clusters: int
+            number of clusters for clustering the features
+
+        clustering_method: str
+            clustering method: currently 'kmeans', 'gmm'
+
+        cluster_scaling: str
+            scaling method for clustering: currently 'standard', 'robust', 'minmax'
+
+        degree: int
+            degree of features interactions to include in the model
+
+        weights_distr: str
+            distribution of weights for constructing the model's hidden layer;
+            currently 'uniform', 'gaussian'
+
+    """
+
+    def __init__(
+        self,
+        base_model=ExtraTreeRegressor(),
+        n_estimators=100,
+        learning_rate=0,
+        n_hidden_features=5,
+        reg_lambda=0.1,
+        alpha=0.5,
+        row_sample=1,
+        col_sample=1,
+        dropout=0,
+        tolerance=1e-4,
+        direct_link=1,
+        verbose=1,
+        seed=123,
+        backend="cpu",
+        solver="ridge",
+        activation="relu",
+        n_clusters=0,
+        clustering_method="kmeans",
+        cluster_scaling="standard",
+        degree=None,
+        weights_distr="uniform",
+    ):
+        super().__init__(
+            base_model=base_model,
+            n_estimators=n_estimators,
+            learning_rate=learning_rate,
+            n_hidden_features=n_hidden_features,
+            reg_lambda=reg_lambda,
+            alpha=alpha,
+            row_sample=row_sample,
+            col_sample=col_sample,
+            dropout=dropout,
+            tolerance=tolerance,
+            direct_link=direct_link,
+            verbose=verbose,
+            seed=seed,
+            backend=backend,
+            solver=solver,
+            activation=activation,
+            n_clusters=n_clusters,
+            clustering_method=clustering_method,
+            cluster_scaling=cluster_scaling,
+            degree=degree,
+            weights_distr=weights_distr,
+        )
+        self.base_model = base_model
+        self.hist_bins = None
+        super().__init__(
+            base_model=base_model,
+            n_estimators=n_estimators,
+            learning_rate=learning_rate,
+            n_hidden_features=n_hidden_features,
+            reg_lambda=reg_lambda,
+            alpha=alpha,
+            row_sample=row_sample,
+            col_sample=col_sample,
+            dropout=dropout,
+            tolerance=tolerance,
+            direct_link=direct_link,
+            verbose=verbose,
+            seed=seed,
+            backend=backend,
+            solver=solver,
+            activation=activation,
+            n_clusters=n_clusters,
+            clustering_method=clustering_method,
+            cluster_scaling=cluster_scaling,
+            degree=degree,
+            weights_distr=weights_distr,
+        )
+    
+    def fit(self, X, y, **kwargs):
+        """Fit Booster (classifier) to training data (X, y)
+
+        Args:
+
+            X: {array-like}, shape = [n_samples, n_features]
+                Training vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+
+            y: array-like, shape = [n_samples]
+               Target values.
+
+            **kwargs: additional parameters to be passed to self.cook_training_set.
+
+        Returns:
+
+            self: object.
+        """
+        X, self.hist_bins = get_histo_features(X)
+        return super().fit(X, y, **kwargs)
+
+    def predict_proba(self, X, **kwargs):
+        """Predict probabilites for test data X.
+
+        Args:
+
+            X: {array-like}, shape = [n_samples, n_features]
+                Training vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+
+            **kwargs: additional parameters to be passed to
+                self.cook_test_set
+
+        Returns:
+
+            predicted values estimates for test data: {array-like}
+        """
+        assert self.hist_bins is not None, "You must fit the model first"
+        X = get_histo_features(X, self.hist_bins)
+        try: 
+            return super().predict_proba(np.asarray(X, order="C"), 
+                                         **kwargs)
+        except Exception:
+            return super().predict_proba(X, 
+                                  **kwargs)
+
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 09a2ec5..3ff146d 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -12,7 +12,7 @@
 except ImportError:
     import _boosterc as boosterc
 from ..predictioninterval import PredictionInterval
-from ..utils import cluster, check_and_install
+from ..utils import cluster, check_and_install, get_histo_features
 
 
 class LSBoostRegressor(BaseEstimator, RegressorMixin):
@@ -240,7 +240,7 @@ def __init__(
             check_and_install("jax")
             check_and_install("jaxlib")
 
-    def fit(self, X, y, **kwargs):
+    def fit(self, X, y, histo=False, **kwargs):
         """Fit Booster (regressor) to training data (X, y)
 
         Args:
@@ -311,8 +311,8 @@ def fit(self, X, y, **kwargs):
 
         return self
 
-    def predict(self, X, level=95, method=None, **kwargs):
-        """Predict probabilities for test data X.
+    def predict(self, X, level=95, method=None, histo=False, **kwargs):
+        """Predict values for test data X.
 
         Args:
 
@@ -326,13 +326,16 @@ def predict(self, X, level=95, method=None, **kwargs):
             method: str
                 `None`, or 'splitconformal', 'localconformal'
                 prediction (if you specify `return_pi = True`)
+            
+            histo: bool
+                whether to use histogram features or not
 
             **kwargs: additional parameters to be passed to
                 self.cook_test_set
 
         Returns:
 
-            probability estimates for test data: {array-like}
+            predicted values estimates for test data: {array-like}
         """
 
         if isinstance(X, pd.DataFrame):
@@ -432,7 +435,7 @@ def update(self, X, y, eta=0.9):
 
 
 class GenericBoostingRegressor(LSBoostRegressor):
-    """LSBoost regressor.
+    """Generic Boosting regressor.
 
     Attributes:
 
@@ -569,3 +572,196 @@ def __init__(
             weights_distr=weights_distr,
             base_model=self.base_model,
         )
+
+class HistGenericBoostingRegressor(GenericBoostingRegressor):
+    """Generic Boosting regressor with histogram-based features.
+
+    Attributes:
+
+        base_model: object
+            base learner (default is ExtraTreeRegressor) to be boosted.
+
+        n_estimators: int
+            number of boosting iterations.
+
+        learning_rate: float
+            controls the learning speed at training time.
+
+        n_hidden_features: int
+            number of nodes in successive hidden layers.
+
+        reg_lambda: float
+            L2 regularization parameter for successive errors in the optimizer
+            (at training time).
+
+        alpha: float
+            compromise between L1 and L2 regularization (must be in [0, 1]),
+            for `solver` == 'enet'
+
+        row_sample: float
+            percentage of rows chosen from the training set.
+
+        col_sample: float
+            percentage of columns chosen from the training set.
+
+        dropout: float
+            percentage of nodes dropped from the training set.
+
+        tolerance: float
+            controls early stopping in gradient descent (at training time).
+
+        direct_link: bool
+            indicates whether the original features are included (True) in model's
+            fitting or not (False).
+
+        verbose: int
+            progress bar (yes = 1) or not (no = 0) (currently).
+
+        seed: int
+            reproducibility seed for nodes_sim=='uniform', clustering and dropout.
+
+        backend: str
+            type of backend; must be in ('cpu', 'gpu', 'tpu')
+
+        solver: str
+            type of 'weak' learner; currently in ('ridge', 'lasso')
+
+        activation: str
+            activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
+
+        type_pi: str.
+            type of prediction interval; currently "kde" (default) or "bootstrap".
+            Used only in `self.predict`, for `self.replications` > 0 and `self.kernel`
+            in ('gaussian', 'tophat'). Default is `None`.
+
+        replications: int.
+            number of replications (if needed) for predictive simulation.
+            Used only in `self.predict`, for `self.kernel` in ('gaussian',
+            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
+
+        n_clusters: int
+            number of clusters for clustering the features
+
+        clustering_method: str
+            clustering method: currently 'kmeans', 'gmm'
+
+        cluster: bool
+            whether to cluster the features or not
+        
+        cluster_scaling: str
+            scaling method for clustering: currently 'standard', 'robust', 'minmax'
+
+        degree: int
+            degree of features interactions to include in the model
+        
+        weights_distr: str
+            distribution of weights for constructing the model's hidden layer;
+            either 'uniform' or 'gaussian'
+        """ 
+    def __init__(
+        self,
+        base_model=ExtraTreeRegressor(),
+        n_estimators=100,
+        learning_rate=0.1,
+        n_hidden_features=5,
+        reg_lambda=0.1,
+        alpha=0.5,
+        row_sample=1,
+        col_sample=1,
+        dropout=0,
+        tolerance=1e-4,
+        direct_link=1,
+        verbose=1,
+        seed=123,
+        backend="cpu",
+        solver="ridge",
+        activation="relu",
+        type_pi=None,
+        replications=None,
+        kernel=None,
+        n_clusters=0,
+        clustering_method="kmeans",
+        cluster_scaling="standard",
+        degree=None,
+        weights_distr="uniform",
+    ):
+        self.base_model = base_model
+        self.hist_bins = None
+        super().__init__(
+            n_estimators=n_estimators,
+            learning_rate=learning_rate,
+            n_hidden_features=n_hidden_features,
+            reg_lambda=reg_lambda,
+            alpha=alpha,
+            row_sample=row_sample,
+            col_sample=col_sample,
+            dropout=dropout,
+            tolerance=tolerance,
+            direct_link=direct_link,
+            verbose=verbose,
+            seed=seed,
+            backend=backend,
+            solver=solver,
+            activation=activation,
+            type_pi=type_pi,
+            replications=replications,
+            kernel=kernel,
+            n_clusters=n_clusters,
+            clustering_method=clustering_method,
+            cluster_scaling=cluster_scaling,
+            degree=degree,
+            weights_distr=weights_distr,
+            base_model=self.base_model,
+        )
+    
+    def fit(self, X, y, **kwargs):
+        """Fit Booster (regressor) to training data (X, y)
+
+        Args:
+
+            X: {array-like}, shape = [n_samples, n_features]
+                Training vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+
+            y: array-like, shape = [n_samples]
+               Target values.
+
+            **kwargs: additional parameters to be passed to self.cook_training_set.
+
+        Returns:
+
+            self: object.
+        """
+        X, self.hist_bins = get_histo_features(X)
+        return super().fit(X, y, **kwargs)
+
+    def predict(self, X, level=95, method=None, **kwargs):
+        """Predict values for test data X.
+
+        Args:
+
+            X: {array-like}, shape = [n_samples, n_features]
+                Training vectors, where n_samples is the number
+                of samples and n_features is the number of features.
+
+            level: int
+                Level of confidence (default = 95)
+
+            method: str
+                `None`, or 'splitconformal', 'localconformal'
+                prediction (if you specify `return_pi = True`)
+            
+            histo: bool
+                whether to use histogram features or not
+
+            **kwargs: additional parameters to be passed to
+                self.cook_test_set
+
+        Returns:
+
+            predicted values estimates for test data: {array-like}
+        """
+        assert self.hist_bins is not None, "You must fit the model first"
+        X = get_histo_features(X, self.hist_bins)
+        return super().predict(X, level=level, method=method, **kwargs)
+
diff --git a/mlsauce/lazybooster/lazyboosterclassif.py b/mlsauce/lazybooster/lazyboosterclassif.py
index b76ceab..2ef1398 100644
--- a/mlsauce/lazybooster/lazyboosterclassif.py
+++ b/mlsauce/lazybooster/lazyboosterclassif.py
@@ -26,7 +26,7 @@
     f1_score,
 )
 from .config import REGRESSORS, MTASKREGRESSORS
-from ..booster import GenericBoostingClassifier
+from ..booster import GenericBoostingClassifier, HistGenericBoostingClassifier
 from ..multitaskregressor import MultiTaskRegressor
 
 import warnings
@@ -194,7 +194,7 @@ def __init__(
         self.preprocess = preprocess
         self.n_jobs = n_jobs
 
-    def fit(self, X_train, X_test, y_train, y_test, **kwargs):
+    def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
         """Fit classifiers to X_train and y_train, predict and score on X_test,
         y_test.
 
@@ -215,6 +215,9 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
             y_test: array-like,
                 Testing vectors, where rows is the number of samples
                 and columns is the number of features.
+            
+            hist: bool, optional (default=False)
+                When set to True, the model is a HistGenericBoostingClassifier.
 
             **kwargs: dict,
                 Additional arguments to be passed to the fit GenericBoostingClassifier.
@@ -376,19 +379,34 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
 
                     try:
                         if "random_state" in model().get_params().keys():
-                            fitted_clf = GenericBoostingClassifier(
-                                {**other_args, **kwargs},
-                                verbose=self.verbose,
-                                base_model=model(
-                                    random_state=self.random_state
-                                ),
-                            )
+                            if hist: 
+                                fitted_clf = GenericBoostingClassifier(
+                                    {**other_args, **kwargs},
+                                    verbose=self.verbose,
+                                    base_model=model(
+                                        random_state=self.random_state
+                                    ),
+                                )
+                            else:
+                                fitted_clf = HistGenericBoostingClassifier(
+                                    {**other_args, **kwargs},
+                                    verbose=self.verbose,
+                                    base_model=model(
+                                        random_state=self.random_state
+                                    ),
+                                )
 
                         else:
-                            fitted_clf = GenericBoostingClassifier(
-                                base_model=model(**kwargs),
-                                verbose=self.verbose,
-                            )
+                            if hist: 
+                                fitted_clf = GenericBoostingClassifier(
+                                    base_model=model(**kwargs),
+                                    verbose=self.verbose,
+                                )
+                            else:
+                                fitted_clf = HistGenericBoostingClassifier(
+                                    base_model=model(**kwargs),
+                                    verbose=self.verbose,
+                                )
 
                         if self.verbose > 0:
                             print("\n Fitting boosted " + name + " model...")
@@ -500,20 +518,36 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
                     start = time.time()
                     try:
                         if "random_state" in model().get_params().keys():
-                            fitted_clf = GenericBoostingClassifier(
-                                base_model=model(
-                                    random_state=self.random_state
-                                ),
-                                verbose=self.verbose,
-                                **kwargs
-                            )
+                            if hist:
+                                fitted_clf = GenericBoostingClassifier(
+                                    base_model=model(
+                                        random_state=self.random_state
+                                    ),
+                                    verbose=self.verbose,
+                                    **kwargs
+                                )
+                            else: 
+                                fitted_clf = HistGenericBoostingClassifier(
+                                    base_model=model(
+                                        random_state=self.random_state
+                                    ),
+                                    verbose=self.verbose,
+                                    **kwargs
+                                )
 
                         else:
-                            fitted_clf = GenericBoostingClassifier(
-                                base_model=model(),
-                                verbose=self.verbose,
-                                **kwargs
-                            )
+                            if hist:
+                                fitted_clf = GenericBoostingClassifier(
+                                    base_model=model(),
+                                    verbose=self.verbose,
+                                    **kwargs
+                                )
+                            else:
+                                fitted_clf = HistGenericBoostingClassifier(
+                                    base_model=model(),
+                                    verbose=self.verbose,
+                                    **kwargs
+                                )
 
                         fitted_clf.fit(X_train, y_train)
 
@@ -689,6 +723,7 @@ def train_model(
         y_test,
         use_preprocessing=False,
         preprocessor=None,
+        hist=False,
         **kwargs
     ):
         """
@@ -711,16 +746,29 @@ def train_model(
         try:
             # Handle random_state parameter
             if "random_state" in model().get_params().keys():
-                fitted_clf = GenericBoostingClassifier(
-                    {**other_args, **kwargs},
-                    verbose=self.verbose,
-                    base_model=model(random_state=self.random_state),
-                )
+                if hist:
+                    fitted_clf = GenericBoostingClassifier(
+                        {**other_args, **kwargs},
+                        verbose=self.verbose,
+                        base_model=model(random_state=self.random_state),
+                    )
+                else:
+                    fitted_clf = HistGenericBoostingClassifier(
+                        {**other_args, **kwargs},
+                        verbose=self.verbose,
+                        base_model=model(random_state=self.random_state),
+                    )
             else:
-                fitted_clf = GenericBoostingClassifier(
-                    base_model=model(**kwargs),
-                    verbose=self.verbose,
-                )
+                if hist: 
+                    fitted_clf = GenericBoostingClassifier(
+                        base_model=model(**kwargs),
+                        verbose=self.verbose,
+                    )
+                else:
+                    fitted_clf = HistGenericBoostingClassifier(
+                        base_model=model(**kwargs),
+                        verbose=self.verbose,
+                    )
 
             if self.verbose > 0:
                 print("\n Fitting boosted " + name + " model...")
diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py
index 2957383..8230222 100644
--- a/mlsauce/lazybooster/lazyboosterregression.py
+++ b/mlsauce/lazybooster/lazyboosterregression.py
@@ -22,7 +22,7 @@
     r2_score
 )
 from .config import REGRESSORS
-from ..booster import GenericBoostingRegressor
+from ..booster import GenericBoostingRegressor, HistGenericBoostingRegressor
 
 import warnings
 
@@ -185,7 +185,7 @@ def __init__(
         self.preprocess = preprocess
         self.n_jobs = n_jobs
 
-    def fit(self, X_train, X_test, y_train, y_test, **kwargs):
+    def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
         """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
 
         Parameters:
@@ -205,6 +205,9 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
             y_test : array-like,
                 Testing vectors, where rows is the number of samples
                 and columns is the number of features.
+            
+            hist: bool, optional (default=False)
+                When set to True, the model is a HistGenericBoostingRegressor.
 
             **kwargs: dict,
                 Additional parameters to be passed to the GenericBoostingRegressor.
@@ -345,9 +348,17 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
 
                     try:
 
-                        model = GenericBoostingRegressor(
-                            base_model=regr(), verbose=self.verbose, **kwargs
-                        )
+                        if hist:
+
+                            model = GenericBoostingRegressor(
+                                base_model=regr(), verbose=self.verbose, **kwargs
+                            )
+                        
+                        else:
+
+                            model = HistGenericBoostingRegressor(
+                                base_model=regr(), verbose=self.verbose, **kwargs
+                            )
 
                         model.fit(X_train, y_train)
 
@@ -456,10 +467,15 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
                 for name, regr in tqdm(self.regressors):  # do parallel exec
                     start = time.time()
                     try:
-
-                        model = GenericBoostingRegressor(
-                            base_model=regr(), verbose=self.verbose, **kwargs
-                        )
+                        
+                        if hist:
+                            model = GenericBoostingRegressor(
+                                base_model=regr(), verbose=self.verbose, **kwargs
+                            )
+                        else:
+                            model = HistGenericBoostingRegressor(
+                                base_model=regr(), verbose=self.verbose, **kwargs
+                            )
 
                         if self.verbose > 0:
                             print("\n Fitting boosted " + name + " model...")
@@ -630,6 +646,7 @@ def train_model(
         y_test,
         use_preprocessing=False,
         preprocessor=None,
+        hist=False,
         **kwargs
     ):
         """
@@ -638,9 +655,14 @@ def train_model(
         start = time.time()
 
         try:
-            model = GenericBoostingRegressor(
-                base_model=regr(), verbose=self.verbose, **kwargs
-            )
+            if hist:
+                model = GenericBoostingRegressor(
+                    base_model=regr(), verbose=self.verbose, **kwargs
+                )
+            else:
+                model = HistGenericBoostingRegressor(
+                    base_model=regr(), verbose=self.verbose, **kwargs
+                )
 
             if use_preprocessing and preprocessor is not None:
                 pipe = Pipeline(
diff --git a/mlsauce/utils/__init__.py b/mlsauce/utils/__init__.py
index 99dec6d..156e393 100644
--- a/mlsauce/utils/__init__.py
+++ b/mlsauce/utils/__init__.py
@@ -10,6 +10,7 @@
 )
 from .progress_bar import Progbar
 from .get_beta import get_beta
+from .histofeatures.gethistofeatures import get_histo_features
 
 __all__ = [
     "cluster",
@@ -22,4 +23,5 @@
     "get_beta",
     "check_and_install",
     "is_multitask_estimator",
+    "get_histo_features"
 ]
diff --git a/mlsauce/utils/histofeatures/__init__.py b/mlsauce/utils/histofeatures/__init__.py
new file mode 100644
index 0000000..a20796b
--- /dev/null
+++ b/mlsauce/utils/histofeatures/__init__.py
@@ -0,0 +1,3 @@
+from .gethistofeatures import get_histo_features
+
+__all__ = ['get_histo_features']
\ No newline at end of file
diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
new file mode 100644
index 0000000..2b762c8
--- /dev/null
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -0,0 +1,99 @@
+import numpy as np
+import pandas as pd
+
+def create_histogram_with_bin_values(x):
+    """
+    Computes a histogram for the input data and assigns a value to each bin
+    reflecting the ordering of the input.
+    
+    Args:
+    x (list or np.array): Input data.
+    num_bins (int): Number of bins for the histogram.
+    
+    Returns:
+    bin_edges (np.array): The edges of the bins.
+    bin_value_dict (dict): A dictionary where keys are the bin ranges (tuples) and values reflect the ordering.
+    """
+    # Compute the histogram
+    hist, bin_edges = np.histogram(x, bins="auto")
+
+    bin_edges = np.concatenate([[1e-10], bin_edges, [1e10]]).ravel()
+    
+    # Create a dict to store bin ranges and assigned values
+    bin_value_dict = {}
+
+    for i in range(len(bin_edges) - 1):
+        bin_range = (bin_edges[i], bin_edges[i + 1])
+        bin_value_dict[i] = (bin_range, np.median(list(bin_range)))
+        
+    return bin_edges, bin_value_dict
+
+def assign_values_to_input(new_data, bin_value_dict):
+    """
+    Assigns values to a new input based on the provided bin ranges and values.
+
+    Args:
+    new_data (list or np.array): New input data to assign values to.
+    bin_value_dict (dict): Dictionary where keys are bin ranges (tuples) and values are the assigned values.
+
+    Returns:
+    assigned_values (list): List of assigned values for the new input data.
+    """
+    assigned_values = []
+
+    for value in new_data:
+        assigned = None
+        # Find the appropriate bin for each value
+        for elt in bin_value_dict.items():
+            if elt[1][0][0] <= value < elt[1][0][1]:
+                assigned = elt[1][1]
+                break
+
+        assigned_values.append(assigned)
+
+    return assigned_values
+
+def get_histo_features(X, bin_value_dict=None):
+    """
+    Computes histogram features for the input data.
+    
+    Args:
+    X {np.array or pd.DataFrame}: Input data.
+    
+    Returns:
+    X_hist {np.array or pd.DataFrame}: Input data with histogram features.
+    """
+
+    if bin_value_dict is None: # training set case        
+
+        if isinstance(X, pd.DataFrame):
+            colnames = X.columns
+            X = X.values
+            X_hist = pd.DataFrame(np.zeros(X.shape), 
+                                        columns=colnames)
+            for i, col in enumerate(colnames):
+                _, bin_value_dict = create_histogram_with_bin_values(X[:, i])
+                X_hist[col] = assign_values_to_input(X[:, i], bin_value_dict)
+        else: 
+            X_hist = np.zeros(X.shape)        
+            for i in range(X.shape[1]):
+                _, bin_value_dict = create_histogram_with_bin_values(X[:, i])
+                X_hist[:, i] = assign_values_to_input(X[:, i], bin_value_dict)            
+            
+        return X_hist, bin_value_dict
+
+    else: # test set case
+            
+            if isinstance(X, pd.DataFrame):
+                colnames = X.columns
+                X = X.values
+                X_hist = pd.DataFrame(np.zeros(X.shape), 
+                                            columns=colnames)
+                for i, col in enumerate(colnames):
+                    X_hist[col] = assign_values_to_input(X[:, i], bin_value_dict)
+            else: 
+                X_hist = np.zeros(X.shape)        
+                for i in range(X.shape[1]):
+                    X_hist[:, i] = assign_values_to_input(X[:, i], bin_value_dict)            
+    
+            return X_hist
\ No newline at end of file

From 24c830eba55829afc46149edf6f994e03564718b Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 17:23:46 +0100
Subject: [PATCH 02/26] ravel responses

---
 mlsauce/booster/_booster_classifier.py | 4 ++--
 mlsauce/booster/_booster_regressor.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index ef126d0..98bd5cb 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -414,7 +414,7 @@ def fit(self, X, y, **kwargs):
 
         self.obj = boosterc.fit_booster_classifier(
             np.asarray(X, order="C"),
-            np.asarray(y, order="C"),
+            np.asarray(y, order="C").ravel(),
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             n_hidden_features=self.n_hidden_features,
@@ -840,7 +840,7 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         X, self.hist_bins = get_histo_features(X)
-        return super().fit(X, y, **kwargs)
+        return super().fit(X, y.ravel(), **kwargs)
 
     def predict_proba(self, X, **kwargs):
         """Predict probabilites for test data X.
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 3ff146d..b9ab67e 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -284,7 +284,7 @@ def fit(self, X, y, histo=False, **kwargs):
 
         self.obj = boosterc.fit_booster_regressor(
             X=np.asarray(X, order="C"),
-            y=np.asarray(y, order="C"),
+            y=np.asarray(y, order="C").ravel(),
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             n_hidden_features=self.n_hidden_features,
@@ -733,7 +733,7 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         X, self.hist_bins = get_histo_features(X)
-        return super().fit(X, y, **kwargs)
+        return super().fit(X, y.ravel(), **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):
         """Predict values for test data X.

From 8b5b0f8769f41780fae6e8ecce1144904ffabeb5 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 17:56:25 +0100
Subject: [PATCH 03/26] fix gbooster Pt.1

---
 mlsauce/booster/_booster_classifier.py | 5 +++--
 mlsauce/booster/_booster_regressor.py  | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 98bd5cb..f845104 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -414,7 +414,7 @@ def fit(self, X, y, **kwargs):
 
         self.obj = boosterc.fit_booster_classifier(
             np.asarray(X, order="C"),
-            np.asarray(y, order="C").ravel(),
+            np.asarray(y, order="C"),
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             n_hidden_features=self.n_hidden_features,
@@ -543,7 +543,8 @@ def update(self, X, y, eta=0.9):
             )
 
         self.obj = boosterc.update_booster(
-            self.obj, np.asarray(X, order="C"), np.asarray(y, order="C"), eta
+            self.obj, np.asarray(X, order="C"), 
+            np.asarray(y, order="C").ravel(), eta
         )
 
         return self
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index b9ab67e..af226ce 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -240,7 +240,7 @@ def __init__(
             check_and_install("jax")
             check_and_install("jaxlib")
 
-    def fit(self, X, y, histo=False, **kwargs):
+    def fit(self, X, y, **kwargs):
         """Fit Booster (regressor) to training data (X, y)
 
         Args:
@@ -284,7 +284,7 @@ def fit(self, X, y, histo=False, **kwargs):
 
         self.obj = boosterc.fit_booster_regressor(
             X=np.asarray(X, order="C"),
-            y=np.asarray(y, order="C").ravel(),
+            y=np.asarray(y, order="C"),
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             n_hidden_features=self.n_hidden_features,

From 7731f1ab6f5656f3fbd4ee7a81cc8f0b49a1944c Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 18:17:47 +0100
Subject: [PATCH 04/26] fix gbooster Pt.2

---
 mlsauce/booster/_booster_classifier.py       |  6 +++++-
 mlsauce/booster/_booster_regressor.py        |  3 +++
 mlsauce/lazybooster/lazyboosterregression.py | 14 +++++++-------
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index f845104..94fd2b5 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -391,6 +391,9 @@ def fit(self, X, y, **kwargs):
 
         if isinstance(X, pd.DataFrame):
             X = X.values
+        
+        if isinstance(y, pd.Series):
+            y = y.values.ravel()
 
         if self.degree is not None:
             assert isinstance(self.degree, int), "`degree` must be an integer"
@@ -433,7 +436,8 @@ def fit(self, X, y, **kwargs):
             obj=self.base_model,
         )
 
-        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
+        self.classes_ = np.unique(y)  # for compatibility with sklearn
+        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
         self.n_estimators = self.obj["n_estimators"]
         return self
 
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index af226ce..9bd1a3a 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -261,6 +261,9 @@ def fit(self, X, y, **kwargs):
 
         if isinstance(X, pd.DataFrame):
             X = X.values
+        
+        if isinstance(y, pd.Series):
+            y = y.values.ravel()
 
         if self.degree is not None:
             assert isinstance(self.degree, int), "`degree` must be an integer"
diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py
index 8230222..d954965 100644
--- a/mlsauce/lazybooster/lazyboosterregression.py
+++ b/mlsauce/lazybooster/lazyboosterregression.py
@@ -287,7 +287,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
         for name, model in tqdm(zip(baseline_names, baseline_models)):
             start = time.time()
             try:
-                model.fit(X_train, y_train)
+                model.fit(X_train, y_train.ravel())
                 self.models_[name] = model
                 y_pred = model.predict(X_test)
                 r_squared = r2_score(y_test, y_pred)
@@ -360,7 +360,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
                                 base_model=regr(), verbose=self.verbose, **kwargs
                             )
 
-                        model.fit(X_train, y_train)
+                        model.fit(X_train, y_train.ravel())
 
                         pipe = Pipeline(
                             steps=[
@@ -370,7 +370,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
                         )
                         if self.verbose > 0:
                             print("\n Fitting boosted " + name + " model...")
-                        pipe.fit(X_train, y_train)
+                        pipe.fit(X_train, y_train.ravel())
 
                         self.models_[name] = pipe
                         y_pred = pipe.predict(X_test)
@@ -479,7 +479,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
 
                         if self.verbose > 0:
                             print("\n Fitting boosted " + name + " model...")
-                        model.fit(X_train, y_train)
+                        model.fit(X_train, y_train.ravel())
 
                         self.models_[name] = model
                         y_pred = model.predict(X_test)
@@ -632,7 +632,7 @@ def provide_models(self, X_train, X_test, y_train, y_test):
 
         """
         if len(self.models_.keys()) == 0:
-            self.fit(X_train, X_test, y_train, y_test)
+            self.fit(X_train, X_test, y_train.ravel(), y_test.values)
 
         return self.models_
 
@@ -677,7 +677,7 @@ def train_model(
                         + name
                         + " model with preprocessing..."
                     )
-                pipe.fit(X_train, y_train)
+                pipe.fit(X_train, y_train.ravel())
                 y_pred = pipe.predict(X_test)
                 fitted_model = pipe
             else:
@@ -688,7 +688,7 @@ def train_model(
                         + name
                         + " model without preprocessing..."
                     )
-                model.fit(X_train, y_train)
+                model.fit(X_train, y_train.ravel())
                 y_pred = model.predict(X_test)
                 fitted_model = model
 

From 347d58e194a8a3f1558daff3e9741f4e108ae2d8 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 18:25:35 +0100
Subject: [PATCH 05/26] fix gbooster Pt.3

---
 mlsauce/booster/_booster_classifier.py | 4 +++-
 mlsauce/booster/_booster_regressor.py  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 94fd2b5..0f707d2 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -394,6 +394,8 @@ def fit(self, X, y, **kwargs):
         
         if isinstance(y, pd.Series):
             y = y.values.ravel()
+        else:
+            y = y.ravel()
 
         if self.degree is not None:
             assert isinstance(self.degree, int), "`degree` must be an integer"
@@ -845,7 +847,7 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         X, self.hist_bins = get_histo_features(X)
-        return super().fit(X, y.ravel(), **kwargs)
+        return super().fit(X, y, **kwargs)
 
     def predict_proba(self, X, **kwargs):
         """Predict probabilites for test data X.
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 9bd1a3a..1135ce2 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -264,6 +264,8 @@ def fit(self, X, y, **kwargs):
         
         if isinstance(y, pd.Series):
             y = y.values.ravel()
+        else:
+            y = y.ravel()
 
         if self.degree is not None:
             assert isinstance(self.degree, int), "`degree` must be an integer"
@@ -736,7 +738,7 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         X, self.hist_bins = get_histo_features(X)
-        return super().fit(X, y.ravel(), **kwargs)
+        return super().fit(X, y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):
         """Predict values for test data X.

From 2fcd945797eee248a3cf625c940fbfa91fbcea24 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 18:53:23 +0100
Subject: [PATCH 06/26] fix gbooster Pt.4

---
 mlsauce/lazybooster/lazyboosterclassif.py    | 12 ++++++------
 mlsauce/lazybooster/lazyboosterregression.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlsauce/lazybooster/lazyboosterclassif.py b/mlsauce/lazybooster/lazyboosterclassif.py
index 2ef1398..c58c812 100644
--- a/mlsauce/lazybooster/lazyboosterclassif.py
+++ b/mlsauce/lazybooster/lazyboosterclassif.py
@@ -379,7 +379,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
 
                     try:
                         if "random_state" in model().get_params().keys():
-                            if hist: 
+                            if hist is False:
                                 fitted_clf = GenericBoostingClassifier(
                                     {**other_args, **kwargs},
                                     verbose=self.verbose,
@@ -397,7 +397,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                 )
 
                         else:
-                            if hist: 
+                            if hist is False: 
                                 fitted_clf = GenericBoostingClassifier(
                                     base_model=model(**kwargs),
                                     verbose=self.verbose,
@@ -518,7 +518,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                     start = time.time()
                     try:
                         if "random_state" in model().get_params().keys():
-                            if hist:
+                            if hist is False:
                                 fitted_clf = GenericBoostingClassifier(
                                     base_model=model(
                                         random_state=self.random_state
@@ -536,7 +536,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                 )
 
                         else:
-                            if hist:
+                            if hist is False:
                                 fitted_clf = GenericBoostingClassifier(
                                     base_model=model(),
                                     verbose=self.verbose,
@@ -746,7 +746,7 @@ def train_model(
         try:
             # Handle random_state parameter
             if "random_state" in model().get_params().keys():
-                if hist:
+                if hist is False:
                     fitted_clf = GenericBoostingClassifier(
                         {**other_args, **kwargs},
                         verbose=self.verbose,
@@ -759,7 +759,7 @@ def train_model(
                         base_model=model(random_state=self.random_state),
                     )
             else:
-                if hist: 
+                if hist is False: 
                     fitted_clf = GenericBoostingClassifier(
                         base_model=model(**kwargs),
                         verbose=self.verbose,
diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py
index d954965..5d94b75 100644
--- a/mlsauce/lazybooster/lazyboosterregression.py
+++ b/mlsauce/lazybooster/lazyboosterregression.py
@@ -348,7 +348,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
 
                     try:
 
-                        if hist:
+                        if hist is False:
 
                             model = GenericBoostingRegressor(
                                 base_model=regr(), verbose=self.verbose, **kwargs
@@ -468,7 +468,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False, **kwargs):
                     start = time.time()
                     try:
                         
-                        if hist:
+                        if hist is False:
                             model = GenericBoostingRegressor(
                                 base_model=regr(), verbose=self.verbose, **kwargs
                             )
@@ -655,7 +655,7 @@ def train_model(
         start = time.time()
 
         try:
-            if hist:
+            if hist is False:
                 model = GenericBoostingRegressor(
                     base_model=regr(), verbose=self.verbose, **kwargs
                 )

From 559a9e59014f34839d2368abb6d7257e75bc7935 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:06:32 +0100
Subject: [PATCH 07/26] return np array assigned values

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index 2b762c8..0cc80d5 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -51,7 +51,7 @@ def assign_values_to_input(new_data, bin_value_dict):
 
         assigned_values.append(assigned)
 
-    return assigned_values
+    return np.asarray(assigned_values)
 
 def get_histo_features(X, bin_value_dict=None):
     """

From a695c72b86cb1db8f4d8e72ac831f4d7a43e549b Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:07:05 +0100
Subject: [PATCH 08/26] return np array assigned values Pt.2

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index 0cc80d5..badc1a5 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -51,7 +51,7 @@ def assign_values_to_input(new_data, bin_value_dict):
 
         assigned_values.append(assigned)
 
-    return np.asarray(assigned_values)
+    return np.asarray(assigned_values).ravel()
 
 def get_histo_features(X, bin_value_dict=None):
     """

From e38dceee72a0b5f701839a40deac5708a7ce9e14 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:21:36 +0100
Subject: [PATCH 09/26] return np array assigned values Pt.3

---
 mlsauce/booster/_booster_classifier.py | 4 +++-
 mlsauce/booster/_booster_regressor.py  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 0f707d2..2c5ed1f 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -846,7 +846,9 @@ def fit(self, X, y, **kwargs):
 
             self: object.
         """
-        X, self.hist_bins = get_histo_features(X)
+        print(f"\n before: {X} \n")
+        X, self.hist_bins = get_histo_features(X)        
+        print(f"\n after: {X} \n")
         return super().fit(X, y, **kwargs)
 
     def predict_proba(self, X, **kwargs):
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 1135ce2..f8779f6 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -737,7 +737,9 @@ def fit(self, X, y, **kwargs):
 
             self: object.
         """
-        X, self.hist_bins = get_histo_features(X)
+        print(f"\n before: {X} \n")
+        X, self.hist_bins = get_histo_features(X)        
+        print(f"\n after: {X} \n")
         return super().fit(X, y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):

From 656cfb8fcb918df7efda179ff060bbb62ec08e04 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:39:17 +0100
Subject: [PATCH 10/26] fix bounds

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index badc1a5..efe0cbd 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -17,14 +17,19 @@ def create_histogram_with_bin_values(x):
     # Compute the histogram
     hist, bin_edges = np.histogram(x, bins="auto")
 
-    bin_edges = np.concatenate([[1e-10], bin_edges, [1e10]]).ravel()
+    bin_edges = np.concatenate([[-1e10], bin_edges, [1e10]]).ravel()
     
     # Create a dict to store bin ranges and assigned values
     bin_value_dict = {}
 
     for i in range(len(bin_edges) - 1):
         bin_range = (bin_edges[i], bin_edges[i + 1])
-        bin_value_dict[i] = (bin_range, np.median(list(bin_range)))
+        if bin_edges[i] == -1e10:
+            bin_value_dict[i] = (bin_range, bin_edges[i + 1])
+        elif bin_edges[i + 1] == 1e10:
+            bin_value_dict[i] = (bin_range, bin_edges[i])
+        else: 
+            bin_value_dict[i] = (bin_range, np.median(list(bin_range)))
         
     return bin_edges, bin_value_dict
 

From 6c1d3cd716ee0cbe6847c5039035fc4b42ef9078 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:49:53 +0100
Subject: [PATCH 11/26] change no. of bins

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index efe0cbd..adb9309 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -15,7 +15,7 @@ def create_histogram_with_bin_values(x):
     bin_value_dict (dict): A dictionary where keys are the bin ranges (tuples) and values reflect the ordering.
     """
     # Compute the histogram
-    hist, bin_edges = np.histogram(x, bins="auto")
+    hist, bin_edges = np.histogram(x)
 
     bin_edges = np.concatenate([[-1e10], bin_edges, [1e10]]).ravel()
     

From 0db67a8a4c50ce63a3c541e4787ea37f175f68d8 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:53:29 +0100
Subject: [PATCH 12/26] change no. of bins Pt.2

---
 mlsauce/booster/_booster_regressor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index f8779f6..3fd153a 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -737,9 +737,9 @@ def fit(self, X, y, **kwargs):
 
             self: object.
         """
-        print(f"\n before: {X} \n")
+        #print(f"\n before: {X} \n")
         X, self.hist_bins = get_histo_features(X)        
-        print(f"\n after: {X} \n")
+        #print(f"\n after: {X} \n")
         return super().fit(X, y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):

From 3b051064516a40ff22f12d6e5d58ba95a8c2a23f Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 27 Oct 2024 19:59:38 +0100
Subject: [PATCH 13/26] change no. of bins Pt.3

---
 mlsauce/booster/_booster_classifier.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 2c5ed1f..52a4b1d 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -846,9 +846,9 @@ def fit(self, X, y, **kwargs):
 
             self: object.
         """
-        print(f"\n before: {X} \n")
+        #print(f"\n before: {X} \n")
         X, self.hist_bins = get_histo_features(X)        
-        print(f"\n after: {X} \n")
+        #print(f"\n after: {X} \n")
         return super().fit(X, y, **kwargs)
 
     def predict_proba(self, X, **kwargs):

From e567d5b89ac1255f18ffd267e66e715b4843602c Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 07:00:01 +0100
Subject: [PATCH 14/26] bins = 'auto'

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index adb9309..efe0cbd 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -15,7 +15,7 @@ def create_histogram_with_bin_values(x):
     bin_value_dict (dict): A dictionary where keys are the bin ranges (tuples) and values reflect the ordering.
     """
     # Compute the histogram
-    hist, bin_edges = np.histogram(x)
+    hist, bin_edges = np.histogram(x, bins="auto")
 
     bin_edges = np.concatenate([[-1e10], bin_edges, [1e10]]).ravel()
     

From becf078b8f041b2e79fa18630992662e105af6dd Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 07:14:44 +0100
Subject: [PATCH 15/26] change no. of bins Pt.4

---
 mlsauce/booster/_booster_regressor.py           | 8 ++++----
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 3fd153a..4833323 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -738,9 +738,9 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         #print(f"\n before: {X} \n")
-        X, self.hist_bins = get_histo_features(X)        
+        X_, self.hist_bins = get_histo_features(X)        
         #print(f"\n after: {X} \n")
-        return super().fit(X, y, **kwargs)
+        return super().fit(X_, y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):
         """Predict values for test data X.
@@ -769,6 +769,6 @@ def predict(self, X, level=95, method=None, **kwargs):
             predicted values estimates for test data: {array-like}
         """
         assert self.hist_bins is not None, "You must fit the model first"
-        X = get_histo_features(X, self.hist_bins)
-        return super().predict(X, level=level, method=method, **kwargs)
+        X_ = get_histo_features(X, self.hist_bins)
+        return super().predict(X_, level=level, method=method, **kwargs)
 
diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index efe0cbd..a4a2d1c 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -50,7 +50,7 @@ def assign_values_to_input(new_data, bin_value_dict):
         assigned = None
         # Find the appropriate bin for each value
         for elt in bin_value_dict.items():
-            if elt[1][0][0] <= value < elt[1][0][1]:
+            if elt[1][0][0] < value <= elt[1][0][1]:
                 assigned = elt[1][1]
                 break
 

From e278f38a17d7e6a1522aa556d75912370dcd6b62 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 08:40:46 +0100
Subject: [PATCH 16/26] modif gethistofeatures

---
 .../utils/histofeatures/gethistofeatures.py   | 42 +++++++++----------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index a4a2d1c..896679b 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 
+
 def create_histogram_with_bin_values(x):
     """
     Computes a histogram for the input data and assigns a value to each bin
@@ -15,49 +16,44 @@ def create_histogram_with_bin_values(x):
     bin_value_dict (dict): A dictionary where keys are the bin ranges (tuples) and values reflect the ordering.
     """
     # Compute the histogram
-    hist, bin_edges = np.histogram(x, bins="auto")
+    _, bin_edges = np.histogram(x, bins="auto")
 
-    bin_edges = np.concatenate([[-1e10], bin_edges, [1e10]]).ravel()
-    
-    # Create a dict to store bin ranges and assigned values
-    bin_value_dict = {}
-
-    for i in range(len(bin_edges) - 1):
-        bin_range = (bin_edges[i], bin_edges[i + 1])
-        if bin_edges[i] == -1e10:
-            bin_value_dict[i] = (bin_range, bin_edges[i + 1])
-        elif bin_edges[i + 1] == 1e10:
-            bin_value_dict[i] = (bin_range, bin_edges[i])
-        else: 
-            bin_value_dict[i] = (bin_range, np.median(list(bin_range)))
+    bin_edges = np.concatenate([[-1e10], bin_edges, [1e10]]).ravel()        
         
-    return bin_edges, bin_value_dict
+    return {i: (bin_edges[i], bin_edges[i + 1]) for i in range(len(bin_edges) - 1)}
+
 
-def assign_values_to_input(new_data, bin_value_dict):
+def assign_values_to_input(x, bin_value_dict):
     """
     Assigns values to a new input based on the provided bin ranges and values.
 
     Args:
-    new_data (list or np.array): New input data to assign values to.
+    x (list or np.array): New input data to assign values to.
     bin_value_dict (dict): Dictionary where keys are bin ranges (tuples) and values are the assigned values.
 
     Returns:
     assigned_values (list): List of assigned values for the new input data.
     """
+
+    if np.issubdtype(x.dtype, np.integer) or np.issubdtype(x.dtype, np.object_):
+
+        return x
+
     assigned_values = []
 
-    for value in new_data:
+    for value in x:
         assigned = None
         # Find the appropriate bin for each value
-        for elt in bin_value_dict.items():
-            if elt[1][0][0] < value <= elt[1][0][1]:
-                assigned = elt[1][1]
+        for i, elt in enumerate(bin_value_dict.items()):
+            if elt[1][0] < value <= elt[1][1]:
+                assigned = i
                 break
 
         assigned_values.append(assigned)
 
     return np.asarray(assigned_values).ravel()
 
+
 def get_histo_features(X, bin_value_dict=None):
     """
     Computes histogram features for the input data.
@@ -77,12 +73,12 @@ def get_histo_features(X, bin_value_dict=None):
             X_hist = pd.DataFrame(np.zeros(X.shape), 
                                         columns=colnames)
             for i, col in enumerate(colnames):
-                _, bin_value_dict = create_histogram_with_bin_values(X[:, i])
+                bin_value_dict = create_histogram_with_bin_values(X[:, i])
                 X_hist[col] = assign_values_to_input(X[:, i], bin_value_dict)
         else: 
             X_hist = np.zeros(X.shape)        
             for i in range(X.shape[1]):
-                _, bin_value_dict = create_histogram_with_bin_values(X[:, i])
+                bin_value_dict = create_histogram_with_bin_values(X[:, i])
                 X_hist[:, i] = assign_values_to_input(X[:, i], bin_value_dict)            
             
         return X_hist, bin_value_dict

From 83847554d8ffea4f8dbb0db2acba07883ab7ded7 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 08:57:46 +0100
Subject: [PATCH 17/26] fix histo

---
 mlsauce/utils/histofeatures/gethistofeatures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlsauce/utils/histofeatures/gethistofeatures.py b/mlsauce/utils/histofeatures/gethistofeatures.py
index 896679b..70024b5 100644
--- a/mlsauce/utils/histofeatures/gethistofeatures.py
+++ b/mlsauce/utils/histofeatures/gethistofeatures.py
@@ -46,7 +46,7 @@ def assign_values_to_input(x, bin_value_dict):
         # Find the appropriate bin for each value
         for i, elt in enumerate(bin_value_dict.items()):
             if elt[1][0] < value <= elt[1][1]:
-                assigned = i
+                assigned = float(i)
                 break
 
         assigned_values.append(assigned)

From 62859d34bba629bbe5702d3c6b10c5ca1d04f1b3 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 09:06:36 +0100
Subject: [PATCH 18/26] raise warnings in histgenbooster

---
 mlsauce/booster/_booster_classifier.py | 5 ++++-
 mlsauce/booster/_booster_regressor.py  | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 52a4b1d..1aeecd5 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -681,7 +681,7 @@ def __init__(
         )
 
 class HistGenericBoostingClassifier(GenericBoostingClassifier):
-    """Histogram-based Generic Boosting classifier (using any classifier as base learner).
+    """EXPERIMENTAL Histogram-based Generic Boosting classifier (using any classifier as base learner).
 
     Attributes:
 
@@ -779,6 +779,9 @@ def __init__(
         degree=None,
         weights_distr="uniform",
     ):
+        
+        warnings.warn("This class is highly experimental", UserWarning)
+
         super().__init__(
             base_model=base_model,
             n_estimators=n_estimators,
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 4833323..ae1a19e 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -579,7 +579,7 @@ def __init__(
         )
 
 class HistGenericBoostingRegressor(GenericBoostingRegressor):
-    """Generic Boosting regressor with histogram-based features.
+    """EXPERIMENTAL Generic Boosting regressor with histogram-based features.
 
     Attributes:
 
@@ -690,6 +690,9 @@ def __init__(
         degree=None,
         weights_distr="uniform",
     ):
+        
+        warnings.warn("This class is highly experimental", UserWarning)
+
         self.base_model = base_model
         self.hist_bins = None
         super().__init__(
@@ -717,7 +720,8 @@ def __init__(
             degree=degree,
             weights_distr=weights_distr,
             base_model=self.base_model,
-        )
+        )   
+
     
     def fit(self, X, y, **kwargs):
         """Fit Booster (regressor) to training data (X, y)

From 5af9da82bf7adc8bf8196c3920566521b3a238a9 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 09:44:49 +0100
Subject: [PATCH 19/26] fix histo Pt.2

---
 mlsauce/booster/_booster_classifier.py | 20 ++++++++------------
 mlsauce/booster/_booster_regressor.py  | 16 +++++++++-------
 setup.py                               |  2 +-
 3 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 1aeecd5..c15f51a 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -780,7 +780,7 @@ def __init__(
         weights_distr="uniform",
     ):
         
-        warnings.warn("This class is highly experimental", UserWarning)
+        #warnings.warn("This class is highly experimental", UserWarning)
 
         super().__init__(
             base_model=base_model,
@@ -806,7 +806,8 @@ def __init__(
             weights_distr=weights_distr,
         )
         self.base_model = base_model
-        self.hist_bins = None
+        self.hist_bins_ = None
+        
         super().__init__(
             base_model=base_model,
             n_estimators=n_estimators,
@@ -850,9 +851,10 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         #print(f"\n before: {X} \n")
-        X, self.hist_bins = get_histo_features(X)        
+        res = get_histo_features(X)        
+        self.hist_bins_ = res[1]
         #print(f"\n after: {X} \n")
-        return super().fit(X, y, **kwargs)
+        return self.fit(res[0], y, **kwargs)
 
     def predict_proba(self, X, **kwargs):
         """Predict probabilites for test data X.
@@ -870,12 +872,6 @@ def predict_proba(self, X, **kwargs):
 
             predicted values estimates for test data: {array-like}
         """
-        assert self.hist_bins is not None, "You must fit the model first"
-        X = get_histo_features(X, self.hist_bins)
-        try: 
-            return super().predict_proba(np.asarray(X, order="C"), 
-                                         **kwargs)
-        except Exception:
-            return super().predict_proba(X, 
-                                  **kwargs)
+        assert self.hist_bins_ is not None, "You must fit the model first"
+        return self.predict_proba(get_histo_features(X, self.hist_bins_))
 
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index ae1a19e..399c39b 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -691,10 +691,11 @@ def __init__(
         weights_distr="uniform",
     ):
         
-        warnings.warn("This class is highly experimental", UserWarning)
+        #warnings.warn("This class is highly experimental", UserWarning)
 
         self.base_model = base_model
-        self.hist_bins = None
+        self.hist_bins_ = None
+
         super().__init__(
             n_estimators=n_estimators,
             learning_rate=learning_rate,
@@ -742,9 +743,10 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
         #print(f"\n before: {X} \n")
-        X_, self.hist_bins = get_histo_features(X)        
+        res = get_histo_features(X)        
+        self.hist_bins_ = res[1]
         #print(f"\n after: {X} \n")
-        return super().fit(X_, y, **kwargs)
+        return self.fit(res[0], y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):
         """Predict values for test data X.
@@ -772,7 +774,7 @@ def predict(self, X, level=95, method=None, **kwargs):
 
             predicted values estimates for test data: {array-like}
         """
-        assert self.hist_bins is not None, "You must fit the model first"
-        X_ = get_histo_features(X, self.hist_bins)
-        return super().predict(X_, level=level, method=method, **kwargs)
+        assert self.hist_bins_ is not None, "You must fit the model first"
+        X_ = get_histo_features(X, self.hist_bins_)
+        return self.predict(X_, level=level, method=method, **kwargs)
 
diff --git a/setup.py b/setup.py
index 188ab86..9b8871a 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@
 MAINTAINER_EMAIL = 'thierry.moudiki@gmail.com'
 LICENSE = 'BSD3 Clause Clear'
 
-__version__ = '0.22.4'
+__version__ = '0.23.0'
 
 VERSION = __version__
 

From f9e7b7e7ab14d65cfd258dbc0629dc1ac2462694 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 09:52:25 +0100
Subject: [PATCH 20/26] fix histo Pt.3

---
 mlsauce/booster/_booster_classifier.py | 4 ++--
 mlsauce/booster/_booster_regressor.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index c15f51a..bd59dd9 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -807,7 +807,7 @@ def __init__(
         )
         self.base_model = base_model
         self.hist_bins_ = None
-        
+
         super().__init__(
             base_model=base_model,
             n_estimators=n_estimators,
@@ -854,7 +854,7 @@ def fit(self, X, y, **kwargs):
         res = get_histo_features(X)        
         self.hist_bins_ = res[1]
         #print(f"\n after: {X} \n")
-        return self.fit(res[0], y, **kwargs)
+        return super().fit(res[0], y, **kwargs)
 
     def predict_proba(self, X, **kwargs):
         """Predict probabilites for test data X.
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 399c39b..e3d9a21 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -746,7 +746,7 @@ def fit(self, X, y, **kwargs):
         res = get_histo_features(X)        
         self.hist_bins_ = res[1]
         #print(f"\n after: {X} \n")
-        return self.fit(res[0], y, **kwargs)
+        return super().fit(res[0], y, **kwargs)
 
     def predict(self, X, level=95, method=None, **kwargs):
         """Predict values for test data X.

From 4fc7dd7b2a685bdcfef12e5a92b8b835d5bc68bc Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 10:04:12 +0100
Subject: [PATCH 21/26] fix histo Pt.4

---
 mlsauce/booster/_booster_classifier.py | 2 +-
 mlsauce/booster/_booster_regressor.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index bd59dd9..666fe25 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -873,5 +873,5 @@ def predict_proba(self, X, **kwargs):
             predicted values estimates for test data: {array-like}
         """
         assert self.hist_bins_ is not None, "You must fit the model first"
-        return self.predict_proba(get_histo_features(X, self.hist_bins_))
+        return super().predict_proba(get_histo_features(X, self.hist_bins_))
 
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index e3d9a21..5abdada 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -776,5 +776,5 @@ def predict(self, X, level=95, method=None, **kwargs):
         """
         assert self.hist_bins_ is not None, "You must fit the model first"
         X_ = get_histo_features(X, self.hist_bins_)
-        return self.predict(X_, level=level, method=method, **kwargs)
+        return super().predict(X_, level=level, method=method, **kwargs)
 

From 7c8a45ed9d6ec6950be9dac67a64e3efa3b594ed Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 10:23:49 +0100
Subject: [PATCH 22/26] fix histo Pt.5

---
 mlsauce/__init__.py                    |   5 -
 mlsauce/booster/__init__.py            |   4 -
 mlsauce/booster/_booster_classifier.py | 222 +++---------------------
 mlsauce/booster/_booster_regressor.py  | 225 +++----------------------
 4 files changed, 51 insertions(+), 405 deletions(-)

diff --git a/mlsauce/__init__.py b/mlsauce/__init__.py
index 9767523..2dddb7d 100644
--- a/mlsauce/__init__.py
+++ b/mlsauce/__init__.py
@@ -61,9 +61,6 @@
         LSBoostRegressor,
         GenericBoostingClassifier,
         GenericBoostingRegressor,
-        HistGenericBoostingRegressor,
-        HistGenericBoostingClassifier,
-
     )
     from .lazybooster import LazyBoostingClassifier, LazyBoostingRegressor
     from .multitaskregressor import MultiTaskRegressor
@@ -80,8 +77,6 @@
         "LSBoostClassifier",
         "GenericBoostingClassifier",
         "GenericBoostingRegressor",
-        "HistGenericBoostingClassifier",
-        "HistGenericBoostingRegressor",
         "StumpClassifier",
         "ElasticNetRegressor",
         "LassoRegressor",
diff --git a/mlsauce/booster/__init__.py b/mlsauce/booster/__init__.py
index b8941dc..786fe8f 100644
--- a/mlsauce/booster/__init__.py
+++ b/mlsauce/booster/__init__.py
@@ -1,15 +1,11 @@
 from ._booster_regressor import LSBoostRegressor
 from ._booster_regressor import GenericBoostingRegressor
-from ._booster_regressor import HistGenericBoostingRegressor
 from ._booster_classifier import LSBoostClassifier
 from ._booster_classifier import GenericBoostingClassifier
-from ._booster_classifier import HistGenericBoostingClassifier
 
 __all__ = [
     "LSBoostClassifier",
     "LSBoostRegressor",
     "GenericBoostingClassifier",
     "GenericBoostingRegressor",
-    "HistGenericBoostingRegressor", 
-    "HistGenericBoostingClassifier"
 ]
diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index 666fe25..ef5d6ec 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -83,6 +83,12 @@ class LSBoostClassifier(BaseEstimator, ClassifierMixin):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             currently 'uniform', 'gaussian'
+        
+        hist: bool
+            indicates whether histogram features are used or not (default is False)
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')
 
     Examples:
 
@@ -307,9 +313,14 @@ def __init__(
         degree=None,
         weights_distr="uniform",
         base_model=None,
+        hist=False,
+        bins="auto",
     ):
 
         self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
+        self.hist_bins_ = None
 
         if n_clusters > 0:
             assert clustering_method in (
@@ -392,6 +403,9 @@ def fit(self, X, y, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
         
+        if self.hist:
+            X, self.hist_bins_ = get_histo_features(X)
+        
         if isinstance(y, pd.Series):
             y = y.values.ravel()
         else:
@@ -628,6 +642,12 @@ class GenericBoostingClassifier(LSBoostClassifier):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             currently 'uniform', 'gaussian'
+        
+        hist: bool
+            indicates whether histogram features are used or not (default is False)
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')
 
     """
 
@@ -654,162 +674,15 @@ def __init__(
         cluster_scaling="standard",
         degree=None,
         weights_distr="uniform",
+        hist=False,
+        bins="auto",
     ):
         self.base_model = base_model
-        super().__init__(
-            n_estimators=n_estimators,
-            learning_rate=learning_rate,
-            n_hidden_features=n_hidden_features,
-            reg_lambda=reg_lambda,
-            alpha=alpha,
-            row_sample=row_sample,
-            col_sample=col_sample,
-            dropout=dropout,
-            tolerance=tolerance,
-            direct_link=direct_link,
-            verbose=verbose,
-            seed=seed,
-            backend=backend,
-            solver=solver,
-            activation=activation,
-            n_clusters=n_clusters,
-            clustering_method=clustering_method,
-            cluster_scaling=cluster_scaling,
-            degree=degree,
-            weights_distr=weights_distr,
-            base_model=self.base_model,
-        )
-
-class HistGenericBoostingClassifier(GenericBoostingClassifier):
-    """EXPERIMENTAL Histogram-based Generic Boosting classifier (using any classifier as base learner).
-
-    Attributes:
-
-        base_model: object
-            base learner (default is ExtraTreeRegressor) to be boosted.
-
-        n_estimators: int
-            number of boosting iterations.
-
-        learning_rate: float
-            controls the learning speed at training time.
-
-        n_hidden_features: int
-            number of nodes in successive hidden layers.
-
-        reg_lambda: float
-            L2 regularization parameter for successive errors in the optimizer
-            (at training time).
-
-        alpha: float
-            compromise between L1 and L2 regularization (must be in [0, 1]),
-            for `solver` == 'enet'.
-
-        row_sample: float
-            percentage of rows chosen from the training set.
-
-        col_sample: float
-            percentage of columns chosen from the training set.
-
-        dropout: float
-            percentage of nodes dropped from the training set.
-
-        tolerance: float
-            controls early stopping in gradient descent (at training time).
-
-        direct_link: bool
-            indicates whether the original features are included (True) in model's
-            fitting or not (False).
-
-        verbose: int
-            progress bar (yes = 1) or not (no = 0) (currently).
-
-        seed: int
-            reproducibility seed for nodes_sim=='uniform', clustering and dropout.
-
-        backend: str
-            type of backend; must be in ('cpu', 'gpu', 'tpu')
-
-        solver: str
-            type of 'weak' learner; currently in ('ridge', 'lasso', 'enet').
-            'enet' is a combination of 'ridge' and 'lasso' called Elastic Net.
-
-        activation: str
-            activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
-
-        n_clusters: int
-            number of clusters for clustering the features
-
-        clustering_method: str
-            clustering method: currently 'kmeans', 'gmm'
-
-        cluster_scaling: str
-            scaling method for clustering: currently 'standard', 'robust', 'minmax'
-
-        degree: int
-            degree of features interactions to include in the model
-
-        weights_distr: str
-            distribution of weights for constructing the model's hidden layer;
-            currently 'uniform', 'gaussian'
-
-    """
-
-    def __init__(
-        self,
-        base_model=ExtraTreeRegressor(),
-        n_estimators=100,
-        learning_rate=0,
-        n_hidden_features=5,
-        reg_lambda=0.1,
-        alpha=0.5,
-        row_sample=1,
-        col_sample=1,
-        dropout=0,
-        tolerance=1e-4,
-        direct_link=1,
-        verbose=1,
-        seed=123,
-        backend="cpu",
-        solver="ridge",
-        activation="relu",
-        n_clusters=0,
-        clustering_method="kmeans",
-        cluster_scaling="standard",
-        degree=None,
-        weights_distr="uniform",
-    ):
-        
-        #warnings.warn("This class is highly experimental", UserWarning)
-
-        super().__init__(
-            base_model=base_model,
-            n_estimators=n_estimators,
-            learning_rate=learning_rate,
-            n_hidden_features=n_hidden_features,
-            reg_lambda=reg_lambda,
-            alpha=alpha,
-            row_sample=row_sample,
-            col_sample=col_sample,
-            dropout=dropout,
-            tolerance=tolerance,
-            direct_link=direct_link,
-            verbose=verbose,
-            seed=seed,
-            backend=backend,
-            solver=solver,
-            activation=activation,
-            n_clusters=n_clusters,
-            clustering_method=clustering_method,
-            cluster_scaling=cluster_scaling,
-            degree=degree,
-            weights_distr=weights_distr,
-        )
-        self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
         self.hist_bins_ = None
 
         super().__init__(
-            base_model=base_model,
             n_estimators=n_estimators,
             learning_rate=learning_rate,
             n_hidden_features=n_hidden_features,
@@ -830,48 +703,5 @@ def __init__(
             cluster_scaling=cluster_scaling,
             degree=degree,
             weights_distr=weights_distr,
-        )
-    
-    def fit(self, X, y, **kwargs):
-        """Fit Booster (classifier) to training data (X, y)
-
-        Args:
-
-            X: {array-like}, shape = [n_samples, n_features]
-                Training vectors, where n_samples is the number
-                of samples and n_features is the number of features.
-
-            y: array-like, shape = [n_samples]
-               Target values.
-
-            **kwargs: additional parameters to be passed to self.cook_training_set.
-
-        Returns:
-
-            self: object.
-        """
-        #print(f"\n before: {X} \n")
-        res = get_histo_features(X)        
-        self.hist_bins_ = res[1]
-        #print(f"\n after: {X} \n")
-        return super().fit(res[0], y, **kwargs)
-
-    def predict_proba(self, X, **kwargs):
-        """Predict probabilites for test data X.
-
-        Args:
-
-            X: {array-like}, shape = [n_samples, n_features]
-                Training vectors, where n_samples is the number
-                of samples and n_features is the number of features.
-
-            **kwargs: additional parameters to be passed to
-                self.cook_test_set
-
-        Returns:
-
-            predicted values estimates for test data: {array-like}
-        """
-        assert self.hist_bins_ is not None, "You must fit the model first"
-        return super().predict_proba(get_histo_features(X, self.hist_bins_))
-
+            base_model=self.base_model,
+        )
\ No newline at end of file
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 5abdada..2ceeedb 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -93,6 +93,12 @@ class LSBoostRegressor(BaseEstimator, RegressorMixin):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             either 'uniform' or 'gaussian'
+        
+        hist: bool
+            whether to use histogram features or not 
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')
 
     Examples:
 
@@ -174,9 +180,14 @@ def __init__(
         degree=None,
         weights_distr="uniform",
         base_model=None,
+        hist=False,
+        bins="auto",
     ):
 
         self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
+        self.hist_bins_ = None
 
         if n_clusters > 0:
             assert clustering_method in (
@@ -262,6 +273,9 @@ def fit(self, X, y, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
         
+        if self.hist:
+            X, self.hist_bins_ = get_histo_features(X)
+        
         if isinstance(y, pd.Series):
             y = y.values.ravel()
         else:
@@ -520,6 +534,12 @@ class GenericBoostingRegressor(LSBoostRegressor):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             either 'uniform' or 'gaussian'
+        
+        hist: bool
+            whether to use histogram features or not
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')                
 
     """
 
@@ -549,151 +569,12 @@ def __init__(
         cluster_scaling="standard",
         degree=None,
         weights_distr="uniform",
+        hist=False,
+        bins="auto",
     ):
         self.base_model = base_model
-        super().__init__(
-            n_estimators=n_estimators,
-            learning_rate=learning_rate,
-            n_hidden_features=n_hidden_features,
-            reg_lambda=reg_lambda,
-            alpha=alpha,
-            row_sample=row_sample,
-            col_sample=col_sample,
-            dropout=dropout,
-            tolerance=tolerance,
-            direct_link=direct_link,
-            verbose=verbose,
-            seed=seed,
-            backend=backend,
-            solver=solver,
-            activation=activation,
-            type_pi=type_pi,
-            replications=replications,
-            kernel=kernel,
-            n_clusters=n_clusters,
-            clustering_method=clustering_method,
-            cluster_scaling=cluster_scaling,
-            degree=degree,
-            weights_distr=weights_distr,
-            base_model=self.base_model,
-        )
-
-class HistGenericBoostingRegressor(GenericBoostingRegressor):
-    """EXPERIMENTAL Generic Boosting regressor with histogram-based features.
-
-    Attributes:
-
-        base_model: object
-            base learner (default is ExtraTreeRegressor) to be boosted.
-
-        n_estimators: int
-            number of boosting iterations.
-
-        learning_rate: float
-            controls the learning speed at training time.
-
-        n_hidden_features: int
-            number of nodes in successive hidden layers.
-
-        reg_lambda: float
-            L2 regularization parameter for successive errors in the optimizer
-            (at training time).
-
-        alpha: float
-            compromise between L1 and L2 regularization (must be in [0, 1]),
-            for `solver` == 'enet'
-
-        row_sample: float
-            percentage of rows chosen from the training set.
-
-        col_sample: float
-            percentage of columns chosen from the training set.
-
-        dropout: float
-            percentage of nodes dropped from the training set.
-
-        tolerance: float
-            controls early stopping in gradient descent (at training time).
-
-        direct_link: bool
-            indicates whether the original features are included (True) in model's
-            fitting or not (False).
-
-        verbose: int
-            progress bar (yes = 1) or not (no = 0) (currently).
-
-        seed: int
-            reproducibility seed for nodes_sim=='uniform', clustering and dropout.
-
-        backend: str
-            type of backend; must be in ('cpu', 'gpu', 'tpu')
-
-        solver: str
-            type of 'weak' learner; currently in ('ridge', 'lasso')
-
-        activation: str
-            activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
-
-        type_pi: str.
-            type of prediction interval; currently "kde" (default) or "bootstrap".
-            Used only in `self.predict`, for `self.replications` > 0 and `self.kernel`
-            in ('gaussian', 'tophat'). Default is `None`.
-
-        replications: int.
-            number of replications (if needed) for predictive simulation.
-            Used only in `self.predict`, for `self.kernel` in ('gaussian',
-            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
-
-        n_clusters: int
-            number of clusters for clustering the features
-
-        clustering_method: str
-            clustering method: currently 'kmeans', 'gmm'
-
-        cluster: bool
-            whether to cluster the features or not
-        
-        cluster_scaling: str
-            scaling method for clustering: currently 'standard', 'robust', 'minmax'
-
-        degree: int
-            degree of features interactions to include in the model
-        
-        weights_distr: str
-            distribution of weights for constructing the model's hidden layer;
-            either 'uniform' or 'gaussian'
-        """ 
-    def __init__(
-        self,
-        base_model=ExtraTreeRegressor(),
-        n_estimators=100,
-        learning_rate=0.1,
-        n_hidden_features=5,
-        reg_lambda=0.1,
-        alpha=0.5,
-        row_sample=1,
-        col_sample=1,
-        dropout=0,
-        tolerance=1e-4,
-        direct_link=1,
-        verbose=1,
-        seed=123,
-        backend="cpu",
-        solver="ridge",
-        activation="relu",
-        type_pi=None,
-        replications=None,
-        kernel=None,
-        n_clusters=0,
-        clustering_method="kmeans",
-        cluster_scaling="standard",
-        degree=None,
-        weights_distr="uniform",
-    ):
-        
-        #warnings.warn("This class is highly experimental", UserWarning)
-
-        self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
         self.hist_bins_ = None
 
         super().__init__(
@@ -721,60 +602,4 @@ def __init__(
             degree=degree,
             weights_distr=weights_distr,
             base_model=self.base_model,
-        )   
-
-    
-    def fit(self, X, y, **kwargs):
-        """Fit Booster (regressor) to training data (X, y)
-
-        Args:
-
-            X: {array-like}, shape = [n_samples, n_features]
-                Training vectors, where n_samples is the number
-                of samples and n_features is the number of features.
-
-            y: array-like, shape = [n_samples]
-               Target values.
-
-            **kwargs: additional parameters to be passed to self.cook_training_set.
-
-        Returns:
-
-            self: object.
-        """
-        #print(f"\n before: {X} \n")
-        res = get_histo_features(X)        
-        self.hist_bins_ = res[1]
-        #print(f"\n after: {X} \n")
-        return super().fit(res[0], y, **kwargs)
-
-    def predict(self, X, level=95, method=None, **kwargs):
-        """Predict values for test data X.
-
-        Args:
-
-            X: {array-like}, shape = [n_samples, n_features]
-                Training vectors, where n_samples is the number
-                of samples and n_features is the number of features.
-
-            level: int
-                Level of confidence (default = 95)
-
-            method: str
-                `None`, or 'splitconformal', 'localconformal'
-                prediction (if you specify `return_pi = True`)
-            
-            histo: bool
-                whether to use histogram features or not
-
-            **kwargs: additional parameters to be passed to
-                self.cook_test_set
-
-        Returns:
-
-            predicted values estimates for test data: {array-like}
-        """
-        assert self.hist_bins_ is not None, "You must fit the model first"
-        X_ = get_histo_features(X, self.hist_bins_)
-        return super().predict(X_, level=level, method=method, **kwargs)
-
+        )

From b53f9e595e9116aa8efaaa387e677fe449639213 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 10:27:42 +0100
Subject: [PATCH 23/26] fix histo Pt.6

---
 mlsauce/lazybooster/lazyboosterclassif.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/mlsauce/lazybooster/lazyboosterclassif.py b/mlsauce/lazybooster/lazyboosterclassif.py
index c58c812..963cb46 100644
--- a/mlsauce/lazybooster/lazyboosterclassif.py
+++ b/mlsauce/lazybooster/lazyboosterclassif.py
@@ -26,7 +26,7 @@
     f1_score,
 )
 from .config import REGRESSORS, MTASKREGRESSORS
-from ..booster import GenericBoostingClassifier, HistGenericBoostingClassifier
+from ..booster import GenericBoostingClassifier, GenericBoostingClassifier
 from ..multitaskregressor import MultiTaskRegressor
 
 import warnings
@@ -217,7 +217,7 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                 and columns is the number of features.
             
             hist: bool, optional (default=False)
-                When set to True, the model is a HistGenericBoostingClassifier.
+                When set to True, the model is a GenericBoostingClassifier.
 
             **kwargs: dict,
                 Additional arguments to be passed to the fit GenericBoostingClassifier.
@@ -388,12 +388,13 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                     ),
                                 )
                             else:
-                                fitted_clf = HistGenericBoostingClassifier(
+                                fitted_clf = GenericBoostingClassifier(
                                     {**other_args, **kwargs},
                                     verbose=self.verbose,
                                     base_model=model(
                                         random_state=self.random_state
                                     ),
+                                    hist=True,
                                 )
 
                         else:
@@ -403,9 +404,10 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                     verbose=self.verbose,
                                 )
                             else:
-                                fitted_clf = HistGenericBoostingClassifier(
+                                fitted_clf = GenericBoostingClassifier(
                                     base_model=model(**kwargs),
                                     verbose=self.verbose,
+                                    hist=True,
                                 )
 
                         if self.verbose > 0:
@@ -527,11 +529,12 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                     **kwargs
                                 )
                             else: 
-                                fitted_clf = HistGenericBoostingClassifier(
+                                fitted_clf = GenericBoostingClassifier(
                                     base_model=model(
                                         random_state=self.random_state
                                     ),
                                     verbose=self.verbose,
+                                    hist=True,
                                     **kwargs
                                 )
 
@@ -543,9 +546,10 @@ def fit(self, X_train, X_test, y_train, y_test, hist=False,  **kwargs):
                                     **kwargs
                                 )
                             else:
-                                fitted_clf = HistGenericBoostingClassifier(
+                                fitted_clf = GenericBoostingClassifier(
                                     base_model=model(),
                                     verbose=self.verbose,
+                                    hist=True,
                                     **kwargs
                                 )
 
@@ -753,10 +757,11 @@ def train_model(
                         base_model=model(random_state=self.random_state),
                     )
                 else:
-                    fitted_clf = HistGenericBoostingClassifier(
+                    fitted_clf = GenericBoostingClassifier(
                         {**other_args, **kwargs},
                         verbose=self.verbose,
                         base_model=model(random_state=self.random_state),
+                        hist=True,
                     )
             else:
                 if hist is False: 
@@ -765,9 +770,10 @@ def train_model(
                         verbose=self.verbose,
                     )
                 else:
-                    fitted_clf = HistGenericBoostingClassifier(
+                    fitted_clf = GenericBoostingClassifier(
                         base_model=model(**kwargs),
                         verbose=self.verbose,
+                        hist=True,
                     )
 
             if self.verbose > 0:

From 4b81c72c443bf0ffd2b813690a31a1bae87b8960 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 10:29:29 +0100
Subject: [PATCH 24/26] fix histo Pt.7

---
 mlsauce/lazybooster/lazyboosterclassif.py    | 2 +-
 mlsauce/lazybooster/lazyboosterregression.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlsauce/lazybooster/lazyboosterclassif.py b/mlsauce/lazybooster/lazyboosterclassif.py
index 963cb46..6b40a94 100644
--- a/mlsauce/lazybooster/lazyboosterclassif.py
+++ b/mlsauce/lazybooster/lazyboosterclassif.py
@@ -26,7 +26,7 @@
     f1_score,
 )
 from .config import REGRESSORS, MTASKREGRESSORS
-from ..booster import GenericBoostingClassifier, GenericBoostingClassifier
+from ..booster import GenericBoostingClassifier
 from ..multitaskregressor import MultiTaskRegressor
 
 import warnings
diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py
index 5d94b75..0fc6a03 100644
--- a/mlsauce/lazybooster/lazyboosterregression.py
+++ b/mlsauce/lazybooster/lazyboosterregression.py
@@ -22,7 +22,7 @@
     r2_score
 )
 from .config import REGRESSORS
-from ..booster import GenericBoostingRegressor, HistGenericBoostingRegressor
+from ..booster import GenericBoostingRegressor
 
 import warnings
 

From 3c32436a07344286f94d7d21d64ee2c7b03d54c9 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 11:10:02 +0100
Subject: [PATCH 25/26] fix histo Pt.8

---
 mlsauce/booster/_booster_classifier.py | 3 +++
 mlsauce/booster/_booster_regressor.py  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index ef5d6ec..f6d87d3 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -496,6 +496,9 @@ def predict_proba(self, X, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
 
+        if self.hist:
+            X = get_histo_features(X, bins=self.hist_bins_)
+
         if self.degree is not None:
             X = self.poly_.transform(X)
 
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 2ceeedb..3d170e5 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -359,6 +359,9 @@ def predict(self, X, level=95, method=None, histo=False, **kwargs):
 
         if isinstance(X, pd.DataFrame):
             X = X.values
+        
+        if self.hist:
+            X = get_histo_features(X, bins=self.hist_bins_)
 
         if self.degree is not None:
             X = self.poly_.transform(X)

From 4a530bba99970855aed3001b58870f0253b05f06 Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Mon, 28 Oct 2024 11:27:54 +0100
Subject: [PATCH 26/26] fix histo Pt.9

---
 mlsauce/booster/_booster_classifier.py | 4 ++--
 mlsauce/booster/_booster_regressor.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
index f6d87d3..eb4938f 100644
--- a/mlsauce/booster/_booster_classifier.py
+++ b/mlsauce/booster/_booster_classifier.py
@@ -403,7 +403,7 @@ def fit(self, X, y, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
         
-        if self.hist:
+        if self.hist == True:
             X, self.hist_bins_ = get_histo_features(X)
         
         if isinstance(y, pd.Series):
@@ -496,7 +496,7 @@ def predict_proba(self, X, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
 
-        if self.hist:
+        if self.hist == True:
             X = get_histo_features(X, bins=self.hist_bins_)
 
         if self.degree is not None:
diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
index 3d170e5..8954113 100644
--- a/mlsauce/booster/_booster_regressor.py
+++ b/mlsauce/booster/_booster_regressor.py
@@ -273,7 +273,7 @@ def fit(self, X, y, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
         
-        if self.hist:
+        if self.hist == True:
             X, self.hist_bins_ = get_histo_features(X)
         
         if isinstance(y, pd.Series):
@@ -360,7 +360,7 @@ def predict(self, X, level=95, method=None, histo=False, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
         
-        if self.hist:
+        if self.hist == True:
             X = get_histo_features(X, bins=self.hist_bins_)
 
         if self.degree is not None: