add GradientBoosting 'estimators' Pt.4

thierrymoudiki · Aug 3, 2024 · c3e1918 · c3e1918
1 parent 9909ce8
commit c3e1918
Show file tree

Hide file tree

Showing 6 changed files with 200 additions and 257 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -30,8 +30,8 @@ jobs:
       - name: Run examples 
         run: pip install .&&find examples -maxdepth 2 -name "*.py" -exec  python3 {} \;
 
-      #- name: Publish to PyPI                
-      #  uses: pypa/gh-action-pypi-publish@release/v1
-      #  with:
-      #    password: ${{ secrets.PYPI_GLOBAL_UB }}
-      #    repository-url: https://upload.pypi.org/legacy/
+      - name: Publish to PyPI                
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_GLOBAL_UB }}
+          repository-url: https://upload.pypi.org/legacy/
diff --git a/unifiedbooster/demo/thierrymoudiki_20240803_unifiedbooster.ipynb b/unifiedbooster/demo/thierrymoudiki_20240803_unifiedbooster.ipynb
@@ -0,0 +1,190 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install git+https://github.com/thierrymoudiki/unifiedbooster.git --upgrade"
+      ],
+      "metadata": {
+        "id": "R9R4-y5Obsqm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import unifiedbooster as ub\n",
+        "from sklearn.datasets import load_iris, load_breast_cancer, load_wine\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import accuracy_score\n",
+        "\n",
+        "datasets = [load_iris(), load_breast_cancer(), load_wine()]\n",
+        "\n",
+        "for dataset in datasets:\n",
+        "\n",
+        "  X, y = dataset.data, dataset.target\n",
+        "  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+        "\n",
+        "  # Initialize the unified regressor (example with XGBoost)\n",
+        "  regressor1 = ub.GBDTClassifier(model_type='xgboost')\n",
+        "  regressor2 = ub.GBDTClassifier(model_type='catboost')\n",
+        "  regressor3 = ub.GBDTClassifier(model_type='lightgbm')\n",
+        "  regressor4 = ub.GBDTClassifier(model_type='gradientboosting')\n",
+        "\n",
+        "  # Fit the model\n",
+        "  regressor1.fit(X_train, y_train)\n",
+        "  regressor2.fit(X_train, y_train)\n",
+        "  regressor3.fit(X_train, y_train)\n",
+        "  regressor4.fit(X_train, y_train)\n",
+        "\n",
+        "  # Predict on the test set\n",
+        "  y_pred1 = regressor1.predict(X_test)\n",
+        "  y_pred2 = regressor2.predict(X_test)\n",
+        "  y_pred3 = regressor3.predict(X_test)\n",
+        "  y_pred4 = regressor4.predict(X_test)\n",
+        "\n",
+        "  # Evaluate the model\n",
+        "  accuracy1 = accuracy_score(y_test, y_pred1)\n",
+        "  accuracy2 = accuracy_score(y_test, y_pred2)\n",
+        "  accuracy3 = accuracy_score(y_test, y_pred3)\n",
+        "  accuracy4 = accuracy_score(y_test, y_pred4)\n",
+        "  print(\"-------------------------\")\n",
+        "  print(f\"Classification Accuracy xgboost: {accuracy1:.2f}\")\n",
+        "  print(f\"Classification Accuracy catboost: {accuracy2:.2f}\")\n",
+        "  print(f\"Classification Accuracy lightgbm: {accuracy3:.2f}\")\n",
+        "  print(f\"Classification Accuracy gradientboosting: {accuracy4:.2f}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GlNUpp9nb_cT",
+        "outputId": "f845bb72-3976-47e3-e6a0-61673822fc0c"
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "-------------------------\n",
+            "Classification Accuracy xgboost: 1.00\n",
+            "Classification Accuracy catboost: 1.00\n",
+            "Classification Accuracy lightgbm: 1.00\n",
+            "Classification Accuracy gradientboosting: 1.00\n",
+            "-------------------------\n",
+            "Classification Accuracy xgboost: 0.96\n",
+            "Classification Accuracy catboost: 0.97\n",
+            "Classification Accuracy lightgbm: 0.96\n",
+            "Classification Accuracy gradientboosting: 0.96\n",
+            "-------------------------\n",
+            "Classification Accuracy xgboost: 0.97\n",
+            "Classification Accuracy catboost: 1.00\n",
+            "Classification Accuracy lightgbm: 1.00\n",
+            "Classification Accuracy gradientboosting: 0.94\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "Yjr1cXR2xVoj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import unifiedbooster as ub\n",
+        "from sklearn.datasets import fetch_california_housing, load_diabetes\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "\n",
+        "\n",
+        "datasets = [fetch_california_housing(), load_diabetes()]\n",
+        "\n",
+        "for dataset in datasets:\n",
+        "\n",
+        "  # Load dataset\n",
+        "  X, y = dataset.data, dataset.target\n",
+        "\n",
+        "  # Split dataset into training and testing sets\n",
+        "  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+        "\n",
+        "  # Initialize the unified regressor (example with XGBoost)\n",
+        "  regressor1 = ub.GBDTRegressor(model_type='xgboost')\n",
+        "  regressor2 = ub.GBDTRegressor(model_type='catboost')\n",
+        "  regressor3 = ub.GBDTRegressor(model_type='lightgbm')\n",
+        "  regressor4 = ub.GBDTRegressor(model_type='gradientboosting')\n",
+        "\n",
+        "  # Fit the model\n",
+        "  regressor1.fit(X_train, y_train)\n",
+        "  regressor2.fit(X_train, y_train)\n",
+        "  regressor3.fit(X_train, y_train)\n",
+        "  regressor4.fit(X_train, y_train)\n",
+        "\n",
+        "  # Predict on the test set\n",
+        "  y_pred1 = regressor1.predict(X_test)\n",
+        "  y_pred2 = regressor2.predict(X_test)\n",
+        "  y_pred3 = regressor3.predict(X_test)\n",
+        "  y_pred4 = regressor4.predict(X_test)\n",
+        "\n",
+        "  # Evaluate the model\n",
+        "  mse1 = np.sqrt(mean_squared_error(y_test, y_pred1))\n",
+        "  mse2 = np.sqrt(mean_squared_error(y_test, y_pred2))\n",
+        "  mse3 = np.sqrt(mean_squared_error(y_test, y_pred3))\n",
+        "  mse4 = np.sqrt(mean_squared_error(y_test, y_pred4))\n",
+        "  print(\"-------------------------\")\n",
+        "  print(f\"Regression Root Mean Squared Error xgboost: {mse1:.2f}\")\n",
+        "  print(f\"Regression Root Mean Squared Error catboost: {mse2:.2f}\")\n",
+        "  print(f\"Regression Root Mean Squared Error lightgbm: {mse3:.2f}\")\n",
+        "  print(f\"Regression Root Mean Squared Error gradientboosting: {mse4:.2f}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "u2vdE0mulk8q",
+        "outputId": "0a29b466-769e-416f-bdbe-3cccd48bf5eb"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "-------------------------\n",
+            "Regression Root Mean Squared Error xgboost: 0.54\n",
+            "Regression Root Mean Squared Error catboost: 0.57\n",
+            "Regression Root Mean Squared Error lightgbm: 0.54\n",
+            "Regression Root Mean Squared Error gradientboosting: 0.54\n",
+            "-------------------------\n",
+            "Regression Root Mean Squared Error xgboost: 54.40\n",
+            "Regression Root Mean Squared Error catboost: 52.12\n",
+            "Regression Root Mean Squared Error lightgbm: 53.19\n",
+            "Regression Root Mean Squared Error gradientboosting: 53.92\n"
+          ]
+        }
+      ]
+    }
+  ]
+}