sklearnのLogisticRegressionの仕様変更への対応

taroyabuki · Dec 7, 2024 · f241474 · f241474
1 parent 225edbe
commit f241474
Show file tree

Hide file tree

Showing 3 changed files with 515 additions and 506 deletions.
diff --git a/code/Python-notebook/python-10.ipynb b/code/Python-notebook/python-10.ipynb
@@ -1,13 +1,13 @@
 {
   "cells": [
     {
-      "id": "9d8e53fb",
+      "id": "ff260c33",
       "cell_type": "markdown",
       "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
       "metadata": {}
     },
     {
-      "id": "164e2b63",
+      "id": "a846b6c3",
       "cell_type": "markdown",
       "source": "## 10.1 2\u5024\u5206\u985e\u306e\u6027\u80fd\u6307\u6a19",
       "metadata": {}
@@ -24,7 +24,7 @@
         "y       = np.array([  0,   1,   1,   0,   1,   0,    1,   0,   0,   1])\n",
         "y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])"
       ],
-      "id": "57e878db-1775-4a97-9f32-4900699f93ae"
+      "id": "11a1e284-ac58-440a-b2b4-a5dd93574560"
     },
     {
       "cell_type": "code",
@@ -35,7 +35,7 @@
         "y_ = np.array([1 if 0.5 <= p else 0 for p in y_score])\n",
         "y_"
       ],
-      "id": "37394cc7-6be5-4d4a-adca-79164b2c695d"
+      "id": "d942efe9-f449-4159-a7aa-c28552eabb3a"
     },
     {
       "cell_type": "code",
@@ -47,10 +47,10 @@
         "\n",
         "print(classification_report(y_true=y, y_pred=y_))"
       ],
-      "id": "5306637f-68c2-4f04-b1f0-05641acdfd17"
+      "id": "0bdc88d0-23a0-4ad2-a11c-2ac696fdf9e4"
     },
     {
-      "id": "18b3dc31",
+      "id": "d604c426",
       "cell_type": "markdown",
       "source": "## 10.2 \u30c8\u30ec\u30fc\u30c9\u30aa\u30d5",
       "metadata": {}
@@ -72,7 +72,7 @@
         "[sum((y == 0) & (y_ == 1)) / sum(y == 0), # FPR\n",
         " sum((y == 1) & (y_ == 1)) / sum(y == 1)] # TPR"
       ],
-      "id": "958f218a-454f-4a5c-84a8-298bf09e4242"
+      "id": "6d569fc5-e550-4493-a96b-941895171bcc"
     },
     {
       "cell_type": "code",
@@ -85,7 +85,7 @@
         "                              pos_label=1) # 1\u304c\u967d\u6027\u3067\u3042\u308b\uff0e\n",
         "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()"
       ],
-      "id": "29717604-1938-44bb-97d9-2d4f3745e9ca"
+      "id": "b544b303-6454-4a56-81e9-411f44bd16d4"
     },
     {
       "cell_type": "code",
@@ -95,7 +95,7 @@
       "source": [
         "auc(x=my_fpr, y=my_tpr)"
       ],
-      "id": "282b7bc6-7063-4060-b40f-5ab5a4ffd982"
+      "id": "81ee69d1-b673-42b9-bbed-c5ce0f1eee5b"
     },
     {
       "cell_type": "code",
@@ -106,7 +106,7 @@
         "[sum((y == 1) & (y_ == 1)) / sum(y  == 1), # Recall == TPR\n",
         " sum((y == 1) & (y_ == 1)) / sum(y_ == 1)] # Precision"
       ],
-      "id": "f19e1b8d-ddeb-4fc3-94a0-6610a800f522"
+      "id": "3d6e046b-0dea-4348-ad54-e3ca256861b5"
     },
     {
       "cell_type": "code",
@@ -119,7 +119,7 @@
         "                                                    pos_label=1)\n",
         "PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()"
       ],
-      "id": "be5989ed-9acd-4ec8-8cdf-652eabd9fe9c"
+      "id": "05130de7-6990-42b5-b7b7-fc6405167a10"
     },
     {
       "cell_type": "code",
@@ -129,10 +129,10 @@
       "source": [
         "auc(x=my_recall, y=my_precision)"
       ],
-      "id": "469fa5e4-4c09-494c-b7f0-70996a31ecb2"
+      "id": "d139ddb9-033f-4f67-89a4-7f507593a148"
     },
     {
-      "id": "4b21162d",
+      "id": "0e178057",
       "cell_type": "markdown",
       "source": "## 10.3 \u30bf\u30a4\u30bf\u30cb\u30c3\u30af",
       "metadata": {}
@@ -155,7 +155,7 @@
         "          '/fromzero/master/data/titanic.csv')\n",
         "my_data = pd.read_csv(my_url)"
       ],
-      "id": "754fc193-173e-46d8-9aa4-ba11a01b861a"
+      "id": "3819263e-3a4a-499f-8904-9b39c0280a0b"
     },
     {
       "cell_type": "code",
@@ -165,7 +165,7 @@
       "source": [
         "my_data.head()"
       ],
-      "id": "9f8f6ea7-cc51-43b2-ac22-0fdaba0ac24c"
+      "id": "ad8f4801-4abc-4089-bb30-391cb7940dfc"
     },
     {
       "cell_type": "code",
@@ -181,7 +181,7 @@
         "                                         min_impurity_decrease=0.01))])\n",
         "my_pipeline.fit(X, y)"
       ],
-      "id": "b618f543-4682-4c88-9c75-b399289a4a87"
+      "id": "bf0be6be-8d21-439e-886a-b664af0ecf32"
     },
     {
       "cell_type": "code",
@@ -201,7 +201,7 @@
         "    filled=True)\n",
         "graphviz.Source(my_dot)"
       ],
-      "id": "5c93c18b-9f14-4e12-aeaa-5875d391b15b"
+      "id": "dae6ea24-c8fb-4222-93a5-02944c1ce574"
     },
     {
       "cell_type": "code",
@@ -215,7 +215,7 @@
         "    n_jobs=-1)\n",
         "my_scores.mean()"
       ],
-      "id": "1000fb3e-4704-48f2-b85d-7b0c8839eb36"
+      "id": "21567e83-fcb2-4321-826a-23814c035b9d"
     },
     {
       "cell_type": "code",
@@ -236,10 +236,10 @@
         "\n",
         "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()"
       ],
-      "id": "932c6d8f-ae20-477d-852e-f823710e57cf"
+      "id": "047e6e11-3636-48ce-b894-c796dbe2e1bb"
     },
     {
-      "id": "fcb2ef07",
+      "id": "2846ebd1",
       "cell_type": "markdown",
       "source": "## 10.4 \u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30",
       "metadata": {}
@@ -257,15 +257,17 @@
         "y = 1 / (1 + np.exp(-x))\n",
         "plt.plot(x, y)"
       ],
-      "id": "3fc1f320-50bd-44d9-8b17-3388986801f5"
+      "id": "00debbab-4d36-49ad-b077-a01a52ffe078"
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
+        "import sklearn\n",
         "import pandas as pd\n",
+        "from packaging.version import parse\n",
         "from sklearn.linear_model import LogisticRegression\n",
         "from sklearn.model_selection import cross_val_score, LeaveOneOut\n",
         "from sklearn.pipeline import Pipeline\n",
@@ -277,11 +279,12 @@
         "\n",
         "X, y = my_data.iloc[:, 0:3], my_data.Survived\n",
         "\n",
+        "none = 'none' if parse(sklearn.__version__) < parse(\"1.4\") else None\n",
         "my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),\n",
-        "                        ('lr', LogisticRegression(penalty='none'))])\n",
+        "                        ('lr', LogisticRegression(penalty=none))])\n",
         "my_pipeline.fit(X, y)"
       ],
-      "id": "6ebaef90-0abb-48f4-9d26-c2d96935aedd"
+      "id": "345720cd-774d-42fe-a160-bf15c86e7d88"
     },
     {
       "cell_type": "code",
@@ -300,7 +303,7 @@
         "pd.Series(my_lr.coef_[0],\n",
         "          index=tmp)"
       ],
-      "id": "99cb7d91-e032-48a9-8fb5-15d2e80571f6"
+      "id": "42014646-0fc0-4c6e-b61b-411e494abdd3"
     },
     {
       "cell_type": "code",
@@ -314,7 +317,7 @@
         "    n_jobs=-1)\n",
         "my_scores.mean()"
       ],
-      "id": "45386452-0d66-4642-847d-fa8732ca7f92"
+      "id": "82b09321-2168-44d0-982d-9d6ec25e10ee"
     }
   ],
   "nbformat": 4,

diff --git a/code/python.Rmd b/code/python.Rmd
@@ -2653,7 +2653,9 @@ plt.plot(x, y)
 ```
 
 ```{python}
+import sklearn
 import pandas as pd
+from packaging.version import parse
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import cross_val_score, LeaveOneOut
 from sklearn.pipeline import Pipeline
@@ -2665,8 +2667,9 @@ my_data = pd.read_csv(my_url)
 
 X, y = my_data.iloc[:, 0:3], my_data.Survived
 
+none = 'none' if parse(sklearn.__version__) < parse("1.4") else None
 my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),
-                        ('lr', LogisticRegression(penalty='none'))])
+                        ('lr', LogisticRegression(penalty=none))])
 my_pipeline.fit(X, y)
 ```