Skip to content

Commit

Permalink
sklearnのLogisticRegressionの仕様変更への対応
Browse files Browse the repository at this point in the history
  • Loading branch information
taroyabuki committed Dec 7, 2024
1 parent 225edbe commit f241474
Show file tree
Hide file tree
Showing 3 changed files with 515 additions and 506 deletions.
53 changes: 28 additions & 25 deletions code/Python-notebook/python-10.ipynb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"cells": [
{
"id": "9d8e53fb",
"id": "ff260c33",
"cell_type": "markdown",
"source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n",
"metadata": {}
},
{
"id": "164e2b63",
"id": "a846b6c3",
"cell_type": "markdown",
"source": "## 10.1 2\u5024\u5206\u985e\u306e\u6027\u80fd\u6307\u6a19",
"metadata": {}
Expand All @@ -24,7 +24,7 @@
"y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1])\n",
"y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])"
],
"id": "57e878db-1775-4a97-9f32-4900699f93ae"
"id": "11a1e284-ac58-440a-b2b4-a5dd93574560"
},
{
"cell_type": "code",
Expand All @@ -35,7 +35,7 @@
"y_ = np.array([1 if 0.5 <= p else 0 for p in y_score])\n",
"y_"
],
"id": "37394cc7-6be5-4d4a-adca-79164b2c695d"
"id": "d942efe9-f449-4159-a7aa-c28552eabb3a"
},
{
"cell_type": "code",
Expand All @@ -47,10 +47,10 @@
"\n",
"print(classification_report(y_true=y, y_pred=y_))"
],
"id": "5306637f-68c2-4f04-b1f0-05641acdfd17"
"id": "0bdc88d0-23a0-4ad2-a11c-2ac696fdf9e4"
},
{
"id": "18b3dc31",
"id": "d604c426",
"cell_type": "markdown",
"source": "## 10.2 \u30c8\u30ec\u30fc\u30c9\u30aa\u30d5",
"metadata": {}
Expand All @@ -72,7 +72,7 @@
"[sum((y == 0) & (y_ == 1)) / sum(y == 0), # FPR\n",
" sum((y == 1) & (y_ == 1)) / sum(y == 1)] # TPR"
],
"id": "958f218a-454f-4a5c-84a8-298bf09e4242"
"id": "6d569fc5-e550-4493-a96b-941895171bcc"
},
{
"cell_type": "code",
Expand All @@ -85,7 +85,7 @@
" pos_label=1) # 1\u304c\u967d\u6027\u3067\u3042\u308b\uff0e\n",
"RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()"
],
"id": "29717604-1938-44bb-97d9-2d4f3745e9ca"
"id": "b544b303-6454-4a56-81e9-411f44bd16d4"
},
{
"cell_type": "code",
Expand All @@ -95,7 +95,7 @@
"source": [
"auc(x=my_fpr, y=my_tpr)"
],
"id": "282b7bc6-7063-4060-b40f-5ab5a4ffd982"
"id": "81ee69d1-b673-42b9-bbed-c5ce0f1eee5b"
},
{
"cell_type": "code",
Expand All @@ -106,7 +106,7 @@
"[sum((y == 1) & (y_ == 1)) / sum(y == 1), # Recall == TPR\n",
" sum((y == 1) & (y_ == 1)) / sum(y_ == 1)] # Precision"
],
"id": "f19e1b8d-ddeb-4fc3-94a0-6610a800f522"
"id": "3d6e046b-0dea-4348-ad54-e3ca256861b5"
},
{
"cell_type": "code",
Expand All @@ -119,7 +119,7 @@
" pos_label=1)\n",
"PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()"
],
"id": "be5989ed-9acd-4ec8-8cdf-652eabd9fe9c"
"id": "05130de7-6990-42b5-b7b7-fc6405167a10"
},
{
"cell_type": "code",
Expand All @@ -129,10 +129,10 @@
"source": [
"auc(x=my_recall, y=my_precision)"
],
"id": "469fa5e4-4c09-494c-b7f0-70996a31ecb2"
"id": "d139ddb9-033f-4f67-89a4-7f507593a148"
},
{
"id": "4b21162d",
"id": "0e178057",
"cell_type": "markdown",
"source": "## 10.3 \u30bf\u30a4\u30bf\u30cb\u30c3\u30af",
"metadata": {}
Expand All @@ -155,7 +155,7 @@
" '/fromzero/master/data/titanic.csv')\n",
"my_data = pd.read_csv(my_url)"
],
"id": "754fc193-173e-46d8-9aa4-ba11a01b861a"
"id": "3819263e-3a4a-499f-8904-9b39c0280a0b"
},
{
"cell_type": "code",
Expand All @@ -165,7 +165,7 @@
"source": [
"my_data.head()"
],
"id": "9f8f6ea7-cc51-43b2-ac22-0fdaba0ac24c"
"id": "ad8f4801-4abc-4089-bb30-391cb7940dfc"
},
{
"cell_type": "code",
Expand All @@ -181,7 +181,7 @@
" min_impurity_decrease=0.01))])\n",
"my_pipeline.fit(X, y)"
],
"id": "b618f543-4682-4c88-9c75-b399289a4a87"
"id": "bf0be6be-8d21-439e-886a-b664af0ecf32"
},
{
"cell_type": "code",
Expand All @@ -201,7 +201,7 @@
" filled=True)\n",
"graphviz.Source(my_dot)"
],
"id": "5c93c18b-9f14-4e12-aeaa-5875d391b15b"
"id": "dae6ea24-c8fb-4222-93a5-02944c1ce574"
},
{
"cell_type": "code",
Expand All @@ -215,7 +215,7 @@
" n_jobs=-1)\n",
"my_scores.mean()"
],
"id": "1000fb3e-4704-48f2-b85d-7b0c8839eb36"
"id": "21567e83-fcb2-4321-826a-23814c035b9d"
},
{
"cell_type": "code",
Expand All @@ -236,10 +236,10 @@
"\n",
"RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()"
],
"id": "932c6d8f-ae20-477d-852e-f823710e57cf"
"id": "047e6e11-3636-48ce-b894-c796dbe2e1bb"
},
{
"id": "fcb2ef07",
"id": "2846ebd1",
"cell_type": "markdown",
"source": "## 10.4 \u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30",
"metadata": {}
Expand All @@ -257,15 +257,17 @@
"y = 1 / (1 + np.exp(-x))\n",
"plt.plot(x, y)"
],
"id": "3fc1f320-50bd-44d9-8b17-3388986801f5"
"id": "00debbab-4d36-49ad-b077-a01a52ffe078"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"import pandas as pd\n",
"from packaging.version import parse\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import cross_val_score, LeaveOneOut\n",
"from sklearn.pipeline import Pipeline\n",
Expand All @@ -277,11 +279,12 @@
"\n",
"X, y = my_data.iloc[:, 0:3], my_data.Survived\n",
"\n",
"none = 'none' if parse(sklearn.__version__) < parse(\"1.4\") else None\n",
"my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),\n",
" ('lr', LogisticRegression(penalty='none'))])\n",
" ('lr', LogisticRegression(penalty=none))])\n",
"my_pipeline.fit(X, y)"
],
"id": "6ebaef90-0abb-48f4-9d26-c2d96935aedd"
"id": "345720cd-774d-42fe-a160-bf15c86e7d88"
},
{
"cell_type": "code",
Expand All @@ -300,7 +303,7 @@
"pd.Series(my_lr.coef_[0],\n",
" index=tmp)"
],
"id": "99cb7d91-e032-48a9-8fb5-15d2e80571f6"
"id": "42014646-0fc0-4c6e-b61b-411e494abdd3"
},
{
"cell_type": "code",
Expand All @@ -314,7 +317,7 @@
" n_jobs=-1)\n",
"my_scores.mean()"
],
"id": "45386452-0d66-4642-847d-fa8732ca7f92"
"id": "82b09321-2168-44d0-982d-9d6ec25e10ee"
}
],
"nbformat": 4,
Expand Down
5 changes: 4 additions & 1 deletion code/python.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2653,7 +2653,9 @@ plt.plot(x, y)
```

```{python}
import sklearn
import pandas as pd
from packaging.version import parse
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, LeaveOneOut
from sklearn.pipeline import Pipeline
Expand All @@ -2665,8 +2667,9 @@ my_data = pd.read_csv(my_url)
X, y = my_data.iloc[:, 0:3], my_data.Survived
none = 'none' if parse(sklearn.__version__) < parse("1.4") else None
my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),
('lr', LogisticRegression(penalty='none'))])
('lr', LogisticRegression(penalty=none))])
my_pipeline.fit(X, y)
```

Expand Down
Loading

0 comments on commit f241474

Please sign in to comment.