diff --git a/code/Python-notebook/python-10.ipynb b/code/Python-notebook/python-10.ipynb index 50bad30..93b253c 100644 --- a/code/Python-notebook/python-10.ipynb +++ b/code/Python-notebook/python-10.ipynb @@ -1,13 +1,13 @@ { "cells": [ { - "id": "9d8e53fb", + "id": "ff260c33", "cell_type": "markdown", "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)\n\n\n", "metadata": {} }, { - "id": "164e2b63", + "id": "a846b6c3", "cell_type": "markdown", "source": "## 10.1 2\u5024\u5206\u985e\u306e\u6027\u80fd\u6307\u6a19", "metadata": {} @@ -24,7 +24,7 @@ "y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1])\n", "y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])" ], - "id": "57e878db-1775-4a97-9f32-4900699f93ae" + "id": "11a1e284-ac58-440a-b2b4-a5dd93574560" }, { "cell_type": "code", @@ -35,7 +35,7 @@ "y_ = np.array([1 if 0.5 <= p else 0 for p in y_score])\n", "y_" ], - "id": "37394cc7-6be5-4d4a-adca-79164b2c695d" + "id": "d942efe9-f449-4159-a7aa-c28552eabb3a" }, { "cell_type": "code", @@ -47,10 +47,10 @@ "\n", "print(classification_report(y_true=y, y_pred=y_))" ], - "id": "5306637f-68c2-4f04-b1f0-05641acdfd17" + "id": "0bdc88d0-23a0-4ad2-a11c-2ac696fdf9e4" }, { - "id": "18b3dc31", + "id": "d604c426", "cell_type": "markdown", "source": "## 10.2 \u30c8\u30ec\u30fc\u30c9\u30aa\u30d5", "metadata": {} @@ -72,7 +72,7 @@ "[sum((y == 0) & (y_ == 1)) / sum(y == 0), # FPR\n", " sum((y == 1) & (y_ == 1)) / sum(y == 1)] # TPR" ], - "id": "958f218a-454f-4a5c-84a8-298bf09e4242" + "id": "6d569fc5-e550-4493-a96b-941895171bcc" }, { "cell_type": "code", @@ -85,7 +85,7 @@ " pos_label=1) # 1\u304c\u967d\u6027\u3067\u3042\u308b\uff0e\n", "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()" ], - "id": "29717604-1938-44bb-97d9-2d4f3745e9ca" + "id": "b544b303-6454-4a56-81e9-411f44bd16d4" }, { "cell_type": "code", @@ -95,7 +95,7 @@ "source": [ "auc(x=my_fpr, y=my_tpr)" ], - "id": "282b7bc6-7063-4060-b40f-5ab5a4ffd982" + "id": "81ee69d1-b673-42b9-bbed-c5ce0f1eee5b" }, { "cell_type": "code", @@ -106,7 +106,7 @@ "[sum((y == 1) & (y_ == 1)) / sum(y == 1), # Recall == TPR\n", " sum((y == 1) & (y_ == 1)) / sum(y_ == 1)] # Precision" ], - "id": "f19e1b8d-ddeb-4fc3-94a0-6610a800f522" + "id": "3d6e046b-0dea-4348-ad54-e3ca256861b5" }, { "cell_type": "code", @@ -119,7 +119,7 @@ " pos_label=1)\n", "PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()" ], - "id": "be5989ed-9acd-4ec8-8cdf-652eabd9fe9c" + "id": "05130de7-6990-42b5-b7b7-fc6405167a10" }, { "cell_type": "code", @@ -129,10 +129,10 @@ "source": [ "auc(x=my_recall, y=my_precision)" ], - "id": "469fa5e4-4c09-494c-b7f0-70996a31ecb2" + "id": "d139ddb9-033f-4f67-89a4-7f507593a148" }, { - "id": "4b21162d", + "id": "0e178057", "cell_type": "markdown", "source": "## 10.3 \u30bf\u30a4\u30bf\u30cb\u30c3\u30af", "metadata": {} @@ -155,7 +155,7 @@ " '/fromzero/master/data/titanic.csv')\n", "my_data = pd.read_csv(my_url)" ], - "id": "754fc193-173e-46d8-9aa4-ba11a01b861a" + "id": "3819263e-3a4a-499f-8904-9b39c0280a0b" }, { "cell_type": "code", @@ -165,7 +165,7 @@ "source": [ "my_data.head()" ], - "id": "9f8f6ea7-cc51-43b2-ac22-0fdaba0ac24c" + "id": "ad8f4801-4abc-4089-bb30-391cb7940dfc" }, { "cell_type": "code", @@ -181,7 +181,7 @@ " min_impurity_decrease=0.01))])\n", "my_pipeline.fit(X, y)" ], - "id": "b618f543-4682-4c88-9c75-b399289a4a87" + "id": "bf0be6be-8d21-439e-886a-b664af0ecf32" }, { "cell_type": "code", @@ -201,7 +201,7 @@ " filled=True)\n", "graphviz.Source(my_dot)" ], - "id": "5c93c18b-9f14-4e12-aeaa-5875d391b15b" + "id": "dae6ea24-c8fb-4222-93a5-02944c1ce574" }, { "cell_type": "code", @@ -215,7 +215,7 @@ " n_jobs=-1)\n", "my_scores.mean()" ], - "id": "1000fb3e-4704-48f2-b85d-7b0c8839eb36" + "id": "21567e83-fcb2-4321-826a-23814c035b9d" }, { "cell_type": "code", @@ -236,10 +236,10 @@ "\n", "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()" ], - "id": "932c6d8f-ae20-477d-852e-f823710e57cf" + "id": "047e6e11-3636-48ce-b894-c796dbe2e1bb" }, { - "id": "fcb2ef07", + "id": "2846ebd1", "cell_type": "markdown", "source": "## 10.4 \u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30", "metadata": {} @@ -257,7 +257,7 @@ "y = 1 / (1 + np.exp(-x))\n", "plt.plot(x, y)" ], - "id": "3fc1f320-50bd-44d9-8b17-3388986801f5" + "id": "00debbab-4d36-49ad-b077-a01a52ffe078" }, { "cell_type": "code", @@ -265,7 +265,9 @@ "metadata": {}, "outputs": [], "source": [ + "import sklearn\n", "import pandas as pd\n", + "from packaging.version import parse\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import cross_val_score, LeaveOneOut\n", "from sklearn.pipeline import Pipeline\n", @@ -277,11 +279,12 @@ "\n", "X, y = my_data.iloc[:, 0:3], my_data.Survived\n", "\n", + "none = 'none' if parse(sklearn.__version__) < parse(\"1.4\") else None\n", "my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),\n", - " ('lr', LogisticRegression(penalty='none'))])\n", + " ('lr', LogisticRegression(penalty=none))])\n", "my_pipeline.fit(X, y)" ], - "id": "6ebaef90-0abb-48f4-9d26-c2d96935aedd" + "id": "345720cd-774d-42fe-a160-bf15c86e7d88" }, { "cell_type": "code", @@ -300,7 +303,7 @@ "pd.Series(my_lr.coef_[0],\n", " index=tmp)" ], - "id": "99cb7d91-e032-48a9-8fb5-15d2e80571f6" + "id": "42014646-0fc0-4c6e-b61b-411e494abdd3" }, { "cell_type": "code", @@ -314,7 +317,7 @@ " n_jobs=-1)\n", "my_scores.mean()" ], - "id": "45386452-0d66-4642-847d-fa8732ca7f92" + "id": "82b09321-2168-44d0-982d-9d6ec25e10ee" } ], "nbformat": 4, diff --git a/code/python.Rmd b/code/python.Rmd index e37dc1e..68c23d7 100644 --- a/code/python.Rmd +++ b/code/python.Rmd @@ -2653,7 +2653,9 @@ plt.plot(x, y) ``` ```{python} +import sklearn import pandas as pd +from packaging.version import parse from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score, LeaveOneOut from sklearn.pipeline import Pipeline @@ -2665,8 +2667,9 @@ my_data = pd.read_csv(my_url) X, y = my_data.iloc[:, 0:3], my_data.Survived +none = 'none' if parse(sklearn.__version__) < parse("1.4") else None my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')), - ('lr', LogisticRegression(penalty='none'))]) + ('lr', LogisticRegression(penalty=none))]) my_pipeline.fit(X, y) ``` diff --git a/code/python.ipynb b/code/python.ipynb index 62c8614..9bd9b04 100644 --- a/code/python.ipynb +++ b/code/python.ipynb @@ -1,7 +1,7 @@ { "cells": [ { - "id": "857a3c1e", + "id": "95d97a25", "cell_type": "markdown", "source": "[\u8fbb\u771f\u543e\u30fb\u77e2\u5439\u592a\u6717\u300e\u30bc\u30ed\u304b\u3089\u306f\u3058\u3081\u308b\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u5165\u9580\u300f\uff08\u8b1b\u8ac7\u793e,\u00a02021\uff09](https://github.com/taroyabuki/fromzero)", "metadata": {} @@ -17,7 +17,7 @@ "if 'COLAB_GPU' in os.environ:\n", " !python -m pip install h2o pandarallel pca pmdarima | tail -n 1" ], - "id": "634dde09-0ae1-45c9-80d8-489af551bbea" + "id": "ab910195-9b54-4261-bcd5-795d73028a5a" }, { "cell_type": "code", @@ -28,64 +28,64 @@ "# \u3053\u308c\u306fPython\u306e\u30b3\u30fc\u30c9\u306e\u4f8b\u3067\u3059\uff0e\n", "1 + 1" ], - "id": "16b83fd9-e383-4b81-897f-97c7304918ed" + "id": "e2508dc8-e1a4-44f9-85bc-adf7d3f518f3" }, { - "id": "f2217df8", + "id": "698e3692", "cell_type": "markdown", "source": "# 1 \u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3068\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\n\n\n", "metadata": {} }, { - "id": "58567dee", + "id": "da860ba3", "cell_type": "markdown", "source": "## 1.1 \u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u306e\u57fa\u672c\u64cd\u4f5c\n\n\n", "metadata": {} }, { - "id": "bf45d001", + "id": "1da30820", "cell_type": "markdown", "source": "## 1.2 \u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u306e\u4ed5\u7d44\u307f\n\n\n", "metadata": {} }, { - "id": "3137d029", + "id": "e2315fac", "cell_type": "markdown", "source": "# 2 \u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u306e\u305f\u3081\u306e\u74b0\u5883\n\n\n", "metadata": {} }, { - "id": "dce0876f", + "id": "25e78c75", "cell_type": "markdown", "source": "## 2.1 \u5b9f\u884c\u74b0\u5883\u306e\u9078\u629e\n\n\n", "metadata": {} }, { - "id": "b2ef2512", + "id": "6d3029cb", "cell_type": "markdown", "source": "## 2.2 \u30af\u30e9\u30a6\u30c9\n\n\n", "metadata": {} }, { - "id": "99806f42", + "id": "c6df8cac", "cell_type": "markdown", "source": "## 2.3 Docker\n\n\n", "metadata": {} }, { - "id": "892ee4f0", + "id": "1af68f56", "cell_type": "markdown", "source": "## 2.4 \u30bf\u30fc\u30df\u30ca\u30eb\u306e\u4f7f\u3044\u65b9\n\n\n", "metadata": {} }, { - "id": "ab1d8170", + "id": "33ea66ef", "cell_type": "markdown", "source": "## 2.5 R\u3068Python\n\n\n", "metadata": {} }, { - "id": "6ea32329", + "id": "f50d8cb6", "cell_type": "markdown", "source": "## 2.6 \u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u306e\u5229\u7528", "metadata": {} @@ -105,16 +105,16 @@ "1 + 3\n", "# 4 # \u8868\u793a\u3055\u308c\u308b\uff0e" ], - "id": "d4a7b953-2f97-4d88-82b6-73b2bd8a9098" + "id": "837b6528-df9e-4cce-80f7-ea8dc13c2523" }, { - "id": "a153d0b7", + "id": "6d167ef9", "cell_type": "markdown", "source": "# 3 R\u3068Python\n\n\n", "metadata": {} }, { - "id": "39af96ff", + "id": "9da784d3", "cell_type": "markdown", "source": "## 3.1 \u5165\u9580", "metadata": {} @@ -127,7 +127,7 @@ "source": [ "0x10" ], - "id": "8b73552b-3e05-48b3-ac63-800fd85cb62a" + "id": "459d00bb-062a-498e-8de4-3b41a7917efb" }, { "cell_type": "code", @@ -137,7 +137,7 @@ "source": [ "1.23e5" ], - "id": "48ce2590-a99b-4dc4-8a65-4fe96ed5fa49" + "id": "ace410c5-97f1-487a-850e-2d0e7befd302" }, { "cell_type": "code", @@ -147,7 +147,7 @@ "source": [ "2 * 3" ], - "id": "a3528b96-e175-4313-8b64-264af14c691e" + "id": "99bf438b-35fa-43f7-bdd7-760ea35a2243" }, { "cell_type": "code", @@ -157,7 +157,7 @@ "source": [ "10 / 3" ], - "id": "076983b5-4ca8-4cfe-b6a6-72e922face5d" + "id": "56a3b3a4-c6b4-4f25-bdba-84aaf13edf5a" }, { "cell_type": "code", @@ -169,7 +169,7 @@ "\n", "10 % 3 # \u4f59\u308a" ], - "id": "b085d1de-cddc-42e8-bd97-b95716a456d5" + "id": "e9a5d950-bd38-42cf-8267-198322e8b88a" }, { "cell_type": "code", @@ -184,7 +184,7 @@ "x, y = 20, 30 # \u307e\u3068\u3081\u3066\u540d\u4ed8\u3051\n", "x * y" ], - "id": "ba028c48-5dcd-42a7-935e-8f9c727b2d19" + "id": "88b7b531-2c1b-47c8-a3f9-033bd1b0e3ef" }, { "cell_type": "code", @@ -197,7 +197,7 @@ "\n", "x # \u5909\u6570\u540d\u3092\u8a55\u4fa1\u3059\u308b\uff0e" ], - "id": "3ad148d2-3c34-43d2-a48d-02502407471e" + "id": "f22b5f95-63de-40b8-8d28-4209b7e8e459" }, { "cell_type": "code", @@ -207,7 +207,7 @@ "source": [ "my_s = 'abcde'" ], - "id": "bbe63045-4eb6-43c3-a20b-a6e38ea7c454" + "id": "4a47cfa5-17f7-4532-8523-87342f858696" }, { "cell_type": "code", @@ -217,7 +217,7 @@ "source": [ "len(my_s)" ], - "id": "101aa9c9-5823-40a1-86ab-f2c86e9035af" + "id": "7acb6e74-6123-4f44-b894-99d58853f1cb" }, { "cell_type": "code", @@ -229,7 +229,7 @@ "# \u3042\u308b\u3044\u306f\n", "'This is ' + 'a' + ' pen.'" ], - "id": "df27b623-7bdf-42c9-9127-564bb8aa896e" + "id": "5ed62ca1-bfa4-4557-940a-3ea353d81893" }, { "cell_type": "code", @@ -239,7 +239,7 @@ "source": [ "my_s[1:4]" ], - "id": "a3dbf61c-bfaa-4cc9-84a4-89689759a6d3" + "id": "2637ccf9-4d3e-4f0b-b310-8bf536a9f3ab" }, { "cell_type": "code", @@ -250,7 +250,7 @@ "tmp = \"{} is {}.\"\n", "tmp.format('This', 'a pen')" ], - "id": "0d67712d-a6e4-4f2f-bd50-ab658855fa58" + "id": "45e215c4-4400-492c-8138-95b561e531cf" }, { "cell_type": "code", @@ -262,7 +262,7 @@ "\n", "1 < 0" ], - "id": "095d48f3-dc63-4f38-a8c1-e394b1f25e85" + "id": "d9d6d388-257d-40b2-9bac-5719f5951a26" }, { "cell_type": "code", @@ -275,7 +275,7 @@ "import math\n", "math.isclose(0.1 + 0.1 + 0.1, 0.3)" ], - "id": "ba6b3f9e-3273-4003-b865-31f21d72e940" + "id": "c2f12c43-3416-4320-ba58-f296c419300d" }, { "cell_type": "code", @@ -289,7 +289,7 @@ "\n", "not True # \u5426\u5b9a\uff08\u3067\u306a\u3044\uff09" ], - "id": "6f4c0c4d-0529-4009-bf17-53f8197c196b" + "id": "0af5a514-9202-4b9c-a2ad-a553c29baf05" }, { "cell_type": "code", @@ -299,7 +299,7 @@ "source": [ "0 if 3 < 5 else 10" ], - "id": "9e0289db-a212-4dd6-a444-6d3b8a2ab11f" + "id": "0b734597-68a2-4628-8058-b240c7861d50" }, { "cell_type": "code", @@ -310,7 +310,7 @@ "import os\n", "os.getcwd()" ], - "id": "a1a6660c-d71b-4c7a-9998-e0e9386a6b2f" + "id": "d12f094f-8ed2-4962-a563-5c1921a78a34" }, { "cell_type": "code", @@ -321,10 +321,10 @@ "os.chdir('..')\n", "os.getcwd()" ], - "id": "efcd97c0-3044-4626-bca4-205faa6ff8ac" + "id": "fe0a55b6-1c17-4520-9080-eadb9b98fdb7" }, { - "id": "405f5333", + "id": "7fb4e50e", "cell_type": "markdown", "source": "## 3.2 \u95a2\u6570", "metadata": {} @@ -338,7 +338,7 @@ "import math\n", "math.sqrt(4)" ], - "id": "89e56472-c805-45b5-914e-54aa23e1f9d2" + "id": "a9778204-bd89-44ed-b9fd-c5877ce172db" }, { "cell_type": "code", @@ -348,7 +348,7 @@ "source": [ "math.log(100, 10)" ], - "id": "6de06332-0559-4e1b-afac-9cc3c47b9ebf" + "id": "bc433472-6150-4b09-8fcd-f09ed8e785ad" }, { "cell_type": "code", @@ -360,7 +360,7 @@ "# \u3042\u308b\u3044\u306f\n", "math.log(100, math.e) # \u7701\u7565\u3057\u306a\u3044\u5834\u5408\n" ], - "id": "7b4cce30-7a35-43ea-9bf1-3a416d76edc1" + "id": "ed0dc25f-4603-470e-8547-7751a3a5304d" }, { "cell_type": "code", @@ -372,7 +372,7 @@ "\n", "math.log2(1024) # \u5e95\u304c2\u306e\u5bfe\u6570" ], - "id": "3b7d0f5d-10ed-4315-9bc9-0182223065f0" + "id": "6a0b0ac7-188d-48ba-9469-f4fab09cc177" }, { "cell_type": "code", @@ -383,7 +383,7 @@ "def f(a, b):\n", " return a - b" ], - "id": "79657bd8-d9b7-4855-a32d-e652eebe5951" + "id": "a7755b43-0257-402d-b4fd-65b6497023ac" }, { "cell_type": "code", @@ -393,7 +393,7 @@ "source": [ "f(3, 5)" ], - "id": "f943569c-f56c-4047-acf1-69c279936599" + "id": "a03fde7a-2fdd-4f40-b6fb-cee295fd368e" }, { "cell_type": "code", @@ -406,7 +406,7 @@ "\n", "f(3) # f(3, 5)\u3068\u540c\u3058\u3053\u3068" ], - "id": "0dbed3b5-c9fa-4c0a-8185-9cc3397a5fb0" + "id": "c0f80530-6f4e-4d54-8643-03eaafc11d67" }, { "cell_type": "code", @@ -416,10 +416,10 @@ "source": [ "(lambda a, b: a - b)(3, 5)" ], - "id": "f1ad457f-f840-438a-9aa6-6859e747e6b9" + "id": "72b8e1e0-639e-4b17-8b35-4f0273379ea6" }, { - "id": "00f44e15", + "id": "1c82bac5", "cell_type": "markdown", "source": "## 3.3 \u30b3\u30ec\u30af\u30b7\u30e7\u30f3", "metadata": {} @@ -432,7 +432,7 @@ "source": [ "x = ['foo', 'bar', 'baz']" ], - "id": "4233a4b5-defb-41f7-b976-ed8134829cab" + "id": "339eb9a4-73f2-45e5-b4b1-ba42abd098c5" }, { "cell_type": "code", @@ -442,7 +442,7 @@ "source": [ "len(x)" ], - "id": "d2e9dca3-ffee-4fc1-adcd-bbc1691941dc" + "id": "a6e4c3d6-fb19-4899-8ffa-6d513447b923" }, { "cell_type": "code", @@ -452,7 +452,7 @@ "source": [ "x[1]" ], - "id": "9c5e17aa-eb8f-48ce-ae0f-288dba3540ec" + "id": "4a200bc9-706c-4ee2-926f-e71951b31c4c" }, { "cell_type": "code", @@ -465,7 +465,7 @@ "\n", "x[1] = 'bar' # \u5143\u306b\u623b\u3059\uff0e" ], - "id": "5f260196-aac6-4d05-899a-93b95a4260c4" + "id": "b61145a0-7fd6-4354-85b7-eaa19aeb6f8c" }, { "cell_type": "code", @@ -475,7 +475,7 @@ "source": [ "x[-2]" ], - "id": "2dd28e5a-fecd-44bc-b4c9-8f2667b5eb0b" + "id": "c79076d6-2efb-4304-8c8c-13c74b723e69" }, { "cell_type": "code", @@ -485,7 +485,7 @@ "source": [ "x + ['qux']" ], - "id": "9c16d8a7-3bfd-478a-bba4-abea669e288c" + "id": "eed88296-14ec-4119-89ff-08dc96632935" }, { "cell_type": "code", @@ -499,7 +499,7 @@ "\n", "x # \u7d50\u679c\u306e\u78ba\u8a8d" ], - "id": "81b43aa5-7212-4b6a-86b6-696bdb695454" + "id": "19865fbd-04ce-4778-b2cf-378b23814878" }, { "cell_type": "code", @@ -509,7 +509,7 @@ "source": [ "list(range(5))" ], - "id": "4f9f40ce-dc29-4182-8474-8bd7f0a58c81" + "id": "a113ae2f-1c6d-4b33-8687-741ff6e45963" }, { "cell_type": "code", @@ -519,7 +519,7 @@ "source": [ "list(range(0, 11, 2))" ], - "id": "10baa004-e1ab-4505-b28a-f67f856ba50e" + "id": "1594e548-2c1b-4303-994e-99ca30ee678b" }, { "cell_type": "code", @@ -530,7 +530,7 @@ "import numpy as np\n", "np.arange(0, 1.1, 0.5)" ], - "id": "7fa85b39-ac29-45f4-b825-a9b85d11017c" + "id": "32ab6a37-a3a4-4627-88b6-17bc40ce9806" }, { "cell_type": "code", @@ -540,7 +540,7 @@ "source": [ "np.linspace(0, 100, 5)" ], - "id": "8098a730-830d-4eaf-aedc-a91886fd09c3" + "id": "4b25a857-830a-4967-b444-922d3577e599" }, { "cell_type": "code", @@ -550,7 +550,7 @@ "source": [ "[10] * 5" ], - "id": "f1fd8051-0948-4abb-8115-b0eb5bc553e9" + "id": "55ace0fb-e9fb-43e1-986a-e49d00e60cbf" }, { "cell_type": "code", @@ -565,7 +565,7 @@ "\n", "x * 10 # \u4e57\u7b97" ], - "id": "3e01e38a-82bc-42a8-8e47-949ad3f219f4" + "id": "85c21d1c-f102-4356-a78b-b32e35e50043" }, { "cell_type": "code", @@ -576,7 +576,7 @@ "x = [2, 3]\n", "np.sin(x)" ], - "id": "f046ec45-1431-4e8d-b7f4-be18fe3c7246" + "id": "e78baff5-55c0-49e7-b216-a81f373510c3" }, { "cell_type": "code", @@ -590,7 +590,7 @@ "\n", "x * y" ], - "id": "dbf73ca0-120c-4dd2-b0cd-d6d7d9fe26cf" + "id": "e934f9f6-267c-4cb2-be72-6c32240891d1" }, { "cell_type": "code", @@ -602,7 +602,7 @@ "# \u3042\u308b\u3044\u306f\n", "x @ y\n" ], - "id": "1c6a2f91-326a-4f5c-8345-30872fe00a36" + "id": "449475ef-5a24-4282-9241-60f0aefdbaaf" }, { "cell_type": "code", @@ -614,7 +614,7 @@ "y = np.array([True, True])\n", "x & y" ], - "id": "712af92c-545d-437d-b614-8766c8d1d639" + "id": "534d6348-efb5-47b9-ae0f-d5b45d710880" }, { "cell_type": "code", @@ -634,7 +634,7 @@ "\n", "u == w # \u8981\u7d20\u3054\u3068\u306e\u6bd4\u8f03" ], - "id": "d0f25000-6c49-4c94-93b7-5db6a23c275b" + "id": "b47b232a-53c4-4005-ab4f-6be7d3a0408c" }, { "cell_type": "code", @@ -646,7 +646,7 @@ "\n", "(u == w).mean() # \u540c\u3058\u8981\u7d20\u306e\u5272\u5408" ], - "id": "80128bc7-d946-4685-aee7-24dc92743c9d" + "id": "5b7025bd-f8c7-4adb-b642-506622d4da24" }, { "cell_type": "code", @@ -656,7 +656,7 @@ "source": [ "x = [1, \"two\"]" ], - "id": "93cc3a37-51ba-40c0-9195-9c08f11771ba" + "id": "85de6cf2-287a-4fb0-bef9-5cd9124aa247" }, { "cell_type": "code", @@ -666,7 +666,7 @@ "source": [ "x[1]" ], - "id": "641f1137-8c3d-4061-b993-5460c85520eb" + "id": "76d470b4-4950-428c-b4a8-3f4af50c89bb" }, { "cell_type": "code", @@ -677,7 +677,7 @@ "x = {'apple' : '\u308a\u3093\u3054',\n", " 'orange': '\u307f\u304b\u3093'}" ], - "id": "e8c41dd4-03f9-44dd-9e7f-990b468b9567" + "id": "00449751-e0c9-4b2f-ab4a-8447a9cfe192" }, { "cell_type": "code", @@ -687,7 +687,7 @@ "source": [ "x['grape'] = '\u3076\u3069\u3046'" ], - "id": "ad4845ce-a3a3-4d5d-8ed1-bcfdea9f4e90" + "id": "900a16c5-c6a2-4485-a493-af0271c3161f" }, { "cell_type": "code", @@ -700,7 +700,7 @@ "tmp = 'apple'\n", "x[tmp]\n" ], - "id": "35de6cd8-a182-4937-ad7c-1f09e1e5d518" + "id": "b0776436-c8b1-4267-80fa-6a404fdeb94d" }, { "cell_type": "code", @@ -715,7 +715,7 @@ "\n", "x # x\u3082\u5909\u308f\u308b\uff0e" ], - "id": "663937f0-3d7a-43a9-860e-09ace4cde9d5" + "id": "9bf67087-16e6-4bdc-a83a-85fe5e929b99" }, { "cell_type": "code", @@ -730,10 +730,10 @@ "y[1] = 'BAR' # y\u3092\u66f4\u65b0\u3057\u3066\u3082\uff0c\n", "x" ], - "id": "b4cbd9b2-62c3-4498-9678-7de8477f0f93" + "id": "133b5fc7-1ccb-4712-a62c-5292e3eb1a0e" }, { - "id": "e1b097b2", + "id": "25839473", "cell_type": "markdown", "source": "## 3.4 \u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0", "metadata": {} @@ -746,7 +746,7 @@ "source": [ "import pandas as pd" ], - "id": "2a6aedbe-722b-460f-a550-c450c869981f" + "id": "a6c2b849-3a24-47db-aeae-65dfba1bd9f2" }, { "cell_type": "code", @@ -760,7 +760,7 @@ " 'math': [ 70, 80, 90, 100],\n", " 'gender': ['f', 'm', 'm', 'f']})" ], - "id": "c05d56d2-36a2-48a3-89af-f034f3b82d88" + "id": "20dfaaf2-76f1-47f5-a96f-1fc0d922701f" }, { "cell_type": "code", @@ -776,7 +776,7 @@ " columns=['name', 'english',\n", " 'math', 'gender'])" ], - "id": "a18a955e-cdd7-4640-9c36-0c8b251abd75" + "id": "410b289a-0548-413b-b6c5-b133b8b2d2bb" }, { "cell_type": "code", @@ -787,7 +787,7 @@ "my_df.head()\n", "# \u7d50\u679c\u306f\u5272\u611b" ], - "id": "fad9de5a-5800-4598-a909-e04a9ffb5fdb" + "id": "946e67ed-16d5-4def-b003-ebf949c8e5ac" }, { "cell_type": "code", @@ -802,7 +802,7 @@ "\n", "c # \u5217\u6570" ], - "id": "a90201d3-9d46-452d-bb93-e32c3e872c72" + "id": "61d19456-a422-47d4-8fe4-2d82ca0ba8cb" }, { "cell_type": "code", @@ -817,7 +817,7 @@ " columns=['X', 'Y'])\n", "my_df2" ], - "id": "ab839a24-083f-4bb2-a401-47f2593bf14d" + "id": "bcf4992f-030b-46f9-9345-2a2c09371f28" }, { "cell_type": "code", @@ -827,7 +827,7 @@ "source": [ "my_df2.columns" ], - "id": "c448e177-59ac-4c4e-ab16-5be805162825" + "id": "c5eacad5-42a3-4d92-a56e-8ae019894c08" }, { "cell_type": "code", @@ -839,7 +839,7 @@ "my_df2\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "83d405e1-ca94-42ca-bc87-aed42ab96470" + "id": "3c7766f0-d4b1-4d7b-a847-e80b40918e88" }, { "cell_type": "code", @@ -849,7 +849,7 @@ "source": [ "list(my_df.index)" ], - "id": "a5fd3e6d-1f4a-45a6-97b4-2d00d9c2c0db" + "id": "c690848e-5301-4158-ad7c-7aead15e1e07" }, { "cell_type": "code", @@ -862,7 +862,7 @@ "my_df2\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "049ad1a2-f3c6-46f3-81c9-7d2a8c0c0c9b" + "id": "984876e0-f3c6-4236-8f31-9950198ca9fb" }, { "cell_type": "code", @@ -877,7 +877,7 @@ " index= ['A', 'B', 'C', 'D'])\n", "my_df3" ], - "id": "4cd3a43b-265a-45fc-8bb3-453252255654" + "id": "0d46c887-08c2-4eee-b076-f9048ded5e8c" }, { "cell_type": "code", @@ -892,7 +892,7 @@ " 'gender' : ['m']})\n", "my_df2 = pd.concat([my_df, tmp])" ], - "id": "91384d28-01bf-43a5-b292-965f67793816" + "id": "326bb171-cea1-4aee-8e93-dadc317d6223" }, { "cell_type": "code", @@ -902,7 +902,7 @@ "source": [ "my_df2 = my_df.assign(id=[1, 2, 3, 4])" ], - "id": "44950da6-9e4a-4d76-a741-5133b3327713" + "id": "d33563e5-a7a6-41d6-acec-8eb20a5a65eb" }, { "cell_type": "code", @@ -914,7 +914,7 @@ "my_df3['id'] = [1, 2, 3, 4] # \u66f4\u65b0\n", "my_df3 # \u7d50\u679c\u306e\u78ba\u8a8d\uff08\u5272\u611b\uff09" ], - "id": "f8fbcb2d-25be-4880-84dc-13868a05932f" + "id": "7dfec752-6c22-4c2d-8605-2fdc3a8edf61" }, { "cell_type": "code", @@ -924,7 +924,7 @@ "source": [ "my_df.iloc[0, 1]" ], - "id": "612d668c-41a6-408e-8820-116f093f33db" + "id": "bbfafea6-2aec-44a2-bdf7-35f2feeb56e2" }, { "cell_type": "code", @@ -943,7 +943,7 @@ "\n", "x # \u7d50\u679c\u306e\u78ba\u8a8d\uff08\u5272\u611b\uff09" ], - "id": "81196559-3c64-453d-9bc3-e01f544a5ea2" + "id": "36b962a8-3ac7-46b3-a372-104095b60a55" }, { "cell_type": "code", @@ -955,7 +955,7 @@ "# \u3042\u308b\u3044\u306f\n", "x = my_df.loc[:, ['name', 'math']]" ], - "id": "8d88f3e4-76eb-4b94-9d35-93fb8aa76c27" + "id": "1808e935-6be2-4924-a9b9-2d2469d5b767" }, { "cell_type": "code", @@ -967,7 +967,7 @@ "# \u3042\u308b\u3044\u306f\n", "x = my_df.iloc[:, [0, 2]]" ], - "id": "1d5564d3-7bd4-470f-bf91-a63c8d6b1445" + "id": "532956a6-b784-4a6e-824c-67f29e3003db" }, { "cell_type": "code", @@ -981,7 +981,7 @@ "x = my_df.drop(\n", " columns=my_df.columns[[1, 3]])" ], - "id": "b4cecbe1-aa70-4477-b638-f7838c983422" + "id": "b365ca79-a204-4d47-a368-e7d669605074" }, { "cell_type": "code", @@ -993,7 +993,7 @@ "# \u3042\u308b\u3044\u306f\n", "x = my_df.iloc[[0, 2], :]" ], - "id": "48980309-84a2-4519-801c-cff6d1dcf2d9" + "id": "b8fb0be9-c3e1-490a-9091-ad3fe7214901" }, { "cell_type": "code", @@ -1003,7 +1003,7 @@ "source": [ "x = my_df.drop([1, 3])" ], - "id": "21648cfe-a5cd-4a2d-8c54-077a828887fd" + "id": "b4cf6722-4835-43e6-b41d-40d516b67372" }, { "cell_type": "code", @@ -1015,7 +1015,7 @@ "# \u3042\u308b\u3044\u306f\n", "x = my_df.query('gender == \"m\"')" ], - "id": "c40ece96-7d6d-406c-8f6f-1aa298f1b8e0" + "id": "be3960a2-7385-447e-85fb-544779e19c3c" }, { "cell_type": "code", @@ -1027,7 +1027,7 @@ "# \u3042\u308b\u3044\u306f\n", "x = my_df.query('english > 80 and gender == \"m\"')" ], - "id": "8f9d0760-5112-4e40-826f-f9caa2202798" + "id": "471ccacc-fe73-498b-8902-f3324ca69128" }, { "cell_type": "code", @@ -1040,7 +1040,7 @@ "tmp = my_df['english'].max()\n", "x = my_df.query('english == @tmp')" ], - "id": "1a580c9b-9e6b-4433-94ee-b6126c6fe4b0" + "id": "ac82c3a3-62ea-417c-9b16-6547e9ccaffc" }, { "cell_type": "code", @@ -1051,7 +1051,7 @@ "my_df2 = my_df.copy() # \u30b3\u30d4\u30fc\n", "my_df2.loc[my_df['gender'] == 'm', 'gender'] = 'M'" ], - "id": "9b8f260e-5e8c-4642-a738-efb4f1e76ab0" + "id": "47aeff63-4b8b-484d-8e8d-bec029245edb" }, { "cell_type": "code", @@ -1061,7 +1061,7 @@ "source": [ "my_df2" ], - "id": "4ba76b30-24b3-4a99-a42e-0f77afe05340" + "id": "d4fdc279-a95c-4be0-8c1a-786498e58f1f" }, { "cell_type": "code", @@ -1071,7 +1071,7 @@ "source": [ "x = my_df.sort_values('english')" ], - "id": "768cca9b-b682-4d5b-980b-2418fcb37507" + "id": "9c137b71-5cba-4567-8bef-c22cef638772" }, { "cell_type": "code", @@ -1082,7 +1082,7 @@ "x = my_df.sort_values('english',\n", " ascending=False)" ], - "id": "69da8f4a-010a-47dd-a9fb-69a8d4c4008f" + "id": "58dbbf7e-af46-46bb-83fc-6805904f7632" }, { "cell_type": "code", @@ -1096,7 +1096,7 @@ "A = np.array(x).reshape(3, 4)\n", "A" ], - "id": "f5f31b87-b9d1-45e9-8c4b-db09f2e902f9" + "id": "5d4b0331-2b6a-409f-b258-4cfd6e1a6e4d" }, { "cell_type": "code", @@ -1107,7 +1107,7 @@ "A = my_df.iloc[:, [1, 2]].values\n", "A" ], - "id": "8eef5049-5219-4745-b811-6d69e123a8b0" + "id": "010b931a-3610-46d0-8723-06a43408db01" }, { "cell_type": "code", @@ -1117,7 +1117,7 @@ "source": [ "pd.DataFrame(A)" ], - "id": "9d8d4813-4acd-43ef-bdf4-f65b774acd52" + "id": "b688877f-9d36-4141-954e-30c7c43f44f6" }, { "cell_type": "code", @@ -1127,7 +1127,7 @@ "source": [ "A.T" ], - "id": "768adc25-80d6-4843-9388-f3a11c40bddb" + "id": "76960ba4-6660-404c-b61d-7d3314c30a5d" }, { "cell_type": "code", @@ -1137,7 +1137,7 @@ "source": [ "A.T @ A" ], - "id": "cca44d4f-87fd-4408-8d17-762312f18f49" + "id": "4528612d-1e3e-4cf0-be9c-8639d4141af6" }, { "cell_type": "code", @@ -1150,7 +1150,7 @@ " 'min': [20, 21, 15],\n", " 'max': [24, 27, 21]})" ], - "id": "966371b8-5ff8-4cd3-abf2-e9873e2539ba" + "id": "4e95639f-4caf-416d-a6c9-090818cb7c51" }, { "cell_type": "code", @@ -1161,7 +1161,7 @@ "my_longer = my_df.melt(id_vars='day')\n", "my_longer" ], - "id": "0d3b88b0-42b9-4bc2-9985-b568607fa03e" + "id": "b2d12bb9-7a9c-41aa-a50c-e7af4da79a2c" }, { "cell_type": "code", @@ -1175,7 +1175,7 @@ " values='value')\n", "my_wider" ], - "id": "f4215ad2-8797-4ff4-b437-06d502139c0e" + "id": "9fdfc23d-b54f-433c-b807-c6b3c034d203" }, { "cell_type": "code", @@ -1188,10 +1188,10 @@ " xticks=my_wider.index, # x\u8ef8\u76ee\u76db\u308a\n", " ylabel='temperature') # y\u8ef8\u30e9\u30d9\u30eb" ], - "id": "6b73ecd9-93fb-4fd6-b491-c5a032bf03a5" + "id": "84fa5e63-f0bc-46ff-9d29-91c6ca8d3ffa" }, { - "id": "f5f8bc0e", + "id": "b11757f9", "cell_type": "markdown", "source": "## 3.5 1\u6b21\u5143\u30c7\u30fc\u30bf\u306e\uff08\u975e\uff09\u985e\u4f3c\u5ea6", "metadata": {} @@ -1214,7 +1214,7 @@ "\n", "distance.euclidean(A, C)" ], - "id": "87584b62-6cef-4e06-93d1-fa0925445468" + "id": "0ff0cd61-aaba-4ba5-894f-63aa79e8fb22" }, { "cell_type": "code", @@ -1226,7 +1226,7 @@ "\n", "distance.cityblock(A, C)" ], - "id": "36ede031-1cb9-4299-9945-fe3ab9a2ec52" + "id": "b3c72e7b-a93e-4396-8393-426582ee8fb0" }, { "cell_type": "code", @@ -1238,7 +1238,7 @@ "\n", "1 - distance.cosine(A, C)" ], - "id": "0e44524f-6c5b-4358-b4b0-29bd97f66cab" + "id": "13f212d2-87ed-40f8-bfae-5d52153edbf4" }, { "cell_type": "code", @@ -1254,7 +1254,7 @@ "# \u3042\u308b\u3044\u306f\n", "pearsonr(A, C)[0]" ], - "id": "afba9582-03a0-454c-8ca3-22a7613edcf7" + "id": "7c49aec2-6d87-450e-ae94-1ed8892053a9" }, { "cell_type": "code", @@ -1288,10 +1288,10 @@ "1 - distance.cdist(my_df, my_df,\n", " metric='correlation')" ], - "id": "d4e17339-84e6-40ef-b7dd-fb50ea5c18ec" + "id": "3735db82-a849-43f6-aba6-19467275c010" }, { - "id": "61b67227", + "id": "45e467b7", "cell_type": "markdown", "source": "## 3.6 R\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\uff0cPython\u306e\u30e2\u30b8\u30e5\u30fc\u30eb", "metadata": {} @@ -1306,7 +1306,7 @@ "import numpy as np\n", "import pandas as pd" ], - "id": "cf770aaf-e44f-4567-975a-cc539952a57d" + "id": "8b895840-57ca-41ca-bccb-2bea335f0050" }, { "cell_type": "code", @@ -1317,7 +1317,7 @@ "import numpy\n", "numpy.array([1, 2, 3, 4])" ], - "id": "4eff91c1-456b-4004-aba3-e98311e61d14" + "id": "87d291ab-1749-478c-8527-4669f4909e55" }, { "cell_type": "code", @@ -1328,7 +1328,7 @@ "import numpy as np\n", "np.array([1, 2, 3, 4])" ], - "id": "b21950a2-bded-47e9-bcc9-1584bae1d147" + "id": "0eb8db23-6b7b-4c14-b920-7bbc3ed1879c" }, { "cell_type": "code", @@ -1339,7 +1339,7 @@ "from numpy import array\n", "array([1, 2, 3, 4])" ], - "id": "4722db60-73ca-46d6-89c2-ea5d0a20cbaa" + "id": "4c4b9d06-4e07-4555-b807-c714f62c596f" }, { "cell_type": "code", @@ -1350,10 +1350,10 @@ "from numpy import *\n", "array([1, 2, 3, 4])" ], - "id": "c0107359-fb10-4cb9-a2dd-c6d8c54233fe" + "id": "7aeb154b-e442-4ced-a300-4a89e1ad3c2a" }, { - "id": "6e65b9ad", + "id": "8a2463e4", "cell_type": "markdown", "source": "## 3.7 \u53cd\u5fa9\u51e6\u7406", "metadata": {} @@ -1367,7 +1367,7 @@ "import numpy as np\n", "import pandas as pd" ], - "id": "2a654287-2a13-4348-903c-c3028bd5373d" + "id": "00e8ad00-293d-4271-955b-794b5bc41088" }, { "cell_type": "code", @@ -1381,7 +1381,7 @@ "\n", "f1(10) # \u52d5\u4f5c\u78ba\u8a8d" ], - "id": "c28b29a9-d490-4104-8493-cc46c13890b4" + "id": "c10295b2-94e7-495d-8478-115d5ea0ee99" }, { "cell_type": "code", @@ -1391,7 +1391,7 @@ "source": [ "[f1(10) for i in range(3)]" ], - "id": "a12c1b77-2a56-415b-9869-0bedfbd117bb" + "id": "87062eb7-8ceb-4254-8d26-9cdc20a02f28" }, { "cell_type": "code", @@ -1401,7 +1401,7 @@ "source": [ "[f1(10)] * 3" ], - "id": "81a62200-e7f7-4b51-8919-c4963fd9c02c" + "id": "701b8e7e-6b05-4a16-8f39-14bb1c9466d6" }, { "cell_type": "code", @@ -1417,7 +1417,7 @@ "v = pd.Series([5, 10, 100])\n", "v.apply(f1) # \u65b9\u6cd52" ], - "id": "1769f73a-2087-4b0b-ae63-3d16fb9321fe" + "id": "0ecbd043-d610-41a0-8731-29d75d727288" }, { "cell_type": "code", @@ -1428,7 +1428,7 @@ "pd.Series([10] * 3).apply(f1)\n", "# \u7d50\u679c\u306f\u5272\u611b" ], - "id": "26bcf99b-0e4f-4ae5-b90e-b68857ad1329" + "id": "7c9cae0e-120a-4e50-bdf1-4b974294df93" }, { "cell_type": "code", @@ -1446,7 +1446,7 @@ "\n", "f2(10) # \u52d5\u4f5c\u78ba\u8a8d" ], - "id": "4b2d3102-06a6-469d-9d94-808af89dcac9" + "id": "61fa3856-b5dd-471e-acba-1d0f7f3dfcec" }, { "cell_type": "code", @@ -1457,7 +1457,7 @@ "v = pd.Series([5, 10, 100])\n", "v.apply(f2)" ], - "id": "33e28c11-481d-479a-ab9e-c88f8ecf13dc" + "id": "19913563-73bc-43e3-8bff-821a7be07659" }, { "cell_type": "code", @@ -1476,7 +1476,7 @@ "\n", "f3(10, 6) # \u52d5\u4f5c\u78ba\u8a8d" ], - "id": "035d06b9-970b-494c-b8b4-c481d763a83b" + "id": "4bcfeede-0c00-4f7a-96e9-0874a9565f23" }, { "cell_type": "code", @@ -1495,7 +1495,7 @@ "my_df.apply(lambda row:\n", " f3(*row), axis=1)\n" ], - "id": "689d6d22-0a82-4d5c-ae4e-e0683a368b78" + "id": "561fa4bc-31fb-4684-a706-0590a258ef86" }, { "cell_type": "code", @@ -1510,10 +1510,10 @@ "v.parallel_apply(f1)\n", "# \u7d50\u679c\u306f\u5272\u611b" ], - "id": "f7e3aec4-68e2-459c-a2ed-0adea131c835" + "id": "a893d00f-f190-4f53-9bc0-2f8f51674780" }, { - "id": "6c2e9287", + "id": "d25675b8", "cell_type": "markdown", "source": "## 3.8 \u305d\u306e\u4ed6", "metadata": {} @@ -1527,7 +1527,7 @@ "x = 123\n", "type(x)" ], - "id": "b1b59ee8-65d7-49b2-bdd1-6de907cea31d" + "id": "825a1fa4-5f8a-422b-838c-badac682f45e" }, { "cell_type": "code", @@ -1537,7 +1537,7 @@ "source": [ "%whos" ], - "id": "ca3f9fab-a767-44b6-afe6-be7eeb74e8e7" + "id": "a20cfab7-0081-47fa-831c-712a91dcafb0" }, { "cell_type": "code", @@ -1550,7 +1550,7 @@ "# \u3042\u308b\u3044\u306f\n", "help(math.log)" ], - "id": "493bf702-659e-4c0e-9c23-98be3491862b" + "id": "ecc49271-6afa-42a9-b961-e5895a4df068" }, { "cell_type": "code", @@ -1562,7 +1562,7 @@ "v = [1, np.nan, 3]\n", "v" ], - "id": "081c96ae-3de6-49c8-b70e-35e6c306f1a0" + "id": "fc44929f-f248-49cd-a2d1-f427bf74daa8" }, { "cell_type": "code", @@ -1574,16 +1574,16 @@ "\n", "v[1] == np.nan # \u8aa4\u308a" ], - "id": "2a6583de-043d-441a-ae01-b1ac3243db7e" + "id": "dfe344c3-dd8a-40c5-8aa6-bb6f9f67c04c" }, { - "id": "3c8597b2", + "id": "c92a0a1b", "cell_type": "markdown", "source": "# 4 \u7d71\u8a08\u5165\u9580\n\n\n", "metadata": {} }, { - "id": "6f6a2982", + "id": "d679271f", "cell_type": "markdown", "source": "## 4.1 \u8a18\u8ff0\u7d71\u8a08", "metadata": {} @@ -1608,7 +1608,7 @@ " [165, 170, 175, 180, 185])\n", "x.mean() # np.mean(x)\u3082\u53ef" ], - "id": "5288839d-cc1d-44b2-a77c-154cd7a61817" + "id": "e3af4844-a497-4f4d-b11f-16bf8a26c752" }, { "cell_type": "code", @@ -1619,7 +1619,7 @@ "n = len(x) # \u30b5\u30f3\u30d7\u30eb\u30b5\u30a4\u30ba\n", "sum(x) / n" ], - "id": "92178f98-e6b6-4a26-b618-e77a602441f0" + "id": "af724461-76c7-4ad8-a493-e75eeed40ebd" }, { "cell_type": "code", @@ -1630,7 +1630,7 @@ "y = [173, 174, 175, 176, 177]\n", "np.mean(y)" ], - "id": "08792819-876a-4fb2-be47-e9ee53641970" + "id": "08946159-97df-4cfc-ac19-247a178ccb04" }, { "cell_type": "code", @@ -1642,7 +1642,7 @@ "\n", "np.var(y, ddof=1) # y\u306e\u5206\u6563" ], - "id": "91638bdf-c39d-4b67-9d50-2864fab1c306" + "id": "aaa99e6e-c3f2-4cee-83a0-51f5b80cfaba" }, { "cell_type": "code", @@ -1652,7 +1652,7 @@ "source": [ "sum((x - np.mean(x))**2) / (n - 1)" ], - "id": "f9a2be01-6947-4f21-9d5f-f61da87dd2cb" + "id": "4a7cad2c-760c-4cd7-9d27-cc43ef29fb3d" }, { "cell_type": "code", @@ -1664,7 +1664,7 @@ "\n", "np.std(y, ddof=1) # y\u306e\u6a19\u6e96\u504f\u5dee" ], - "id": "16885015-4d0f-41f4-80aa-8abc32d23c90" + "id": "07995627-8e07-4396-90cb-710251a5576e" }, { "cell_type": "code", @@ -1674,7 +1674,7 @@ "source": [ "np.var(x, ddof=1)**0.5 # x\u306e\u6a19\u6e96\u504f\u5dee" ], - "id": "fd24cf59-c297-4a2c-8149-1e3a8214c74b" + "id": "ab286cde-908e-4b98-bb8a-b23500f25877" }, { "cell_type": "code", @@ -1685,7 +1685,7 @@ "s = pd.Series(x)\n", "s.describe()" ], - "id": "3d4d65f1-8b53-420f-a196-0c30f423ccfe" + "id": "202904ef-8cc6-4352-8b28-482925f58a38" }, { "cell_type": "code", @@ -1695,7 +1695,7 @@ "source": [ "# s.describe()\u3067\u8a08\u7b97\u6e08\u307f" ], - "id": "949aa2ca-0290-413e-b63c-cf4b597fde14" + "id": "a438fd24-da44-44ea-9f79-277dfd577626" }, { "cell_type": "code", @@ -1709,7 +1709,7 @@ "\n", "np.var(x, ddof=0) # \u6a19\u672c\u5206\u6563" ], - "id": "d565d8d2-7b59-4b42-9e50-10a6ee45d677" + "id": "3abadad9-7e3d-4ff1-879d-8e1f4e945d62" }, { "cell_type": "code", @@ -1721,7 +1721,7 @@ "\n", "np.std(x, ddof=0) # \u221a\u6a19\u672c\u5206\u6563" ], - "id": "1cbf89ab-2c11-4327-ae15-7008b20c0173" + "id": "5eb5c08e-7bed-4cf0-b1ac-b17dcbe2fa9d" }, { "cell_type": "code", @@ -1731,7 +1731,7 @@ "source": [ "np.std(x, ddof=1) / len(x)**0.5" ], - "id": "9cf0b957-7491-4efd-bf08-e59478592a5c" + "id": "2fb6628d-7336-46fc-a35b-a47084045e16" }, { "cell_type": "code", @@ -1748,7 +1748,7 @@ " 'math': [ 70, 80, 90, 100],\n", " 'gender': ['f', 'm', 'm', 'f']})" ], - "id": "fba63136-57b5-4a87-be00-abb3bdd08647" + "id": "ff7496b1-b568-4efb-bcb1-3d249208d408" }, { "cell_type": "code", @@ -1760,7 +1760,7 @@ "# \u3042\u308b\u3044\u306f\n", "np.var(my_df['english'], ddof=1)\n" ], - "id": "a58b536f-1005-46aa-8b87-589a3aea42f1" + "id": "675dd18e-a763-4bc8-805d-6487b57626ab" }, { "cell_type": "code", @@ -1775,7 +1775,7 @@ "my_df.iloc[:, [1, 2]].apply(\n", " lambda x: np.var(x, ddof=1))\n" ], - "id": "fed3c9c3-f23d-4a1d-b8ba-1948f1ace212" + "id": "eb226353-bb98-40e2-9ff9-146f4be1ed04" }, { "cell_type": "code", @@ -1785,7 +1785,7 @@ "source": [ "my_df.describe()" ], - "id": "fe330ef3-4c0b-43a4-b543-9be764b92137" + "id": "1c424e90-2577-4c42-9419-fca202b7153e" }, { "cell_type": "code", @@ -1800,7 +1800,7 @@ "\n", "my_df.groupby('gender').apply(len)" ], - "id": "888c9851-ae18-4be4-9f27-03bb842b8670" + "id": "1a707517-0dcf-40d8-b61e-bc0c60a2f8b9" }, { "cell_type": "code", @@ -1813,7 +1813,7 @@ "pd.crosstab(my_df2.gender,\n", " my_df2.excel)" ], - "id": "ef7bfa82-22b8-4f44-9c96-64e5d58804b5" + "id": "7cb9b018-4c20-4a30-bfa4-4d07d87d41d2" }, { "cell_type": "code", @@ -1827,10 +1827,10 @@ "# \u3042\u308b\u3044\u306f\n", "my_df.drop(['name'], axis=1).groupby('gender').agg(np.mean)\n" ], - "id": "b303a270-2ed1-4f15-9940-d70ec421560c" + "id": "b1bc601d-7028-4e6b-a344-406651952be0" }, { - "id": "ad995ba0", + "id": "b6b8a5c2", "cell_type": "markdown", "source": "## 4.2 \u30c7\u30fc\u30bf\u306e\u53ef\u8996\u5316", "metadata": {} @@ -1847,7 +1847,7 @@ "iris = sm.datasets.get_rdataset('iris', 'datasets').data\n", "iris.head()" ], - "id": "019b0591-1406-456b-b131-5e791942e2cc" + "id": "d78ee790-885c-40c0-b8f4-0dac2890b047" }, { "cell_type": "code", @@ -1857,7 +1857,7 @@ "source": [ "iris.hist('Sepal.Length')" ], - "id": "d369726c-1c86-4087-840b-da5293505580" + "id": "0cfa6a3e-23fd-49b5-9569-f710525de058" }, { "cell_type": "code", @@ -1869,7 +1869,7 @@ " {'x': [10, 20, 30]})\n", "my_df.hist('x', bins=2) # \u968e\u7d1a\u6570\u306f2" ], - "id": "166f4bbd-7181-4421-bf9a-7c87e9049065" + "id": "76f0872a-e8e0-4bd4-8ca6-c8da5b72d9cd" }, { "cell_type": "code", @@ -1882,7 +1882,7 @@ "iris.hist('Sepal.Length',\n", " bins=tmp.round(2))" ], - "id": "d742c0c7-fc62-4726-a5e5-93a211d320bc" + "id": "d05c1928-0a4b-4e37-af59-cdc536dedf10" }, { "cell_type": "code", @@ -1894,7 +1894,7 @@ " 'Sepal.Width',\n", " kind='scatter')" ], - "id": "3c4ced63-bba4-4ba1-9339-b61fe0255b3b" + "id": "57d5919f-f6fb-44d6-9eea-4d19f00f1e5f" }, { "cell_type": "code", @@ -1904,7 +1904,7 @@ "source": [ "iris.boxplot()" ], - "id": "a7be24a5-e2a1-4863-aabf-6ad7642b1949" + "id": "21fd5eab-df1b-46f4-a908-82b355441b65" }, { "cell_type": "code", @@ -1920,7 +1920,7 @@ " len(iris)**0.5)\n", "my_df" ], - "id": "531dc4a7-f214-4505-83db-773156d35c46" + "id": "ae454402-12ee-435e-982c-eabbad3dc3dc" }, { "cell_type": "code", @@ -1930,7 +1930,7 @@ "source": [ "my_df.plot(y='mean', kind='bar', yerr='se', capsize=10)" ], - "id": "c13f553f-1644-4dab-b6c0-6886b7a56b79" + "id": "3c2a9452-8676-421e-97dc-5fbc3295975f" }, { "cell_type": "code", @@ -1943,7 +1943,7 @@ "my_se = my_group.agg(lambda x: x.std() / len(x)**0.5) # \u6a19\u6e96\u8aa4\u5dee\u3092\u6c42\u3081\u308b\uff0e\n", "my_se" ], - "id": "91cc8f7d-55a6-4029-8491-2170f93ebc85" + "id": "b5d891b4-bd31-4404-b40e-df1ccf2fec95" }, { "cell_type": "code", @@ -1953,7 +1953,7 @@ "source": [ "my_group.agg('mean').plot(kind='bar', yerr=my_se, capsize=5)" ], - "id": "aeb6af32-d88b-4e0b-886e-6bbf73a992d5" + "id": "97cc0d6e-7f8d-426a-8dae-0a0fa8cd4cfb" }, { "cell_type": "code", @@ -1976,7 +1976,7 @@ "mosaic(my_df,\n", " index=['Species', 'w_Sepal'])" ], - "id": "c2e035be-7734-4b3e-bf5d-51a1f4cc996b" + "id": "b7acdc51-fcd0-413b-a407-9e53ce8aa6f4" }, { "cell_type": "code", @@ -1988,7 +1988,7 @@ "my_table.index = [str(x) for x in my_table.index]\n", "mosaic(my_df, index=['Species', 'w_Sepal'], labelizer=lambda k: my_table.loc[k])" ], - "id": "501cc5c4-20b9-48b4-9440-f96799276866" + "id": "9778be9f-3000-4acf-811b-b0e26a388fe7" }, { "cell_type": "code", @@ -2003,10 +2003,10 @@ "y = x**3 - x\n", "plt.plot(x, y)" ], - "id": "f6fca945-fa3f-48d7-b315-f7a3cffee690" + "id": "f221cbcd-5f89-4b4a-8019-690f5e53c0a8" }, { - "id": "ec7ae2bc", + "id": "7f21a35c", "cell_type": "markdown", "source": "## 4.3 \u4e71\u6570", "metadata": {} @@ -2021,7 +2021,7 @@ "import numpy as np\n", "rng = np.random.default_rng()" ], - "id": "f3b47efd-ae43-4320-9e50-e4089a24e52b" + "id": "6a0e9929-00f4-42c1-809f-7dc6f37b07a5" }, { "cell_type": "code", @@ -2043,7 +2043,7 @@ "\n", "plt.hist(x, bins=6) # \u30d2\u30b9\u30c8\u30b0\u30e9\u30e0" ], - "id": "bb51656a-66a2-4708-bb04-85aff6e5f1ed" + "id": "a482fe14-42fc-4ec0-9a49-03168223dab2" }, { "cell_type": "code", @@ -2061,7 +2061,7 @@ " size=1000) # \u4e71\u6570\u306e\u6570\n", "plt.hist(x)" ], - "id": "f7d5a83d-556d-4f7c-b083-fc3f897bf877" + "id": "6eb6c7fd-c66f-4dfa-b797-89148e4b637d" }, { "cell_type": "code", @@ -2076,7 +2076,7 @@ "x = [int(k) for k in tmp]\n", "plt.hist(x, bins=6) # \u7d50\u679c\u306f\u5272\u611b" ], - "id": "0f7f8be8-5d3e-45ba-bff5-c387606d1625" + "id": "e166dac2-b269-49fa-a94d-0a051cc1af9a" }, { "cell_type": "code", @@ -2095,7 +2095,7 @@ " size=r) # \u4e71\u6570\u306e\u6570\n", "plt.hist(x, bins=max(x) - min(x))" ], - "id": "03fd84ac-f9cc-4b40-abd4-b18d212787ce" + "id": "5d010d4e-d709-48fd-a790-1c89b0dcba6c" }, { "cell_type": "code", @@ -2112,7 +2112,7 @@ " size=r) # \u4e71\u6570\u306e\u6570\n", "plt.hist(x, bins=40)" ], - "id": "5e8db47b-f4de-47ac-a08b-61591bcd42ca" + "id": "5e49df08-b729-4c53-aac6-ac1a4cc663fc" }, { "cell_type": "code", @@ -2131,7 +2131,7 @@ " np.std(tmp, ddof=1) / n**0.5], # \u6a19\u6e96\u8aa4\u5dee\n", " index=['k', 'mean', 'se'])" ], - "id": "8e854754-bf3c-4511-9dfe-1ce714a75abe" + "id": "f55b6f44-ec58-4b79-a3ca-693738d1636a" }, { "cell_type": "code", @@ -2143,7 +2143,7 @@ " return np.var(x, ddof=1)\n", "pd.Series([10, 20, 30]).apply(f)" ], - "id": "f0dd2a8d-6bae-4b1a-9785-d0d803694d2a" + "id": "bd5b59f0-9381-4145-b4ee-293cf800a50e" }, { "cell_type": "code", @@ -2155,7 +2155,7 @@ " return np.std(x, ddof=1)\n", "pd.Series([10, 20, 30]).apply(f)" ], - "id": "9889fab3-8a20-498b-8fa0-4011bfd235ba" + "id": "0aaf5d42-a0c3-4391-9294-b93a67088f33" }, { "cell_type": "code", @@ -2173,10 +2173,10 @@ " gamma(n / 2)))\n", "pd.Series([10, 20, 30]).apply(f)" ], - "id": "c1f6f866-a66d-4b3c-87ce-f2a50342cdcb" + "id": "60c2c97e-b55e-4b2b-a66b-d246eac1e577" }, { - "id": "556becf1", + "id": "31f1665f", "cell_type": "markdown", "source": "## 4.4 \u7d71\u8a08\u7684\u63a8\u6e2c", "metadata": {} @@ -2196,7 +2196,7 @@ " # \u5de6\u7247\u5074\u691c\u5b9a\u306a\u3089'smaller'\n", " # \u53f3\u7247\u5074\u691c\u5b9a\u306a\u3089'larger'" ], - "id": "f785294d-5d17-459b-9a9a-3c9b0c1180d7" + "id": "b63e5a44-83ad-41b0-9152-bd22afa12e43" }, { "cell_type": "code", @@ -2222,7 +2222,7 @@ "ax.hlines(y=my_pr2, xmin=0, xmax=15) # \u6c34\u5e73\u7dda\n", "ax.vlines(x=x, ymin=0, ymax=my_pr) # \u5782\u76f4\u7dda" ], - "id": "bb48f3d3-39fc-4ca9-9aa6-fa68e2511d25" + "id": "a09316df-f8ff-421f-b561-c71f1449b61c" }, { "cell_type": "code", @@ -2237,7 +2237,7 @@ " alpha=a, # \u6709\u610f\u6c34\u6e96\uff08\u7701\u7565\u53ef\uff09\n", " method='binom_test')" ], - "id": "767e9eaf-155c-495e-8104-bccef641c17d" + "id": "b78ac342-b2f4-48f2-85f5-112bf01c3b8a" }, { "cell_type": "code", @@ -2255,7 +2255,7 @@ "\n", "my_df.plot(x='t', legend=None, xlabel=r'$\\theta$', ylabel=r'p-value')" ], - "id": "50bada95-4a23-4c54-aaff-d3397e472d67" + "id": "752bd0ea-1fe2-49db-9ae2-0899f7b32672" }, { "cell_type": "code", @@ -2280,7 +2280,7 @@ "\n", "d.tconfint_mean(alpha=a, alternative=alt) # \u4fe1\u983c\u533a\u9593" ], - "id": "616eb971-8351-44d8-96e2-333274391106" + "id": "ff496c02-28f4-424e-9d75-c28e98608fdc" }, { "cell_type": "code", @@ -2295,7 +2295,7 @@ "\n", "c.tconfint_diff(alpha=a, alternative=alt, usevar=ve) # \u4fe1\u983c\u533a\u9593" ], - "id": "df8785bd-a78c-40c6-9c99-5a8b41133e55" + "id": "e0d14ef6-b561-4f1e-8749-8769c6619fff" }, { "cell_type": "code", @@ -2308,7 +2308,7 @@ " '/fromzero/master/data/smoker.csv')\n", "my_data = pd.read_csv(my_url)" ], - "id": "9748e00d-a7ac-4f97-879f-e67704da698a" + "id": "db7f32fc-41d4-4cf5-8918-d2a1daf2ec82" }, { "cell_type": "code", @@ -2318,7 +2318,7 @@ "source": [ "my_data.head()" ], - "id": "f066943e-3f09-43fa-8631-f6585a6b7fdf" + "id": "5e85add2-ddfe-496f-8798-faf52c72a70a" }, { "cell_type": "code", @@ -2331,7 +2331,7 @@ " my_data['smoker'])\n", "my_table" ], - "id": "53585c81-66ef-4d80-9bf0-fb81451f913e" + "id": "53685413-2431-4817-986e-9e1fb4a4a74b" }, { "cell_type": "code", @@ -2342,7 +2342,7 @@ "from scipy.stats import chi2_contingency\n", "chi2_contingency(my_table, correction=False)[1]" ], - "id": "a588701f-f681-4dd7-9e9a-b971d2a58123" + "id": "9a01aad1-415f-4bd2-be75-f7cd200dbbaa" }, { "cell_type": "code", @@ -2361,7 +2361,7 @@ "n = 10**5\n", "result = [sum(np.random.choice(X, len(X), replace=True)) for _ in range(n)] # \u624b\u98064" ], - "id": "aaa6840c-0f41-4b38-be90-fbcd8769996b" + "id": "036c91a8-e0aa-4520-883a-46356bda479a" }, { "cell_type": "code", @@ -2372,7 +2372,7 @@ "import matplotlib.pyplot as plt\n", "plt.hist(result, bins=range(0, 16))" ], - "id": "36eb311c-6543-4097-8480-dc46ada1ee13" + "id": "e121cf3c-1503-4cc8-83d1-40b39f2cd254" }, { "cell_type": "code", @@ -2382,16 +2382,16 @@ "source": [ "np.quantile(result, [0.025, 0.975])" ], - "id": "af43addd-2e5f-4b8f-8cab-d4a7fc661052" + "id": "d7aea5da-8cc9-429b-b5ab-bbd73ce695dc" }, { - "id": "f86c14c4", + "id": "60322c71", "cell_type": "markdown", "source": "# 5 \u524d\u51e6\u7406\n\n\n", "metadata": {} }, { - "id": "266a4a00", + "id": "1e0e656b", "cell_type": "markdown", "source": "## 5.1 \u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f", "metadata": {} @@ -2404,7 +2404,7 @@ "source": [ "!wget https://raw.githubusercontent.com/taroyabuki/fromzero/master/data/exam.csv" ], - "id": "dc1308bf-f924-4177-b8aa-10b716a85c23" + "id": "e350f26b-f159-4383-80c0-b4916c1d6b08" }, { "cell_type": "code", @@ -2416,7 +2416,7 @@ "my_df = pd.read_csv('exam.csv')\n", "my_df" ], - "id": "e9dac051-c56f-44ef-84d3-01bfefde3465" + "id": "4c58febd-07e8-4f0c-bb41-5bef5488625d" }, { "cell_type": "code", @@ -2428,7 +2428,7 @@ " '/fromzero/master/data/exam.csv')\n", "my_df = pd.read_csv(my_url)" ], - "id": "280384ec-a471-404b-bb80-bd8ed3204ca7" + "id": "6d6af307-f7f8-4913-8ba1-f5c3c32de19b" }, { "cell_type": "code", @@ -2440,7 +2440,7 @@ " index_col='name')\n", "my_df2" ], - "id": "040d0c56-4a3e-4b25-acf7-64ba83ed5471" + "id": "b2f79c02-4a8d-447d-869b-188caec6fa23" }, { "cell_type": "code", @@ -2450,7 +2450,7 @@ "source": [ "my_df.to_csv('exam2.csv', index=False)" ], - "id": "f78f9e88-23fa-4e9d-b35b-15f499eae0df" + "id": "919496f3-91b6-4435-8122-c26cc831066a" }, { "cell_type": "code", @@ -2460,7 +2460,7 @@ "source": [ "my_df2.to_csv('exam3.csv')" ], - "id": "118f0f43-499a-45d4-87fe-3c129938de93" + "id": "1212a9fd-1a01-4b8a-9efa-4f2e121394c4" }, { "cell_type": "code", @@ -2471,7 +2471,7 @@ "my_df = pd.read_csv('exam.csv',\n", " encoding='UTF-8')" ], - "id": "b5f46a6c-00dc-43dd-8a5f-35b04bc36ae9" + "id": "fbec5be9-d0e7-4e23-9b0f-78f5186715c8" }, { "cell_type": "code", @@ -2481,7 +2481,7 @@ "source": [ "my_df.to_csv('exam2.csv', index=False, encoding='UTF-8')" ], - "id": "b5e9f853-3fb5-4096-9f1e-f001a05839b4" + "id": "f521db3d-e377-4ac7-8f4b-c86de6713168" }, { "cell_type": "code", @@ -2492,7 +2492,7 @@ "my_url = 'https://taroyabuki.github.io/fromzero/exam.html'\n", "my_tables = pd.read_html(my_url)" ], - "id": "09347bcf-a932-4d06-908e-3faf2f0a6e65" + "id": "b4171056-93ce-4039-b415-01cb6b66d8f8" }, { "cell_type": "code", @@ -2502,7 +2502,7 @@ "source": [ "my_tables" ], - "id": "d74e7953-eeaf-4f78-a4be-392b8708806e" + "id": "1c640d23-fae3-4435-9777-ac5276f8e16c" }, { "cell_type": "code", @@ -2512,7 +2512,7 @@ "source": [ "my_tables[0]" ], - "id": "749abf91-6aff-4041-9213-e5465b5f9ab3" + "id": "f0314834-a432-4df3-8389-5eb041e854ad" }, { "cell_type": "code", @@ -2524,7 +2524,7 @@ "my_data = my_tables[0].iloc[:, 1:]\n", "my_data" ], - "id": "1236b41e-fb03-4ce1-9b68-c27e9a54f8a6" + "id": "40eeac6e-38d9-4167-a9ed-3ad387ccca31" }, { "cell_type": "code", @@ -2538,7 +2538,7 @@ "#my_data = pd.read_json('exam.json') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n", "my_data" ], - "id": "ab4f7a0e-b3f9-46d5-9719-7bb33c80516b" + "id": "86eb3b56-7c49-4ec5-970b-fcedc0707d14" }, { "cell_type": "code", @@ -2557,7 +2557,7 @@ "#my_tree = ET.parse('exam.xml') # \uff08\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3046\u5834\u5408\uff09\n", "my_ns = '{https://www.example.net/ns/1.0}' # \u540d\u524d\u7a7a\u9593" ], - "id": "4ef09bbe-4595-4cc5-ab4e-ba826d0fafb7" + "id": "fe6cbf78-1d27-4769-bdb0-f04f38d01cba" }, { "cell_type": "code", @@ -2567,7 +2567,7 @@ "source": [ "my_records = my_tree.findall(f'.//{my_ns}record')" ], - "id": "df781e90-e2af-4e0f-bc97-0cb2ef513cfe" + "id": "84c0fcd0-3ee6-4c4b-a4b7-aa60e6fde031" }, { "cell_type": "code", @@ -2581,7 +2581,7 @@ " my_dic2 = {child.tag.replace(my_ns, ''): child.text for child in list(record)}\n", " return {**my_dic1, **my_dic2} # \u8f9e\u66f8\u3092\u7d50\u5408\u3059\u308b\uff0e" ], - "id": "83a409ab-58a8-4252-84b9-135678824c0c" + "id": "aab966cf-9358-4019-bfaf-a4e49f77600b" }, { "cell_type": "code", @@ -2594,10 +2594,10 @@ "my_data['math'] = pd.to_numeric(my_data['math'])\n", "my_data" ], - "id": "3c64e1d8-f19b-4819-be5e-04cf7c3a45af" + "id": "1e07d23d-c861-42df-a4cb-45f52061a837" }, { - "id": "d5eb074c", + "id": "5e295f28", "cell_type": "markdown", "source": "## 5.2 \u30c7\u30fc\u30bf\u306e\u5909\u63db", "metadata": {} @@ -2620,7 +2620,7 @@ "\n", "z1" ], - "id": "e633c319-4df5-4d02-a81a-710e4478de1b" + "id": "91c61925-b898-4b9a-bc65-759582656b11" }, { "cell_type": "code", @@ -2630,7 +2630,7 @@ "source": [ "z1.mean(), np.std(z1, ddof=1)" ], - "id": "168d1e81-3ada-42ca-acae-72d0505811b4" + "id": "1359c00b-0ea6-47f7-9833-ab1776164d35" }, { "cell_type": "code", @@ -2640,7 +2640,7 @@ "source": [ "z1 * np.std(x1, ddof=1) + np.mean(x1)" ], - "id": "b7665db2-2fa3-4cf4-b468-3de04177356f" + "id": "8369b328-6efb-4f15-b455-2c4116aa29d2" }, { "cell_type": "code", @@ -2653,7 +2653,7 @@ " np.std(x1, ddof=1))\n", "z2.mean(), np.std(z2, ddof=1)" ], - "id": "ab5a7138-6824-4553-bbdd-1bf9ef2b68c1" + "id": "eb66a4aa-df22-4aa1-8275-78edcf66bcf5" }, { "cell_type": "code", @@ -2677,7 +2677,7 @@ "else my_enc.get_feature_names_out()\n", "pd.DataFrame(tmp, columns=my_names)" ], - "id": "130afe19-a78e-4484-bcfc-7277e47dd58b" + "id": "170a83b1-9511-48d3-bf4a-002b58b0fb48" }, { "cell_type": "code", @@ -2692,7 +2692,7 @@ " my_df2[['class']]).toarray()\n", "pd.DataFrame(tmp, columns=my_names)" ], - "id": "64ac0d86-29c8-4f64-9bca-c1abbd45a425" + "id": "69bceb4f-4235-4021-a4b9-48d3ec662dca" }, { "cell_type": "code", @@ -2713,22 +2713,22 @@ " my_df2[['class']]).toarray()\n", "pd.DataFrame(tmp, columns=my_names)" ], - "id": "b8327589-2196-4b33-a2b3-72a73e14ebd4" + "id": "2d436a69-d104-4e72-9d54-f4be60561ba5" }, { - "id": "7b9b8a05", + "id": "12b1cad0", "cell_type": "markdown", "source": "# 6 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\u30fb\u30c7\u30fc\u30bf\u30fb\u624b\u6cd5\n\n\n", "metadata": {} }, { - "id": "aefed9b1", + "id": "4c8bb155", "cell_type": "markdown", "source": "## 6.1 \u6a5f\u68b0\u5b66\u7fd2\u306e\u76ee\u7684\uff08\u672c\u66f8\u306e\u5834\u5408\uff09\n\n\n", "metadata": {} }, { - "id": "77fbcb18", + "id": "c1e68e92", "cell_type": "markdown", "source": "## 6.2 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u30c7\u30fc\u30bf", "metadata": {} @@ -2744,7 +2744,7 @@ "iris.head()\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "bf3b7533-846c-46d1-8e18-cf76c2a7cfe0" + "id": "9cbb8974-8be5-4628-af1d-fff6a4ae4a8e" }, { "cell_type": "code", @@ -2757,7 +2757,7 @@ "iris.head()\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "69072476-81c1-4f60-98e0-aaf7795c2687" + "id": "2f8cc959-c08b-464e-9289-97f911ed471c" }, { "cell_type": "code", @@ -2773,28 +2773,28 @@ "iris.head()\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "bb3ce3e5-cdf4-4a80-b4a0-95ea5fed4347" + "id": "51238f34-7990-4f86-9483-ff45f89c9e4b" }, { - "id": "efbb8eaa", + "id": "9a89a338", "cell_type": "markdown", "source": "## 6.3 \u6a5f\u68b0\u5b66\u7fd2\u306e\u305f\u3081\u306e\u624b\u6cd5\n\n\n", "metadata": {} }, { - "id": "9b27ff77", + "id": "a55e20ea", "cell_type": "markdown", "source": "# 7 \u56de\u5e301\uff08\u5358\u56de\u5e30\uff09\n\n\n", "metadata": {} }, { - "id": "7873a9bf", + "id": "21c87cff", "cell_type": "markdown", "source": "## 7.1 \u81ea\u52d5\u8eca\u306e\u505c\u6b62\u8ddd\u96e2\n\n\n", "metadata": {} }, { - "id": "1951f7c0", + "id": "ce0f2424", "cell_type": "markdown", "source": "## 7.2 \u30c7\u30fc\u30bf\u306e\u78ba\u8a8d", "metadata": {} @@ -2808,7 +2808,7 @@ "import statsmodels.api as sm\n", "my_data = sm.datasets.get_rdataset('cars', 'datasets').data" ], - "id": "d3e22973-9b9b-4f10-a244-b24ea758cf49" + "id": "efe25a4f-a3d1-4a6e-9d04-6a17581ea38e" }, { "cell_type": "code", @@ -2818,7 +2818,7 @@ "source": [ "my_data.shape" ], - "id": "c83f0355-b9fb-40e7-850b-cc862363bd59" + "id": "aad5f546-73d7-459c-bd09-0ff3a919c8f2" }, { "cell_type": "code", @@ -2828,7 +2828,7 @@ "source": [ "my_data.head()" ], - "id": "58135523-0a67-4c13-9000-ce110cc1eadf" + "id": "50922d0d-3bc9-4be9-a6f6-e3175daf759c" }, { "cell_type": "code", @@ -2838,7 +2838,7 @@ "source": [ "my_data.describe()" ], - "id": "6b3d27f7-d438-4e67-b123-c985c95850cd" + "id": "fc3a80f4-3254-4332-a232-821ae08fd014" }, { "cell_type": "code", @@ -2848,10 +2848,10 @@ "source": [ "my_data.plot(x='speed', style='o')" ], - "id": "a135e3eb-dc58-4874-9a32-13503648cf2d" + "id": "0be73640-59dc-433b-9290-0022e4d2c56d" }, { - "id": "c9867783", + "id": "9a3ec3b1", "cell_type": "markdown", "source": "## 7.3 \u56de\u5e30\u5206\u6790", "metadata": {} @@ -2872,7 +2872,7 @@ "ax.set_xlim(4, 25)\n", "ax.set_ylim(-5, 125)" ], - "id": "0505447a-1d61-4902-8239-5929c7f160e6" + "id": "3099a786-a0da-4be5-80f6-c2c9eb2298aa" }, { "cell_type": "code", @@ -2884,7 +2884,7 @@ "my_data = sm.datasets.get_rdataset('cars', 'datasets').data\n", "X, y = my_data[['speed']], my_data['dist']" ], - "id": "ac2d8896-4617-4524-a9b7-5634f715909f" + "id": "45e916e6-d73f-4010-8e46-a8426e8be4c3" }, { "cell_type": "code", @@ -2902,7 +2902,7 @@ "# \u307e\u3068\u3081\u3066\u5b9f\u884c\u3057\u3066\u3082\u3088\u3044\uff0e\n", "# my_model = LinearRegression().fit(X, y)" ], - "id": "d8c7a474-3d07-4fb0-ad32-475e59def31f" + "id": "d3ca2fa1-b10d-4e19-b683-f0f20d688157" }, { "cell_type": "code", @@ -2912,7 +2912,7 @@ "source": [ "my_model.intercept_, my_model.coef_" ], - "id": "f52713d6-7bb4-49a1-a8de-f3a8f5f846d7" + "id": "c1b6a307-ed8c-486c-adf1-a3435ea74aa8" }, { "cell_type": "code", @@ -2923,7 +2923,7 @@ "tmp = [[21.5]]\n", "my_model.predict(tmp)" ], - "id": "04fd8b09-a5ce-4c74-bb97-286520a12eb6" + "id": "630e6ec1-8602-4c31-9113-e0209f661bec" }, { "cell_type": "code", @@ -2939,7 +2939,7 @@ " 100)})\n", "tmp['model'] = my_model.predict(tmp)" ], - "id": "733aa4c0-8a8b-4225-a3f2-c38465c5d849" + "id": "5e326fe2-42c8-4139-a1cf-7a271b3780b7" }, { "cell_type": "code", @@ -2950,10 +2950,10 @@ "pd.concat([my_data, tmp]).plot(\n", " x='speed', style=['o', '-'])" ], - "id": "e89d6918-4fe4-44cc-b1b9-9c4123bbfe41" + "id": "31e37b9e-a1e7-4199-8213-b60443c68dab" }, { - "id": "abcd9f00", + "id": "a840b331", "cell_type": "markdown", "source": "## 7.4 \u5f53\u3066\u306f\u307e\u308a\u306e\u826f\u3055\u306e\u6307\u6a19", "metadata": {} @@ -2977,7 +2977,7 @@ "y_ = my_model.predict(X)\n", "my_data['y_'] = y_" ], - "id": "26f5ef52-b12b-43c9-8c99-c71bdb8aff66" + "id": "5d4c027e-3c78-410f-ba98-f1633d9a448e" }, { "cell_type": "code", @@ -2990,7 +2990,7 @@ "my_data['residual'] = y - y_\n", "my_data.head()" ], - "id": "b7050afa-6197-44b0-a185-2c4309e19918" + "id": "1bafe1e7-7536-4665-8762-b7f23d599be6" }, { "cell_type": "code", @@ -3002,7 +3002,7 @@ "my_data.plot(x='speed', y='y_', style='-', legend=False, ax=ax)\n", "ax.vlines(x=X, ymin=y, ymax=y_, linestyles='dotted')" ], - "id": "9d722112-d79c-449f-8f40-53b603dc63af" + "id": "fec133f9-c6df-4995-b4fe-aed01b5b6c82" }, { "cell_type": "code", @@ -3014,7 +3014,7 @@ "# \u3042\u308b\u3044\u306f\n", "(my_data['residual']**2).mean()**0.5\n" ], - "id": "eaaba540-b6f4-4331-aa8c-55199f2d6224" + "id": "748941dd-96b2-41cc-8db4-1b1c27a28c87" }, { "cell_type": "code", @@ -3026,7 +3026,7 @@ "# \u3042\u308b\u3044\u306f\n", "r2_score(y_true=y, y_pred=y_)" ], - "id": "a5f05be9-dff9-4d2c-bb5c-d1a520490c2f" + "id": "8753d35a-1dec-4670-8172-627f770177df" }, { "cell_type": "code", @@ -3037,7 +3037,7 @@ "import numpy as np\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "0a462799-7acb-448d-93f5-04f71b09905f" + "id": "7077803c-0d1a-450d-b98f-b7a3db6b9502" }, { "cell_type": "code", @@ -3056,7 +3056,7 @@ "\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "2bf3669d-d0af-4c3f-98d6-0c8c945adb3f" + "id": "47818245-74d1-4f14-8ae0-8d48f7293727" }, { "cell_type": "code", @@ -3076,7 +3076,7 @@ "my_sample = my_data.iloc[my_idx, ]\n", "X, y = my_sample[['speed']], my_sample['dist']" ], - "id": "128c843b-ef1d-4aad-8712-84c5316ebb05" + "id": "aa198472-b5b5-4f60-96a0-b3ee2cd3ed00" }, { "cell_type": "code", @@ -3091,7 +3091,7 @@ "my_model.fit(X5, y)\n", "y_ = my_model.predict(X5)" ], - "id": "9fd75742-05ee-4b63-aa1f-b8b745a06669" + "id": "5bd09527-746a-40c0-ab66-e5f27c0c2aee" }, { "cell_type": "code", @@ -3105,7 +3105,7 @@ "\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "94571017-0700-4fb6-98f9-30bd1ae427be" + "id": "ec3e7eff-f3d1-41a6-a801-ab13eb752e98" }, { "cell_type": "code", @@ -3123,10 +3123,10 @@ "my_df = pd.concat([my_data, my_sample, tmp])\n", "my_df.plot(x='speed', style=['o', 'o', '-'], ylim=(0, 130))" ], - "id": "7df483d7-e0d2-4dc8-bfbc-5f8dd667bdc4" + "id": "8e769bf8-d340-4771-9401-ae24aaed4c47" }, { - "id": "63c576fb", + "id": "020b5703", "cell_type": "markdown", "source": "## 7.5 K\u6700\u8fd1\u508d\u6cd5", "metadata": {} @@ -3156,7 +3156,7 @@ " 100)})\n", "tmp['model'] = my_model.predict(tmp)" ], - "id": "d41fb801-082f-44d1-8a1c-080a23946885" + "id": "f8975993-12ed-4f2e-b888-2e4e0a880d18" }, { "cell_type": "code", @@ -3167,7 +3167,7 @@ "pd.concat([my_data, tmp]).plot(\n", " x='speed', style=['o', '-'])" ], - "id": "7e596c45-279a-4359-b4ab-015249ac63df" + "id": "d06d8eb6-81db-4819-8ab3-7c28da561c0d" }, { "cell_type": "code", @@ -3183,10 +3183,10 @@ "\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "7d1dc486-019d-4396-85b1-f0060b817e06" + "id": "1ce0fb43-37b6-4e0c-a428-8fbd9c84065f" }, { - "id": "660e5a0c", + "id": "43b6f55d", "cell_type": "markdown", "source": "## 7.6 \u691c\u8a3c", "metadata": {} @@ -3217,7 +3217,7 @@ "# \u5e73\u5747\u3092\u6c7a\u5b9a\u4fc2\u65701\uff08\u691c\u8a3c\uff09\u3068\u3059\u308b\uff0e\n", "my_scores.mean()" ], - "id": "f373627a-9fc2-4aff-845a-9b3de0fb8b57" + "id": "c825edfd-64ea-48da-a2ee-3729c532ace9" }, { "cell_type": "code", @@ -3229,7 +3229,7 @@ " scoring='neg_root_mean_squared_error')\n", "-my_scores.mean()" ], - "id": "7eadb4ff-d98c-4300-a953-5fb620248625" + "id": "483bffb9-521d-4d9c-b4d7-bba44f86a3db" }, { "cell_type": "code", @@ -3248,7 +3248,7 @@ "my_model = LinearRegression().fit(X, y)\n", "y_ = my_model.predict(X)" ], - "id": "526e847f-be93-4ffa-afba-0326eb7ba6f3" + "id": "378520f9-9d6b-4da1-9a5b-87a33130e497" }, { "cell_type": "code", @@ -3267,7 +3267,7 @@ "# \u6c7a\u5b9a\u4fc2\u65706\uff08\u8a13\u7df4\uff09\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "e42e8886-ccb3-4e66-be0b-55947e17e1b5" + "id": "9a3e0500-1203-4fd4-b662-e2a928b5f6d1" }, { "cell_type": "code", @@ -3282,7 +3282,7 @@ "my_scores = cross_val_score(my_model, X, y, scoring='r2') # scoring='r2'\u306f\u7701\u7565\u53ef\n", "my_scores.mean()" ], - "id": "2f2fc8d2-368f-4459-9051-7a9340f0eb72" + "id": "8438bdad-8b66-42fe-a177-2f6ae25eaa4c" }, { "cell_type": "code", @@ -3300,7 +3300,7 @@ " scoring='neg_root_mean_squared_error')\n", "(my_scores2**2).mean()**0.5" ], - "id": "033203ef-ca0c-415c-b599-e889fc9f0f0c" + "id": "08f8cddc-e9cd-436b-9e76-e79733489275" }, { "cell_type": "code", @@ -3310,7 +3310,7 @@ "source": [ "-my_scores2.mean()" ], - "id": "4af312ac-2247-4c69-b266-1b1cb0663782" + "id": "d21e59e2-f36e-4020-ab1b-c0f4f7639e3f" }, { "cell_type": "code", @@ -3336,7 +3336,7 @@ " KNeighborsRegressor(n_neighbors=5),\n", " X, y, cv=LeaveOneOut(), scoring='neg_mean_squared_error')" ], - "id": "fcd68a6a-83cf-4334-89ff-9d56264d7012" + "id": "b0c4bebf-81b9-4767-899e-5d59f14ac600" }, { "cell_type": "code", @@ -3348,7 +3348,7 @@ "\n", "(-my_knn_socres.mean())**0.5" ], - "id": "6cb66982-4071-4aeb-ac5b-925041fcc67a" + "id": "f1140d8b-b64b-48a9-8b68-4b5245bce108" }, { "cell_type": "code", @@ -3361,7 +3361,7 @@ " 'knn': -my_knn_socres})\n", "my_df.head()" ], - "id": "15e843a8-3cee-4269-8b84-734e08bbc1d0" + "id": "96517b6c-22b9-4054-9da3-c7b387431ade" }, { "cell_type": "code", @@ -3371,7 +3371,7 @@ "source": [ "my_df.boxplot().set_ylabel(\"$r^2$\")" ], - "id": "fd4fcc74-d65c-4b6a-9b73-97c357c1262b" + "id": "bbeff9cd-e5c2-4c2b-8a24-2419cf083a1b" }, { "cell_type": "code", @@ -3385,10 +3385,10 @@ "\n", "d.tconfint_mean(alpha=0.05, alternative='two-sided') # \u4fe1\u983c\u533a\u9593" ], - "id": "da791c26-aa1f-4476-958f-8c69382da2df" + "id": "661305c2-a68d-4114-b32f-df79ed4341e9" }, { - "id": "4ffb6b46", + "id": "f0f9d7aa", "cell_type": "markdown", "source": "## 7.7 \u30d1\u30e9\u30e1\u30fc\u30bf\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0", "metadata": {} @@ -3416,7 +3416,7 @@ " scoring='neg_mean_squared_error')\n", "my_search.fit(X, y)" ], - "id": "b9501796-9a0b-4bd9-86d4-006e36ab6c50" + "id": "de2de03d-f2f4-421f-b46d-6d02fa00a25a" }, { "cell_type": "code", @@ -3428,7 +3428,7 @@ "my_scores = (-tmp['mean_test_score'])**0.5 # RMSE\n", "my_results = pd.DataFrame(tmp['params']).assign(validation=my_scores)" ], - "id": "1684c1a6-c5d8-4ee1-9e91-9e616f1cfd07" + "id": "d0c70b3d-67dc-4940-b292-dcd52ab51f29" }, { "cell_type": "code", @@ -3438,7 +3438,7 @@ "source": [ "my_results.head()" ], - "id": "eb0a4538-1864-4fa1-aa0a-7bb5f3975c6c" + "id": "f5ec2e56-28fa-4f8b-ae86-c8e58595094a" }, { "cell_type": "code", @@ -3450,7 +3450,7 @@ " style='o-',\n", " ylabel='RMSE')" ], - "id": "9d737195-f5bf-427d-9df6-a6dd5da04225" + "id": "f8655e3a-9591-4234-a0ca-ecedd7811076" }, { "cell_type": "code", @@ -3460,7 +3460,7 @@ "source": [ "my_search.best_params_" ], - "id": "928be82f-4a20-4069-80b8-abe942f9cab7" + "id": "50254c08-3871-4838-a8b8-40131c60f606" }, { "cell_type": "code", @@ -3470,7 +3470,7 @@ "source": [ "(-my_search.best_score_)**0.5" ], - "id": "a5482fdd-1943-42e2-ba8b-f0fbe944eba9" + "id": "c3a78b5c-62e7-4f5e-9b7b-5457214b762c" }, { "cell_type": "code", @@ -3482,7 +3482,7 @@ "y_ = my_model.predict(X)\n", "mean_squared_error(y_, y)**0.5" ], - "id": "622b031f-fe4e-41ee-bc1d-aa0439c28562" + "id": "bd161bfb-be21-4f9c-a7a2-676ddc1613eb" }, { "cell_type": "code", @@ -3512,7 +3512,7 @@ "\n", "my_results = pd.Series(range(1, 16)).apply(my_loocv)" ], - "id": "6d0916ea-9627-4356-aa47-a122cfa9dd23" + "id": "2f3aa4ef-2d0a-4693-a074-a8cbf558b2ee" }, { "cell_type": "code", @@ -3524,16 +3524,16 @@ " style='o-',\n", " ylabel='RMSE')" ], - "id": "4fcba86c-a8f5-482d-bbef-809023f04408" + "id": "a48413f7-1761-4421-840c-48d02be45017" }, { - "id": "7fd8cd01", + "id": "fca716f4", "cell_type": "markdown", "source": "# 8 \u56de\u5e302\uff08\u91cd\u56de\u5e30\uff09\n\n\n", "metadata": {} }, { - "id": "7832a5fe", + "id": "06fd8dce", "cell_type": "markdown", "source": "## 8.1 \u30d6\u30c9\u30a6\u306e\u751f\u80b2\u6761\u4ef6\u3068\u30ef\u30a4\u30f3\u306e\u4fa1\u683c", "metadata": {} @@ -3550,7 +3550,7 @@ "tmp.describe()\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "004a2eae-c753-4ff4-891c-75d250ae576f" + "id": "1ae5be78-f715-4b14-a544-424cb27dd997" }, { "cell_type": "code", @@ -3561,7 +3561,7 @@ "my_data = tmp.iloc[:, 2:].dropna()\n", "my_data.head()" ], - "id": "eb94f93a-07e6-4f4f-8cdd-ed5a2c21bcd9" + "id": "f07272e7-0d79-431e-b4cc-73655f2e1ba5" }, { "cell_type": "code", @@ -3571,7 +3571,7 @@ "source": [ "my_data.shape" ], - "id": "e4838165-69e8-4ee4-b411-39cec0cf31e9" + "id": "8f31166c-5fbb-4028-9fae-54e68a38283c" }, { "cell_type": "code", @@ -3582,7 +3582,7 @@ "my_data.to_csv('wine.csv',\n", " index=False)" ], - "id": "20958adb-c774-44c7-9aa4-02e15191dbd4" + "id": "7ce570f4-5c4c-4ba2-b1bd-e35338c94a82" }, { "cell_type": "code", @@ -3595,10 +3595,10 @@ " '/fromzero/master/data/wine.csv')\n", "my_data = pd.read_csv(my_url)" ], - "id": "7b563ddb-0131-4100-a513-036722a3e1fd" + "id": "ddd076c5-a3ca-45c5-b5da-906b8c7f41cd" }, { - "id": "563dc27b", + "id": "1bd9951e", "cell_type": "markdown", "source": "## 8.2 \u91cd\u56de\u5e30\u5206\u6790", "metadata": {} @@ -3622,7 +3622,7 @@ "\n", "my_model = LinearRegression().fit(X, y)" ], - "id": "601631a9-26f4-426c-937c-b4e735fc79a8" + "id": "5d8798a8-99ea-474b-acf2-3be520e34260" }, { "cell_type": "code", @@ -3635,7 +3635,7 @@ "pd.Series(my_model.coef_,\n", " index=X.columns)" ], - "id": "41fc5caa-229d-4411-ab37-26c2d4c03b21" + "id": "8669cde7-775c-4ca6-992d-6ad114002e41" }, { "cell_type": "code", @@ -3646,7 +3646,7 @@ "my_test = [[500, 17, 120, 2]]\n", "my_model.predict(my_test)" ], - "id": "1094a8a4-600b-4904-b2ce-d433bc13908d" + "id": "daca1213-c9f4-47ac-86e0-5635cda35a0a" }, { "cell_type": "code", @@ -3662,7 +3662,7 @@ "\n", "np.corrcoef(y, y_)[0, 1]**2" ], - "id": "bb4bf8aa-aac3-4b49-ac31-07cb5a7cabb5" + "id": "e8702872-f21a-4643-964e-e0fc56b5c41b" }, { "cell_type": "code", @@ -3675,7 +3675,7 @@ " scoring='neg_mean_squared_error')\n", "(-my_scores.mean())**0.5" ], - "id": "e1e08087-b7f8-43ed-9816-7fce3a516143" + "id": "302d36b4-5d4b-4956-bee6-949c7ff20850" }, { "cell_type": "code", @@ -3689,10 +3689,10 @@ "pd.Series(b,\n", " index=list(X.columns) + ['b0'])" ], - "id": "bccd3bbc-d162-46d7-b355-f8a43646e037" + "id": "c1a78921-42ce-4da7-b55c-60e784af9b84" }, { - "id": "ae8ad214", + "id": "d78bb238", "cell_type": "markdown", "source": "## 8.3 \u6a19\u6e96\u5316", "metadata": {} @@ -3716,7 +3716,7 @@ "pd.DataFrame(StandardScaler().fit_transform(X), columns=X.columns\n", " ).boxplot(showmeans=True)" ], - "id": "a11bb703-7429-4de0-b834-ad2773e844e9" + "id": "9bca074c-b058-440c-9321-dcc06c5068cd" }, { "cell_type": "code", @@ -3729,7 +3729,7 @@ " ('lr', LinearRegression())])\n", "my_pipeline.fit(X, y)" ], - "id": "50d398f1-8448-45d0-b155-87f1b7758bcc" + "id": "ab5870f2-e4e0-430b-9f59-dbd30d07fddd" }, { "cell_type": "code", @@ -3744,7 +3744,7 @@ "pd.Series(my_lr.coef_,\n", " index=X.columns)" ], - "id": "1350a781-3a56-4503-9214-67bc7e51cd4b" + "id": "cc09302a-d901-4ce7-8778-83ec0610a155" }, { "cell_type": "code", @@ -3755,10 +3755,10 @@ "my_test = [[500, 17, 120, 2]]\n", "my_pipeline.predict(my_test)" ], - "id": "2c608d77-a2c7-4849-8af8-c7356c701c9e" + "id": "12e51861-2b90-4a67-a04f-b6324ec67cc7" }, { - "id": "d1ddd89f", + "id": "32c883ad", "cell_type": "markdown", "source": "## 8.4 \u5165\u529b\u5909\u6570\u306e\u6570\u3068\u30e2\u30c7\u30eb\u306e\u826f\u3055", "metadata": {} @@ -3783,7 +3783,7 @@ " v2=[i % 3 for i in range(n)])\n", "my_data2.head()" ], - "id": "8b55b6e6-eb4d-49df-82b8-0bf879ce5b85" + "id": "3d1284b6-95a5-4edd-83e3-8b83267574d0" }, { "cell_type": "code", @@ -3802,10 +3802,10 @@ " scoring='neg_mean_squared_error')\n", "(-my_scores.mean())**0.5" ], - "id": "f2338f06-e76d-43de-8025-e115ae3ba908" + "id": "4e66262f-8af2-4718-b743-6327196e4a4b" }, { - "id": "86ac0586", + "id": "3bdb2eb0", "cell_type": "markdown", "source": "## 8.5 \u5909\u6570\u9078\u629e", "metadata": {} @@ -3831,7 +3831,7 @@ " v2=[i % 3 for i in range(n)])\n", "X, y = my_data2.drop(columns=['LPRICE2']), my_data2['LPRICE2']" ], - "id": "627caeaa-5e2e-460d-9f03-d9c7252ca274" + "id": "a3556e94-b547-4e4e-8668-627edd67d32f" }, { "cell_type": "code", @@ -3858,10 +3858,10 @@ "my_model = my_search.best_estimator_ # \u6700\u826f\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u3067\u518d\u8a13\u7df4\u3057\u305f\u30e2\u30c7\u30eb\n", "my_search.best_estimator_.named_steps.sfs.get_support()" ], - "id": "a32df2e8-4b39-42eb-92eb-846953e5d392" + "id": "543e0396-5b49-483b-992e-928a918cab32" }, { - "id": "74a8434d", + "id": "0697fdd8", "cell_type": "markdown", "source": "## 8.6 \u88dc\u8db3\uff1a\u6b63\u5247\u5316", "metadata": {} @@ -3888,7 +3888,7 @@ "my_data = pd.read_csv(my_url)\n", "X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']" ], - "id": "225a5616-cd56-4d8f-a5a9-32a82cfda985" + "id": "a4ad5699-0d66-4fbc-b604-96a85b54767a" }, { "cell_type": "code", @@ -3906,7 +3906,7 @@ " l1_ratio=B))])\n", "my_pipeline.fit(X, y)" ], - "id": "4cf4aa92-3bfe-4f22-b5ce-af82fdb6e9d2" + "id": "b722f2e7-8020-4c11-9bd5-03bebb3d714b" }, { "cell_type": "code", @@ -3920,7 +3920,7 @@ "pd.Series(my_enet.coef_,\n", " index=X.columns)" ], - "id": "a02e1071-1d38-46ec-9cbb-bdd696ead7eb" + "id": "bfc5629d-83c0-4606-b3e0-c25536f83bee" }, { "cell_type": "code", @@ -3932,7 +3932,7 @@ " [[500, 17, 120, 2]])\n", "my_pipeline.predict(my_test)" ], - "id": "10f72c2c-c68a-441e-bd28-214249c6595c" + "id": "d5d2e961-3df8-4315-9527-f3cefe73d0e4" }, { "cell_type": "code", @@ -3956,7 +3956,7 @@ " xlabel='log A ( = log alpha)',\n", " ylabel='Coefficients')" ], - "id": "54632868-0495-4668-a6a6-5901c8eadcdd" + "id": "9e735552-5b55-4b79-9398-aa8c97fa0fc6" }, { "cell_type": "code", @@ -3979,7 +3979,7 @@ "\n", "my_search.best_params_ # \u6700\u826f\u30d1\u30e9\u30e1\u30fc\u30bf" ], - "id": "f131d957-a2be-4a86-a834-c814fbc97c57" + "id": "f4ac72b7-502d-4eee-8154-c96788d0456e" }, { "cell_type": "code", @@ -3998,7 +3998,7 @@ "my_results.plot(style='o-', xlabel='A ( = alpha)', ylabel='RMSE').legend(\n", " title='B ( = l1_ratio)')" ], - "id": "e122ac2d-fa2f-403c-9e45-64ea0fb0561c" + "id": "08cfe2b1-dc7f-4850-86ab-10adeda35d42" }, { "cell_type": "code", @@ -4008,10 +4008,10 @@ "source": [ "(-my_search.best_score_)**0.5" ], - "id": "1f431390-2566-41ef-98aa-dd7cb5a5177c" + "id": "569c87f8-6059-4763-9296-c7e780cdf7d6" }, { - "id": "585e06d4", + "id": "b128c876", "cell_type": "markdown", "source": "## 8.7 \u30cb\u30e5\u30fc\u30e9\u30eb\u30cd\u30c3\u30c8\u30ef\u30fc\u30af", "metadata": {} @@ -4028,7 +4028,7 @@ "y = 1 / (1 + np.exp(-x))\n", "plt.plot(x, y)" ], - "id": "84e20c3f-2179-4345-8c5b-2544450159c7" + "id": "3ee33c66-f1ea-4958-805d-1dd8e6f4acf1" }, { "cell_type": "code", @@ -4049,7 +4049,7 @@ "my_data = pd.read_csv(my_url)\n", "X, y = my_data.drop(columns=['LPRICE2']), my_data['LPRICE2']" ], - "id": "2d00449a-c666-497e-9f87-d3d0ae095fb6" + "id": "05d597a2-50d3-40fe-8c52-c17a2d9c7c72" }, { "cell_type": "code", @@ -4066,7 +4066,7 @@ " scoring='neg_mean_squared_error')\n", "warnings.simplefilter(\"default\", ConvergenceWarning) # \u3053\u308c\u4ee5\u964d\uff0c\u8b66\u544a\u3092\u8868\u793a\u3059\u308b\uff0e" ], - "id": "d2befcbb-e915-4d83-97bd-0cb50b538ea9" + "id": "2c5f846d-d01f-4a99-b44f-f34ad5256195" }, { "cell_type": "code", @@ -4076,7 +4076,7 @@ "source": [ "(-my_scores.mean())**0.5" ], - "id": "82773927-b1ff-417b-acb2-b80a1fe953be" + "id": "24c960b8-0e17-4fd0-8c52-5c82349d0d6d" }, { "cell_type": "code", @@ -4100,7 +4100,7 @@ "\n", "my_search.best_params_ # \u6700\u826f\u30d1\u30e9\u30e1\u30fc\u30bf" ], - "id": "71409a43-d66c-4c6a-8d07-715c05e81fec" + "id": "642f29a2-0243-46f5-8100-b0e7a9b90bc3" }, { "cell_type": "code", @@ -4110,16 +4110,16 @@ "source": [ "(-my_search.best_score_)**0.5" ], - "id": "0aef82dc-d17b-499e-b6be-1030238d967c" + "id": "15e613f5-c989-4788-bed3-5632c36d1ac3" }, { - "id": "3f7d81bc", + "id": "12689f6d", "cell_type": "markdown", "source": "# 9 \u5206\u985e1\uff08\u591a\u5024\u5206\u985e\uff09\n\n\n", "metadata": {} }, { - "id": "d292ba12", + "id": "8ee98fac", "cell_type": "markdown", "source": "## 9.1 \u30a2\u30e4\u30e1\u306e\u30c7\u30fc\u30bf", "metadata": {} @@ -4134,7 +4134,7 @@ "my_data = sm.datasets.get_rdataset('iris', 'datasets').data\n", "my_data.head()" ], - "id": "b0272a1b-20e9-4990-83c0-170f86d6d9f3" + "id": "545ebfcc-8296-479e-8cc4-33a5bc6bf11f" }, { "cell_type": "code", @@ -4145,10 +4145,10 @@ "my_data.describe()\n", "# \u4ee5\u4e0b\u7701\u7565" ], - "id": "0eaba8d5-eb12-4b14-bc87-a493837cbbdc" + "id": "a10dacd5-6317-47b2-8c0f-d6ffe5d5b6d3" }, { - "id": "bc08e97b", + "id": "3e0c22a1", "cell_type": "markdown", "source": "## 9.2 \u6728\u306b\u3088\u308b\u5206\u985e", "metadata": {} @@ -4170,7 +4170,7 @@ "my_model = tree.DecisionTreeClassifier(max_depth=2, random_state=0)\n", "my_model.fit(X, y)" ], - "id": "53b5ba4b-7e91-41da-b08f-c29a98bccbc2" + "id": "99e1ffbf-b74f-44ce-8e85-a10a54c5c855" }, { "cell_type": "code", @@ -4186,7 +4186,7 @@ " filled=True) # \u8272\u3092\u5857\u308b\uff0e\n", "graphviz.Source(my_dot)" ], - "id": "91efdca1-33fb-4a89-8a4f-39a766cc352e" + "id": "0c417d5a-30d3-43fb-8a5f-9130688ff1d5" }, { "cell_type": "code", @@ -4198,7 +4198,7 @@ " [6.5, 3.0, 5.0, 2.0]])\n", "my_model.predict(my_test)" ], - "id": "5f844de7-55ce-4a06-bc21-58312187c924" + "id": "1bd70ce7-b1bb-4821-9448-6aa646f4d985" }, { "cell_type": "code", @@ -4210,10 +4210,10 @@ " my_model.predict_proba(my_test),\n", " columns=my_model.classes_)" ], - "id": "92dac181-2976-4bc4-a2d7-543e96f62ad4" + "id": "65fb81bb-98c3-45e6-99cf-6adff57a7f08" }, { - "id": "f0c92acc", + "id": "9ecaa1d7", "cell_type": "markdown", "source": "## 9.3 \u6b63\u89e3\u7387", "metadata": {} @@ -4238,7 +4238,7 @@ "y_ = my_model.predict(X)\n", "confusion_matrix(y_true=y, y_pred=y_)" ], - "id": "ebe83ce3-1060-4ca4-98d1-03421b284bd0" + "id": "7d4ae674-55b4-41bb-a122-eb4746df5fcb" }, { "cell_type": "code", @@ -4251,7 +4251,7 @@ "y_ = my_model.predict(X)\n", "(y_ == y).mean()\n" ], - "id": "e4cb9d3e-b77b-4f6a-9531-1c2a5083ba36" + "id": "7b678df2-9da3-4839-aabc-4bdfa093754e" }, { "cell_type": "code", @@ -4261,7 +4261,7 @@ "source": [ "cross_val_score(my_model, X, y, cv=LeaveOneOut()).mean()" ], - "id": "58421182-aa3a-41e1-9fad-76265e18d23f" + "id": "2a8975ad-a20a-4cd9-b625-d396a3a8f02d" }, { "cell_type": "code", @@ -4275,7 +4275,7 @@ " n_jobs=-1).fit(X, y)\n", "my_search.best_params_, my_search.best_score_" ], - "id": "4e3ac6d6-13b1-434f-9547-b6df2e59ddeb" + "id": "1415fe1c-beea-44e5-a925-13187f68033f" }, { "cell_type": "code", @@ -4302,7 +4302,7 @@ "# \u6b63\u89e3\u7387\uff08\u691c\u8a3c\uff09\u306e\u6700\u5927\u5024\n", "my_results[my_results.Accuracy == my_results.Accuracy.max()]" ], - "id": "fd6d23f7-bee7-4db0-b748-c50b7088128f" + "id": "a88865f2-18c5-4558-8de5-5bde5daf06de" }, { "cell_type": "code", @@ -4319,10 +4319,10 @@ " filled=True)\n", "graphviz.Source(my_dot)" ], - "id": "8b13ef54-03cd-4fe5-806d-d6e4dda18f30" + "id": "e67c161c-f9fc-4b8b-b652-f8a4e46fbaf4" }, { - "id": "3dd711c8", + "id": "9f5d35bc", "cell_type": "markdown", "source": "## 9.4 \u8907\u6570\u306e\u6728\u3092\u4f7f\u3046\u65b9\u6cd5", "metadata": {} @@ -4353,7 +4353,7 @@ "\n", "my_search.cv_results_['mean_test_score']" ], - "id": "da7d6aa6-12d9-416c-9e63-e8cf9491d770" + "id": "7d0d9f79-643d-4c57-a590-603361c96ed4" }, { "cell_type": "code", @@ -4379,7 +4379,7 @@ "\n", "my_search.best_score_" ], - "id": "aa8cc21e-249a-4fe7-ab0d-f30b4e8364bb" + "id": "fdf8d994-5cb5-48d4-b21a-8121d33b3ecc" }, { "cell_type": "code", @@ -4391,10 +4391,10 @@ "tmp = pd.Series(my_model.feature_importances_, index=X.columns)\n", "tmp.sort_values().plot(kind='barh')" ], - "id": "64853307-d132-45c5-84cd-7db4f510eebf" + "id": "0e58c6c7-0abb-46dd-b227-c3cac19665a1" }, { - "id": "9b5b9ea7", + "id": "b2f06f54", "cell_type": "markdown", "source": "## 9.5 \u6b20\u640d\u306e\u3042\u308b\u30c7\u30fc\u30bf\u3067\u306e\u5b66\u7fd2", "metadata": {} @@ -4427,7 +4427,7 @@ "\n", "X, y = my_data.iloc[:, 0:4], my_data.Species" ], - "id": "92ae0f68-ccaa-4e70-85be-71003012957b" + "id": "b8fe6e41-89f6-4319-8ab7-9566c19dee9d" }, { "cell_type": "code", @@ -4441,7 +4441,7 @@ "my_scores = cross_val_score(my_pipeline, X, y, cv=LeaveOneOut(), n_jobs=-1)\n", "my_scores.mean()" ], - "id": "6fbe7009-f940-469e-819d-9c21b2282a5e" + "id": "5e4ca4ef-490b-4c5b-874c-b3d1b375e8d9" }, { "cell_type": "code", @@ -4460,10 +4460,10 @@ "\n", "my_scores.mean()" ], - "id": "0e55095d-439f-4d20-aadb-3fa92be86221" + "id": "e83cd19d-6754-425c-a992-b6745369c4d7" }, { - "id": "64b491ff", + "id": "fea6d1d9", "cell_type": "markdown", "source": "## 9.6 \u4ed6\u306e\u5206\u985e\u624b\u6cd5", "metadata": {} @@ -4484,7 +4484,7 @@ "my_scores = cross_val_score(KNeighborsClassifier(), X, y, cv=LeaveOneOut())\n", "my_scores.mean()" ], - "id": "06b38a93-cd0b-4a9b-8321-9aa3af2c25a9" + "id": "7c9468b7-a61f-4907-8242-a2b2dd3cc15d" }, { "cell_type": "code", @@ -4506,16 +4506,16 @@ "my_scores = cross_val_score(my_pipeline, X, y, cv=LeaveOneOut(), n_jobs=-1)\n", "my_scores.mean()" ], - "id": "c45ca2c3-b0e5-4822-9e8f-7267c3fb962b" + "id": "e44b553c-b1ea-477e-aa1a-929beaa3a6b1" }, { - "id": "29f4a9ea", + "id": "73909c42", "cell_type": "markdown", "source": "# 10 \u5206\u985e2\uff082\u5024\u5206\u985e\uff09\n\n\n", "metadata": {} }, { - "id": "47ba0e97", + "id": "08d073ff", "cell_type": "markdown", "source": "## 10.1 2\u5024\u5206\u985e\u306e\u6027\u80fd\u6307\u6a19", "metadata": {} @@ -4532,7 +4532,7 @@ "y = np.array([ 0, 1, 1, 0, 1, 0, 1, 0, 0, 1])\n", "y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])" ], - "id": "eea81b26-7375-4e38-8042-c2bf186b062c" + "id": "9598c38c-faa8-418e-b869-f0d936384843" }, { "cell_type": "code", @@ -4543,7 +4543,7 @@ "y_ = np.array([1 if 0.5 <= p else 0 for p in y_score])\n", "y_" ], - "id": "4cdbad6b-3f57-41cd-9ff1-07e5b07516f3" + "id": "a5d4bf27-d818-4f30-b2b8-4a9abd399c71" }, { "cell_type": "code", @@ -4555,10 +4555,10 @@ "\n", "print(classification_report(y_true=y, y_pred=y_))" ], - "id": "b28e9a0d-9d95-4800-8ac4-19df12a3d0ff" + "id": "10ee4179-cdef-44d9-b5df-56e08e8affff" }, { - "id": "314a9aa1", + "id": "4d0a625d", "cell_type": "markdown", "source": "## 10.2 \u30c8\u30ec\u30fc\u30c9\u30aa\u30d5", "metadata": {} @@ -4580,7 +4580,7 @@ "[sum((y == 0) & (y_ == 1)) / sum(y == 0), # FPR\n", " sum((y == 1) & (y_ == 1)) / sum(y == 1)] # TPR" ], - "id": "f6092284-924e-4133-b006-7551d0a0498b" + "id": "c50ff1e0-c3bc-4114-bed6-fbb713d12030" }, { "cell_type": "code", @@ -4593,7 +4593,7 @@ " pos_label=1) # 1\u304c\u967d\u6027\u3067\u3042\u308b\uff0e\n", "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()" ], - "id": "8ad01525-9e1b-4f72-bfbf-d6b6f210672b" + "id": "30b5f885-2184-442b-b86f-df5cef82c34a" }, { "cell_type": "code", @@ -4603,7 +4603,7 @@ "source": [ "auc(x=my_fpr, y=my_tpr)" ], - "id": "261c0df5-450b-4dc3-98c4-f48d23947720" + "id": "cdd7905a-857a-4461-ac6b-138a43680471" }, { "cell_type": "code", @@ -4614,7 +4614,7 @@ "[sum((y == 1) & (y_ == 1)) / sum(y == 1), # Recall == TPR\n", " sum((y == 1) & (y_ == 1)) / sum(y_ == 1)] # Precision" ], - "id": "a85eb631-01f3-4f6b-bc96-de9e80c01173" + "id": "b47ac1ae-c493-4ed0-845b-0d00baeb5899" }, { "cell_type": "code", @@ -4627,7 +4627,7 @@ " pos_label=1)\n", "PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()" ], - "id": "71de68cf-b425-4520-b1f9-8c4b8f43f6ea" + "id": "d46f4a99-1977-462d-892e-d65d4b53dc97" }, { "cell_type": "code", @@ -4637,10 +4637,10 @@ "source": [ "auc(x=my_recall, y=my_precision)" ], - "id": "6337d8da-e327-42e4-8061-a4a486f1ef27" + "id": "34103287-609c-42a2-a4e6-ccbb744b201c" }, { - "id": "0d506771", + "id": "ff778929", "cell_type": "markdown", "source": "## 10.3 \u30bf\u30a4\u30bf\u30cb\u30c3\u30af", "metadata": {} @@ -4663,7 +4663,7 @@ " '/fromzero/master/data/titanic.csv')\n", "my_data = pd.read_csv(my_url)" ], - "id": "e1ec1cec-c11f-4570-9a20-6eda471b00a4" + "id": "6d86e238-1d5e-4be7-ae28-d45a1d21ea2b" }, { "cell_type": "code", @@ -4673,7 +4673,7 @@ "source": [ "my_data.head()" ], - "id": "e8ac7278-b1a9-4f9f-85f9-98935e0547e0" + "id": "24bcc4db-61ef-4ddc-a618-d60bc0e0b93d" }, { "cell_type": "code", @@ -4689,7 +4689,7 @@ " min_impurity_decrease=0.01))])\n", "my_pipeline.fit(X, y)" ], - "id": "46f27b12-f6a7-4a92-a017-c2edaa98c212" + "id": "0cfe5243-29ef-4f9e-a306-03d22b88a45d" }, { "cell_type": "code", @@ -4709,7 +4709,7 @@ " filled=True)\n", "graphviz.Source(my_dot)" ], - "id": "cc01935c-dda2-48e3-8189-faec55115b2d" + "id": "63046e78-249a-4ec9-bf4a-e728b7b68c49" }, { "cell_type": "code", @@ -4723,7 +4723,7 @@ " n_jobs=-1)\n", "my_scores.mean()" ], - "id": "127ae57d-f946-4695-94f4-470f122958a2" + "id": "f5af8941-6112-41d8-8289-76aaab6c093e" }, { "cell_type": "code", @@ -4744,10 +4744,10 @@ "\n", "RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()" ], - "id": "a846f6c3-7c59-445b-8150-c0cd1b862bad" + "id": "f164d146-a934-4b91-8a9d-c2b2740c97cf" }, { - "id": "87bed5e6", + "id": "9b57e82c", "cell_type": "markdown", "source": "## 10.4 \u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30", "metadata": {} @@ -4765,7 +4765,7 @@ "y = 1 / (1 + np.exp(-x))\n", "plt.plot(x, y)" ], - "id": "c55abff2-c89d-47bb-9fb4-60c867c1f908" + "id": "99842d76-ee95-4c96-beb6-fb54c8baa380" }, { "cell_type": "code", @@ -4773,7 +4773,9 @@ "metadata": {}, "outputs": [], "source": [ + "import sklearn\n", "import pandas as pd\n", + "from packaging.version import parse\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import cross_val_score, LeaveOneOut\n", "from sklearn.pipeline import Pipeline\n", @@ -4785,11 +4787,12 @@ "\n", "X, y = my_data.iloc[:, 0:3], my_data.Survived\n", "\n", + "none = 'none' if parse(sklearn.__version__) < parse(\"1.4\") else None\n", "my_pipeline = Pipeline([('ohe', OneHotEncoder(drop='first')),\n", - " ('lr', LogisticRegression(penalty='none'))])\n", + " ('lr', LogisticRegression(penalty=none))])\n", "my_pipeline.fit(X, y)" ], - "id": "fda98632-a61f-44ba-bba3-399633a9f308" + "id": "d0535b4a-7cc8-4e37-ae82-a7e68aac7ca8" }, { "cell_type": "code", @@ -4808,7 +4811,7 @@ "pd.Series(my_lr.coef_[0],\n", " index=tmp)" ], - "id": "fe50c97b-8e60-4187-aa57-62f31f50ae8d" + "id": "845dda07-6424-4df6-b05d-c71434a4cc07" }, { "cell_type": "code", @@ -4822,16 +4825,16 @@ " n_jobs=-1)\n", "my_scores.mean()" ], - "id": "f23fb5a3-bb33-482d-b1e3-9db6b0bb458c" + "id": "51793c5e-aa8e-4580-9dcc-d8ba3accd6c9" }, { - "id": "140e9b4e", + "id": "dd027b08", "cell_type": "markdown", "source": "# 11 \u6df1\u5c64\u5b66\u7fd2\u3068AutoML\n\n\n", "metadata": {} }, { - "id": "d8ee57d0", + "id": "d69321dc", "cell_type": "markdown", "source": "## 11.1 Keras\u306b\u3088\u308b\u56de\u5e30", "metadata": {} @@ -4853,7 +4856,7 @@ " '/fromzero/master/data/wine.csv')\n", "tmp = pd.read_csv(my_url)" ], - "id": "b94e5454-c85c-4705-b41e-7b345e04853d" + "id": "a7b14d5f-2e99-431b-8cc4-c584150ef353" }, { "cell_type": "code", @@ -4863,7 +4866,7 @@ "source": [ "my_data = shuffle(tmp)" ], - "id": "fe7d5445-c4a8-4076-a342-da04ebade99f" + "id": "0d0324ba-d2f5-41ff-8333-2605b5924217" }, { "cell_type": "code", @@ -4876,7 +4879,7 @@ " my_data.drop(columns=['LPRICE2']))\n", "y = my_data['LPRICE2']" ], - "id": "95d94496-a532-40e6-ad2e-4127df7f9aa5" + "id": "3665fd12-dc83-4e2d-8c61-379530822aed" }, { "cell_type": "code", @@ -4889,7 +4892,7 @@ "plt.xlabel('x')\n", "plt.ylabel('ReLU(x)')" ], - "id": "2ce0c1a9-c073-4e2e-9f52-57f577227087" + "id": "9cd7b4f6-deff-4133-b084-b0ad54dc2e86" }, { "cell_type": "code", @@ -4903,7 +4906,7 @@ "\n", "my_model.summary() # \u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u306e\u6982\u8981" ], - "id": "3ec0b0ed-f516-4b94-8f39-4c0d40686a5d" + "id": "35f7ac95-eda8-431f-99a6-d8fe3b043ea5" }, { "cell_type": "code", @@ -4915,7 +4918,7 @@ " loss='mse',\n", " optimizer='rmsprop')" ], - "id": "2b6404e5-2d45-434e-8c90-03cd88595ca3" + "id": "80375dbb-8088-4645-81de-1ab96c699cb4" }, { "cell_type": "code", @@ -4927,7 +4930,7 @@ " patience=20,\n", " restore_best_weights=True)" ], - "id": "4e704f17-23a9-44e1-83d9-6ee0d7311e62" + "id": "f7574227-04ad-431e-8241-6a70bda831c3" }, { "cell_type": "code", @@ -4944,7 +4947,7 @@ " callbacks=[my_cb],\n", " verbose=0)" ], - "id": "fdc1cf05-5d2f-4463-bf49-75de0f6f114f" + "id": "c3f1876e-8cc7-4a1c-bdb5-77adef9f97cc" }, { "cell_type": "code", @@ -4955,7 +4958,7 @@ "tmp = pd.DataFrame(my_history.history)\n", "tmp.plot(xlabel='epoch')" ], - "id": "453acf0f-d4ea-4ecf-be77-f21ea61a723f" + "id": "a6eb54e3-62a4-4150-8c00-a8460f6bcea3" }, { "cell_type": "code", @@ -4965,7 +4968,7 @@ "source": [ "tmp.iloc[-1, ]" ], - "id": "7406ab95-a4eb-4290-8c5e-cae694ca1094" + "id": "45fd43f2-7cb5-4e49-a73f-b5e5d249a577" }, { "cell_type": "code", @@ -4976,10 +4979,10 @@ "y_ = my_model.predict(X)\n", "((y_.ravel() - y)**2).mean()" ], - "id": "8cd18414-03fd-4f7d-bfa5-9e2ddec69a38" + "id": "a372205e-a20f-442c-a51d-bca22e620c1f" }, { - "id": "4fffaa34", + "id": "27c6d333", "cell_type": "markdown", "source": "## 11.2 Keras\u306b\u3088\u308b\u5206\u985e", "metadata": {} @@ -5000,7 +5003,7 @@ "tmp = sm.datasets.get_rdataset('iris', 'datasets').data\n", "my_data = shuffle(tmp)" ], - "id": "294c1b96-2aa8-42de-9b6d-9841edc0857a" + "id": "ca2bcacb-d7c2-407b-9cee-79e62a28a1a0" }, { "cell_type": "code", @@ -5015,7 +5018,7 @@ "y = my_enc.fit_transform(\n", " my_data['Species'])" ], - "id": "9c94f595-87f7-45c5-96c6-19597df5986f" + "id": "0f71aee9-af6a-4deb-bf46-12ab831094bb" }, { "cell_type": "code", @@ -5027,7 +5030,7 @@ "my_model.add(layers.Dense(units=3, activation='relu', input_shape=[4]))\n", "my_model.add(layers.Dense(units=3, activation='softmax'))" ], - "id": "1cd141b2-8663-455d-abef-37b6cb6f9753" + "id": "aefc114d-e1c0-444a-8aed-8dd755af4ce5" }, { "cell_type": "code", @@ -5039,7 +5042,7 @@ " optimizer='rmsprop',\n", " metrics=['accuracy'])" ], - "id": "ebb3afca-0e25-49d5-b1c1-ddd33880570a" + "id": "aa02871d-75b8-4d01-b935-fd6e78d0f077" }, { "cell_type": "code", @@ -5063,7 +5066,7 @@ "tmp = pd.DataFrame(my_history.history)\n", "tmp.plot(xlabel='epoch')" ], - "id": "c0505af7-9c4f-470e-aa47-95c352e24caf" + "id": "92ba7ff2-3bb9-40e1-8ad1-dfeb08943551" }, { "cell_type": "code", @@ -5073,7 +5076,7 @@ "source": [ "tmp.iloc[-1, ]" ], - "id": "3f5c8947-71da-49ca-8aa1-115f55d46b0c" + "id": "47d4bd34-226c-40a8-b00e-074d79ccfee0" }, { "cell_type": "code", @@ -5085,7 +5088,7 @@ "y_ = np.argmax(tmp, axis=-1)\n", "(y_ == y).mean()" ], - "id": "da91eb2f-4ae2-4e00-a0ab-6007b4e81585" + "id": "a1f0a83b-134b-4035-b45d-0ff151cf5a65" }, { "cell_type": "code", @@ -5097,7 +5100,7 @@ "\n", "-np.log([0.7, 0.6, 0.2, 0.7]).mean()" ], - "id": "234c6be9-d169-4dac-914a-402e7ecbcf90" + "id": "4200bad3-78b3-48e1-8e50-035f6ebe578e" }, { "cell_type": "code", @@ -5115,7 +5118,7 @@ " [0.2, 0.5, 0.3],\n", " [0.2, 0.7, 0.1]]" ], - "id": "1944c147-5904-43a6-8a75-91fc8f6fc538" + "id": "185f91d0-5ab4-4fed-91d9-9b35dc1a445b" }, { "cell_type": "code", @@ -5126,10 +5129,10 @@ "[losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_1).numpy().mean(),\n", " losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_2).numpy().mean()]" ], - "id": "b8724fcd-7544-4943-bb4f-2dffd0cb0ad1" + "id": "ea0c5f33-18e5-4ebd-90f5-330406fff3b8" }, { - "id": "8d1fdb68", + "id": "653f9c9d", "cell_type": "markdown", "source": "## 11.3 MNIST\uff1a\u624b\u66f8\u304d\u6570\u5b57\u306e\u5206\u985e", "metadata": {} @@ -5150,7 +5153,7 @@ "\n", "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()" ], - "id": "f0290b62-1b9e-4f89-8828-16c8383ea9c2" + "id": "0bf6e498-7b1e-4316-a3e5-9cffdb42dd00" }, { "cell_type": "code", @@ -5160,7 +5163,7 @@ "source": [ "x_train.shape" ], - "id": "e72edc77-4e86-467b-b8c5-9ebd384b5c71" + "id": "ee8cf96d-1748-41c8-af4d-740b0ce5903c" }, { "cell_type": "code", @@ -5171,7 +5174,7 @@ "np.set_printoptions(linewidth=170)\n", "x_train[4, :, :]" ], - "id": "1fce3bb6-f2c9-44f3-a404-48d2a12518c7" + "id": "c79e2aec-c07e-4eef-9673-6fadf12f3b53" }, { "cell_type": "code", @@ -5181,7 +5184,7 @@ "source": [ "plt.matshow(x_train[4, :, :])" ], - "id": "917b8973-a5cd-462d-a735-65e6286a2e27" + "id": "16de489a-21da-428a-89ef-8cfaa1af5cba" }, { "cell_type": "code", @@ -5191,7 +5194,7 @@ "source": [ "y_train" ], - "id": "d31ec476-d4b4-4809-9eb0-4cd4dd465a29" + "id": "53d9f926-546c-4c8e-b3a3-d564cbf59c5d" }, { "cell_type": "code", @@ -5201,7 +5204,7 @@ "source": [ "x_train.min(), x_train.max()" ], - "id": "c3b2eac6-e553-4485-ba91-7056d18ad263" + "id": "ac9cb106-ebca-4b4e-808f-a75072f37875" }, { "cell_type": "code", @@ -5212,7 +5215,7 @@ "x_train = x_train / 255\n", "x_test = x_test / 255" ], - "id": "a9f28f7d-e3ea-4868-beaa-f62a5faad523" + "id": "a6752396-a541-4af1-b616-9d7b2ac8eee7" }, { "cell_type": "code", @@ -5224,7 +5227,7 @@ "x_train = x_train[my_index, :, :]\n", "y_train = y_train[my_index]" ], - "id": "610e7141-5998-43b2-b0ab-a47ee29f8a99" + "id": "a0e68c26-2509-4318-8533-0f5ca94c47bc" }, { "cell_type": "code", @@ -5245,7 +5248,7 @@ "\n", "my_cb = callbacks.EarlyStopping(patience=5, restore_best_weights=True)" ], - "id": "d0dcff17-3608-471a-950d-0cc35eb4f08d" + "id": "9e2b2baf-3a81-457f-b7e0-96efcaf33888" }, { "cell_type": "code", @@ -5265,7 +5268,7 @@ "tmp = pd.DataFrame(my_history.history)\n", "tmp.plot(xlabel='epoch', style='o-')" ], - "id": "22f9ac10-795e-4e64-b573-95121114f1ee" + "id": "11ebcbe8-6e05-4813-b333-4a42de111929" }, { "cell_type": "code", @@ -5278,7 +5281,7 @@ "confusion_matrix(y_true=y_test,\n", " y_pred=y_)" ], - "id": "7a5dc5ae-5441-431e-b5f1-60fe06381810" + "id": "e739c32f-1a10-4f8a-8034-a9f06ba403f6" }, { "cell_type": "code", @@ -5286,7 +5289,7 @@ "metadata": {}, "outputs": [], "source": [], - "id": "d8a588e7-a5f9-4338-932c-288cf4c969b1" + "id": "77f3654f-9d58-4e98-bc10-504d04e89fb4" }, { "cell_type": "code", @@ -5296,7 +5299,7 @@ "source": [ "(y_ == y_test).mean()" ], - "id": "e6d44965-f396-4578-a25d-0ef2b978493d" + "id": "bce023df-a841-4bda-a4b5-c41cd2200edb" }, { "cell_type": "code", @@ -5306,7 +5309,7 @@ "source": [ "my_model.evaluate(x=x_test, y=y_test)" ], - "id": "4930bf67-5474-4ffe-871a-bfe87f52135d" + "id": "eca5f0f2-0d94-419c-ae0f-7c03f348def4" }, { "cell_type": "code", @@ -5317,7 +5320,7 @@ "x_train2d = x_train.reshape(-1, 28, 28, 1)\n", "x_test2d = x_test.reshape(-1, 28, 28, 1)" ], - "id": "65dbf140-d548-47d7-a33d-bd8e6f467889" + "id": "18c7c8f2-c05d-4411-835d-7a09fecb7897" }, { "cell_type": "code", @@ -5344,7 +5347,7 @@ "my_cb = EarlyStopping(patience=5,\n", " restore_best_weights=True)" ], - "id": "6e166484-19b7-47bb-a1be-2a28df2c3d0d" + "id": "1335852a-e790-44c2-bfb7-f71977a62477" }, { "cell_type": "code", @@ -5364,7 +5367,7 @@ "tmp = pd.DataFrame(my_history.history)\n", "tmp.plot(xlabel='epoch', style='o-')" ], - "id": "eb49f9d7-0d16-440f-bee2-9bd6b94a8366" + "id": "03a54a44-8274-4d13-aa9b-d9c3b17a0ed9" }, { "cell_type": "code", @@ -5374,7 +5377,7 @@ "source": [ "my_model.evaluate(x=x_test2d, y=y_test)" ], - "id": "6c138b2a-3874-49b0-b5f2-bcaf322300fb" + "id": "d5b495e8-fcb8-4d8f-a908-50f74b7a9321" }, { "cell_type": "code", @@ -5401,7 +5404,7 @@ "my_cb = callbacks.EarlyStopping(patience=5,\n", " restore_best_weights=True)" ], - "id": "c12156ed-13d4-4502-be1b-b5ee4479208b" + "id": "c38b81ff-f511-43c1-962a-0e806fb2344a" }, { "cell_type": "code", @@ -5421,7 +5424,7 @@ "tmp = pd.DataFrame(my_history.history)\n", "tmp.plot(xlabel='epoch', style='o-')" ], - "id": "39cdf13f-ff1a-4fd7-b66f-d8c02485a8da" + "id": "efd0fef9-c4e6-43c4-8423-95d130f264d0" }, { "cell_type": "code", @@ -5431,7 +5434,7 @@ "source": [ "my_model.evaluate(x=x_test2d, y=y_test)" ], - "id": "c2f40b60-16c5-4bd7-8c0a-9179dcc699c4" + "id": "4cbfe43f-b845-4244-a3d2-08ad870c2cba" }, { "cell_type": "code", @@ -5450,7 +5453,7 @@ "tmp = tmp[tmp.y_ != tmp.y] # \u4e88\u6e2c\u304c\u306f\u305a\u308c\u305f\u3082\u306e\u3092\u6b8b\u3059\n", "my_result = tmp.sort_values('y_prob', ascending=False) # \u78ba\u7387\u306e\u5927\u304d\u3044\u9806\u306b\u4e26\u3073\u66ff\u3048\u308b" ], - "id": "d1bfe040-660f-440b-b9c4-cbf468a57e01" + "id": "53b8e029-1763-4741-a1ea-0a12c527d58a" }, { "cell_type": "code", @@ -5460,7 +5463,7 @@ "source": [ "my_result.head()" ], - "id": "2a900a44-bc43-48f9-a7f4-4b37a3a55672" + "id": "013d3ffd-a54b-41d4-a5aa-cb3aa19e5f6a" }, { "cell_type": "code", @@ -5476,10 +5479,10 @@ " plt.imshow(x_test[id])\n", " plt.axis('off')" ], - "id": "e76bf54a-d669-4779-a57c-331cf6ebef48" + "id": "c5a28d0f-71cb-4c58-a172-c68d132a0112" }, { - "id": "3db4aa85", + "id": "a90fbfd3", "cell_type": "markdown", "source": "## 11.4 AutoML", "metadata": {} @@ -5500,7 +5503,7 @@ "h2o.no_progress()\n", "# h2o.cluster().shutdown() # \u505c\u6b62" ], - "id": "04a43e26-72b1-44ba-b1ee-eb8624784a85" + "id": "e9bd2f69-7ee8-433b-8db3-de8aba39d496" }, { "cell_type": "code", @@ -5515,7 +5518,7 @@ "# \u3042\u308b\u3044\u306f\n", "my_frame = h2o.import_file(my_url, header=1) # \u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3080\uff0e" ], - "id": "1e263683-b2b7-4836-a2e4-3e72e39331bc" + "id": "14c57982-9e9c-4f24-8d12-f7a8406fb723" }, { "cell_type": "code", @@ -5529,7 +5532,7 @@ "h2o.as_list(my_frame).head()\n", "# \u7d50\u679c\u306f\u5272\u611b\uff08\u898b\u305f\u76ee\u306f\u540c\u3058\uff09" ], - "id": "0140953f-65e2-4b53-8933-6f7d2c54211d" + "id": "3f051200-ab01-45b5-9caa-4058a49f105c" }, { "cell_type": "code", @@ -5543,7 +5546,7 @@ " y='LPRICE2',\n", " training_frame=my_frame)" ], - "id": "0e8b57d7-5fa9-4070-b5f6-4279c15e120b" + "id": "c48e348e-ed0d-42f9-bda7-82ebf4abb7e0" }, { "cell_type": "code", @@ -5553,7 +5556,7 @@ "source": [ "my_model.leaderboard['rmse'].min()" ], - "id": "2872a2de-dcba-46eb-a92a-77f501093504" + "id": "5d6c7b7e-c029-4ecb-aca0-fcd5b910888c" }, { "cell_type": "code", @@ -5569,7 +5572,7 @@ " 'y_': tmp['predict']}\n", ").plot('y', 'y_', kind='scatter')" ], - "id": "df42e8ea-5289-4687-9da1-c344df0568da" + "id": "5ae83b74-e206-401f-bac7-9daedf8a0301" }, { "cell_type": "code", @@ -5582,7 +5585,7 @@ "x_train = x_train[my_index, :, :]\n", "y_train = y_train[my_index]" ], - "id": "893e0bb3-d57f-45ba-8ca8-54f7e8389ded" + "id": "f91b585e-4f95-4370-9c4c-0e1a198ad9ba" }, { "cell_type": "code", @@ -5601,7 +5604,7 @@ " x_test.reshape(-1, 28 * 28))\n", "my_test = h2o.H2OFrame(tmp)" ], - "id": "772c7085-1a61-400c-b43f-4263d897e376" + "id": "01c4fe56-1e6d-43c3-9c97-2f70d69b2c9b" }, { "cell_type": "code", @@ -5615,7 +5618,7 @@ " y=y,\n", " training_frame=my_train)" ], - "id": "464d77da-c169-4e76-a9fb-ba1ce69e38ab" + "id": "30b42bd3-4360-4bda-8bf4-643f4f46750b" }, { "cell_type": "code", @@ -5626,7 +5629,7 @@ "my_model.leaderboard[\n", " 'mean_per_class_error'].min()" ], - "id": "a49d0ccb-07e0-40ab-9a98-fef62b7a6b81" + "id": "5fb10db2-e54f-4e8c-90b7-a7e96195f096" }, { "cell_type": "code", @@ -5640,16 +5643,16 @@ "\n", "(y_ == y_test).mean()" ], - "id": "ffdf83ad-ce45-4953-b7d2-484041329326" + "id": "2f32726d-d253-4a46-9d1e-60dad0e4c2cb" }, { - "id": "0a9d5ac1", + "id": "268ce179", "cell_type": "markdown", "source": "# 12 \u6642\u7cfb\u5217\u4e88\u6e2c\n\n\n", "metadata": {} }, { - "id": "3075d1fe", + "id": "3ed73be9", "cell_type": "markdown", "source": "## 12.1 \u65e5\u6642\u3068\u65e5\u6642\u306e\u5217", "metadata": {} @@ -5663,7 +5666,7 @@ "import pandas as pd\n", "pd.to_datetime('2020-01-01')" ], - "id": "a740837d-cd0a-4796-9691-17dc81f90ec4" + "id": "d1eb26a2-2d53-48f9-bcdd-0f6037a7b273" }, { "cell_type": "code", @@ -5683,10 +5686,10 @@ "\n", "pd.date_range(start='2021-01-01 00:00:00', end='2021-01-01 03:00:00', freq='2H')" ], - "id": "b37629bd-f335-4167-a312-2bcfb8376037" + "id": "75fb9c55-372e-43f1-b8a4-87fc3542992c" }, { - "id": "6d6f9f38", + "id": "8050608b", "cell_type": "markdown", "source": "## 12.2 \u6642\u7cfb\u5217\u30c7\u30fc\u30bf\u306e\u4e88\u6e2c", "metadata": {} @@ -5704,7 +5707,7 @@ "\n", "my_data = airpassengers.load_airpassengers()" ], - "id": "3a030df2-907f-4d65-82e0-f2463327dfc9" + "id": "4e873343-4af2-4979-bbec-61b5a3da1e84" }, { "cell_type": "code", @@ -5715,7 +5718,7 @@ "n = len(my_data) # \u30c7\u30fc\u30bf\u6570\uff08144\uff09\n", "k = 108 # \u8a13\u7df4\u30c7\u30fc\u30bf\u6570" ], - "id": "2e78f3f0-6280-4204-8aa8-c41895a8d313" + "id": "844a52d7-df16-4911-949b-caca29396263" }, { "cell_type": "code", @@ -5734,7 +5737,7 @@ " index=my_ds)\n", "my_df.head()" ], - "id": "43290e5f-24dd-478e-a9fe-2aafbbaa791c" + "id": "476299b6-824a-4a98-8ce7-570d956be429" }, { "cell_type": "code", @@ -5746,7 +5749,7 @@ "my_test = my_df[-(n - k): ]\n", "y = my_test.y" ], - "id": "b616ca2c-6437-49df-8af3-163e861dc7cf" + "id": "e1825ae9-1ed3-4118-a949-399456dfab71" }, { "cell_type": "code", @@ -5758,7 +5761,7 @@ "plt.plot(my_test.y, label='test')\n", "plt.legend()" ], - "id": "9f2942ad-54e2-4bd0-a2f0-76201b1ea21b" + "id": "bdad8633-95c6-4e59-adc0-7456ebe17eb3" }, { "cell_type": "code", @@ -5775,7 +5778,7 @@ "y_ = my_lm_model.predict(X)\n", "mean_squared_error(y, y_)**0.5 # RMSE\uff08\u30c6\u30b9\u30c8\uff09" ], - "id": "7befbdc6-421d-46a6-891a-f613363ab206" + "id": "9139e26b-1c4d-4495-be18-1584aae04ba0" }, { "cell_type": "code", @@ -5791,7 +5794,7 @@ "plt.plot(tmp, label='model')\n", "plt.legend()" ], - "id": "a8ecf1ba-f113-4059-a851-ff2bf68f4a43" + "id": "0fa09bd6-9047-42fe-b173-fcfb35a06b96" }, { "cell_type": "code", @@ -5802,7 +5805,7 @@ "import pmdarima as pm\n", "my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)" ], - "id": "d3c8b76e-dae0-473c-8c92-03348ae3895d" + "id": "ca0f8ad5-2d7c-4fe1-9ccb-2882d7e33ea8" }, { "cell_type": "code", @@ -5819,7 +5822,7 @@ " index=my_test.index)\n", "tmp.head()" ], - "id": "ce589c0d-5b1d-4109-8b0f-bc9c66bb36ed" + "id": "efb148d1-6459-4908-bb9e-7c904f9f10d2" }, { "cell_type": "code", @@ -5829,7 +5832,7 @@ "source": [ "mean_squared_error(y, y_)**0.5" ], - "id": "1f57e7b1-cbc3-4e5e-b708-f25df664db75" + "id": "13b8341d-9d72-424a-b6c0-962755c222af" }, { "cell_type": "code", @@ -5846,7 +5849,7 @@ " alpha=0.25) # \u4e0d\u900f\u660e\u5ea6\n", "plt.legend(loc='upper left')" ], - "id": "8f283397-2cbf-4c50-85d3-d9a0a5155b65" + "id": "450bcf84-9b4b-466e-8e22-737fc7aab356" }, { "cell_type": "code", @@ -5859,7 +5862,7 @@ "my_prophet_model = Prophet(seasonality_mode='multiplicative')\n", "my_prophet_model.fit(my_train)" ], - "id": "db0862f5-124a-41dd-8a78-f953f8713d36" + "id": "1ea615ff-2eb9-463d-ae10-355439257533" }, { "cell_type": "code", @@ -5870,7 +5873,7 @@ "tmp = my_prophet_model.predict(my_test)\n", "tmp[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()" ], - "id": "402913c9-8a59-42d3-a8ab-1c2080aef243" + "id": "86cf148c-0870-433d-939a-a5b299e50b1d" }, { "cell_type": "code", @@ -5881,7 +5884,7 @@ "y_ = tmp.yhat\n", "mean_squared_error(y, y_)**0.5" ], - "id": "2e146eed-18d9-4f27-a162-9fdc4604b9d4" + "id": "c58c6ad1-0244-4425-903c-99e1bbe51a52" }, { "cell_type": "code", @@ -5895,16 +5898,16 @@ "fig.axes[0].plot(my_train.ds, my_train.y)\n", "fig.axes[0].plot(my_test.ds, my_test.y, color='red')" ], - "id": "651c406e-43bf-4c0f-8530-5c5b1f8aa464" + "id": "34320f96-e8ed-40e2-8efe-861537feb239" }, { - "id": "d797b5d2", + "id": "9caf7c0e", "cell_type": "markdown", "source": "# 13 \u6559\u5e2b\u306a\u3057\u5b66\u7fd2\n\n\n", "metadata": {} }, { - "id": "08ed5c7c", + "id": "8eaa1d26", "cell_type": "markdown", "source": "## 13.1 \u4e3b\u6210\u5206\u5206\u6790", "metadata": {} @@ -5930,7 +5933,7 @@ "my_model = pca(n_components=5)\n", "my_result = my_model.fit_transform(my_data) # \u4e3b\u6210\u5206\u5206\u6790\u306e\u5b9f\u884c" ], - "id": "04143743-2525-4233-9bbb-652955d60a25" + "id": "88bab087-fe46-4723-bc7b-efb6677c0645" }, { "cell_type": "code", @@ -5940,7 +5943,7 @@ "source": [ "my_result['PC'] # \u4e3b\u6210\u5206\u30b9\u30b3\u30a2" ], - "id": "8bf1c06f-dbd9-4ba4-ad4b-76f799476563" + "id": "cb951006-668f-49af-b10f-2e1f816a33ae" }, { "cell_type": "code", @@ -5950,7 +5953,7 @@ "source": [ "my_model.biplot(legend=False)" ], - "id": "8e12f34d-6a0f-4a99-be62-2f6b317d77c4" + "id": "c56f7c75-a048-4112-8bfd-454e5d941c5e" }, { "cell_type": "code", @@ -5960,7 +5963,7 @@ "source": [ "my_result['loadings']" ], - "id": "b26de1fc-90e5-4048-92eb-624fa67fecf4" + "id": "f356ee34-39df-4bce-86ff-8236b74d0169" }, { "cell_type": "code", @@ -5970,7 +5973,7 @@ "source": [ "my_result['explained_var']" ], - "id": "6219804c-85c5-4fbc-aa4f-4ababb83ef38" + "id": "2d07f715-3708-4525-aadc-5037df278364" }, { "cell_type": "code", @@ -5983,7 +5986,7 @@ " tmp)\n", "my_result['PC'] # \u4e3b\u6210\u5206\u30b9\u30b3\u30a2" ], - "id": "3cc938ca-cc14-4e9f-8a94-d027654d147b" + "id": "3bf9849d-396b-4ce3-9e4a-05dc43d66e6b" }, { "cell_type": "code", @@ -6005,7 +6008,7 @@ "Z @ vecs # \u4e3b\u6210\u5206\u30b9\u30b3\u30a2\uff08\u7d50\u679c\u306f\u5272\u611b\uff09\n", "vals.cumsum() / vals.sum() # \u7d2f\u7a4d\u5bc4\u4e0e\u7387" ], - "id": "e162475d-40e9-40b7-b852-28f91af2768c" + "id": "de8d1eda-708f-4c06-9495-07579cd513df" }, { "cell_type": "code", @@ -6025,10 +6028,10 @@ "e = d ** 2 / n # \u5206\u6563\u5171\u5206\u6563\u884c\u5217\u306e\u56fa\u6709\u5024\n", "e.cumsum() / e.sum() # \u7d2f\u7a4d\u5bc4\u4e0e\u7387" ], - "id": "753b23bc-05ac-401e-8bbc-1c300779fe39" + "id": "af33671c-4356-489d-b670-ec76adb868a6" }, { - "id": "b5741d36", + "id": "f0475f40", "cell_type": "markdown", "source": "## 13.2 \u30af\u30e9\u30b9\u30bf\u5206\u6790", "metadata": {} @@ -6052,7 +6055,7 @@ " metric='euclidean', # \u7701\u7565\u53ef\n", " method='complete')" ], - "id": "0376b45b-4e15-40ce-9bc7-726ad2642d89" + "id": "3135cb82-916a-4803-a746-056af7e75602" }, { "cell_type": "code", @@ -6063,7 +6066,7 @@ "hierarchy.dendrogram(my_result,\n", " labels=my_data.index)" ], - "id": "3213ba39-c1af-449c-a91f-1dd9228a606c" + "id": "279dada5-1864-4b64-a24f-d56c5792cc87" }, { "cell_type": "code", @@ -6077,7 +6080,7 @@ "my_data.assign(cluster=\n", " hierarchy.cut_tree(my_result, 3))" ], - "id": "ce2d0438-1d5e-46aa-927d-881d97cb0195" + "id": "5ad2c43c-c977-405f-a767-0359a04f8abb" }, { "cell_type": "code", @@ -6098,7 +6101,7 @@ "\n", "sns.clustermap(my_data, z_score=1) # \u5217\u3054\u3068\u306e\u6a19\u6e96\u5316" ], - "id": "ffe491f2-d7a4-4b59-b4fd-400a6751c799" + "id": "107a21e4-1f40-4edb-af2c-bdefc1aecb97" }, { "cell_type": "code", @@ -6117,7 +6120,7 @@ "my_result = KMeans(\n", " n_clusters=3).fit(my_data)" ], - "id": "19297862-c5a4-484a-8342-55f4e8083993" + "id": "02e96448-352d-459e-a5a3-9f3b08098441" }, { "cell_type": "code", @@ -6131,7 +6134,7 @@ "my_data.assign(\n", " cluster=my_result.labels_)" ], - "id": "a7d44c0a-6dde-4a10-9529-4f68e774ce24" + "id": "f1e02cd1-393f-42c5-a767-69a57b31a139" }, { "cell_type": "code", @@ -6152,7 +6155,7 @@ " 'inertia': [KMeans(k).fit(my_data).inertia_ for k in range(1, 11)]})\n", "my_df.plot(x='k', style='o-', legend=False)" ], - "id": "a4f07f06-bcf9-44e7-b7d6-2105bb508c0b" + "id": "b28157dd-acac-403d-81ef-b2f6574418a8" }, { "cell_type": "code", @@ -6186,7 +6189,7 @@ " style='Species', # \u5f62\u3067\u54c1\u7a2e\u3092\u8868\u73fe\u3059\u308b\uff0e\n", " palette='bright')" ], - "id": "af8e6e64-b5ab-4981-a6e0-fe976a791f15" + "id": "540dc699-dc23-4a25-a8a0-0523586cba6c" } ], "nbformat": 4,