Skip to content

Commit

Permalink
Merge pull request #1956 from microsoft/staging
Browse files Browse the repository at this point in the history
Staging to main: Fix latest errors and rerun the notebooks with Python 3.9
  • Loading branch information
miguelgfierro authored Jul 5, 2023
2 parents 787ae30 + af53046 commit 29c64c9
Show file tree
Hide file tree
Showing 14 changed files with 1,310 additions and 1,081 deletions.
466 changes: 275 additions & 191 deletions examples/00_quick_start/lightgbm_tinycriteo.ipynb

Large diffs are not rendered by default.

435 changes: 158 additions & 277 deletions examples/00_quick_start/sar_movielens.ipynb

Large diffs are not rendered by default.

51 changes: 22 additions & 29 deletions examples/01_prepare_data/data_split.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.7.15 (default, Nov 24 2022, 21:12:53) \n",
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n",
"Pyspark version: 3.3.1\n"
"Pyspark version: 3.2.4\n"
]
}
],
Expand All @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -106,14 +106,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████| 1.93k/1.93k [00:00<00:00, 1.94kKB/s]\n"
"100%|██████████████████████████████████████████████████████████████████████████| 1.93k/1.93k [00:01<00:00, 1.82kKB/s]\n"
]
}
],
Expand Down Expand Up @@ -749,17 +749,17 @@
" <td>1997-11-03 07:33:03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16314</th>\n",
" <th>202</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <th>16314</th>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -770,10 +770,10 @@
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <th>51295</th>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>1997-11-03 07:33:40</td>\n",
" </tr>\n",
" <tr>\n",
Expand Down Expand Up @@ -806,10 +806,10 @@
"1989 1 90 4 1997-11-03 07:31:40\n",
"11807 1 219 1 1997-11-03 07:32:07\n",
"50026 1 167 2 1997-11-03 07:33:03\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"16314 1 230 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"43280 1 162 4 1997-11-03 07:33:40\n",
"202 1 61 4 1997-11-03 07:33:40\n",
"51295 1 35 1 1997-11-03 07:33:40\n",
"820 1 265 4 1997-11-03 07:34:01\n",
"11154 1 112 1 1997-11-03 07:34:01\n",
"45732 1 57 5 1997-11-03 07:34:19"
Expand Down Expand Up @@ -893,16 +893,16 @@
" <td>1997-11-03 07:35:52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96699</th>\n",
" <th>4280</th>\n",
" <td>1</td>\n",
" <td>152</td>\n",
" <td>82</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4280</th>\n",
" <th>96699</th>\n",
" <td>1</td>\n",
" <td>82</td>\n",
" <td>152</td>\n",
" <td>5</td>\n",
" <td>1997-11-03 07:36:29</td>\n",
" </tr>\n",
Expand Down Expand Up @@ -944,8 +944,8 @@
"24493 1 30 3 1997-11-03 07:35:15\n",
"6234 1 233 2 1997-11-03 07:35:52\n",
"39865 1 131 1 1997-11-03 07:35:52\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"4280 1 82 5 1997-11-03 07:36:29\n",
"96699 1 152 5 1997-11-03 07:36:29\n",
"25721 1 141 3 1997-11-03 07:36:48\n",
"5842 1 72 4 1997-11-03 07:37:58\n",
"333 1 33 4 1997-11-03 07:38:19\n",
Expand Down Expand Up @@ -1133,13 +1133,6 @@
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
Expand Down Expand Up @@ -1197,7 +1190,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
8 changes: 4 additions & 4 deletions examples/01_prepare_data/data_transform.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.7.15 (default, Nov 24 2022, 21:12:53) \n",
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n",
"NumPy version: 1.21.6\n",
"Pandas version: 1.3.5\n"
"NumPy version: 1.24.3\n",
"Pandas version: 1.5.3\n"
]
}
],
Expand Down Expand Up @@ -1719,7 +1719,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
90 changes: 67 additions & 23 deletions examples/01_prepare_data/mind_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.11 | packaged by conda-forge | (default, Nov 27 2020, 18:51:43) \n",
"[GCC Clang 11.0.0]\n"
"System version: 3.9.16 (main, May 15 2023, 23:46:34) \n",
"[GCC 11.2.0]\n"
]
}
],
"source": [
"import sys\n",
"import os\n",
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"from collections import Counter\n",
"from tqdm import tqdm\n",
"import pickle\n",
"import numpy as np\n",
"import scrapbook as sb\n",
"\n",
"from collections import Counter\n",
"from tempfile import TemporaryDirectory\n",
"from recommenders.datasets.mind import (download_mind,\n",
" extract_mind,\n",
Expand Down Expand Up @@ -84,8 +83,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 17.0k/17.0k [00:42<00:00, 403KB/s]\n",
"100%|██████████| 9.84k/9.84k [00:14<00:00, 694KB/s] \n"
"100%|██████████████████████████████████████████████████████████████████████████| 17.0k/17.0k [00:05<00:00, 2.92kKB/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████| 9.84k/9.84k [00:01<00:00, 6.80kKB/s]\n"
]
}
],
Expand Down Expand Up @@ -260,7 +259,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 26740/26740 [00:11<00:00, 2257.38it/s]\n"
"100%|████████████████████████████████████████████████████████████████████████| 26740/26740 [00:02<00:00, 9093.49it/s]\n"
]
}
],
Expand Down Expand Up @@ -321,7 +320,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 842k/842k [06:30<00:00, 2.15kKB/s] \n"
"100%|████████████████████████████████████████████████████████████████████████████| 842k/842k [02:45<00:00, 5.08kKB/s]\n"
]
}
],
Expand All @@ -338,8 +337,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"400000it [00:08, 47154.93it/s]\n",
"400000it [00:09, 43258.03it/s]\n"
"400000it [00:06, 60728.10it/s]\n",
"400000it [00:07, 50299.10it/s]\n"
]
}
],
Expand Down Expand Up @@ -374,7 +373,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"22034it [00:00, 370992.88it/s]\n"
"22034it [00:00, 89146.42it/s]\n"
]
}
],
Expand All @@ -400,9 +399,26 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 19,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"{'vert_num': 17,\n",
" 'subvert_num': 17,\n",
" 'word_num': 23404,\n",
" 'word_num_all': 41074,\n",
" 'embedding_exist_num': 22408,\n",
" 'embedding_exist_num_all': 37634,\n",
" 'uid2index': 5000}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"utils_state = {\n",
" 'vert_num': len(vert_dict),\n",
Expand All @@ -412,21 +428,49 @@
" 'embedding_exist_num': len(exist_word),\n",
" 'embedding_exist_num_all': len(exist_all_word),\n",
" 'uid2index': len(uid2index)\n",
"}"
"}\n",
"utils_state"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/scrapbook.scrap.json+json": {
"data": {
"embedding_exist_num": 22408,
"embedding_exist_num_all": 37634,
"subvert_num": 17,
"uid2index": 5000,
"vert_num": 17,
"word_num": 23404,
"word_num_all": 41074
},
"encoder": "json",
"name": "utils_state",
"version": 1
}
},
"metadata": {
"scrapbook": {
"data": true,
"display": false,
"name": "utils_state"
}
},
"output_type": "display_data"
}
],
"source": [
"sb.glue(\"utils_state\", utils_state)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -437,9 +481,9 @@
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python (reco_base)",
"display_name": "Python (recommenders)",
"language": "python",
"name": "reco_base"
"name": "recommenders"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -451,9 +495,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.11"
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
93 changes: 53 additions & 40 deletions examples/01_prepare_data/wikidata_knowledge_graph.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 29c64c9

Please sign in to comment.