lots of compilation fixes

aeturrell · Dec 22, 2024 · 56e3eab · 56e3eab
1 parent ed193f6
commit 56e3eab
Show file tree

Hide file tree

Showing 28 changed files with 143 additions and 183 deletions.
diff --git a/_config.yml b/_config.yml
@@ -36,8 +36,9 @@ html:
   use_repository_button: true
   extra_footer              : This book is available under an MIT license.
   baseurl                   : "https://aeturrell.github.io/coding-for-economists"
-  google_analytics_id       : "G-9FZQCPFXZJ"  # A GA id that can be used to track book views.
   use_multitoc_numbering    : false
+  analytics:
+    google_analytics_id       : "G-9FZQCPFXZJ"  # A GA id that can be used to track book views.
 # NB this only works if repo info filled in, and if
 # repo is public. See https://jupyterbook.org/interactive/launchbuttons.html
 launch_buttons:

diff --git a/auto-research-outputs.md b/auto-research-outputs.md
@@ -116,9 +116,39 @@ reg_results = Stargazer([est, est2])
 reg_results
 ```
 
-which can similarly be cast into $\LaTeX$ using `reg_results.render_latex()`.
+```{code-cell} ipython3
+import numpy as np
+import pandas as pd
+#import pylatex as pl  # for the latex table; note: not a dependency of pyfixest - needs manual installation
+from great_tables import loc, style
+from IPython.display import FileLink, display
+
+import pyfixest as pf
+
+data = pf.get_data()
+
+fit1 = pf.feols("Y ~ X1 + X2 | f1", data=data)
+fit2 = pf.feols("Y ~ X1 + X2 | f1 + f2", data=data)
+fit3 = pf.feols("Y2 ~ X1 + X2 | f1", data=data)
+fit4 = pf.feols("Y2 ~ X1 + X2 | f1 + f2", data=data)
+
+pf.etable([fit1, fit2, fit3, fit4,])
+```
+
+which can be cast into $\LaTeX$ using `type="tex"`.
+
+```{code-cell} ipython3
+tab = pf.etable(
+    [fit1, fit2, fit3, fit4],
+    digits=2,
+    type="tex",
+    print_tex=True,
+)
+
+tab
+```
 
-We'd like to export tables like this into files that can be picked up by our $\LaTeX$ document. We must first save it to the right place from Python. This would be
+We'd like to export tables like this into files that can be picked up by our $\LaTeX$ document. We must first save it to the right place from Python. Assuming you have the folders "outputs/tables" relative to your working directory, this would be
 
 ```python
 from pathlib import Path
@@ -131,7 +161,7 @@ in the first example, and
 ```python
 from pathlib import Path
 with open(Path('outputs/tables/reg_table.tex'), 'w') as f:
-    f.write(reg_results.render_latex())
+    f.write(tab)
 ```
 
 in the second. Remember that `Path` is a clever module that will find the relevant file path regardless of which operating system you happen to be using at the time. This is especially useful when you have co-authors on different systems!

diff --git a/code-preliminaries.md b/code-preliminaries.md
@@ -119,7 +119,7 @@ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | ie
 
 for Windows. Hit return to execute the commands.
 
-Once you have installed uv, you can check it's in properly by running `uv --version`. You should see a message pop up that says "uv" and then the latest version number.
+Once you have installed uv, you can check it's installed properly by running `uv --version`. You should see a message pop up that says "uv" and then the latest version number.
 
 ### Installing your integrated development environment, Visual Studio Code
 

diff --git a/craft-writing-papers.md b/craft-writing-papers.md
@@ -321,9 +321,6 @@ Once you've written a draft you're happy with, there are a bunch of checks you c
 
 - Look back at your figures and tables, and be brutal. Do you need them all? Do they all convey important messages that a reader cannot get from the text? In general, you can have up to approximately 10 floats before an editor or referee may wonder if they have picked up a picture book rather than a journal article. Naturally, in some special cases—for example, if the paper is about data visualisation—you may feel warranted in having more. But a good check is whether you can tell the story in just four floats, and the most important result in just one.
 
-![First drafts](https://quotesnhumor.com/wp-content/uploads/2018/04/Writing-memes7.jpg)
-*First drafts*
-
 ## Further Resources
 
 Two extremely good general resources on writing are {cite:t}`zinsser2006writing` and {cite:t}`white1972elements`. For a more in-depth take on writing papers (specific to applied economics papers), see {cite:t}`bellemare2020write`.
diff --git a/data-analysis-quickstart.ipynb b/data-analysis-quickstart.ipynb
@@ -486,7 +486,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "table = df[[\"mass\", \"height\"]].agg([np.mean, np.std])\n",
+    "table = df[[\"mass\", \"height\"]].agg([\"mean\", \"std\"])\n",
     "table"
    ]
   },

diff --git a/data-categorical.ipynb b/data-categorical.ipynb
@@ -350,7 +350,7 @@
    "outputs": [],
    "source": [
     "time_df = pd.DataFrame(\n",
-    "    pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"M\"), dtype=\"category\"),\n",
+    "    pd.Series(pd.date_range(\"2015/05/01\", periods=5, freq=\"ME\"), dtype=\"category\"),\n",
     "    columns=[\"datetime\"],\n",
     ")\n",
     "time_df"
@@ -386,17 +386,14 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "9d7534ecd9fbc7d385378f8400cf4d6cb9c6175408a574f1c99c5269f08771cc"
-  },
   "jupytext": {
    "cell_metadata_filter": "-all",
    "encoding": "# -*- coding: utf-8 -*-",
    "formats": "md:myst",
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -410,7 +407,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.16"
   },
   "toc-showtags": true
  },

diff --git a/data-databases.ipynb b/data-databases.ipynb
@@ -788,7 +788,7 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3.10.12 ('codeforecon')",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -802,14 +802,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.16"
   },
-  "toc-showtags": true,
-  "vscode": {
-   "interpreter": {
-    "hash": "c4570b151692b3082981c89d172815ada9960dee4eb0bedb37dc10c95601d3bd"
-   }
-  }
+  "toc-showtags": true
  },
  "nbformat": 4,
  "nbformat_minor": 5

diff --git a/data-exploratory-analysis.ipynb b/data-exploratory-analysis.ipynb
@@ -646,10 +646,6 @@
    "source": [
     "## The **ydata-profiling** package\n",
     "\n",
-    "```{warning}\n",
-    "The live example in this section is not currently working due to **ydata-profiling**'s dependency on an older version of **numpy**. If you need to use something from this section, you probably still can: *coding for economists* has a huge number of dependencies, much more than a normal project, and this particular problem may not affect you. If you want to help, you can show your support for a resolution on [this GitHub issue](https://github.com/ydataai/ydata-profiling/issues/1456)—though please do bear in mind that most open source libraries are run by volunteers, and you should always be constructive in your interactions. The second is to contribute to the library yourself by creating a pull request that fixes the problem.\n",
-    "```\n",
-    "\n",
     "The EDA we did using the built-in **pandas** functions was a bit limited and user-input heavy. The [**ydata-profiling**](https://docs.profiling.ydata.ai/) library aims to automate the legwork of EDA for you. It generates 'profile' reports from a pandas DataFrame. For each column, many statistics are computed and then relayed in an interactive HTML report.\n",
     "\n",
     "Let's generate a report on our dataset using the `minimal=True` setting (the default settings produce a lot of computationally expensive extras):\n"
@@ -661,13 +657,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# from ydata_profiling import ProfileReport\n",
-    "\n",
+    "from ydata_profiling import ProfileReport\n",
     "\n",
-    "# profile = ProfileReport(\n",
-    "#     df, minimal=True, title=\"Profiling Report: Grinnell House Sales\"\n",
-    "# )\n",
-    "# profile.to_notebook_iframe()"
+    "profile = ProfileReport(\n",
+    "    df, minimal=True, title=\"Profiling Report: Grinnell House Sales\"\n",
+    ")\n",
+    "profile.to_notebook_iframe()"
    ]
   },
   {
@@ -708,7 +703,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.12 ('codeforecon')",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -722,12 +717,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "c4570b151692b3082981c89d172815ada9960dee4eb0bedb37dc10c95601d3bd"
-   }
+   "version": "3.10.16"
   }
  },
  "nbformat": 4,

diff --git a/data-intro.ipynb b/data-intro.ipynb
@@ -1375,7 +1375,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "index = pd.date_range(\"1/1/2000\", periods=12, freq=\"Q\")\n",
+    "index = pd.date_range(\"1/1/2000\", periods=12, freq=\"QE\")\n",
     "df = pd.DataFrame(np.random.randint(0, 10, (12, 5)), index=index, columns=list(\"ABCDE\"))\n",
     "df"
    ]

diff --git a/data-joining-data.ipynb b/data-joining-data.ipynb
@@ -73,7 +73,7 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "base_url = \"http://www.stata-press.com/data/r14/\"\n",
+    "base_url = \"https://github.com/aeturrell/coding-for-economists/raw/refs/heads/general-hygiene/data/\"  # TODO change to main post merge\n",
     "state_codes = [\"ca\", \"il\"]\n",
     "end_url = \"pop.dta\"\n",
     "\n",
@@ -231,17 +231,14 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "9d7534ecd9fbc7d385378f8400cf4d6cb9c6175408a574f1c99c5269f08771cc"
-  },
   "jupytext": {
    "cell_metadata_filter": "-all",
    "encoding": "# -*- coding: utf-8 -*-",
    "formats": "md:myst",
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -255,7 +252,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.16"
   },
   "toc-showtags": true
  },

diff --git a/data-missing-values.ipynb b/data-missing-values.ipynb
@@ -222,7 +222,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nan_df.fillna(method=\"ffill\")"
+    "nan_df.ffill()"
    ]
   },
   {
@@ -232,7 +232,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nan_df.fillna(method=\"bfill\")"
+    "nan_df.bfill()"
    ]
   },
   {
@@ -561,7 +561,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "health_cut.groupby(\"smoker\")[\"age\"].mean()"
+    "health_cut.groupby(\"smoker\", observed=False)[\"age\"].mean()"
    ]
   },
   {
@@ -581,7 +581,7 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3.8.13 ('codeforecon')",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -595,14 +595,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.16"
   },
-  "toc-showtags": true,
-  "vscode": {
-   "interpreter": {
-    "hash": "caf5ac9f613b176c5984ad2a1a4525760eb7d898a3291351da4c152dc719ffa1"
-   }
-  }
+  "toc-showtags": true
  },
  "nbformat": 4,
  "nbformat_minor": 5

diff --git a/data-numbers.ipynb b/data-numbers.ipynb
@@ -108,7 +108,7 @@
    "outputs": [],
    "source": [
     "(\n",
-    "    flights.groupby([\"dest\"])\n",
+    "    flights.groupby([\"dest\"], observed=False)\n",
     "    .agg(\n",
     "        mean_delay=(\"dep_delay\", \"mean\"),\n",
     "        count_flights=(\"dest\", \"count\"),\n",
@@ -132,7 +132,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(flights.groupby(\"tailnum\").agg(miles=(\"distance\", \"sum\")))"
+    "(flights.groupby(\"tailnum\", observed=False).agg(miles=(\"distance\", \"sum\")))"
    ]
   },
   {
@@ -150,7 +150,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(flights.groupby(\"dest\").agg(n_cancelled=(\"dep_time\", lambda x: x.isnull().sum())))"
+    "(\n",
+    "    flights.groupby(\"dest\", observed=False).agg(\n",
+    "        n_cancelled=(\"dep_time\", lambda x: x.isnull().sum())\n",
+    "    )\n",
+    ")"
    ]
   },
   {
@@ -772,17 +776,14 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "9d7534ecd9fbc7d385378f8400cf4d6cb9c6175408a574f1c99c5269f08771cc"
-  },
   "jupytext": {
    "cell_metadata_filter": "-all",
    "encoding": "# -*- coding: utf-8 -*-",
    "formats": "md:myst",
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -796,7 +797,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.16"
   },
   "toc-showtags": true
  },

diff --git a/data-spreadsheets.ipynb b/data-spreadsheets.ipynb
@@ -125,7 +125,7 @@
     "    \"data/students.xlsx\",\n",
     "    names=[\"student_id\", \"full_name\", \"favourite_food\", \"meal_plan\", \"age\"],\n",
     ")\n",
-    "students[\"age\"] = students[\"age\"].replace(\"five\", 5)\n",
+    "students[\"age\"] = students[\"age\"].replace(\"five\", \"5\").astype(float)\n",
     "students"
    ]
   },
@@ -422,7 +422,7 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "Python 3.8.13 ('codeforecon')",
+   "display_name": "codeforecon",
    "language": "python",
    "name": "python3"
   },
@@ -436,14 +436,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.16"
   },
-  "toc-showtags": true,
-  "vscode": {
-   "interpreter": {
-    "hash": "caf5ac9f613b176c5984ad2a1a4525760eb7d898a3291351da4c152dc719ffa1"
-   }
-  }
+  "toc-showtags": true
  },
  "nbformat": 4,
  "nbformat_minor": 5