From b6fe58af8b921391ba2ddaec030d08c514004dcb Mon Sep 17 00:00:00 2001
From: Shang Wang <samshang.wang@mail.utoronto.ca>
Date: Mon, 4 Aug 2025 21:27:02 -0400
Subject: [PATCH 1/2] Update README.md

Signed-off-by: Shang Wang <samshang.wang@mail.utoronto.ca>
---
 README.md | 56 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 30 deletions(-)
diff --git a/README.md b/README.md
index dab00ba6a8..c9673bd399 100644
--- a/README.md
+++ b/README.md
@@ -105,41 +105,37 @@ sudo apt-get update
 sudo apt-get install cudnn-cuda-12
 ```
 
-Install `uv`.
-```sh
-# For faster setup and environment isolation, we use `uv`
-pip install uv
+For faster setup and environment isolation, we use [uv](https://docs.astral.sh/uv/).
+Follow [these instructions](https://docs.astral.sh/uv/getting-started/installation/) to install uv.
 
-# Initialize NeMo RL project virtual environment
-# NOTE: Please do not use -p/--python and instead allow uv venv to read it from .python-version
-#       This ensures that the version of python used is always what we prescribe.
+Then, initialize NeMo RL project virtual environment via:
+```sh
 uv venv
+```
+> [!NOTE]
+> Please do not use `-p/--python` and instead allow `uv venv` to read it from `.python-version`.
+> This ensures that the version of python used is always what we prescribe.
 
-# If working outside a container, it can help to build flash-attn and warm the
-# uv cache before your first run. The NeMo RL Dockerfile will warm the uv cache
-# with flash-attn. See https://docs.nvidia.com/nemo/rl/latest/docker.html for
-# instructions if you are looking for the NeMo RL container.
+If working outside a container, it can help to build [flash-attn](https://github.com/Dao-AILab/flash-attention) and warm the uv cache before your first run.
+```sh
 bash tools/build-flash-attn-in-uv-cache.sh
-# If sucessful, you should see "✅ flash-attn successfully added to uv cache"
-
-# If you cannot install at the system level, you can install for your user with
-# pip install --user uv
-
-# Use `uv run` to launch all commands. It handles pip installing implicitly and
-# ensures your environment is up to date with our lock file.
-
-# Note that it is not recommended to activate the venv and instead use `uv run` since
-# it ensures consistent environment usage across different shells and sessions.
-# Example: uv run python examples/run_grpo_math.py
 ```
-
-**Important Notes:**
-
-- Use the `uv run <command>` to execute scripts within the managed environment. This helps maintain consistency across different shells and sessions.
-- Ensure you have the necessary CUDA drivers and PyTorch installed compatible with your hardware.
-- On the first install, `flash-attn` can take a while to install (~45min with 48 CPU hyperthreads). After it is built once, it is cached in your `uv`'s cache dir making subsequent installs much quicker.
-- If you update your environment in `pyproject.toml`, it is necessary to force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run.
-- **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
+> [!NOTE]
+> On the first install, `flash-attn` can take a while to install (~45min with 48 CPU hyperthreads). After it is built once, it is cached in your uv's cache dir making subsequent installs much quicker.
+
+> [!TIP]
+> The NeMo RL Dockerfile will warm the uv cache with flash-attn.
+> See https://docs.nvidia.com/nemo/rl/latest/docker.html for instructions if you are looking for the NeMo RL container.
+
+If sucessful, you should see `✅ flash-attn successfully added to uv cache`.
+
+Use `uv run` to launch all commands. It handles pip installing implicitly and ensures your environment is up to date with our lock file.
+> [!NOTE]
+> - It is not recommended to activate the `venv`, and you should use `uv run <command>` instead to execute scripts within the managed environment.
+>   This ensures consistent environment usage across different shells and sessions. Example: `uv run python examples/run_grpo_math.py`
+> - Ensure you have the necessary CUDA drivers and PyTorch installed compatible with your hardware.
+> - If you update your environment in `pyproject.toml`, it is necessary to force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run.
+> - **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
 
 ## Training Backends
 

From 1cdffcb0c00389a87d601ce3d61838a6fd6bbc10 Mon Sep 17 00:00:00 2001
From: Shang Wang <samshang.wang@mail.utoronto.ca>
Date: Fri, 8 Aug 2025 11:26:56 -0400
Subject: [PATCH 2/2] Add a helper function in conf.py to properly render GH
 alerts in MyST-Parser.

Signed-off-by: Shang Wang <samshang.wang@mail.utoronto.ca>
---
 .pre-commit-config.yaml | 19 +++++++++------
 docs/conf.py            | 51 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d22fdd475a..7d1a05182d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,11 +3,9 @@ repos:
     rev: v4.4.0
     hooks:
     - id: end-of-file-fixer
-      # only include python files
-      files: \.py$
+      types_or: [python, pyi] # Only include Python files.
     - id: trailing-whitespace
-      # only include python files
-      files: \.py$
+      types_or: [python, pyi] # Only include Python files.
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: "v0.9.9" # Use the appropriate version
@@ -36,8 +34,15 @@ repos:
         exclude: '^\.github/'
         types: [file]
 
-  - repo: https://github.com/facebook/pyrefly
-    rev: 0.24.2
+  - repo: local
     hooks:
       - id: pyrefly-typecheck
-        files: \.py$
\ No newline at end of file
+        name: pyrefly check
+        entry: uv run --group dev pyrefly check
+        types_or: [python, pyi]
+        language: system
+        pass_filenames: false # Pyrefly reads config & project roots itself.
+        args: []
+        require_serial: true
+        additional_dependencies: []
+        minimum_pre_commit_version: "2.9.2"
diff --git a/docs/conf.py b/docs/conf.py
index 60bcecf32f..a7a932160e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -98,3 +98,54 @@
     },
 }
 html_extra_path = ["project.json", "versions1.json"]
+
+# -- Supporting rendering GitHub alerts correctly ----------------------------
+# https://github.com/executablebooks/MyST-Parser/issues/845
+
+_GITHUB_ADMONITIONS = {
+    "> [!NOTE]": "note",
+    "> [!TIP]": "tip",
+    "> [!IMPORTANT]": "important",
+    "> [!WARNING]": "warning",
+    "> [!CAUTION]": "caution",
+}
+
+
+def convert_gh_admonitions(app, relative_path, parent_docname, contents):
+    # loop through content lines, replace github admonitions
+    for i, orig_content in enumerate(contents):
+        orig_line_splits = orig_content.split("\n")
+        replacing = False
+        for j, line in enumerate(orig_line_splits):
+            # look for admonition key
+            line_roi = line.lstrip()
+            for admonition_key in _GITHUB_ADMONITIONS:
+                if line_roi.startswith(admonition_key):
+                    line = line.replace(
+                        admonition_key,
+                        "```{" + _GITHUB_ADMONITIONS[admonition_key] + "}",
+                    )
+                    # start replacing quotes in subsequent lines
+                    replacing = True
+                    break
+            else:  # no break
+                if not replacing:
+                    continue
+                # remove GH directive to match MyST directive
+                # since we are replacing on the original line, this will preserve the right indent, if any
+                if line_roi.startswith("> "):
+                    line = line.replace("> ", "", 1)
+                elif line_roi.rstrip() == ">":
+                    line = line.replace(">", "", 1)
+                else:
+                    # missing "> ", so stop replacing and terminate directive
+                    line = f"```\n{line}"
+                    replacing = False
+            # swap line back in splits
+            orig_line_splits[j] = line
+        # swap line back in original
+        contents[i] = "\n".join(orig_line_splits)
+
+
+def setup(app):
+    app.connect("include-read", convert_gh_admonitions)