From ee98aa0a904b22f296c42955bcb0a8ddfd2c6a9c Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Thu, 4 Sep 2025 14:51:01 -0400 Subject: [PATCH 01/13] Convert relative path to a file in Mardown to its URL on GitHub. Signed-off-by: Shang Wang --- docs/conf.py | 70 +++++++++++++++++----- pyproject.toml | 1 + uv.lock | 160 +++++++++++++++++++++++++------------------------ 3 files changed, 137 insertions(+), 94 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a7a932160e..fc58de3015 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,6 +22,14 @@ import os import sys +from pathlib import Path +from typing import Any + +import git +from docutils import nodes +from docutils.transforms import Transform +from sphinx import addnodes +from sphinx.application import Sphinx project = "NeMo-RL" copyright = "2025, NVIDIA Corporation" @@ -99,19 +107,21 @@ } html_extra_path = ["project.json", "versions1.json"] -# -- Supporting rendering GitHub alerts correctly ---------------------------- -# https://github.com/executablebooks/MyST-Parser/issues/845 - -_GITHUB_ADMONITIONS = { - "> [!NOTE]": "note", - "> [!TIP]": "tip", - "> [!IMPORTANT]": "important", - "> [!WARNING]": "warning", - "> [!CAUTION]": "caution", -} +def _convert_gh_admonitions( + app: Sphinx, relative_path: Path, parent_docname: str, contents: list[str] +) -> None: + """Supporting rendering GitHub alerts correctly. -def convert_gh_admonitions(app, relative_path, parent_docname, contents): + # https://github.com/executablebooks/MyST-Parser/issues/845 + """ + _github_admonitions = { + "> [!NOTE]": "note", + "> [!TIP]": "tip", + "> [!IMPORTANT]": "important", + "> [!WARNING]": "warning", + "> [!CAUTION]": "caution", + } # loop through content lines, replace github admonitions for i, orig_content in enumerate(contents): orig_line_splits = orig_content.split("\n") @@ -119,11 +129,11 @@ def convert_gh_admonitions(app, relative_path, parent_docname, contents): for j, line in enumerate(orig_line_splits): # look for admonition key line_roi = line.lstrip() - for admonition_key in _GITHUB_ADMONITIONS: + for admonition_key in _github_admonitions: if line_roi.startswith(admonition_key): line = line.replace( admonition_key, - "```{" + _GITHUB_ADMONITIONS[admonition_key] + "}", + "```{" + _github_admonitions[admonition_key] + "}", ) # start replacing quotes in subsequent lines replacing = True @@ -147,5 +157,35 @@ def convert_gh_admonitions(app, relative_path, parent_docname, contents): contents[i] = "\n".join(orig_line_splits) -def setup(app): - app.connect("include-read", convert_gh_admonitions) +class _GitHubLinkTransform(Transform): + """Converting the relative path to a file in a Markdown to the URL of that file on GitHub.""" + + default_priority = 500 # type: ignore[bad-override] + + def apply(self, **kwargs: Any) -> None: # type: ignore[bad-override] + repo = git.Repo(search_parent_directories=True) + origin_url = repo.remotes.origin.url + if origin_url.startswith("git@github.com:"): + origin_url = origin_url.replace("git@github.com:", "https://github.com/", 1) + if origin_url.endswith(".git"): + origin_url = origin_url[: -len(".git")] + blob = f"blob/{repo.head.object.hexsha}" + for node in self.document.traverse(addnodes.download_reference): + # `node["refdoc"]` would be, e.g., "guides/grpo". Therefore, `md_dir` would + # be, e.g., `"docs/guides"`. + # Avoid using `os.path` or `pathlib` for path manipulation because, well, + # what if we try to build the docs on Windows? + md_dir = "/".join(["docs"] + node["refdoc"].split("/")[:-1]) + # `file_path` would be `"docs/grpo/../../examples/run_grpo_math.py"`. + file_path = "/".join((md_dir, node["reftarget"])) + # `refuri` would be `"https://github.com/NVIDIA-NeMo/RL/blob//docs/guides/../../examples/run_grpo_math.py"`. + refuri = "/".join((origin_url, blob, file_path)) + new_node = nodes.reference(rawsource=node.rawsource, refuri=refuri) + if node.children: + new_node += node.children + node.replace_self(new_node) + + +def setup(app: Sphinx) -> None: + app.add_transform(_GitHubLinkTransform) + app.connect("include-read", _convert_gh_admonitions) diff --git a/pyproject.toml b/pyproject.toml index b5ed8e4453..552c50c0e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,7 @@ docs = [ "sphinx-copybutton", # Adds a copy button for code blocks "myst_parser", # For our markdown docs "nvidia-sphinx-theme", # Our NVIDIA theme + "gitpython", # To git-related information ] dev = [ "pre-commit>=4.2.0", diff --git a/uv.lock b/uv.lock index 91580d7d37..d14f918a6d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'", @@ -302,8 +302,8 @@ name = "bitsandbytes" version = "0.45.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "torch", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, + { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "torch", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/07/b7/cb5ce4d1a382cf53c19ef06c5fc29e85f5e129b4da6527dd207d90a5b8ad/bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:a5453f30cc6aab6ccaac364e6bf51a7808d3da5f71763dffeb6d9694c59136e4", size = 76059261, upload-time = "2025-04-07T13:32:52.573Z" }, @@ -567,7 +567,7 @@ name = "coloredlogs" version = "15.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "humanfriendly" }, + { name = "humanfriendly", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" } wheels = [ @@ -735,9 +735,9 @@ name = "cppimport" version = "22.8.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, - { name = "mako" }, - { name = "pybind11" }, + { name = "filelock", marker = "sys_platform != 'darwin'" }, + { name = "mako", marker = "sys_platform != 'darwin'" }, + { name = "pybind11", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/54/27/01d9078a77b9e31b79b9716e66ca4db74f4744c5232bcb3e8769395c4280/cppimport-22.8.2.tar.gz", hash = "sha256:bbb4957102db41bc99ad72c233bce92f9d1fd91be352fc07878c4361033a401f", size = 26635, upload-time = "2022-08-02T16:50:36.872Z" } @@ -824,8 +824,8 @@ name = "cupy-cuda12x" version = "13.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "fastrlock" }, - { name = "numpy" }, + { name = "fastrlock", marker = "sys_platform != 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/12/c5/7e7fc4816d0de0154e5d9053242c3a08a0ca8b43ee656a6f7b3b95055a7b/cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a6970ceefe40f9acbede41d7fe17416bd277b1bd2093adcde457b23b578c5a59", size = 127334633, upload-time = "2025-08-18T08:24:43.065Z" }, @@ -1965,8 +1965,8 @@ name = "liger-kernel" version = "0.5.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "torch", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "triton", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, + { name = "torch", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "triton", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a2/55/3a703f337110e2a121a04e503abfeec2c191529cbee18bb1fb630d65642a/liger_kernel-0.5.8.tar.gz", hash = "sha256:3246d7dced89e0f982a52de259d4f78fd10eb9171246b28ae52b63ad09fc0732", size = 3593097, upload-time = "2025-04-12T16:44:32.252Z" } wheels = [ @@ -2517,12 +2517,12 @@ name = "mlx-lm" version = "0.26.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "jinja2", marker = "sys_platform != 'linux'" }, - { name = "mlx", marker = "sys_platform != 'linux'" }, - { name = "numpy", marker = "sys_platform != 'linux'" }, - { name = "protobuf", marker = "sys_platform != 'linux'" }, - { name = "pyyaml", marker = "sys_platform != 'linux'" }, - { name = "transformers", marker = "sys_platform != 'linux'" }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "mlx", marker = "sys_platform == 'darwin'" }, + { name = "numpy", marker = "sys_platform == 'darwin'" }, + { name = "protobuf", marker = "sys_platform == 'darwin'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin'" }, + { name = "transformers", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/af/4b/ed8ec01f182203b0897415a9d20f0cd8a141def77ad43deea18ffaba4c9c/mlx_lm-0.26.3.tar.gz", hash = "sha256:06cd74ee3eea920335c528e68feb854eede45fe4e5f149b464ac100c1dbeaded", size = 172096, upload-time = "2025-08-06T21:48:22.762Z" } wheels = [ @@ -2889,6 +2889,7 @@ dev = [ { name = "types-requests" }, ] docs = [ + { name = "gitpython" }, { name = "myst-parser" }, { name = "nvidia-sphinx-theme" }, { name = "sphinx" }, @@ -2972,6 +2973,7 @@ dev = [ { name = "types-requests" }, ] docs = [ + { name = "gitpython" }, { name = "myst-parser" }, { name = "nvidia-sphinx-theme" }, { name = "sphinx" }, @@ -3238,22 +3240,22 @@ name = "nvidia-modelopt" version = "0.33.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ninja" }, - { name = "numpy" }, - { name = "nvidia-ml-py" }, - { name = "nvidia-modelopt-core" }, - { name = "packaging" }, - { name = "pulp" }, - { name = "pydantic" }, - { name = "regex" }, - { name = "rich" }, - { name = "safetensors" }, - { name = "scipy" }, - { name = "torch" }, - { name = "torchprofile" }, + { name = "ninja", marker = "sys_platform != 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "nvidia-ml-py", marker = "sys_platform != 'darwin'" }, + { name = "nvidia-modelopt-core", marker = "sys_platform != 'darwin'" }, + { name = "packaging", marker = "sys_platform != 'darwin'" }, + { name = "pulp", marker = "sys_platform != 'darwin'" }, + { name = "pydantic", marker = "sys_platform != 'darwin'" }, + { name = "regex", marker = "sys_platform != 'darwin'" }, + { name = "rich", marker = "sys_platform != 'darwin'" }, + { name = "safetensors", marker = "sys_platform != 'darwin'" }, + { name = "scipy", marker = "sys_platform != 'darwin'" }, + { name = "torch", marker = "sys_platform != 'darwin'" }, + { name = "torchprofile", marker = "sys_platform != 'darwin'" }, { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, - { name = "tqdm" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "tqdm", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ca/cb/4af39357792a96f334c7877ea0380c9337aec210ff4794a7dd95beb7c349/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:6c51091683a117cd40fdb96a0ec28579f2276f6b627db7ccddc370df544e1dd7", size = 751683, upload-time = "2025-08-12T18:37:48.832Z" }, @@ -3262,18 +3264,18 @@ wheels = [ [package.optional-dependencies] onnx = [ - { name = "cppimport" }, + { name = "cppimport", marker = "sys_platform != 'darwin'" }, { name = "cupy-cuda12x", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin'" }, - { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, - { name = "onnx" }, - { name = "onnx-graphsurgeon" }, - { name = "onnxconverter-common" }, - { name = "onnxruntime", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin'" }, + { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and sys_platform != 'darwin'" }, + { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' and sys_platform != 'darwin'" }, + { name = "onnx", marker = "sys_platform != 'darwin'" }, + { name = "onnx-graphsurgeon", marker = "sys_platform != 'darwin'" }, + { name = "onnxconverter-common", marker = "sys_platform != 'darwin'" }, + { name = "onnxruntime", marker = "platform_machine == 'aarch64' and sys_platform != 'darwin'" }, { name = "onnxruntime-gpu", version = "1.20.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'win32'" }, { name = "onnxruntime-gpu", version = "1.22.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" }, - { name = "onnxscript" }, - { name = "polygraphy" }, + { name = "onnxscript", marker = "sys_platform != 'darwin'" }, + { name = "polygraphy", marker = "sys_platform != 'darwin'" }, ] [[package]] @@ -3314,13 +3316,13 @@ name = "nvidia-resiliency-ext" version = "0.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "defusedxml" }, - { name = "nvidia-ml-py" }, - { name = "packaging" }, - { name = "psutil" }, - { name = "pynvml" }, - { name = "pyyaml" }, - { name = "torch" }, + { name = "defusedxml", marker = "sys_platform != 'darwin'" }, + { name = "nvidia-ml-py", marker = "sys_platform != 'darwin'" }, + { name = "packaging", marker = "sys_platform != 'darwin'" }, + { name = "psutil", marker = "sys_platform != 'darwin'" }, + { name = "pynvml", marker = "sys_platform != 'darwin'" }, + { name = "pyyaml", marker = "sys_platform != 'darwin'" }, + { name = "torch", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" }, @@ -3371,9 +3373,9 @@ name = "onnx" version = "1.18.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "protobuf" }, - { name = "typing-extensions" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "protobuf", marker = "sys_platform != 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3d/60/e56e8ec44ed34006e6d4a73c92a04d9eea6163cc12440e35045aec069175/onnx-1.18.0.tar.gz", hash = "sha256:3d8dbf9e996629131ba3aa1afd1d8239b660d1f830c6688dd7e03157cccd6b9c", size = 12563009, upload-time = "2025-05-12T22:03:09.626Z" } wheels = [ @@ -3395,8 +3397,8 @@ name = "onnx-graphsurgeon" version = "0.5.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "onnx" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "onnx", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/71/53/98334c4f64a9e289a8cb48f5e7966b8ff015414d0bf26587cf46d764f1d8/onnx_graphsurgeon-0.5.8-py2.py3-none-any.whl", hash = "sha256:6f611ea29a8e4740fbab1aae52bf4c40b8b9918f8459058d20b99acc79fce121", size = 57923, upload-time = "2025-04-10T18:49:24.483Z" }, @@ -3407,11 +3409,11 @@ name = "onnx-ir" version = "0.1.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, - { name = "numpy" }, - { name = "onnx" }, - { name = "typing-extensions" }, + { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and sys_platform != 'darwin'" }, + { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' and sys_platform != 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "onnx", marker = "sys_platform != 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6a/14/4a003926218f8edee6da19546f69a1831b74cdd993eaf5ff50a2fb168e70/onnx_ir-0.1.7.tar.gz", hash = "sha256:4734b7587807ca657158b042c138879c3f454756fae74e949f6c99f0107d8df6", size = 107944, upload-time = "2025-08-22T15:01:16.383Z" } wheels = [ @@ -3423,10 +3425,10 @@ name = "onnxconverter-common" version = "1.15.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "onnx" }, - { name = "packaging" }, - { name = "protobuf" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "onnx", marker = "sys_platform != 'darwin'" }, + { name = "packaging", marker = "sys_platform != 'darwin'" }, + { name = "protobuf", marker = "sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/76/ac/c3ff41cc2d36c8caab51bffa9185ea64019f161850b9641eb0409b243ae1/onnxconverter_common-1.15.0-py2.py3-none-any.whl", hash = "sha256:24579ed1bb3c10beca39a4517d196c17341911be5bd09bd0e6050a7379a2a7d9", size = 89640, upload-time = "2025-07-01T16:42:56.968Z" }, @@ -3437,12 +3439,12 @@ name = "onnxruntime" version = "1.22.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "coloredlogs", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, - { name = "flatbuffers", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, - { name = "numpy", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, - { name = "packaging", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, - { name = "protobuf", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, - { name = "sympy", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" }, + { name = "coloredlogs", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "flatbuffers", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "protobuf", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, + { name = "sympy", marker = "(platform_machine == 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/29/e5/00b099b4d4f6223b610421080d0eed9327ef9986785c9141819bbba0d396/onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:984cea2a02fcc5dfea44ade9aca9fe0f7a8a2cd6f77c258fc4388238618f3928", size = 14473861, upload-time = "2025-07-10T19:15:42.911Z" }, @@ -3500,13 +3502,13 @@ name = "onnxscript" version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, - { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, - { name = "numpy" }, - { name = "onnx" }, - { name = "onnx-ir" }, - { name = "packaging" }, - { name = "typing-extensions" }, + { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and sys_platform != 'darwin'" }, + { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' and sys_platform != 'darwin'" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "onnx", marker = "sys_platform != 'darwin'" }, + { name = "onnx-ir", marker = "sys_platform != 'darwin'" }, + { name = "packaging", marker = "sys_platform != 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/9f/45aed9951d3fa50a97b910487186ef9c15ad08d3c9cb3605aabd99f65f92/onnxscript-0.4.0.tar.gz", hash = "sha256:de618eeb6e0c57f5a70f85909ab1f829cbb2053ad55f8f2fcc2701fa29b7adfc", size = 567393, upload-time = "2025-08-22T21:05:46.416Z" } wheels = [ @@ -4322,7 +4324,7 @@ name = "pynvml" version = "12.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-ml-py" }, + { name = "nvidia-ml-py", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/26/6f/6b5880ed0239e85b9a39aed103b65b2ef81425beef9f45e5c035bf008330/pynvml-12.0.0.tar.gz", hash = "sha256:299ce2451a6a17e6822d6faee750103e25b415f06f59abb8db65d30f794166f5", size = 33636, upload-time = "2024-12-02T15:04:36.631Z" } wheels = [ @@ -5647,10 +5649,10 @@ name = "torchprofile" version = "0.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "torch" }, + { name = "numpy", marker = "sys_platform != 'darwin'" }, + { name = "torch", marker = "sys_platform != 'darwin'" }, { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, - { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" }, + { name = "torchvision", version = "0.22.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" } wheels = [ From 0da45f296d8558379fbd9ed1053e02989e93f4f4 Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Tue, 2 Sep 2025 18:58:15 -0700 Subject: [PATCH 02/13] fix: make layernorm_epsilon configurable in with megatron backend (#1046) Signed-off-by: ashors1 Signed-off-by: Shang Wang --- nemo_rl/models/policy/megatron_policy_worker.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py index 10c8cf33be..870c7042d4 100644 --- a/nemo_rl/models/policy/megatron_policy_worker.py +++ b/nemo_rl/models/policy/megatron_policy_worker.py @@ -561,6 +561,9 @@ def __init__( "moe_router_bias_update_rate" ] + if "layernorm_epsilon" in self.cfg["megatron_cfg"]: + model_cfg.layernorm_epsilon = self.cfg["megatron_cfg"]["layernorm_epsilon"] + model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"] model_cfg.bf16 = self.dtype == torch.bfloat16 model_cfg.fp16 = self.dtype == torch.float16 From 838ee44a89e122c62ebc1298e2453aedfefb7633 Mon Sep 17 00:00:00 2001 From: Charlie Truong Date: Tue, 2 Sep 2025 21:12:51 -0500 Subject: [PATCH 03/13] ci: Only run build-test-publish-wheel workflow if env var set (#1047) Signed-off-by: Charlie Truong Signed-off-by: Shang Wang --- .github/workflows/build-test-publish-wheel.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml index 37025e0e62..b39719417b 100644 --- a/.github/workflows/build-test-publish-wheel.yml +++ b/.github/workflows/build-test-publish-wheel.yml @@ -27,6 +27,7 @@ defaults: jobs: build-test-publish-wheel: uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.33.0 + if: ${{ vars.BUILD_TEST_PUBLISH_WHEEL == 'true' }} with: dry-run: true python-package: nemo_rl From 62d09f3af88ce23f1d572e0acedf015366045062 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 2 Sep 2025 23:56:39 -0700 Subject: [PATCH 04/13] fix: ray.sub will exit early if any srun fails to launch (#1022) Signed-off-by: Terry Kong Signed-off-by: Shang Wang --- ray.sub | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/ray.sub b/ray.sub index 23da1f175e..6c3fcdfe08 100644 --- a/ray.sub +++ b/ray.sub @@ -128,6 +128,23 @@ CPUS_PER_WORKER=${CPUS_PER_WORKER:-$((GPUS_PER_NODE * 16))} num_retries=3 +# Track backgrounded srun client PIDs for head and workers +declare -A SRUN_PIDS + +# Verify all backgrounded srun client processes are still alive; exit fast if any died +check_srun_processes() { + for name in "${!SRUN_PIDS[@]}"; do + pid="${SRUN_PIDS[$name]}" + # Check if the process is still running + if ! kill -0 "$pid" 2>/dev/null; then + echo "[ERROR] Background srun '$name' died (pid=$pid). Could be a failure in startup or an issue with the node preventing the srun to start. Attempting to exit." >&2 + # Signal sidecars inside containers to terminate ASAP + touch "$LOG_DIR/ENDED" + exit 1 + fi + done +} + # Getting the node names and IP addresses in the SLURM allocation nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST") nodes_array=($nodes) @@ -278,6 +295,7 @@ exit 1 EOF ) srun $COMMON_SRUN_ARGS --container-name=ray-head --nodes=1 --ntasks=1 --cpus-per-task=$CPUS_PER_WORKER -w "$head_node" -o $LOG_DIR/ray-head.log bash -x -c "$head_cmd" & +SRUN_PIDS["ray-head"]=$! NUM_ACTORS=$((GPUS_PER_NODE * SLURM_JOB_NUM_NODES)) @@ -375,11 +393,12 @@ exit 1 EOF ) srun $COMMON_SRUN_ARGS --container-name=ray-worker-$i --exact --nodes=1 --ntasks=1 --cpus-per-task=$CPUS_PER_WORKER -w "$node_i" -o $LOG_DIR/ray-worker-$i.log bash -x -c "$worker_cmd" & + SRUN_PIDS["ray-worker-$i"]=$! sleep 3 done # Then we wait here for the file to be created by the head node container -while ! srun --overlap --nodes=1 --ntasks=1 -w $head_node test -f $LOG_DIR/STARTED_RAY_HEAD; do +while check_srun_processes && ! srun --overlap --nodes=1 --ntasks=1 -w $head_node test -f $LOG_DIR/STARTED_RAY_HEAD; do echo "[INFO][$(date)] Waiting for head node container to start..." sleep 2 done @@ -404,9 +423,10 @@ extract_worker_units() { while true; do worker_units=$(extract_worker_units) echo "[INFO] Number of actors online: $worker_units/$NUM_ACTORS" - if [ "$worker_units" -eq "$NUM_ACTORS" ]; then + if [[ "$worker_units" -eq "$NUM_ACTORS" ]]; then break fi + check_srun_processes sleep 2 done From 0b8374488399688b2cf761558558785a0f88c3ab Mon Sep 17 00:00:00 2001 From: Zhiyu Li Date: Wed, 3 Sep 2025 09:33:30 -0700 Subject: [PATCH 05/13] fix: address double bos in eval task (#962) Signed-off-by: Zhiyu Li Signed-off-by: Zhiyu Li Co-authored-by: Yuki Huang <48991475+yuki-97@users.noreply.github.com> Signed-off-by: Shang Wang --- nemo_rl/data/processors.py | 12 +++- tests/unit/data/test_data_processor.py | 88 +++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 11 deletions(-) diff --git a/nemo_rl/data/processors.py b/nemo_rl/data/processors.py index 0e1c811cf7..4fecd46125 100644 --- a/nemo_rl/data/processors.py +++ b/nemo_rl/data/processors.py @@ -51,7 +51,9 @@ def math_data_processor( add_generation_prompt=False, add_special_tokens=False, ) - sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0] + sys_prompt["token_ids"] = tokenizer( + sys, return_tensors="pt", add_special_tokens=False + )["input_ids"][0] message_log.append(sys_prompt) # user prompt @@ -138,7 +140,9 @@ def multichoice_qa_processor( add_generation_prompt=False, add_special_tokens=False, ) - sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0] + sys_prompt["token_ids"] = tokenizer( + sys, return_tensors="pt", add_special_tokens=False + )["input_ids"][0] message_log.append(sys_prompt) # user prompt @@ -153,7 +157,9 @@ def multichoice_qa_processor( add_generation_prompt=True, add_special_tokens=False, ) - user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0] + user_message["token_ids"] = tokenizer( + message, return_tensors="pt", add_special_tokens=False + )["input_ids"][0] user_message["content"] = message message_log.append(user_message) diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py index 8c85eb6ed0..f161d1f7a5 100644 --- a/tests/unit/data/test_data_processor.py +++ b/tests/unit/data/test_data_processor.py @@ -14,6 +14,7 @@ import os import sys +import tempfile from collections import defaultdict import pytest @@ -25,6 +26,13 @@ from examples.run_grpo_math import hf_data_processor from nemo_rl.algorithms.utils import get_tokenizer from nemo_rl.data.datasets import AllTaskProcessedDataset +from nemo_rl.data.eval_datasets import ( + AIME2024Dataset, + AIME2025Dataset, + GPQADataset, + MathDataset, + MMLUDataset, +) from nemo_rl.data.hf_datasets.deepscaler import DeepScalerDataset from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset from nemo_rl.data.interfaces import TaskDataProcessFnCallable, TaskDataSpec @@ -78,18 +86,15 @@ def test_math_data_processor(): ], ) @pytest.mark.parametrize( - "dataset_name", + "dataset_cls", [ - "openmathinstruct2", - "deepscaler", + OpenMathInstruct2Dataset, + DeepScalerDataset, ], ) -def test_math_hf_data_processor(tokenizer_name, dataset_name): +def test_math_hf_data_processor(tokenizer_name, dataset_cls): # Initialize dataset - if dataset_name == "openmathinstruct2": - data = OpenMathInstruct2Dataset() - elif dataset_name == "deepscaler": - data = DeepScalerDataset() + data = dataset_cls() # Setup tokenizer tokenizer = get_tokenizer( @@ -124,3 +129,70 @@ def test_math_hf_data_processor(tokenizer_name, dataset_name): assert first_item is not None assert "message_log" in first_item assert len(first_item["message_log"]) > 0 + + +@pytest.fixture +def system_prompt_file(request): + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as file: + file.write("You are a helpful assistant.\n{}") + + return file.name + + +@pytest.mark.hf_gated +@pytest.mark.parametrize( + "tokenizer_name", + [ + "meta-llama/Llama-3.2-1B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", # no bos token + "google/gemma-3-1b-it", + "Qwen/Qwen3-0.6B", # no bos token + "deepseek-ai/DeepSeek-V3", + "moonshotai/Moonlight-16B-A3B-Instruct", + ], +) +@pytest.mark.parametrize( + "dataset_cls", + [ + MMLUDataset, + GPQADataset, + MathDataset, + AIME2024Dataset, + AIME2025Dataset, + ], +) +@pytest.mark.parametrize( + "system_prompt_file", [system_prompt_file, None], indirect=True +) +def test_eval_math_hf_data_processor(tokenizer_name, dataset_cls, system_prompt_file): + # Initialize dataset + data = dataset_cls() + + # Setup tokenizer + tokenizer = get_tokenizer( + TokenizerConfig( + name=tokenizer_name, + chat_template="default", + ) + ) + + # Configure task specification + math_task_spec = TaskDataSpec( + task_name="math", + prompt_file=f"{os.path.dirname(abspath)}/../../../examples/prompts/cot.txt", + system_prompt_file=system_prompt_file, + ) + + dataset = AllTaskProcessedDataset( + dataset=data.rekeyed_ds, + tokenizer=tokenizer, + default_task_data_spec=math_task_spec, + task_data_processors=data.processor, + max_seq_length=128, + ) + + # Test that the first item can be retrieved when the BOS token assertion passes + first_item = dataset[0] + assert first_item is not None + assert "message_log" in first_item + assert len(first_item["message_log"]) > 0 From 037ecd71c45650c137660329913f3b06cb145598 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 3 Sep 2025 19:53:31 -0700 Subject: [PATCH 06/13] feat: add testmon support to detect when tests need to be rerun (#1056) Signed-off-by: Terry Kong Signed-off-by: Shang Wang --- .gitignore | 2 + docs/testing.md | 36 ++++ nemo_rl/distributed/virtual_cluster.py | 3 + pyproject.toml | 1 + tests/unit/__init__.py | 12 ++ tests/unit/_plugins/remote_select.py | 284 +++++++++++++++++++++++++ uv.lock | 15 ++ 7 files changed, 353 insertions(+) create mode 100644 tests/unit/_plugins/remote_select.py diff --git a/.gitignore b/.gitignore index acb3116f06..faf2763255 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,8 @@ coverage.json unit_results.json unit_results/ test_assets/ +.nrl_remote_map.json +.nrl_remote_state.json # Cache uv_cache/ diff --git a/docs/testing.md b/docs/testing.md index 5a24452813..c1d1bc570a 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -30,6 +30,42 @@ uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only --hf-gated ``` +### Experimental: Faster local test iteration with pytest-testmon + +We support `pytest-testmon` to speed up local unit test runs by re-running only impacted tests. This works for both regular in-process code and out-of-process `@ray.remote` workers via a lightweight, test-only selection helper. + +Usage: +```sh +# Re-run only impacted unit tests +uv run --group test pytest --testmon tests/unit + +# You can also combine with markers/paths +uv run --group test pytest --hf-gated --testmon tests/unit/models/policy/test_dtensor_worker.py +``` + +What to expect: +- On the first run in a fresh workspace, testmon may run a broader set (or deselect everything if nothing was executed yet) to build its dependency cache. +- On subsequent runs, editing non-remote code narrows selection to only the tests that import/use those modules. +- Editing code inside `@ray.remote` actors also retriggers impacted tests. We maintain a static mapping from test modules to transitive `nemo_rl` modules they import and intersect that with changed files when `--testmon` is present. +- After a successful impacted run, a second `--testmon` invocation (with no further edits) will deselect all tests. +- Running `pytest` with `-k some_substring_in_test_name` will always run tests that match even if `--testmon` is passed. + +Limitations and tips: +- Selection is based on Python imports and file mtimes; non-Python assets (YAML/JSON/shell) are not tracked. When editing those, re-run target tests explicitly. +- The remote-aware selection uses a conservative static import map (no dynamic import resolution). If a test loads code dynamically that isn’t visible via imports, you may need to run it explicitly once to seed the map. +- The helper is test-only and does not alter library behavior. It activates automatically when you pass `--testmon`. + +Refreshing remote-selection artifacts +------------------------------------- +If you change test layout or significantly refactor imports, the remote-selection artifacts may become stale. +To rebuild them, delete the following files at the repo root and re-run with `--testmon` to seed again: + +```sh +# At the root of nemo-rl +rm .nrl_remote_map.json .nrl_remote_state.json +``` + + ### Run Unit Tests in a Hermetic Environment For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`) diff --git a/nemo_rl/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py index 5d5e0bd7d9..833376a017 100644 --- a/nemo_rl/distributed/virtual_cluster.py +++ b/nemo_rl/distributed/virtual_cluster.py @@ -78,6 +78,9 @@ def init_ray(log_dir: Optional[str] = None) -> None: Try to attach to an existing local cluster. If that cluster uses the same CUDA_VISIBLE_DEVICES or Slurm managed tag we will reuse it. Otherwise, we will detach and start a fresh local cluster. + + Args: + log_dir: Optional directory to store Ray logs and temp files. """ # Set up runtime environment env_vars = dict(os.environ) diff --git a/pyproject.toml b/pyproject.toml index 552c50c0e6..39fcdefc2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,6 +135,7 @@ test = [ "pytest-timeout", "pytest-cov", "pytest-asyncio", + "pytest-testmon", ] [tool.uv.sources] diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index 341a77c5bc..31c1220368 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -11,3 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + + +""" +Auto-loading remote_select plugin here: +- Ensures the plugin is discovered without extra CLI flags or global config. +- Loads early in pytest’s startup so ``pytest_load_initial_conftests`` can + rewrite args before other plugins (e.g., testmon) prune collection. +- Scopes behavior to unit tests only (does not affect functional tests). +- Avoids a top-level ``conftest.py`` that would apply repo-wide. +""" + +pytest_plugins = ["tests.unit._plugins.remote_select"] diff --git a/tests/unit/_plugins/remote_select.py b/tests/unit/_plugins/remote_select.py new file mode 100644 index 0000000000..a3f21c136a --- /dev/null +++ b/tests/unit/_plugins/remote_select.py @@ -0,0 +1,284 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Remote-aware test selection helper for pytest-testmon (Python 3.12). + +Purpose +------- +When running unit tests with ``--testmon``, pytest-testmon tracks in-process +Python execution and reruns only affected tests. Code executed inside +``@ray.remote`` actors runs out-of-process, so testmon alone cannot see those +dependencies. This lightweight test-only plugin augments selection so that +edits inside remote actors can still retrigger the relevant tests. + +How it works +------------ +- Builds a static mapping from each unit test (nodeid) to the transitive set + of ``nemo_rl`` Python files that the test module imports. +- Stores the mapping in ``.nrl_remote_map.json`` and tracks mtimes in + ``.nrl_remote_state.json`` at repo root. +- When ``--testmon`` is present: + - On first run, seeds the state file and does not change selection. + - On subsequent runs, compares mtimes; if tracked files changed, it replaces + the pytest positional args with the affected nodeids so those tests run. +- Honors ``-k``. If a ``-k`` filter is provided, the plugin does not alter + selection and lets user intent win. + +Limitations +----------- +- Static import analysis only; dynamic imports/loading are not discovered. +- Only Python files are considered (YAML/JSON/shell edits are not tracked). +- The mapping is conservative; if a test exercises code not visible via + imports, run it once explicitly to seed the map. + +Activation +---------- +This plugin auto-loads via ``tests/unit/__init__.py`` and only engages when +``--testmon`` is present. + +Artifacts +--------- +Two JSON files are written to the repository root: + +1) ``.nrl_remote_map.json`` + - Maps test nodeids to the transitive set of project files (under ``nemo_rl/``) + imported by that test module. + - Example (paths abbreviated for readability): + { + "tests/unit/distributed/test_worker_groups.py::test_configure_worker_interaction": [ + "/workspaces/nemo-rl/nemo_rl/distributed/worker_groups.py", + "/workspaces/nemo-rl/nemo_rl/distributed/virtual_cluster.py" + ], + "tests/unit/models/policy/test_dtensor_worker.py::test_lm_policy_init[True]": [ + "/workspaces/nemo-rl/nemo_rl/models/policy/dtensor_policy_worker.py" + ] + } + +2) ``.nrl_remote_state.json`` + - Stores the last-seen modification time (mtime) per tracked file to detect changes. + - Example: + { + "/workspaces/nemo-rl/nemo_rl/distributed/worker_groups.py": 1725369123.456, + "/workspaces/nemo-rl/nemo_rl/models/policy/dtensor_policy_worker.py": 1725369187.012 + } +""" + +import ast +import json +import os +import sys +from pathlib import Path +from typing import Iterable + +REPO_ROOT: Path = Path(__file__).resolve().parents[3] +MAP_PATH: Path = REPO_ROOT / ".nrl_remote_map.json" +STATE_PATH: Path = REPO_ROOT / ".nrl_remote_state.json" +PROJECT_PREFIXES: tuple[str, ...] = ("nemo_rl",) + + +def _read_text(path: Path) -> str: + try: + return path.read_text() + except Exception: + return "" + + +def _parse_imported_modules(py_path: Path) -> set[str]: + src = _read_text(py_path) + try: + tree = ast.parse(src) + except Exception: + return set() + modules: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + modules.add(alias.name) + elif isinstance(node, ast.ImportFrom): + if node.module: + modules.add(node.module) + return {m for m in modules if m.startswith(PROJECT_PREFIXES)} + + +def _module_to_file(module_name: str) -> Path | None: + mod_path = Path(module_name.replace(".", "/") + ".py") + abs_path = (REPO_ROOT / mod_path).resolve() + return abs_path if abs_path.exists() else None + + +def _discover_test_nodeids_and_files() -> dict[str, set[str]]: + mapping: dict[str, set[str]] = {} + tests_root = REPO_ROOT / "tests" / "unit" + for test_path in tests_root.rglob("test_*.py"): + rel = test_path.relative_to(REPO_ROOT) + mod_node_prefix = str(rel) + modules = _parse_imported_modules(test_path) + files: set[str] = set() + for m in modules: + f = _module_to_file(m) + if f: + files.add(str(f)) + if not files: + continue + src = _read_text(test_path) + try: + tree = ast.parse(src) + except Exception: + continue + for node in tree.body: + if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): + nodeid = f"{mod_node_prefix}::{node.name}" + mapping[nodeid] = set(files) + elif isinstance(node, ast.ClassDef) and node.name.startswith("Test"): + for sub in node.body: + if isinstance(sub, ast.FunctionDef) and sub.name.startswith( + "test_" + ): + nodeid = f"{mod_node_prefix}::{node.name}::{sub.name}" + mapping[nodeid] = set(files) + return mapping + + +def _load_mapping() -> dict[str, set[str]]: + if not MAP_PATH.exists(): + return {} + try: + data = json.loads(MAP_PATH.read_text()) + return {k: set(v) for k, v in data.items()} + except Exception: + return {} + + +def _save_mapping(mapping: dict[str, set[str]]) -> None: + MAP_PATH.write_text( + json.dumps({k: sorted(v) for k, v in mapping.items()}, indent=2) + ) + + +def _detect_changed(files: Iterable[str]) -> set[str]: + prev: dict[str, float] = {} + if STATE_PATH.exists(): + try: + prev = json.loads(STATE_PATH.read_text()) + except Exception: + prev = {} + changed: set[str] = set() + state: dict[str, float] = {} + for f in files: + try: + mtime = os.path.getmtime(f) + state[f] = mtime + if prev.get(f, 0) < mtime: + changed.add(f) + except FileNotFoundError: + changed.add(f) + if files: + STATE_PATH.write_text(json.dumps(state, indent=2)) + return changed + + +def _has_k_filter(args: list[str]) -> bool: + """Return True if -k/--keyword filter is present in CLI args.""" + if "-k" in args: + return True + for i, a in enumerate(args): + if a.startswith("-k") or a.startswith("--keyword"): + return True + if a in {"-k", "--keyword"} and i + 1 < len(args): + return True + return False + + +def pytest_load_initial_conftests(args, early_config, parser): + # Only augment when user asked for --testmon and no -k filter is provided + if "--testmon" not in args or _has_k_filter(args): + return + + affected = _select_affected(None) + # None = first run (seed only), empty set = no changes; leave args unchanged + if affected is None or affected == set(): + return + + # Remove --testmon and narrow args to affected nodeids (execute only those tests) + while "--testmon" in args: + args.remove("--testmon") + if not any(not a.startswith("-") for a in args): + args[:] = sorted(affected) + else: + args.extend(sorted(affected)) + + +def _effective_mapping() -> dict[str, set[str]]: + mapping = _load_mapping() + if not mapping: + mapping = _discover_test_nodeids_and_files() + if mapping: + _save_mapping(mapping) + return mapping + + +def _select_affected(config) -> set[str] | None: + mapping = _effective_mapping() + if not mapping: + return None + file_set: set[str] = set() + for files in mapping.values(): + file_set.update(files) + if not file_set: + return None + if not STATE_PATH.exists(): + _ = _detect_changed(file_set) + return None + changed = _detect_changed(file_set) + if not changed: + return set() + affected: set[str] = set() + for nodeid, files in mapping.items(): + if any(f in changed for f in files): + affected.add(nodeid) + return affected + + +def pytest_configure(config) -> None: + # Late-stage fallback in case initial hook didn't capture + tm_on = config.pluginmanager.hasplugin("testmon") or "--testmon" in sys.argv + if not tm_on: + return + # Honor -k/--keyword filters + if _has_k_filter(sys.argv): + return + affected = _select_affected(config) + if affected is None or affected == set(): + return + try: + config.args[:] = sorted(affected) + except Exception: + pass + + +def pytest_collection_modifyitems(config, items): + tm_on = config.pluginmanager.hasplugin("testmon") or "--testmon" in sys.argv + if not tm_on: + return + # Honor -k/--keyword filters + if _has_k_filter(sys.argv): + return + affected = _select_affected(config) + if affected is None: + return + if affected == set(): + # No changes → deselect all for speed + items[:] = [] + return + items[:] = [it for it in items if it.nodeid in affected] diff --git a/uv.lock b/uv.lock index d14f918a6d..269b158339 100644 --- a/uv.lock +++ b/uv.lock @@ -2901,6 +2901,7 @@ test = [ { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "pytest-testmon" }, { name = "pytest-timeout" }, ] @@ -2985,6 +2986,7 @@ test = [ { name = "pytest", specifier = ">=7.0.0" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "pytest-testmon" }, { name = "pytest-timeout" }, ] @@ -4431,6 +4433,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/7f/92c8dbe185aa38270fec1e73e0ed70d8e5de31963aa057ba621055f8b008/pytest_random_order-1.2.0-py3-none-any.whl", hash = "sha256:78d1d6f346222cdf26a7302c502d2f1cab19454529af960b8b9e1427a99ab277", size = 10889, upload-time = "2025-06-22T14:44:42.438Z" }, ] +[[package]] +name = "pytest-testmon" +version = "2.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/24/b17712bc8b9d9814a30346e5bd76a6c4539f5187455f4e0d99d95f033da6/pytest_testmon-2.1.3.tar.gz", hash = "sha256:dad41aa7d501d74571750da1abd3f6673b63fd9dbf3023bd1623814999018c97", size = 22608, upload-time = "2024-12-22T12:43:28.822Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/08/278800711d937e76ce59105fea1bb739ae5ff5c13583fd064fe3b4e64fa1/pytest_testmon-2.1.3-py3-none-any.whl", hash = "sha256:53ba06d8a90ce24c3a191b196aac72ca4b788beff5eb1c1bffee04dc50ec7105", size = 24994, upload-time = "2024-12-22T12:43:10.173Z" }, +] + [[package]] name = "pytest-timeout" version = "2.4.0" From 558020067b7434e04e7f9e81f260f26ec2696944 Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Thu, 4 Sep 2025 15:46:25 -0400 Subject: [PATCH 07/13] Fix CI. Signed-off-by: Shang Wang --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 39fcdefc2e..644e4f1209 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,6 +124,7 @@ docs = [ "gitpython", # To git-related information ] dev = [ + "sphinx", "pre-commit>=4.2.0", "ruff==0.9.9", "types-PyYAML", From 6052183fe30eab28da22e7154760354b81c6180b Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Thu, 4 Sep 2025 15:50:06 -0400 Subject: [PATCH 08/13] Fix CI. Signed-off-by: Shang Wang --- uv.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uv.lock b/uv.lock index 269b158339..3666f8cf4c 100644 --- a/uv.lock +++ b/uv.lock @@ -2885,6 +2885,7 @@ dev = [ { name = "pre-commit" }, { name = "pyrefly" }, { name = "ruff" }, + { name = "sphinx" }, { name = "types-pyyaml" }, { name = "types-requests" }, ] @@ -2970,6 +2971,7 @@ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pyrefly", specifier = "==0.24.2" }, { name = "ruff", specifier = "==0.9.9" }, + { name = "sphinx" }, { name = "types-pyyaml" }, { name = "types-requests" }, ] From 93322777674cb5ece0e1167ae7241ead777b7c11 Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Fri, 5 Sep 2025 16:34:11 -0400 Subject: [PATCH 09/13] Ignore sphinx from type checking. Signed-off-by: Shang Wang --- pyproject.toml | 1 - pyrefly.toml | 1 + uv.lock | 2 -- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 644e4f1209..39fcdefc2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,7 +124,6 @@ docs = [ "gitpython", # To git-related information ] dev = [ - "sphinx", "pre-commit>=4.2.0", "ruff==0.9.9", "types-PyYAML", diff --git a/pyrefly.toml b/pyrefly.toml index 48f6c2c697..bf3e6e62f4 100644 --- a/pyrefly.toml +++ b/pyrefly.toml @@ -13,6 +13,7 @@ replace-imports-with-any = [ "megatron.*", "ray.*", "numpy.*", + "sphinx.*" ] project-includes = [ # TODO: enable these once we have 100 correctness diff --git a/uv.lock b/uv.lock index 3666f8cf4c..269b158339 100644 --- a/uv.lock +++ b/uv.lock @@ -2885,7 +2885,6 @@ dev = [ { name = "pre-commit" }, { name = "pyrefly" }, { name = "ruff" }, - { name = "sphinx" }, { name = "types-pyyaml" }, { name = "types-requests" }, ] @@ -2971,7 +2970,6 @@ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pyrefly", specifier = "==0.24.2" }, { name = "ruff", specifier = "==0.9.9" }, - { name = "sphinx" }, { name = "types-pyyaml" }, { name = "types-requests" }, ] From 1dd4b02bf029d1438723b674bddf1e2a9b3e403f Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Fri, 5 Sep 2025 16:40:18 -0400 Subject: [PATCH 10/13] Ignore sphinx from type checking. Signed-off-by: Shang Wang --- pyrefly.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyrefly.toml b/pyrefly.toml index bf3e6e62f4..6fc1a02250 100644 --- a/pyrefly.toml +++ b/pyrefly.toml @@ -13,7 +13,8 @@ replace-imports-with-any = [ "megatron.*", "ray.*", "numpy.*", - "sphinx.*" + "sphinx.*", + "docutils.*", ] project-includes = [ # TODO: enable these once we have 100 correctness From 6cbaa67b8655fcc8304866c1e3ff8552c38f37f1 Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Wed, 10 Sep 2025 17:09:36 -0400 Subject: [PATCH 11/13] Fix CI and address CodeRabbit comments. Signed-off-by: Shang Wang --- docs/conf.py | 75 ++++++++++++++++++++++++++++++++++++++------------ pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 900b45ddd9..6191186b8b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ import os import sys -from pathlib import Path +from pathlib import Path, PosixPath from typing import Any import git @@ -119,6 +119,7 @@ def _convert_gh_admonitions( app: Sphinx, relative_path: Path, parent_docname: str, contents: list[str] ) -> None: """Supporting rendering GitHub alerts correctly. + # https://github.com/executablebooks/MyST-Parser/issues/845 """ _github_admonitions = { @@ -168,25 +169,63 @@ class _GitHubLinkTransform(Transform): default_priority = 500 # type: ignore[bad-override] + @staticmethod + def _get_github_source_url(repo: git.Repo) -> PosixPath: + # Find out which remote GitHub repo should be the source. + if "origin" in repo.remotes: + url = repo.remotes.origin.url + elif len(repo.remotes) == 1: + url = repo.remotes[0].url + else: + raise ValueError( + "Cannot determine which remote repo on GitHub this local repo is from." + ) + # Canonicalize the URL. + if url.startswith("git@github.com:"): + url = url.replace("git@github.com:", "https://github.com/", 1) + if url.endswith(".git"): + url = url[: -len(".git")] + return PosixPath(url) + def apply(self, **kwargs: Any) -> None: # type: ignore[bad-override] - repo = git.Repo(search_parent_directories=True) - origin_url = repo.remotes.origin.url - if origin_url.startswith("git@github.com:"): - origin_url = origin_url.replace("git@github.com:", "https://github.com/", 1) - if origin_url.endswith(".git"): - origin_url = origin_url[: -len(".git")] - blob = f"blob/{repo.head.object.hexsha}" + try: + local_repo = git.Repo(search_parent_directories=True) + remote_repo_url = self._get_github_source_url(local_repo) + except Exception: + # Cannot figure out which source url it should be; leave links as-is. + return + if local_repo.working_tree_dir is None: + # If the local repo is a bare repo, the method below won't work. + return + wt_dir = local_repo.working_tree_dir + for node in self.document.traverse(addnodes.download_reference): - # `node["refdoc"]` would be, e.g., "guides/grpo". Therefore, `md_dir` would - # be, e.g., `"docs/guides"`. - # Avoid using `os.path` or `pathlib` for path manipulation because, well, - # what if we try to build the docs on Windows? - md_dir = "/".join(["docs"] + node["refdoc"].split("/")[:-1]) - # `file_path` would be `"docs/grpo/../../examples/run_grpo_math.py"`. - file_path = "/".join((md_dir, node["reftarget"])) - # `refuri` would be `"https://github.com/NVIDIA-NeMo/RL/blob//docs/guides/../../examples/run_grpo_math.py"`. - refuri = "/".join((origin_url, blob, file_path)) - new_node = nodes.reference(rawsource=node.rawsource, refuri=refuri) + md_dir = Path(node["refdoc"]).parent + dst_path = md_dir / Path(node["reftarget"]) + try: + dst_path = dst_path.resolve(strict=True) + except OSError: + # If the path doesn't exist or a symlink loop is encountered. + continue + if dst_path.is_file(): + kind = "blob" + elif dst_path.is_dir(): + kind = "tree" + else: + # Cannot figure out what type of thing this path is pointing to. + continue + refuri = ( + remote_repo_url + / PosixPath(kind) + / PosixPath(local_repo.head.object.hexsha) + / dst_path.relative_to(wt_dir).as_posix() + ) + new_node = nodes.reference(rawsource=node.rawsource, refuri=str(refuri)) + # Preserve styling and title if present. + if "classes" in node: + new_node["classes"] = list(node["classes"]) + if "title" in node: + new_node["title"] = node["title"] if node.children: new_node += node.children node.replace_self(new_node) diff --git a/pyproject.toml b/pyproject.toml index d201042807..444f5bc64a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,7 +121,7 @@ docs = [ "sphinx-copybutton", # Adds a copy button for code blocks "myst_parser", # For our markdown docs "nvidia-sphinx-theme", # Our NVIDIA theme - "gitpython", # To git-related information + "gitpython>=3.1.45", # To git-related information ] dev = [ "pre-commit>=4.2.0", diff --git a/uv.lock b/uv.lock index 18784c490e..4532901773 100644 --- a/uv.lock +++ b/uv.lock @@ -2999,7 +2999,7 @@ dev = [ { name = "types-requests" }, ] docs = [ - { name = "gitpython" }, + { name = "gitpython", specifier = ">=3.1.45" }, { name = "myst-parser" }, { name = "nvidia-sphinx-theme" }, { name = "sphinx" }, From 76577e16d834ccef3d077a17ba06f8805dcf463a Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Wed, 10 Sep 2025 17:35:13 -0400 Subject: [PATCH 12/13] Address CodeRabbit comments. Signed-off-by: Shang Wang --- docs/conf.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 6191186b8b..383f297c4e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,8 @@ import os import sys -from pathlib import Path, PosixPath +import urllib.parse +from pathlib import Path from typing import Any import git @@ -170,7 +171,7 @@ class _GitHubLinkTransform(Transform): default_priority = 500 # type: ignore[bad-override] @staticmethod - def _get_github_source_url(repo: git.Repo) -> PosixPath: + def _get_github_source_url(repo: git.Repo) -> str: # Find out which remote GitHub repo should be the source. if "origin" in repo.remotes: url = repo.remotes.origin.url @@ -185,7 +186,7 @@ def _get_github_source_url(repo: git.Repo) -> PosixPath: url = url.replace("git@github.com:", "https://github.com/", 1) if url.endswith(".git"): url = url[: -len(".git")] - return PosixPath(url) + return url def apply(self, **kwargs: Any) -> None: # type: ignore[bad-override] try: @@ -214,11 +215,13 @@ def apply(self, **kwargs: Any) -> None: # type: ignore[bad-override] else: # Cannot figure out what type of thing this path is pointing to. continue - refuri = ( - remote_repo_url - / PosixPath(kind) - / PosixPath(local_repo.head.object.hexsha) - / dst_path.relative_to(wt_dir).as_posix() + refuri = "/".join( + ( + remote_repo_url.rstrip("/"), + kind, + local_repo.head.object.hexsha, + urllib.parse.quote(dst_path.relative_to(wt_dir).as_posix()), + ) ) new_node = nodes.reference(rawsource=node.rawsource, refuri=str(refuri)) # Preserve styling and title if present. From d2b8d0cc8a91f597c855fd578555f4f8d426fd2e Mon Sep 17 00:00:00 2001 From: Shang Wang Date: Wed, 10 Sep 2025 18:19:51 -0400 Subject: [PATCH 13/13] Address CodeRabbit comments. Signed-off-by: Shang Wang --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 383f297c4e..1ea041a8c4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -223,7 +223,7 @@ def apply(self, **kwargs: Any) -> None: # type: ignore[bad-override] urllib.parse.quote(dst_path.relative_to(wt_dir).as_posix()), ) ) - new_node = nodes.reference(rawsource=node.rawsource, refuri=str(refuri)) + new_node = nodes.reference(rawsource=node.rawsource, refuri=refuri) # Preserve styling and title if present. if "classes" in node: new_node["classes"] = list(node["classes"])