NVIDIA-NeMo · sahgerlad · Mar 21, 2026 · Mar 21, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -72,6 +72,7 @@ RUN GITHUB_ARTIFACTORY=github.com \
 ARG UV_VERSION=0.9.7
 ARG PYTHON_VERSION=3.12
 ENV PATH="/root/.local/bin:$PATH"
+ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
 RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
     uv python install ${PYTHON_VERSION}
 
@@ -109,8 +110,8 @@ ARG SKIP_SGLANG_BUILD
 ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
 ENV UV_LINK_MODE=copy
 
-# Ensure DeepEP is built for H100 and B200 (also mcore inference unified memory API now invokes a torch API that requires these to be set)
-ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
+# Ensure DeepEP is built for hopper and (grace)blackwell (also mcore inference unified memory API now invokes a torch API that requires these to be set)
+ENV TORCH_CUDA_ARCH_LIST="9.0 10.0 10.3"
 
 # First copy only the dependency files
 COPY --from=nemo-rl pyproject.toml uv.lock ./
@@ -197,6 +198,10 @@ else
     UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py
 fi
 EOF
+RUN for d in /opt/ray_venvs /opt/nemo_rl_venv; do \
+      [ -d "$d" ] || continue; \
+      find "$d" -name "ptxas-blackwell" -exec ln -sf /usr/local/cuda/bin/ptxas {} +; \
+    done
 
 # Generate container fingerprint for frozen environment support
 # Store outside /opt/nemo-rl to avoid being overwritten by user mounts

@@ -100,7 +100,7 @@ def __init__(
             if "TORCH_CUDA_ARCH_LIST" not in os.environ:
                 raise RuntimeError(
                     "TORCH_CUDA_ARCH_LIST is not set. This is required in Megatron backend. This variable is set in our container, but "
-                    "if you are running a custom container or baremetal, you may need to set this variable manually. Example: export TORCH_CUDA_ARCH_LIST='9.0 10.0'"
+                    "if you are running a custom container or baremetal, you may need to set this variable manually. Example: export TORCH_CUDA_ARCH_LIST='9.0 10.0 10.3'"
                 )
 
         else:
@@ -118,7 +118,7 @@ def __init__(
                 if "TORCH_CUDA_ARCH_LIST" not in os.environ:
                     warnings.warn(
                         "TORCH_CUDA_ARCH_LIST is not set. This is needed if using DeepEP in DTensorPolicyWorker V2. This variable is set in our container, but "
-                        "if you are running a custom container or baremetal, you may need to set this variable manually. Example: export TORCH_CUDA_ARCH_LIST='9.0 10.0'"
+                        "if you are running a custom container or baremetal, you may need to set this variable manually. Example: export TORCH_CUDA_ARCH_LIST='9.0 10.0 10.3'"
                     )
             else:
                 assert (