NVIDIA-NeMo · terrykong · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
@@ -70,6 +70,9 @@ jobs:
           run: |
             docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}
 
+        - name: Checkout repository
+          uses: actions/checkout@v4
+
         - name: Start container
           run: |
             nvidia-smi
@@ -81,6 +84,7 @@ jobs:
               --env HF_DATASETS_CACHE=/home/TestData/reinforcer/hf_datasets_cache \
               --env REINFORCER_REPO_DIR=/opt/reinforcer \
               --env HF_TOKEN \
+              --volume $GITHUB_WORKSPACE:/opt/reinforcer \
               --volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \
               --volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \
               --volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
@@ -91,6 +95,7 @@ jobs:
 
         - name: Run unit tests
           run: |
+            docker exec nemo_container_${{ github.run_id }} git config --global --add safe.directory /opt/reinforcer
             docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "${{ inputs.UNIT_TEST_SCRIPT }}"
 
         - name: Run doc tests
@@ -107,6 +112,7 @@ jobs:
         - name: after_script
           if: always() && inputs.AFTER_SCRIPT != ':'
           run: |
+            # Run the after script
             cmd=$(cat <<"RUN_TEST_EOF"
             ${{ inputs.AFTER_SCRIPT }}
             RUN_TEST_EOF
@@ -125,5 +131,7 @@ jobs:
         - name: Container shutdown
           if: always()
           run: |
+            # Ensure any added files in the mounted directory are owned by the runner user to allow it to clean up
+            docker exec nemo_container_${{ github.run_id }} bash -c "find /opt/reinforcer -path '/opt/reinforcer/datasets' -prune -o -exec chown $(id -u):$(id -g) {} +"
             docker container stop nemo_container_${{ github.run_id }} || true
             docker container rm nemo_container_${{ github.run_id }} || true
@@ -18,26 +18,27 @@ RUN chmod 755 /home/ray/.cache
 FROM base AS hermetic
 
 WORKDIR /opt/reinforcer
-# This is less efficient as this invalidates the cache more frequently, but
-# creates a smaller image. Adding reinforcer afterwards and doing
-# `uv pip install --no-deps --editable .` causes a "sync" of some of the environment,
-# which defeats the purpose of pre-installing.
-# In the future we may optimize this: https://github.com/NVIDIA/reinforcer/issues/129
-COPY --chown=ray --chmod=755 . /opt/reinforcer
+
+# First copy only the dependency files
+COPY --chown=ray --chmod=755 pyproject.toml uv.lock ./
+
+ENV UV_PROJECT_ENVIRONMENT=/opt/reinforcer_venv
+ENV VIRTUAL_ENV=/opt/reinforcer_venv
+
+# Create and activate virtual environment
 RUN <<"EOF"
-uv venv .venv
+uv venv /opt/reinforcer_venv
 # uv sync has a more reliable resolver than simple uv pip install which can fail
 
 # Sync each training + inference backend one at a time (since they may conflict)
 # to warm the uv cache, then at the end just sync the default dependencies.
 # Do everything in one layer to prevent large layers.
 
 uv sync --locked --extra vllm --no-install-project
-uv sync --locked --all-groups
+uv sync --locked --all-groups --no-install-project
 EOF
 
-ENV VIRTUAL_ENV=/opt/reinforcer/.venv
-ENV PATH="/opt/reinforcer/.venv/bin:$PATH"
+ENV PATH="/opt/reinforcer_venv/bin:$PATH"
 
 # The ray images automatically activate the anaconda venv. We will
 # comment this out of the .bashrc to give the same UX between docker
@@ -50,7 +51,4 @@ sed -i '/# >>> conda initialize >>>/,/# <<< conda initialize <<</ { /^[^#]/ s/^/
 
 # Comment out any line that explicitly exports the anaconda3 PATH
 sed -i '/export PATH=\$HOME\/anaconda3\/bin:\$PATH/ s/^/# /' ~/.bashrc
-
-# Enable the repo's git history within the container
-git config --global --add safe.directory /opt/reinforcer
 EOF