Skip to content
Merged
8 changes: 8 additions & 0 deletions .github/workflows/_run_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ jobs:
run: |
docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}

- name: Checkout repository
uses: actions/checkout@v4

- name: Start container
run: |
nvidia-smi
Expand All @@ -81,6 +84,7 @@ jobs:
--env HF_DATASETS_CACHE=/home/TestData/reinforcer/hf_datasets_cache \
--env REINFORCER_REPO_DIR=/opt/reinforcer \
--env HF_TOKEN \
--volume $GITHUB_WORKSPACE:/opt/reinforcer \
--volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \
--volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \
--volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
Expand All @@ -91,6 +95,7 @@ jobs:

- name: Run unit tests
run: |
docker exec nemo_container_${{ github.run_id }} git config --global --add safe.directory /opt/reinforcer
docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "${{ inputs.UNIT_TEST_SCRIPT }}"

- name: Run doc tests
Expand All @@ -107,6 +112,7 @@ jobs:
- name: after_script
if: always() && inputs.AFTER_SCRIPT != ':'
run: |
# Run the after script
cmd=$(cat <<"RUN_TEST_EOF"
${{ inputs.AFTER_SCRIPT }}
RUN_TEST_EOF
Expand All @@ -125,5 +131,7 @@ jobs:
- name: Container shutdown
if: always()
run: |
# Ensure any added files in the mounted directory are owned by the runner user to allow it to clean up
docker exec nemo_container_${{ github.run_id }} bash -c "find /opt/reinforcer -path '/opt/reinforcer/datasets' -prune -o -exec chown $(id -u):$(id -g) {} +"
docker container stop nemo_container_${{ github.run_id }} || true
docker container rm nemo_container_${{ github.run_id }} || true
24 changes: 11 additions & 13 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,27 @@ RUN chmod 755 /home/ray/.cache
FROM base AS hermetic

WORKDIR /opt/reinforcer
# This is less efficient as this invalidates the cache more frequently, but
# creates a smaller image. Adding reinforcer afterwards and doing
# `uv pip install --no-deps --editable .` causes a "sync" of some of the environment,
# which defeats the purpose of pre-installing.
# In the future we may optimize this: https://github.com/NVIDIA/reinforcer/issues/129
COPY --chown=ray --chmod=755 . /opt/reinforcer

# First copy only the dependency files
COPY --chown=ray --chmod=755 pyproject.toml uv.lock ./

ENV UV_PROJECT_ENVIRONMENT=/opt/reinforcer_venv
ENV VIRTUAL_ENV=/opt/reinforcer_venv

# Create and activate virtual environment
RUN <<"EOF"
uv venv .venv
uv venv /opt/reinforcer_venv
# uv sync has a more reliable resolver than simple uv pip install which can fail

# Sync each training + inference backend one at a time (since they may conflict)
# to warm the uv cache, then at the end just sync the default dependencies.
# Do everything in one layer to prevent large layers.

uv sync --locked --extra vllm --no-install-project
uv sync --locked --all-groups
uv sync --locked --all-groups --no-install-project
EOF

ENV VIRTUAL_ENV=/opt/reinforcer/.venv
ENV PATH="/opt/reinforcer/.venv/bin:$PATH"
ENV PATH="/opt/reinforcer_venv/bin:$PATH"

# The ray images automatically activate the anaconda venv. We will
# comment this out of the .bashrc to give the same UX between docker
Expand All @@ -50,7 +51,4 @@ sed -i '/# >>> conda initialize >>>/,/# <<< conda initialize <<</ { /^[^#]/ s/^/

# Comment out any line that explicitly exports the anaconda3 PATH
sed -i '/export PATH=\$HOME\/anaconda3\/bin:\$PATH/ s/^/# /' ~/.bashrc

# Enable the repo's git history within the container
git config --global --add safe.directory /opt/reinforcer
EOF