diff --git a/.github/workflows/test-arealite.yml b/.github/workflows/test-arealite.yml new file mode 100644 index 000000000..41ae60b8f --- /dev/null +++ b/.github/workflows/test-arealite.yml @@ -0,0 +1,50 @@ +name: Test AReaLite + +on: + push: + paths: + - .github/workflows/test-arealite.yml + - arealite/** + - ci/** + workflow_dispatch: + +jobs: + test-arealite: + runs-on: ubuntu-latest + concurrency: + group: test-arealite + steps: + - uses: actions/checkout@v4 + + - uses: appleboy/ssh-action@v1 + env: + GIT_REPO_URL: https://github.bibk.top/${{ github.repository }} + GIT_COMMIT_SHA: ${{ github.sha }} + with: + host: ${{ secrets.CI_NODE_ADDR }} + username: ${{ secrets.CI_NODE_USER }} + key: ${{ secrets.REMOTE_SSH_KEY }} + envs: GIT_REPO_URL,GIT_COMMIT_SHA + script_path: ci/clone_repo.sh + + - uses: appleboy/ssh-action@v1 + env: + GIT_COMMIT_SHA: ${{ github.sha }} + with: + host: ${{ secrets.CI_NODE_ADDR }} + username: ${{ secrets.CI_NODE_USER }} + key: ${{ secrets.REMOTE_SSH_KEY }} + command_timeout: 2h + envs: GIT_COMMIT_SHA + script_path: ci/build_env_image.sh + + - uses: appleboy/ssh-action@v1 + env: + GIT_COMMIT_SHA: ${{ github.sha }} + with: + host: ${{ secrets.CI_NODE_ADDR }} + username: ${{ secrets.CI_NODE_USER }} + key: ${{ secrets.REMOTE_SSH_KEY }} + command_timeout: 1h + envs: GIT_COMMIT_SHA + script_path: ci/test_arealite.sh diff --git a/arealite/tests/test_engine.py b/arealite/tests/test_engine.py index f7f53023b..03b1559b8 100644 --- a/arealite/tests/test_engine.py +++ b/arealite/tests/test_engine.py @@ -23,7 +23,7 @@ from realhf.impl.model.utils.padding import unpad_input VOCAB_SIZE = 100 -MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/" +MODEL_PATH = "Qwen/Qwen2-0.5B" @pytest.fixture(scope="module") diff --git a/arealite/tests/test_grpo.py b/arealite/tests/test_grpo.py index 5bf5c6754..24881a2a2 100644 --- a/arealite/tests/test_grpo.py +++ b/arealite/tests/test_grpo.py @@ -22,7 +22,7 @@ EXPR_NAME = "test_grpo" TRIAL_NAME = "test_grpo" -MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/" +MODEL_PATH = "Qwen/Qwen2-0.5B" @pytest.fixture(scope="module") diff --git a/arealite/tests/test_rollout.py b/arealite/tests/test_rollout.py index 858f18f44..3f6bd43fb 100644 --- a/arealite/tests/test_rollout.py +++ b/arealite/tests/test_rollout.py @@ -27,7 +27,7 @@ EXPR_NAME = "test_rollout" TRIAL_NAME = "test_rollout" -MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/" +MODEL_PATH = "Qwen/Qwen2-0.5B" @pytest.fixture(scope="module") diff --git a/arealite/tests/test_rollout_controller.py b/arealite/tests/test_rollout_controller.py index aadabdaf7..636f34933 100644 --- a/arealite/tests/test_rollout_controller.py +++ b/arealite/tests/test_rollout_controller.py @@ -22,7 +22,7 @@ EXPR_NAME = "test_rollout_controller" TRIAL_NAME = "test_rollout_controller" -MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/" +MODEL_PATH = "Qwen/Qwen2-0.5B" @pytest.fixture(scope="module") diff --git a/arealite/tests/test_sft.py b/arealite/tests/test_sft.py index bf605e4bc..eaa81cd52 100644 --- a/arealite/tests/test_sft.py +++ b/arealite/tests/test_sft.py @@ -60,7 +60,7 @@ def test_sft(): ) engine_config = EngineConfig( - path="/storage/openpsi/models/Qwen__Qwen3-1.7B/", + path="Qwen/Qwen2-0.5B", gradient_checkpointing=False, optimizer=OptimizerConfig(), backend=EngineBackendConfig(type="hf"), diff --git a/arealite/tests/test_sglang_client.py b/arealite/tests/test_sglang_client.py index ab61caf29..db037c1d0 100644 --- a/arealite/tests/test_sglang_client.py +++ b/arealite/tests/test_sglang_client.py @@ -23,7 +23,7 @@ EXPR_NAME = "test_sglang_client" TRIAL_NAME = "test_sglang_client" -MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/" +MODEL_PATH = "Qwen/Qwen2-0.5B" @pytest.fixture(scope="module") diff --git a/ci/build_env_image.sh b/ci/build_env_image.sh new file mode 100644 index 000000000..383984592 --- /dev/null +++ b/ci/build_env_image.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +set -e + +GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"} + +echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA" + +# If there is already an image named areal-env, skip. +if docker images --format '{{.Repository}}:{{.Tag}}' | grep -q 'areal-env:latest'; then + echo "Image areal-env already exists, skipping build." + exit 0 +fi + +RUN_ID="areal-$GIT_COMMIT_SHA" +cd "/tmp/$RUN_ID" + +if docker ps -a --format '{{.Names}}' | grep -q "$RUN_ID"; then + docker rm -f $RUN_ID +fi + +docker run \ + --name $RUN_ID \ + --gpus all \ + --shm-size=8g \ + -v $(pwd):/workspace \ + -w /workspace \ + nvcr.io/nvidia/pytorch:25.01-py3 \ + bash -c " + python -m pip install --upgrade pip + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + pip config unset global.extra-index-url + bash examples/env/scripts/setup-pip-deps.sh + pip uninstall -y transformer-engine + mv ./sglang /sglang + " || { docker rm -f $RUN_ID; exit 1; } + +docker commit $RUN_ID areal-env:latest +docker rm -f $RUN_ID diff --git a/ci/clone_repo.sh b/ci/clone_repo.sh new file mode 100644 index 000000000..c4448822e --- /dev/null +++ b/ci/clone_repo.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -e + +GIT_REPO_URL=${GIT_REPO_URL:?"GIT_REPO_URL is not set"} +GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"} + +echo "GIT_REPO_URL: $GIT_REPO_URL" +echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA" + +RUN_ID="areal-$GIT_COMMIT_SHA" +rm -rf "/tmp/$RUN_ID" +mkdir -p "/tmp/$RUN_ID" +cd "/tmp/$RUN_ID" + +git init +git remote add origin "$GIT_REPO_URL" +git fetch --depth 1 origin "$GIT_COMMIT_SHA" +git checkout FETCH_HEAD diff --git a/ci/test_arealite.sh b/ci/test_arealite.sh new file mode 100644 index 000000000..79734eade --- /dev/null +++ b/ci/test_arealite.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -e + +GIT_COMMIT_SHA=${GIT_COMMIT_SHA:?"GIT_COMMIT_SHA is not set"} + +echo "GIT_COMMIT_SHA: $GIT_COMMIT_SHA" + +RUN_ID="areal-$GIT_COMMIT_SHA" +cd "/tmp/$RUN_ID" + +if docker ps -a --format '{{.Names}}' | grep -q "$RUN_ID"; then + docker rm -f $RUN_ID +fi + +docker run \ + --name $RUN_ID \ + --gpus all \ + --shm-size=8g \ + -v $(pwd):/workspace \ + -w /workspace \ + areal-env:latest \ + bash -c " + mv /sglang ./sglang + HF_ENDPOINT=https://hf-mirror.com python -m pytest -s arealite/ + " || { docker rm -f $RUN_ID; exit 1; } + +docker rm -f $RUN_ID