diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index eeb4e330d7..579b333cdf 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -97,7 +97,7 @@ jobs: - Linux - ${{ matrix.arch }} - cpu - timeout-minutes: 300 + timeout-minutes: 360 strategy: fail-fast: false matrix: @@ -108,6 +108,10 @@ jobs: steps: - name: Cleanup run: | + # Stop all Docker containers to free memory + docker stop $(docker ps -q) 2>/dev/null || true + docker rm $(docker ps -aq) 2>/dev/null || true + # Clean workspace and caches sudo rm -rf ${{ github.workspace }}/* || true sudo rm -rf ${{ github.workspace }}/.[!.]* || true rm -rf ~/.cache/flashinfer_jit || true @@ -136,7 +140,7 @@ jobs: needs: setup if: needs.setup.outputs.skip_build != 'true' && github.event.inputs.skip_gpu != 'true' runs-on: [self-hosted, Linux, X64, gpu, sm86] - timeout-minutes: 300 + timeout-minutes: 360 strategy: fail-fast: false matrix: @@ -154,7 +158,6 @@ jobs: sudo rm -rf ${{ github.workspace }}/.[!.]* || true rm -rf ~/.cache/flashinfer_jit || true docker system prune -f || true - # Verify GPU is free nvidia-smi || true - uses: actions/checkout@v4 @@ -180,7 +183,7 @@ jobs: needs: setup if: needs.setup.outputs.skip_build != 'true' && github.event.inputs.skip_gpu != 'true' runs-on: [self-hosted, Linux, X64, gpu, sm75] - timeout-minutes: 300 + timeout-minutes: 360 env: DOCKER_IMAGE: flashinfer/flashinfer-ci-cu129:${{ needs.setup.outputs.docker_tag }} steps: @@ -194,7 +197,6 @@ jobs: sudo rm -rf ${{ github.workspace }}/.[!.]* || true rm -rf ~/.cache/flashinfer_jit || true docker system prune -f || true - # Verify GPU is free nvidia-smi || true - uses: actions/checkout@v4 diff --git a/scripts/task_show_node_info.sh b/scripts/task_show_node_info.sh index 8569a1e0d9..6d79c3fe6e 100755 --- a/scripts/task_show_node_info.sh +++ b/scripts/task_show_node_info.sh @@ -40,5 +40,5 @@ ec2_metadata public-hostname echo "===== RUNNER INFO =====" df --human-readable lscpu -free +free -h nvidia-smi 2>/dev/null || echo "cuda not found"