diff --git a/.github/workflows/nv-accelerate-v100.yml b/.github/workflows/nv-accelerate-v100.yml new file mode 100644 index 000000000000..20743dc6d858 --- /dev/null +++ b/.github/workflows/nv-accelerate-v100.yml @@ -0,0 +1,58 @@ +name: nv-accelerate-v100 + +on: + push: + branches: + - 'master' + - 'staging**' + paths-ignore: + - 'docs/**' + pull_request: + paths-ignore: + - 'docs/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-tests: + runs-on: [self-hosted, nvidia, cu111, v100] + + steps: + - uses: actions/checkout@v2 + + - name: environment + run: | + nvidia-smi + which python + python --version + which nvcc + nvcc --version + pip install --upgrade pip + pip uninstall --yes torch torchvision + pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + + - name: Python environment + run: | + pip list + + - name: Install deepspeed + run: | + pip uninstall --yes deepspeed + pip install .[dev,autotuning] + ds_report + + - name: HF Accelerate tests + run: | + if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi + git clone https://github.com/huggingface/accelerate + cd accelerate + # installing dependencies + pip install .[testing] + # force protobuf version due to issues + pip install "protobuf<4.21.0" + pip list + TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose tests/deepspeed