Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
CI: switch GPU pool (#1476)
Browse files Browse the repository at this point in the history
* switch GPU pool
* updates
* drop 1 gpu

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Borda and pre-commit-ci[bot] authored Oct 28, 2022
1 parent d0eeedc commit 4b810d8
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 21 deletions.
5 changes: 1 addition & 4 deletions .azure-pipelines/gpu-example-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@ pr:
jobs:
- template: testing-template.yml
parameters:
configs:
domains:
- "image"
- "icevision"
- "vissl"
- "text"
- "tabular"
- "video"
gpu_inds:
- "0"
- "0,1"
12 changes: 9 additions & 3 deletions .azure-pipelines/gpu-special-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,24 @@ jobs:
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2

pool: azure-gpus-spot
pool: lit-rtx-3090
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )

container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.9-cuda11.1.1"
options: "--ipc=host --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1"
options: "--ipc=host --gpus=all"

workspace:
clean: all

steps:

- bash: echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
displayName: 'set visible devices'

- bash: |
echo $CUDA_VISIBLE_DEVICES
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
Expand Down
40 changes: 26 additions & 14 deletions .azure-pipelines/testing-template.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
jobs:
- ${{ each gids in parameters.gpu_inds }}:
- ${{ each config in parameters.configs }}:
- ${{ each dom in parameters.domains }}:
- job:
displayName: "domain ${{config}} with GPUs ${{gids}}"
displayName: "domain ${{dom}} with 2 GPU"
# how long to run the job before automatically cancelling
timeoutInMinutes: 45
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2

pool: azure-gpus-spot
pool: lit-rtx-3090
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )

# this need to have installed docker in the base image...
container:
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.10"
# image: "pytorch/pytorch:1.8.1-cuda11.0-cudnn8-runtime"
options: "-it --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
options: "-it --rm --gpus=all --shm-size=16g"

workspace:
clean: all
steps:

- bash: echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
displayName: 'set visible devices'

- bash: |
echo $CUDA_VISIBLE_DEVICES
lspci | egrep 'VGA|3D'
whereis nvidia
nvidia-smi
python --version
pip --version
pip list
df -kh /dev/shm
Expand All @@ -36,31 +41,38 @@ jobs:
- bash: |
# python -m pip install "pip==20.1"
if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "vissl" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi
if [ "${{dom}}" == "icevision" ]; then
pip install '.[image]' icevision effdet icedata;
elif [ "${{dom}}" == "vissl" ]; then
pip install '.[image]';
else
pip install '.[${{dom}}]';
fi
pip install '.[test]' --upgrade-strategy only-if-needed
pip list
displayName: 'Install dependencies'
- bash: |
pip uninstall -y opencv-python
pip uninstall -y opencv-python-headless
pip uninstall -y opencv-python opencv-python-headless
pip install opencv-python-headless==4.5.5.64
displayName: 'Install OpenCV dependencies'
condition: eq('${{ config }}', 'icevision')
condition: eq('${{ dom }}', 'icevision')
- bash: |
pip install fairscale
pip install git+https://github.com/facebookresearch/ClassyVision.git
pip install git+https://github.com/facebookresearch/vissl.git
displayName: 'Install VISSL dependencies'
condition: eq('${{ config }}', 'vissl')
condition: eq('${{ dom }}', 'vissl')
- bash: |
python -c "import torch; print(f'found GPUs: {torch.cuda.device_count()}')"
python -m coverage run --source flash -m pytest tests/examples/test_scripts.py tests/image/embedding/test_model.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
python -m coverage run --source flash -m pytest \
tests/examples/test_scripts.py \
tests/image/embedding/test_model.py \
-v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
env:
CUDA_VISIBLE_DEVICES: ${{gids}}
FLASH_TEST_TOPIC: ${{ config }}
FLASH_TEST_TOPIC: ${{ dom }}
displayName: 'Testing'
- bash: |
Expand Down

0 comments on commit 4b810d8

Please sign in to comment.