Skip to content

Canary-GPU

Canary-GPU #473

Workflow file for this run

name: Canary-GPU
on:
schedule:
- cron: '0 4 * * *'
workflow_dispatch:
inputs:
repo-id:
description: 'staging repository id to test'
required: false
default: ''
djl-version:
description: 'djl version to test'
required: false
pt-version:
description: 'pytorch version to test'
required: false
default: ''
jobs:
canary-test-cuda112:
if: github.repository == 'deepjavalibrary/djl-demo'
runs-on: [ self-hosted, gpu ]
container:
image: nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04
options: --gpus all --runtime=nvidia
env:
AWS_REGION: us-east-1
DJL_STAGING: ${{github.event.inputs.repo-id}}
DJL_VERSION: ${{github.event.inputs.djl-version}}
PT_VERSION: ${{github.event.inputs.pt-version}}
timeout-minutes: 30
needs: create-gpu-runner
steps:
- name: Setup Environment
run: |
apt-get update
apt-get install -y software-properties-common wget libgomp1
- uses: actions/checkout@v3
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: 11
distribution: corretto
- name: Test MXNet
working-directory: canary
run: |
set -x
DJL_ENGINE=mxnet-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=mxnet-native-cu112mkl ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test PyTorch
working-directory: canary
run: |
set -x
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=$PT_VERSION ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cpu-precxx11 ./gradlew clean run
rm -rf /root/.djl.ai/
# not support lower version of CUDA since 0.22.0, fallback to CPU
DJL_ENGINE=pytorch-native-cu117 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu113 PT_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu116 PT_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117 PT_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu118 PT_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117-precxx11 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu113-precxx11 PT_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu116-precxx11 PT_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117-precxx11 PT_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu118-precxx11 PT_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Tensorflow
working-directory: canary
run: |
set -x
DJL_ENGINE=mxnet-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
# tensorflow-native-cu113 cannot run on CU112 since 0.22.0
- name: Test Paddle
working-directory: canary
run: |
set -x
mkdir -p $HOME/.djl.ai/paddle/2.3.2-cu112-linux-x86_64
DJL_CACHE_DIR=$HOME/.djl.ai/ DJL_ENGINE=paddlepaddle-native-auto \
LD_LIBRARY_PATH=$DJL_CACHE_DIR/paddle/2.3.2-cu112-linux-x86_64 \
./gradlew clean run
rm -rf /root/.djl.ai/
mkdir -p $HOME/.djl.ai/paddle/2.3.2-20221103-cu112-linux-x86_64
DJL_CACHE_DIR=$HOME/.djl.ai/ DJL_ENGINE=paddlepaddle-native-cu112 \
LD_LIBRARY_PATH=$DJL_CACHE_DIR/paddle/2.3.2-20221103-cu112-linux-x86_64 \
./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Xgboost GPU
working-directory: canary
run: |
DJL_ENGINE=xgboost-gpu ./gradlew clean run
rm -rf /root/.djl.ai/
canary-test-cuda113:
if: github.repository == 'deepjavalibrary/djl-demo'
runs-on: [ self-hosted, gpu ]
container:
image: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04
options: --gpus all --runtime=nvidia
env:
AWS_REGION: us-east-1
DJL_STAGING: ${{github.event.inputs.repo-id}}
DJL_VERSION: ${{github.event.inputs.djl-version}}
PT_VERSION: ${{github.event.inputs.pt-version}}
timeout-minutes: 30
needs: create-gpu-runner
steps:
- name: Setup Environment
run: |
apt-get update
apt-get install -y software-properties-common wget libgomp1
- uses: actions/checkout@v3
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: 11
distribution: corretto
- name: Test MXNet
working-directory: canary
run: |
set -x
DJL_ENGINE=mxnet-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test PyTorch
working-directory: canary
run: |
set -x
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 PYTORCH_FLAVOR=cu118 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 PYTORCH_FLAVOR=cu118-precxx11 ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Tensorflow
working-directory: canary
run: |
set -x
DJL_ENGINE=tensorflow-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=tensorflow-native-cu113 ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Paddle
working-directory: canary
run: |
set -x
DJL_ENGINE=paddlepaddle-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=paddlepaddle-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Xgboost GPU
working-directory: canary
run: |
set -x
DJL_ENGINE=xgboost-gpu ./gradlew clean run
rm -rf /root/.djl.ai/
canary-test-cuda118:
if: github.repository == 'deepjavalibrary/djl-demo'
runs-on: [ self-hosted, gpu ]
container:
image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04
options: --gpus all --runtime=nvidia
env:
AWS_REGION: us-east-1
DJL_STAGING: ${{github.event.inputs.repo-id}}
DJL_VERSION: ${{github.event.inputs.djl-version}}
PT_VERSION: ${{github.event.inputs.pt-version}}
timeout-minutes: 30
needs: create-gpu-runner
steps:
- name: Setup Environment
run: |
apt-get update
apt-get install -y software-properties-common wget libgomp1
- uses: actions/checkout@v3
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: 11
distribution: corretto
- name: Test MXNet
working-directory: canary
run: |
set -x
DJL_ENGINE=mxnet-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=mxnet-native-mkl ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test PyTorch
working-directory: canary
run: |
set -x
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=$PT_VERSION ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=$PT_VERSION ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-auto PYTORCH_PRECXX11=true PYTORCH_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cpu-precxx11 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu113 PT_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu116 PT_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117 PT_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu118 PT_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117-precxx11 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu113-precxx11 PT_VERSION=1.11.0 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu116-precxx11 PT_VERSION=1.12.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu117-precxx11 PT_VERSION=1.13.1 ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=pytorch-native-cu118-precxx11 PT_VERSION=2.0.1 ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Tensorflow
working-directory: canary
run: |
set -x
DJL_ENGINE=tensorflow-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=tensorflow-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Paddle
working-directory: canary
run: |
set -x
DJL_ENGINE=paddlepaddle-native-auto ./gradlew clean run
rm -rf /root/.djl.ai/
DJL_ENGINE=paddlepaddle-native-cpu ./gradlew clean run
rm -rf /root/.djl.ai/
- name: Test Xgboost GPU
working-directory: canary
run: |
set -x
DJL_ENGINE=xgboost-gpu ./gradlew clean run
rm -rf /root/.djl.ai/
create-gpu-runner:
if: github.repository == 'deepjavalibrary/djl-demo'
runs-on: [ self-hosted, scheduler ]
steps:
- name: Create new GPU instance
id: create_gpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-demo/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_gpu $token djl-demo
outputs:
gpu_instance_id: ${{ steps.create_gpu.outputs.action_gpu_instance_id }}
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-gpu-runner, canary-test-cuda112, canary-test-cuda113, canary-test-cuda118 ]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-gpu-runner.outputs.gpu_instance_id }}
./stop_instance.sh $instance_id