Skip to content

Commit

Permalink
add CI test for yitian (#1016)
Browse files Browse the repository at this point in the history
  • Loading branch information
Guo-Peilin authored Feb 17, 2023
1 parent 27fd896 commit 27c4aeb
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 8 deletions.
75 changes: 75 additions & 0 deletions .github/workflows/pytorch113_yitian.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: pytorch1_13-yitian
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
schedule:
- cron: '0 17 * * *' # 1:00 am UTC+8:00
workflow_dispatch:

jobs:
build:
if: github.repository == 'alibaba/BladeDISC'
# The type of runner that the job will run on
runs-on: [self-hosted, yitian-ci]
steps:
- name: Checkout
uses: actions/[email protected]
- name: Build Dev Docker
run: |
set -e
source $HOME/.cache/proxy_config
git submodule sync
git submodule update --depth=1 --init --recursive
cp /etc/apt/sources.list .
docker build -t disc-dev-cpu-yitian \
--build-arg BASEIMAGE=ubuntu:20.04 \
-f docker/dev/Dockerfile.aarch64 .
- name: Build And Test DISC
run: |
set -e
docker run --rm -t --user $(id -u):$(id -g) \
-v $HOME/.cache:$HOME/.cache \
-v /etc/passwd:/etc/passwd:ro \
-v /etc/group:/etc/group:ro \
-v /etc/hosts:/etc/hosts:ro \
-v $PWD:/disc \
-e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
-e TORCH_BLADE_BUILD_WITH_CUDA_SUPPORT=OFF \
-e TORCH_BLADE_CI_BUILD_TORCH_VERSION=1.13.1+aarch64 \
-e TORCH_BLADE_BUILD_ON_YITIAN=ON \
-w /disc \
disc-dev-cpu-yitian \
bash ./scripts/ci/test_pytorch_blade.sh
- name: Deploy Dev Docker Image
if: github.event.ref == 'refs/heads/main'
env:
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
GITHUB_PULL_REQUEST: ${{ github.event.number }}
LOCAL_DEV_DOCKER: disc-dev-cpu-yitian
REMOTE_DEV_DOCKER: bladedisc:latest-devel-cpu-yitian
run: |
set -e
echo "Try to deploy runtime docker image..."
source $HOME/.cache/proxy_config
bash ./scripts/ci/deploy_wrapper.sh
- name: Deploy Runtime Docker Image
if: github.event.ref == 'refs/heads/main'
env:
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
GITHUB_PULL_REQUEST: ${{ github.event.number }}
RUNTIME_BASEIMAGE: bladedisc/bladedisc:latest-devel-cpu-yitian
RUNTIME_DOCKER_FILE: docker/runtime/Dockerfile.pytorch.aarch64
REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-torch1.13.1-cpu-yitian
run: |
set -e
echo "Try to deploy runtime docker image..."
source $HOME/.cache/proxy_config
bash ./scripts/ci/deploy_wrapper.sh
74 changes: 74 additions & 0 deletions .github/workflows/tf280_yitian.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: tf2_8-yitian
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
schedule:
- cron: '0 17 * * *' # 1:00 am UTC+8:00
workflow_dispatch:

jobs:
build:
if: github.repository == 'alibaba/BladeDISC'
# The type of runner that the job will run on
runs-on: [self-hosted, yitian-ci]
steps:
- name: Checkout
uses: actions/[email protected]
- name: Build Dev Docker
run: |
set -e
source $HOME/.cache/proxy_config
git submodule sync
git submodule update --depth=1 --init --recursive
cp /etc/apt/sources.list .
docker build -t disc-dev-cpu-yitian \
--build-arg BASEIMAGE=ubuntu:20.04 \
--build-arg DISC_HOST_TF_VERSION="tensorflow-aarch64==2.8" \
-f docker/dev/Dockerfile.aarch64 .
- name: Build And Test DISC
run: |
set -e
docker run --rm -t --user $(id -u):$(id -g) \
-v $HOME/.cache:$HOME/.cache \
-v /etc/passwd:/etc/passwd:ro \
-v /etc/group:/etc/group:ro \
-v /etc/hosts:/etc/hosts:ro \
-v $PWD:/disc \
-e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
-h=`hostname` \
-w /disc \
disc-dev-cpu-yitian \
bash ./scripts/ci/build_and_test.sh --cpu-only --target_cpu_arch="armv8.6-a-sve2"
- name: Deploy Dev Docker Image
if: github.event.ref == 'refs/heads/main'
env:
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
GITHUB_PULL_REQUEST: ${{ github.event.number }}
LOCAL_DEV_DOCKER: disc-dev-cpu-yitian
REMOTE_DEV_DOCKER: bladedisc:latest-devel-cpu-yitian
run: |
set -e
echo "Try to deploy runtime docker image..."
source $HOME/.cache/proxy_config
bash ./scripts/ci/deploy_wrapper.sh
- name: Deploy Runtime Docker Image
if: github.event.ref == 'refs/heads/main'
env:
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
GITHUB_PULL_REQUEST: ${{ github.event.number }}
RUNTIME_BASEIMAGE: bladedisc/bladedisc:latest-devel-cpu-yitian
RUNTIME_DOCKER_FILE: docker/runtime/Dockerfile.tf.aarch64
REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-tensorflow2.8-cpu-yitian
run: |
set -e
echo "Try to deploy runtime docker image..."
source $HOME/.cache/proxy_config
bash ./scripts/ci/deploy_wrapper.sh
4 changes: 4 additions & 0 deletions pytorch_blade/scripts/build_pytorch_blade.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ function ci_build() {
COMMON_SETUP_ARGS="--platform_alibaba"
fi

if [ "$TORCH_BLADE_BUILD_ON_YITIAN" = "ON" ]; then
COMMON_SETUP_ARGS+=" --target_cpu_arch='armv8.6-a-sve2'"
fi

if [ "$TORCH_BLADE_BUILD_WITH_CUDA_SUPPORT" = "ON" ]; then
export TORCH_BLADE_BUILD_TENSORRT=${TORCH_BLADE_BUILD_TENSORRT:-ON}
export TORCH_BLADE_BUILD_TENSORRT_STATIC=${TORCH_BLADE_BUILD_TENSORRT_STATIC:-OFF}
Expand Down
3 changes: 2 additions & 1 deletion pytorch_blade/tests/disc/ops/test_conv_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import torch_blade
import unittest

from tests.disc.testing_base import DiscTestCase, skipTorchLE
from tests.disc.testing_base import DiscTestCase, skipTorchLE, skipIfOnYitian

@skipIfOnYitian()
@skipTorchLE("1.6.1")
class TestMlirConvolution(DiscTestCase):
def _test_conv(self, conv_func, inp_test_data=None):
Expand Down
2 changes: 2 additions & 0 deletions pytorch_blade/tests/disc/testing_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from torch_blade.quantization import is_available as is_quantization_available
from torch_blade.testing.common_utils import TestCase

def skipIfOnYitian():
return unittest.skipIf(os.popen("lscpu").read().find("svebf16") != -1, "Yitian bug was not fix")

def skipIfNoDISC():
return unittest.skipIf(not is_available(), "DISC support was not built")
Expand Down
10 changes: 5 additions & 5 deletions scripts/ci/build_and_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ fi
# cleanup build cache
(rm -rf build \
&& rm -rf tao/build \
&& cd tao && bazel clean --expunge && cd .. \
&& cd tf_community && bazel clean --expunge)
&& cd tao && bazel clean --expunge && cd ..\
&& cd tao_compiler && bazel clean --expunge)

python ${ENTRY} ${VENV_PATH} -s configure --bridge-gcc default --compiler-gcc default ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH}
python ${ENTRY} ${VENV_PATH} -s build_tao_bridge ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH}
python ${ENTRY} ${VENV_PATH} -s build_tao_compiler ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH}
python ${ENTRY} ${VENV_PATH} -s configure --bridge-gcc default --compiler-gcc default ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH} ${TARGET_CPU_ARCH}
python ${ENTRY} ${VENV_PATH} -s build_tao_bridge ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH} ${TARGET_CPU_ARCH}
python ${ENTRY} ${VENV_PATH} -s build_tao_compiler ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH} ${TARGET_CPU_ARCH}
if [[ -z "$ROCM" ]] && [[ -z "$DCU" ]]; then
python ${ENTRY} ${VENV_PATH} -s test_tao_bridge_cpp ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH}
python ${ENTRY} ${VENV_PATH} -s test_tao_bridge_py ${CPU_ONLY} ${ROCM} ${DCU} ${ROCM_PATH}
Expand Down
4 changes: 4 additions & 0 deletions scripts/ci/parse_args.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ while [[ $# -gt 0 ]]; do
VENV_PATH="$2"
shift 2
;;
--target_cpu_arch)
TARGET_CPU_ARCH="--target_cpu_arch $2"
shift 2
;;
--rocm)
ROCM="--rocm"
shift
Expand Down
2 changes: 1 addition & 1 deletion scripts/ci/test_pytorch_blade.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if [[ -f ~/.cache/proxy_config ]]; then
fi

# cleanup build cache
(cd tf_community && bazel clean --expunge)
(cd tao_compiler && bazel clean --expunge)

# note(yancey.yx): using virtualenv to avoid permission issue on workflow actions CI,
if [ $TORCH_BLADE_CI_BUILD_TORCH_VERSION = "ngc" ]; then
Expand Down
16 changes: 15 additions & 1 deletion scripts/python/common_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,11 +261,17 @@ def mkl_install_dir(root):
def acl_root_dir(root):
return os.path.join(mkldnn_build_dir(root), 'acl', 'ComputeLibrary')

def extra_acl_patch_dir(root):
if root is None:
root = get_source_root_dir()
return os.path.join(root, "third_party", "bazel", "acl")

def config_mkldnn(root, args):
build_dir = mkldnn_build_dir(root)
ensure_empty_dir(build_dir, clear_hidden=False)
mkl_dir = mkl_install_dir(root)
acl_dir = acl_root_dir(root)
acl_patch_dir = extra_acl_patch_dir(root)
ensure_empty_dir(mkl_dir, clear_hidden=False)
ensure_empty_dir(acl_dir, clear_hidden=False)
if args.x86:
Expand Down Expand Up @@ -293,11 +299,19 @@ def config_mkldnn(root, args):
ACL_DIR={}
git clone --branch v22.02 --depth 1 $ACL_REPO $ACL_DIR
cd $ACL_DIR
EXTRA_ACL_PATCH_DIR={}
for file in $EXTRA_ACL_PATCH_DIR/acl_*.patch
do
if [[ $file == *makefile* ]]; then
continue
fi
patch -p1 < $file
done
scons --silent $MAKE_NP Werror=0 debug=0 neon=1 opencl=0 openmp=1 embed_kernels=0 os=linux arch={} build=native extra_cxx_flags="-fPIC"
exit $?
'''.format(acl_dir, arch)
'''.format(acl_dir, acl_patch_dir, arch)
execute(cmd)
# a workaround for static linking
execute('rm -f build/*.so')
Expand Down

0 comments on commit 27c4aeb

Please sign in to comment.