Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,25 @@ jobs:
docker_npu_image: ${{ needs.build-docker.outputs.docker_npu_image }}

distribute:
name: Distribute-stable
name: Distribute-stable-build
uses: ./.github/workflows/_Distribute-stable.yml
needs: [clone, build-docker]
with:
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
clone-can-skip: ${{ needs.clone.outputs.can-skip }}

distribute-test:
name: Distribute-stable-test
uses: ./.github/workflows/_Distribute-stable-Test.yml
needs: [clone, build-docker, distribute]
with:
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
clone-can-skip: ${{ needs.clone.outputs.can-skip }}

distribute-formers:
name: Distribute-stable-formers
uses: ./.github/workflows/_Distribute-stable-Formers.yml
needs: [clone, build-docker, distribute]
with:
docker_distribute_image: ${{ needs.build-docker.outputs.docker_distribute_image }}
clone-can-skip: ${{ needs.clone.outputs.can-skip }}
178 changes: 178 additions & 0 deletions .github/workflows/_Distribute-stable-Formers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
name: Distribute-stable-Formers

on:
workflow_call:
inputs:
docker_distribute_image:
type: string
required: true
clone-can-skip:
type: string
required: false
default: "false"

env:
PR_ID: ${{ github.event.pull_request.number }}
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
work_dir: /paddle
PADDLE_ROOT: /paddle
TASK: paddle-CI-${{ github.event.pull_request.number }}-distribute-formers
ci_scripts: /paddle/ci
BRANCH: ${{ github.event.pull_request.base.ref }}
CI_name: distribute
no_proxy: bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn,paddlepaddle.org.cn
docker_image: ${{ inputs.docker_distribute_image }}

defaults:
run:
shell: bash

jobs:
formers-test:
name: formers-Test
if: ${{ inputs.clone-can-skip != 'true' }}
runs-on:
group: Distribute
steps:
- name: Check docker image and run container
env:
FLAGS_fraction_of_gpu_memory_to_use: 0.15
CTEST_OUTPUT_ON_FAILURE: 1
CTEST_PARALLEL_LEVEL: 4
WITH_GPU: "ON"
WITH_AVX: "ON"
WITH_DISTRIBUTE: "ON"
WITH_TESTING: "ON"
WITH_COVERAGE: "OFF"
CMAKE_BUILD_TYPE: Release
PADDLE_FRACTION_GPU_MEMORY_TO_USE: 0.15
PRECISION_TEST: "OFF"
WITH_UNITY_BUILD: "ON"
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
AGILE_REVISION: ${{ github.event.pull_request.head.sha }}
WITH_INCREMENTAL_COVERAGE: "OFF"
WITH_ONNXRUNTIME: "OFF"
COVERALLS_UPLOAD: "ON"
PADDLE_VERSION: 0.0.0
GIT_PR_ID: ${{ github.event.pull_request.number }}
PY_VERSION: "3.10"
CUDA_ARCH_NAME: Auto
WITH_CUDNN_FRONTEND: "ON"
FLAGS_enable_cudnn_frontend: 1
CACHE_DIR: /root/.cache/build
CCACHE_DIR: /root/.ccache/formers
CFS_DIR: /home/data/cfs
paddle_whl: /workspace/dist/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
formers_docker: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev-latest
run: |
export CUDA_SO="$(\ls -d /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls -d /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES="$(\ls -d /dev/nvidia* | xargs -I{} echo "-v {}:{}") $(\ls /dev/nvidia-caps/* | xargs -I{} echo "-v {}:{}")"
export SMI="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
container_name=${TASK}-test-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> ${{ github.env }}
docker run -d -t --name ${container_name} ${CUDA_SO} ${DEVICES} ${SMI} --runtime=nvidia --shm-size=32G --privileged \
-v "/home/data/cfs:/home/data/cfs" \
-v "/home/data/cfs/.cache/:/root/.cache" \
-v "/home/data/cfs/.ccache:/root/.ccache" \
-v "/ssd1/models:/home/models" \
-v "/ssd1/root:/root" \
-v "/dev/shm:/dev/shm" \
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-v ${{ github.workspace }}:/workspace \
-e BRANCH \
-e PR_ID \
-e COMMIT_ID \
-e work_dir \
-e PADDLE_ROOT \
-e ci_scripts \
-e CI_name \
-e PF_HOME=/home/models \
-e FLAGS_fraction_of_gpu_memory_to_use \
-e CTEST_OUTPUT_ON_FAILURE \
-e CTEST_PARALLEL_LEVEL \
-e WITH_GPU \
-e WITH_AVX \
-e WITH_DISTRIBUTE \
-e WITH_TESTING \
-e WITH_COVERAGE \
-e CMAKE_BUILD_TYPE \
-e PADDLE_FRACTION_GPU_MEMORY_TO_USE \
-e PRECISION_TEST \
-e WITH_UNITY_BUILD \
-e AGILE_COMPILE_BRANCH \
-e AGILE_REVISION \
-e WITH_INCREMENTAL_COVERAGE \
-e WITH_ONNXRUNTIME \
-e COVERALLS_UPLOAD \
-e PADDLE_VERSION \
-e GIT_PR_ID \
-e PY_VERSION \
-e CUDA_ARCH_NAME \
-e WITH_CUDNN_FRONTEND \
-e FLAGS_enable_cudnn_frontend \
-e CACHE_DIR \
-e CCACHE_DIR \
-e CFS_DIR \
-e paddle_whl \
-e no_proxy \
-w /workspace --network host ${formers_docker}
- name: Download paddle.tar.gz and merge target branch
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading Paddle.tar.gz"
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/gpups/${{ env.PR_ID }}/${{ env.COMMIT_ID }}/Paddle.tar.gz --no-check-certificate
echo "Extracting Paddle.tar.gz"
tar --use-compress-program="pzstd" -xf Paddle.tar.gz --strip-components=1
rm Paddle.tar.gz
git config --global --add safe.directory /workspace
git checkout test
'
- name: Test
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
source ${{ github.workspace }}/../../../proxy
source ${{ github.workspace }}/../../../AISTUDIO_ACCESS_TOKEN
set -ex
bash /workspace/ci/formers_test.sh
'
- name: Upload and display logs
if: always()
env:
home_path: ${{ github.workspace }}/..
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
export AK=paddle
export SK=paddle
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos_retry
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
fi
if [ -n "$PR_ID" ] && [ "$PR_ID" != "0" ]; then
bos_prefix="${PR_ID}/${COMMIT_ID}"
else
bos_prefix="schedule/$(date +%Y%m%d)"
fi
# api test logs
cd /workspace/PaddleFormers/unittest_logs
for FILE in /workspace/PaddleFormers/unittest_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/unittest-gpu/${bos_prefix}/logs/$file"
done
# models test logs
cd /workspace/PaddleFormers/model_unittest_logs
for FILE in /workspace/PaddleFormers/model_unittest_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleFormers/model-unittest-gpu/${bos_prefix}/logs/$file"
done
'
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
docker rm -f ${{ env.container_name }}
159 changes: 159 additions & 0 deletions .github/workflows/_Distribute-stable-Test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
name: Distribute-stable-Test

on:
workflow_call:
inputs:
docker_distribute_image:
type: string
required: true
clone-can-skip:
type: string
required: false
default: "false"

env:
PR_ID: ${{ github.event.pull_request.number }}
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
work_dir: /paddle
PADDLE_ROOT: /paddle
TASK: paddle-CI-${{ github.event.pull_request.number }}-distribute-test
ci_scripts: /paddle/ci
BRANCH: ${{ github.event.pull_request.base.ref }}
CI_name: distribute
no_proxy: bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn,paddlepaddle.org.cn
docker_image: ${{ inputs.docker_distribute_image }}

defaults:
run:
shell: bash

jobs:
test:
name: Test
if: ${{ inputs.clone-can-skip != 'true' }}
runs-on:
group: Distribute
steps:
- name: Check docker image and run container
env:
FLAGS_fraction_of_gpu_memory_to_use: 0.15
CTEST_OUTPUT_ON_FAILURE: 1
CTEST_PARALLEL_LEVEL: 4
WITH_GPU: "ON"
WITH_AVX: "ON"
WITH_DISTRIBUTE: "ON"
WITH_TESTING: "ON"
WITH_COVERAGE: "OFF"
CMAKE_BUILD_TYPE: Release
PADDLE_FRACTION_GPU_MEMORY_TO_USE: 0.15
PRECISION_TEST: "OFF"
WITH_UNITY_BUILD: "ON"
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
AGILE_REVISION: ${{ github.event.pull_request.head.sha }}
WITH_INCREMENTAL_COVERAGE: "OFF"
WITH_ONNXRUNTIME: "OFF"
COVERALLS_UPLOAD: "ON"
PADDLE_VERSION: 0.0.0
GIT_PR_ID: ${{ github.event.pull_request.number }}
PY_VERSION: "3.10"
CUDA_ARCH_NAME: Auto
WITH_CUDNN_FRONTEND: "ON"
FLAGS_enable_cudnn_frontend: 1
CACHE_DIR: /root/.cache/build
CCACHE_DIR: /root/.ccache/gpubox
run: |
export CUDA_SO="$(\ls -d /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls -d /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES="$(\ls -d /dev/nvidia* | xargs -I{} echo "-v {}:{}") $(\ls /dev/nvidia-caps/* | xargs -I{} echo "-v {}:{}")"
export SMI="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
container_name=${TASK}-test-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> ${{ github.env }}
docker run -d -t --name ${container_name} ${CUDA_SO} ${DEVICES} ${SMI} --runtime=nvidia --shm-size=32G \
-v "/home/data/cfs:/home/data/cfs" \
-v "/home/data/cfs/.cache/:/root/.cache" \
-v "/home/data/cfs/.ccache:/root/.ccache" \
-v "/ssd1/root:/root" \
-v "/dev/shm:/dev/shm" \
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-v ${{ github.workspace }}:/paddle \
-e BRANCH \
-e PR_ID \
-e COMMIT_ID \
-e work_dir \
-e PADDLE_ROOT \
-e ci_scripts \
-e CI_name \
-e FLAGS_fraction_of_gpu_memory_to_use \
-e CTEST_OUTPUT_ON_FAILURE \
-e CTEST_PARALLEL_LEVEL \
-e WITH_GPU \
-e WITH_AVX \
-e WITH_DISTRIBUTE \
-e WITH_TESTING \
-e WITH_COVERAGE \
-e CMAKE_BUILD_TYPE \
-e PADDLE_FRACTION_GPU_MEMORY_TO_USE \
-e PRECISION_TEST \
-e WITH_UNITY_BUILD \
-e AGILE_COMPILE_BRANCH \
-e AGILE_REVISION \
-e WITH_INCREMENTAL_COVERAGE \
-e WITH_ONNXRUNTIME \
-e COVERALLS_UPLOAD \
-e PADDLE_VERSION \
-e GIT_PR_ID \
-e PY_VERSION \
-e CUDA_ARCH_NAME \
-e WITH_CUDNN_FRONTEND \
-e FLAGS_enable_cudnn_frontend \
-e CACHE_DIR \
-e CCACHE_DIR \
-e no_proxy \
-w /paddle --network host ${docker_image}

- name: Download paddle.tar.gz and merge target branch
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading Paddle.tar.gz"
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/gpups/${{ env.PR_ID }}/${{ env.COMMIT_ID }}/Paddle.tar.gz --no-check-certificate
echo "Extracting Paddle.tar.gz"
tar --use-compress-program="pzstd" -xf Paddle.tar.gz --strip-components=1
rm Paddle.tar.gz
git checkout test
'

- name: Test
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
source ${{ github.workspace }}/../../../proxy
bash ${ci_scripts}/distribute_test.sh
'

- name: Upload and display logs
if: always()
env:
home_path: ${{ github.workspace }}/..
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
export AK=paddle
export SK=paddle
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos_retry
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
fi
cd /case_logs
for FILE in /case_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/Distribute-Stable/${PR_ID}/${COMMIT_ID}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/Distribute-Stable/${PR_ID}/${COMMIT_ID}/logs/$file"
done
'

- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
docker rm -f ${{ env.container_name }}
Loading
Loading