Skip to content

Commit a690d22

Browse files
authored
Merge branch 'master' into loadams/torch19
2 parents e16e804 + 0c75f4a commit a690d22

File tree

7 files changed

+88
-0
lines changed

7 files changed

+88
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
name: CI failure report
3+
about: Report a DeepSpeed CI failure
4+
title: "{{ env.GITHUB_WORKFLOW }} CI test failure"
5+
labels: ci-failure
6+
assignees: ''
7+
8+
---
9+
10+
The Nightly CI for {{ env.GITHUB_SERVER_URL }}/{{ env.GITHUB_REPOSITORY }}/actions/runs/{{ env.GITHUB_RUN_ID }} failed.

.github/workflows/amd-mi200.yml

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ concurrency:
88
group: ${{ github.workflow }}-${{ github.ref }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
issues: write
14+
1115
jobs:
1216
amd-tests:
1317
# The type of runner that the job will run on
@@ -65,3 +69,12 @@ jobs:
6569
cd tests
6670
pytest $PYTEST_OPTS -n 4 --verbose unit/
6771
pytest $PYTEST_OPTS -m 'sequential' unit/
72+
73+
- name: Open GitHub issue if nightly CI fails
74+
if: failure()
75+
uses: JasonEtco/create-an-issue@v2
76+
env:
77+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
78+
with:
79+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
80+
update_existing: true

.github/workflows/nv-h100.yml

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ concurrency:
88
group: ${{ github.workflow }}-${{ github.ref }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
issues: write
14+
1115
jobs:
1216
unit-tests:
1317
runs-on: [self-hosted, nvidia, h100]
@@ -49,3 +53,12 @@ jobs:
4953
cd tests
5054
python -m pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.0" --cuda_ver="12"
5155
python -m pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.0" --cuda_ver="12"
56+
57+
- name: Open GitHub issue if nightly CI fails
58+
if: failure()
59+
uses: JasonEtco/create-an-issue@v2
60+
env:
61+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62+
with:
63+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
64+
update_existing: true

.github/workflows/nv-nightly.yml

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ concurrency:
88
group: ${{ github.workflow }}-${{ github.ref }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
issues: write
14+
1115
jobs:
1216
unit-tests:
1317
runs-on: [self-hosted, nvidia, cu116, v100]
@@ -47,3 +51,12 @@ jobs:
4751
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
4852
cd tests
4953
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="1.13" --cuda_ver="11.6"
54+
55+
- name: Open GitHub issue if nightly CI fails
56+
if: failure()
57+
uses: JasonEtco/create-an-issue@v2
58+
env:
59+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
60+
with:
61+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
62+
update_existing: true

.github/workflows/nv-torch-nightly-v100.yml

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ concurrency:
88
group: ${{ github.workflow }}-${{ github.ref }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
issues: write
14+
1115
jobs:
1216
unit-tests:
1317
runs-on: [self-hosted, nvidia, cu116, v100]
@@ -48,3 +52,12 @@ jobs:
4852
cd tests
4953
pytest $PYTEST_OPTS --forked -n 4 unit/
5054
pytest $PYTEST_OPTS --forked -m 'sequential' unit/
55+
56+
- name: Open GitHub issue if nightly CI fails
57+
if: failure()
58+
uses: JasonEtco/create-an-issue@v2
59+
env:
60+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
61+
with:
62+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
63+
update_existing: true

.github/workflows/nv-torch19-p40.yml

+13
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ concurrency:
1212
group: ${{ github.workflow }}-${{ github.ref }}
1313
cancel-in-progress: true
1414

15+
permissions:
16+
contents: read
17+
issues: write
18+
1519
jobs:
1620
unit-tests:
1721
runs-on: [self-hosted, nvidia, cu111, p40]
@@ -51,3 +55,12 @@ jobs:
5155
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
5256
cd tests
5357
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11.1"
58+
59+
- name: Open GitHub issue if nightly CI fails
60+
if: failure()
61+
uses: JasonEtco/create-an-issue@v2
62+
env:
63+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
64+
with:
65+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
66+
update_existing: true

.github/workflows/nv-torch19-v100.yml

+13
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ concurrency:
1212
group: ${{ github.workflow }}-${{ github.ref }}
1313
cancel-in-progress: true
1414

15+
permissions:
16+
contents: read
17+
issues: write
18+
1519
jobs:
1620
unit-tests:
1721
runs-on: [self-hosted, nvidia, cu111, v100]
@@ -52,3 +56,12 @@ jobs:
5256
cd tests
5357
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11"
5458
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="1.9" --cuda_ver="11"
59+
60+
- name: Open GitHub issue if nightly CI fails
61+
if: failure()
62+
uses: JasonEtco/create-an-issue@v2
63+
env:
64+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65+
with:
66+
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
67+
update_existing: true

0 commit comments

Comments
 (0)