Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/fast_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ jobs:
start-runner:
name: Start self-hosted EC2 runner
needs: authorize
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-0a82d7d7ad5d25f56
EC2_AMI_ID: ami-0a2179742e502fdfe
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-b7533b96
EC2_SECURITY_GROUP: sg-08af7938042271373
Expand Down Expand Up @@ -77,7 +77,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -89,7 +89,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/fast_tests.sh
diffusers:
name: Run tests for optimum.habana.diffusers
Expand All @@ -113,7 +113,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -125,7 +125,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/fast_tests_diffusers.sh
stop-runner:
name: Stop self-hosted EC2 runner
Expand All @@ -134,7 +134,7 @@ jobs:
- start-runner # required to get output from the start-runner job
- transformers # required to wait for the tests to be finished
- diffusers # required to wait for the tests to be finished
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
Expand Down
34 changes: 17 additions & 17 deletions .github/workflows/slow_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ concurrency:
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-west-2
EC2_AMI_ID: ami-01b277257cd28a061
EC2_AMI_ID: ami-0961e95b539f72c46
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-452c913d
EC2_SECURITY_GROUP: sg-0894f4f70dd6bd778
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -67,7 +67,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
Expand All @@ -83,7 +83,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -95,7 +95,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -112,7 +112,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -124,7 +124,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -141,7 +141,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -153,7 +153,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -171,7 +171,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -183,7 +183,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
albert-xxl-single-card:
name: Test single-card ALBERT XXL
Expand All @@ -204,7 +204,7 @@ jobs:
- name: Pull image
if: github.event.schedule == '0 21 * * 6'
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run test
if: github.event.schedule == '0 21 * * 6'
run: |
Expand All @@ -217,7 +217,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/albert_xxl_1x.sh
- name: Warning
if: github.event.schedule != '0 21 * * 6'
Expand All @@ -240,7 +240,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -252,7 +252,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
stop-runner:
name: Stop self-hosted EC2 runner
Expand All @@ -264,7 +264,7 @@ jobs:
- single-card
- albert-xxl-single-card
- text-generation
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
AWS_REGION: us-west-2
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/slow_tests_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -30,7 +30,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -56,7 +56,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -69,7 +69,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -82,7 +82,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -96,7 +96,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -110,7 +110,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
text-generation:
name: Test text-generation example
Expand All @@ -125,7 +125,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
docker pull vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest
- name: Run tests
run: |
docker run \
Expand All @@ -138,5 +138,5 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ slow_tests_8x: test_installs

# Run DeepSpeed non-regression tests
slow_tests_deepspeed: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
python -m pytest tests/test_examples.py -v -s -k "deepspeed"

slow_tests_diffusers: test_installs
Expand All @@ -58,7 +58,7 @@ slow_tests_diffusers: test_installs

# Run text-generation non-regression tests
slow_tests_text_generation_example: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder_text_summarization.py -v -s --token $(TOKEN)

# Check if examples are up to date with the Transformers library
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up

> To use DeepSpeed on HPUs, you also need to run the following command:
>```bash
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
>```

Optimum Habana is a fast-moving project, and you may want to install it from source:
Expand Down Expand Up @@ -211,7 +211,7 @@ Please refer to Habana Gaudi's official [installation guide](https://docs.habana

> Tests should be run in a Docker container based on Habana Docker images.
>
> The current version has been validated for SynapseAI 1.13.
> The current version has been validated for SynapseAI 1.14.


## Development
Expand Down
10 changes: 2 additions & 8 deletions docs/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest

ARG commit_sha
ARG clone_url
Expand All @@ -7,13 +7,7 @@ ARG clone_url
RUN apt-get update && apt-get install -y \
software-properties-common \
npm

# Need node to build doc HTML. Taken from https://stackoverflow.com/a/67491580
RUN apt-get update && apt-get install -y \
software-properties-common \
npm
RUN npm install npm@9.8.1 -g && \
npm install n -g && \
RUN npm install n -g && \
n latest

RUN git clone $clone_url optimum-habana && cd optimum-habana && git checkout $commit_sha
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ python -m pip install --upgrade-strategy eager optimum[habana]
To use DeepSpeed on HPUs, you also need to run the following command:

```bash
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
```

4 changes: 2 additions & 2 deletions docs/source/usage_guides/deepspeed.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ You can find more information about DeepSpeed Gaudi integration [here](https://d
To use DeepSpeed on Gaudi, you need to install Optimum Habana and [Habana's DeepSpeed fork](https://github.com/HabanaAI/DeepSpeed) with:
```bash
pip install optimum[habana]
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
```


Expand Down Expand Up @@ -78,7 +78,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf

</Tip>

Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.13.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.
Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.14.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.

The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well.
A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/).
Expand Down
2 changes: 1 addition & 1 deletion examples/audio-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ On 8 HPUs, this script should run in ~12 minutes and yield an accuracy of **80.4

> You need to install DeepSpeed with:
> ```bash
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
> ```

DeepSpeed can be used with almost the same command as for a multi-card run:
Expand Down
2 changes: 1 addition & 1 deletion examples/gaudi_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def main():
if not is_deepspeed_available():
raise ImportError(
"--use_deepspeed requires deepspeed: `pip install"
" git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0`."
" git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0`."
)

# Patch sys.argv
Expand Down
Loading