Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: switch to custom docker images #2123

Merged
merged 20 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .azure/gpu-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,22 @@ jobs:
set -e
pip install -q packaging fire requests wget
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
python adjust-torch-versions.py requirements.txt $(torch-ver)
python adjust-torch-versions.py requirements/integrate.txt $(torch-ver)
python .github/assistant.py set-oldest-versions --req_files='["requirements/integrate.txt"]'
cat requirements/integrate.txt
python adjust-torch-versions.py requirements/base.txt $(torch-ver)
python adjust-torch-versions.py requirements/_integrate.txt $(torch-ver)
# FixMe: this shall not be for all integrations/cases
python .github/assistant.py set-oldest-versions --req_files='["requirements/_integrate.txt"]'
cat requirements/_integrate.txt
displayName: "Adjust versions"

- bash: |
set -ex
pip install -q -r requirements/integrate.txt
pip install -q -r requirements/_integrate.txt
# force reinstall TM as it could be overwritten by integration's dependencies
pip install . -U -r requirements/test.txt --find-links ${TORCH_URL}
pip list
pip install . -U -r requirements/_tests.txt --find-links ${TORCH_URL}
displayName: "Install package & integrations"

- bash: |
set -e
pip list
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: {ver}'"
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
displayName: "Sanity check"
Expand Down
56 changes: 11 additions & 45 deletions .azure/gpu-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ jobs:
matrix:
"PyTorch | old":
# Torch does not have build wheels with old Torch versions for newer CUDA
docker-image: "nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04"
docker-image: "pytorchlightning/torchmetrics:ubuntu20.04-cuda11.1.1-py3.8-torch1.8.1"
agent-pool: "lit-rtx-3090"
torch-ver: "1.8.1"
"PyTorch | 1.X":
docker-image: "pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime"
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda11.8.0-py3.9-torch1.13"
agent-pool: "lit-rtx-3090"
torch-ver: "1.13.1"
"PyTorch | 2.X":
docker-image: "pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime"
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda11.8.0-py3.10-torch2.0"
agent-pool: "lit-rtx-3090"
torch-ver: "2.0.0"
torch-ver: "2.0.1"
# how long to run the job before automatically cancelling
timeoutInMinutes: "120"
# how much time to give 'run always even if cancelled tasks' before stopping them
Expand All @@ -51,37 +51,12 @@ jobs:

container:
image: "$(docker-image)"
options: "--gpus=all --shm-size=8g -v /usr/bin/docker:/tmp/docker:ro -v /var/tmp:/var/tmp"
options: "--gpus=all --shm-size=8g -v /var/tmp:/var/tmp"

workspace:
clean: all

steps:
- script: |
set -ex
container_id=$(head -1 /proc/self/cgroup|cut -d/ -f3)
echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
/tmp/docker exec -t -u 0 $container_id \
sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
echo "##vso[task.setvariable variable=CONTAINER_ID]$container_id"
displayName: "Install Sudo in container (thanks Microsoft!)"

- script: |
sudo apt-get update -q --fix-missing
sudo apt-get install -q -y --no-install-recommends \
build-essential \
wget \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-distutils
sudo update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate
python get-pip.py
env:
PYTHON_VERSION: "3.8"
condition: startsWith(variables['docker-image'], 'nvidia/cuda:')
displayName: "install python & pip"

- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
CUDA_version=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p')
Expand All @@ -96,7 +71,6 @@ jobs:
whereis nvidia
nvidia-smi
echo $CUDA_VISIBLE_DEVICES
echo $CONTAINER_ID
echo $TORCH_URL
python --version
pip --version
Expand All @@ -105,29 +79,22 @@ jobs:
displayName: "Image info & NVIDIA"

- bash: |
pip install -q packaging wget
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
pip install -q packaging
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
python adjust-torch-versions.py requirements.txt $(torch-ver)
for fpath in `ls requirements/*.txt`; do
python adjust-torch-versions.py $fpath $(torch-ver)
done
# FixMe: missing setting minumal configurations for testing
displayName: "Adjust versions"

- bash: |
set -ex
sudo apt-get update -qq --fix-missing
sudo apt-get install -y --no-install-recommends \
build-essential gcc g++ cmake ffmpeg git libsndfile1 unzip
# pip install pip -U
pip install -q "numpy<1.24" # trying to resolve pesq installation
pip install . -U -r ./requirements/devel.txt \
--prefer-binary --find-links=${TORCH_URL}
pip install mkl-service==2.4.0 # needed for the gpu multiprocessing
pip list
pip install . -U -r ./requirements/_devel.txt --prefer-binary --find-links=${TORCH_URL}
displayName: "Install environment"

- bash: |
set -e
pip list
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: {ver}'"
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
displayName: "Sanity check"
Expand All @@ -149,8 +116,7 @@ jobs:
displayName: "DocTesting"

- bash: |
# wget is simpler but does not work on Windows
python -c "from urllib.request import urlretrieve ; urlretrieve('https://pl-public-data.s3.amazonaws.com/metrics/data.zip', 'data.zip')"
wget https://pl-public-data.s3.amazonaws.com/metrics/data.zip
unzip -o data.zip
ls -l _data/*
workingDirectory: tests
Expand Down
8 changes: 3 additions & 5 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,18 @@ RUN if [ "${NODE_VERSION}" != "none" ]; then \
fi

COPY requirements/ /tmp/pip-tmp/requirements/
COPY requirements.txt /tmp/pip-tmp/
RUN \
pip3 install awscli && \
aws s3 sync --no-sign-request s3://sphinx-packages/ dist/ && \
pip3 --disable-pip-version-check --no-cache-dir install \
-r /tmp/pip-tmp/requirements/devel.txt \
-r /tmp/pip-tmp/requirements/docs.txt \
-r /tmp/pip-tmp/requirements/_devel.txt \
-r /tmp/pip-tmp/requirements/_docs.txt \
--find-links="https://download.pytorch.org/whl/cpu/torch_stable.html" \
--find-links="dist/" && \
rm -rf /tmp/pip-tmp

# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
# COPY requirements.txt /tmp/pip-tmp/
# RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
# RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements/base.txt \
# && rm -rf /tmp/pip-tmp

# [Optional] Uncomment this section to install additional OS packages.
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// https://github.com/microsoft/vscode-dev-containers/tree/v0.194.0/containers/python-3
{
"name": "PyTorch Lightning Metrics",
"image": "pytorchlightning/metrics-dev",
"image": "pytorchlightning/torchmetrics:devcontainer-py3.9",
// If you want to use a different Python version, uncomment the build object below
// "build": {
// "dockerfile": "Dockerfile",
Expand Down
2 changes: 1 addition & 1 deletion .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ When you send a PR the continuous integration will run tests and build the docs.
To setup a local development environment, install both local and test dependencies:

```bash
python -m pip install -r requirements/test.txt
python -m pip install -r requirements/_tests.txt
python -m pip install pre-commit
```

Expand Down
2 changes: 1 addition & 1 deletion .github/actions/pull-caches/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ runs:
if: inputs.pytorch-version != ''
run: |
curl https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py -o adjust-torch-versions.py
python adjust-torch-versions.py requirements.txt ${{ inputs.pytorch-version }}
python adjust-torch-versions.py requirements/base.txt ${{ inputs.pytorch-version }}
shell: bash

- name: Set min. dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/push-caches/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ runs:

- name: Dump wheels
run: |
pip wheel -r requirements/devel.txt --prefer-binary \
pip wheel -r requirements/_devel.txt --prefer-binary \
--wheel-dir=.pip-wheels \
-f ${{ inputs.torch-url }} -f ${{ inputs.pypi-dir }}
ls -lh .pip-wheels
Expand Down
4 changes: 2 additions & 2 deletions .github/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def prune_packages(req_file: str, *pkgs: str) -> None:
fp.writelines(lines)

@staticmethod
def set_min_torch_by_python(fpath: str = "requirements.txt") -> None:
def set_min_torch_by_python(fpath: str = "requirements/base.txt") -> None:
"""Set minimal torch version according to Python actual version.
>>> AssistantCLI.set_min_torch_by_python("../requirements.txt")
>>> AssistantCLI.set_min_torch_by_python("../requirements/base.txt")
"""
py_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ci-integrate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ jobs:
run: |
set -e
curl https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py -o adjust-torch-versions.py
pip install -r requirements/test.txt -r requirements/integrate.txt \
pip install -r requirements/_tests.txt -r requirements/_integrate.txt \
--find-links $PYTORCH_URL -f $PYPI_CACHE --upgrade-strategy eager
python adjust-torch-versions.py requirements.txt
python adjust-torch-versions.py requirements/base.txt
python adjust-torch-versions.py requirements/image.txt
cat requirements.txt
cat requirements/base.txt
pip install -e . --find-links $PYTORCH_URL -f $PYPI_CACHE
pip list

Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ jobs:
run: |
pip --version
pip install -e . -U --find-links $PYTORCH_URL -f $PYPI_CACHE
pip install -r requirements/doctest.txt -U -f $PYPI_CACHE
pip install -r requirements/_doctest.txt -U -f $PYPI_CACHE
pip list

# todo: copy this to install checks
Expand All @@ -116,11 +116,10 @@ jobs:
run: |
curl https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py -o adjust-torch-versions.py
pip install -q cython # needed for installing `pycocotools` in latest config
python adjust-torch-versions.py requirements.txt
for fpath in `ls requirements/*.txt`; do
python adjust-torch-versions.py $fpath
done
pip install --requirement requirements/devel.txt -U \
pip install --requirement requirements/_devel.txt -U \
--find-links $PYTORCH_URL -f $PYPI_CACHE
pip list

Expand Down
57 changes: 47 additions & 10 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Build & Push Docker"
name: "Build (& Push) Dockers"

on: # Trigger the workflow on push or pull request, but only for the master branch
push:
Expand All @@ -7,10 +7,10 @@ on: # Trigger the workflow on push or pull request, but only for the master bran
branches: [master]
paths:
- "requirements/*"
- ".devcontainer/*"
- "environment.yml"
- "requirements.txt"
- ".github/workflows/*docker*.yml"
- ".devcontainer/*"
- "dockers/**"
- ".github/workflows/docker-build.yml"
- "setup.py"
workflow_dispatch: {}

Expand All @@ -19,21 +19,21 @@ concurrency:
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}

env:
PUSH_RELEASE: ${{ github.ref == 'refs/heads/master' || github.event_name == 'workflow_dispatch' }}
PUSH_DOCKERHUB: ${{ github.ref == 'refs/heads/master' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }}

jobs:
build-Devcontainer:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python_version: ["3.9"]
python: ["3.9", "3.10"]
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Login to DockerHub
if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI'
if: env.PUSH_DOCKERHUB == 'true' && github.repository_owner == 'Lightning-AI'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
Expand All @@ -44,8 +44,45 @@ jobs:
uses: docker/build-push-action@v5
with:
build-args: |
VARIANT=${{ matrix.python_version }}
VARIANT=${{ matrix.python }}
file: .devcontainer/Dockerfile
push: ${{ env.PUSH_RELEASE }}
tags: pytorchlightning/metrics-dev
push: ${{ env.PUSH_DOCKERHUB }}
tags: "pytorchlightning/torchmetrics:devcontainer-py${{ matrix.python }}"
timeout-minutes: 50

build-cuda:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# These are the base images for PL release docker images,
# so include at least all of the combinations in release-dockers.yml.
- { python: "3.8", pytorch: "1.8.1", cuda: "11.1.1", ubuntu: "20.04" }
- { python: "3.9", pytorch: "1.10", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.9", pytorch: "1.11", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.9", pytorch: "1.13", cuda: "11.8.0", ubuntu: "22.04" }
- { python: "3.10", pytorch: "2.0", cuda: "11.8.0", ubuntu: "22.04" }
steps:
- uses: actions/checkout@v4

- name: Login to DockerHub
uses: docker/login-action@v3
if: env.PUSH_DOCKERHUB == 'true' && github.repository_owner == 'Lightning-AI'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Build (and Push) Devcontainer
uses: docker/build-push-action@v5
with:
build-args: |
UBUNTU_VERSION=${{ matrix.ubuntu }}
PYTHON_VERSION=${{ matrix.python }}
PYTORCH_VERSION=${{ matrix.pytorch }}
CUDA_VERSION=${{ matrix.cuda }}
file: dockers/ubuntu-cuda/Dockerfile
push: ${{ env.PUSH_DOCKERHUB }}
tags: "pytorchlightning/torchmetrics:ubuntu${{ matrix.ubuntu }}-cuda${{ matrix.cuda }}-py${{ matrix.python }}-torch${{ matrix.pytorch }}"
timeout-minutes: 55
2 changes: 1 addition & 1 deletion .github/workflows/docs-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
sudo apt-get update --fix-missing
sudo apt-get install -y cmake
pip --version
pip install . -U -r requirements/docs.txt \
pip install . -U -r requirements/_docs.txt \
--find-links="${PYPI_CACHE}" --find-links="${TORCH_URL}" --find-links="dist/"
pip list

Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ build:
- pip install -U pip awscli --user
- python -m awscli s3 sync --no-sign-request s3://sphinx-packages/ dist/ ; ls -lh dist/
- >
pip install -e . -q -r requirements/docs.txt \
pip install -e . -q -r requirements/_docs.txt \
-f 'https://download.pytorch.org/whl/cpu/torch_stable.html' -f dist/ ;
pip list
# this need to be split so `sphinx-build` is picked from previous installation
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ test: clean env data
cd tests && python -m coverage report

docs: clean
pip install -e . --quiet -r requirements/docs.txt
pip install -e . --quiet -r requirements/_docs.txt
# apt-get install -y texlive-latex-extra dvipng texlive-pictures texlive-fonts-recommended cm-super
TOKENIZERS_PARALLELISM=false python -m sphinx -b html -W --keep-going docs/source docs/build

env:
pip install -e . -U -r requirements/devel.txt
pip install -e . -U -r requirements/_devel.txt

data:
python -c "from urllib.request import urlretrieve ; urlretrieve('https://pl-public-data.s3.amazonaws.com/metrics/data.zip', 'data.zip')"
Expand Down
Loading
Loading