Skip to content

Commit

Permalink
Merge branch 'master' into feature/3330_trainer_profiler_str
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli authored Oct 27, 2020
2 parents aa9deca + 8e3faa2 commit 8b5b5ef
Show file tree
Hide file tree
Showing 25 changed files with 470 additions and 164 deletions.
34 changes: 3 additions & 31 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,21 @@ name: torch-GPU

steps:
- name: testing
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
image: pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.5

environment:
SLURM_LOCALID: 0
CODECOV_TOKEN:
from_secret: codecov_token
MKL_THREADING_LAYER: GNU
HOROVOD_GPU_OPERATIONS: NCCL
HOROVOD_WITH_PYTORCH: 1
HOROVOD_WITHOUT_TENSORFLOW: 1
HOROVOD_WITHOUT_MXNET: 1
HOROVOD_WITH_GLOO: 1
HOROVOD_WITHOUT_MPI: 1

#volumes:
# # Mount pip cache from host
# - name: pip_cache
# path: /opt/conda/lib/python3.7/site-packages

commands:
# todo: remove unsets as in correct image Horovod shall be set
- unset HOROVOD_GPU_ALLREDUCE
- unset HOROVOD_GPU_BROADCAST
- export PATH="$PATH:/root/.local/bin"
- python --version
- pip install pip -U
- pip --version
- nvidia-smi
#- bash ./requirements/install_AMP.sh
- apt-get update && apt-get install -y cmake
- pip uninstall -y horovod # todo: this shall not be needed
- pip install -r ./requirements/devel.txt --user -q --upgrade-strategy only-if-needed --no-cache-dir
#- pip install -r ./requirements/docs.txt --user -q
- pip install -r ./requirements/examples.txt --user -q --upgrade-strategy only-if-needed
- pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir
- pip list
- python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
- coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --color=yes --durations=25 # --flake8
- python -m py.test benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8
- python -m pytest benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8
#- cd docs; make doctest; make coverage
- coverage report
# see: https://docs.codecov.io/docs/merging-reports
Expand All @@ -73,8 +50,3 @@ trigger:
include:
- push
- pull_request

#volumes:
# - name: pip_cache
# host:
# path: /tmp/cache/drone/pip
84 changes: 61 additions & 23 deletions .github/workflows/ci_dockers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
branches: [master]

jobs:
build-Conda:
build-PL:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
Expand All @@ -21,18 +21,16 @@ jobs:
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# to use cache-from and cache-to argument of buildx command
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Build Conda Docker
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Build PL Docker
# publish master
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/conda/Dockerfile
file: dockers/release/Dockerfile
push: false
timeout-minutes: 50

Expand All @@ -48,10 +46,8 @@ jobs:
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# to use cache-from and cache-to argument of buildx command
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Build XLA Docker
# publish master
uses: docker/build-push-action@v2
Expand All @@ -70,33 +66,75 @@ jobs:
fail-fast: false
matrix:
include:
#- python_version: 3.7
# pytorch_version: 1.8 # todo
# pytorch_channel: pytorch-nightly
- python_version: 3.8
#- python_version: 3.8
# pytorch_version: 1.7 # todo
- python_version: 3.7
pytorch_version: 1.6
pytorch_channel: pytorch
- python_version: 3.6
pytorch_version: 1.5
pytorch_channel: pytorch
pytorch_version: 1.3
steps:
- name: Checkout
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# to use cache-from and cache-to argument of buildx command
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
# for PT 1.3 and 1.4 we need to use CUDA 10.1
- run: |
cuda=$(python -c "print(10.2 if float(${{matrix.pytorch_version}}) > 1.4 else 10.1)" 2>&1)
echo "::set-output name=CUDA::$cuda"
id: extend
# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Build CUDA Docker
# publish master
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
PYTORCH_CHANNEL=${{ matrix.pytorch_channel }}
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
cache-from: pytorchlightning/pytorch_lightning:base-cuda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
file: dockers/base-cuda/Dockerfile
push: false
timeout-minutes: 50

build-conda:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- python_version: 3.8
pytorch_version: 1.6
- python_version: 3.6
pytorch_version: 1.4
#- python_version: 3.7
# pytorch_version: 1.8 # todo
steps:
- name: Checkout
uses: actions/checkout@v2

# for PT 1.3 and 1.4 we need to use CUDA 10.1
- run: |
cuda=$(python -c "print(10.2 if float(${{matrix.pytorch_version}}) > 1.4 else 10.1)" 2>&1)
echo "::set-output name=CUDA::$cuda"
channel=$(python -c "print('pytorch-nightly' if float(${{matrix.pytorch_version}}) > 1.7 else 'pytorch')" 2>&1)
echo "::set-output name=CHANNEL::$channel"
id: extend
# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Build CUDA Docker
# publish master
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
PYTORCH_CHANNEL=${{ steps.extend.outputs.CHANNEL }}
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
cache-from: pytorchlightning/pytorch_lightning:base-conda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
file: dockers/base-conda/Dockerfile
push: false
timeout-minutes: 50
8 changes: 4 additions & 4 deletions .github/workflows/ci_test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ on: # Trigger the workflow on push or pull request, but only for the master bra

jobs:
conda:
runs-on: ${{ matrix.os }}
container: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}
runs-on: ubuntu-20.04
container: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04]
# os: [ubuntu-20.04]
python-version: [3.7]
pytorch-version: [1.3, 1.4, 1.5, 1.6, 1.7]
pytorch-version: [1.3, 1.4, 1.5, 1.6] # , 1.7 # todo

# Timeout: https://stackoverflow.com/a/59076067/4521646
timeout-minutes: 35
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
types: [created]

jobs:
build-Conda:
build-PL:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
Expand Down Expand Up @@ -36,7 +36,7 @@ jobs:
repository: pytorchlightning/pytorch_lightning
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
dockerfile: dockers/conda/Dockerfile
dockerfile: dockers/release/Dockerfile
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
tags: "${{ env.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
timeout-minutes: 55
50 changes: 24 additions & 26 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:

# based on https://github.com/pypa/gh-action-pypi-publish
jobs:

pypi-release:
runs-on: ubuntu-20.04

Expand Down Expand Up @@ -47,10 +48,8 @@ jobs:
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# to use cache-from and cache-to argument of buildx command
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
Expand Down Expand Up @@ -78,45 +77,40 @@ jobs:
matrix:
python_version: [3.6, 3.7, 3.8]
pytorch_version: [1.3, 1.4, 1.5, 1.6] # todo: , 1.7
pytorch_channel: ["pytorch", "pytorch-nightly"]
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#example-including-new-combinations
exclude:
- pytorch_version: 1.7
pytorch_channel: pytorch
- pytorch_version: 1.3
pytorch_channel: pytorch-nightly
- pytorch_version: 1.4
pytorch_channel: pytorch-nightly
- pytorch_version: 1.5
pytorch_channel: pytorch-nightly
- pytorch_version: 1.6
pytorch_channel: pytorch-nightly
- pytorch_version: 1.3
pytorch_channel: pytorch
python_version: 3.8
# excludes PT 1.3 as it is missing on pypi
- python_version: 3.8
pytorch_version: 1.3

steps:
- name: Checkout
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# to use cache-from and cache-to argument of buildx command
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

# for PT 1.3 and 1.4 we need to use CUDA 10.1
- run: |
cuda=$(python -c "print(10.2 if float(${{matrix.pytorch_version}}) > 1.4 else 10.1)" 2>&1)
echo "::set-output name=CUDA::$cuda"
channel=$(python -c "print('pytorch-nightly' if float(${{matrix.pytorch_version}}) > 1.7 else 'pytorch')" 2>&1)
echo "::set-output name=CHANNEL::$channel"
id: extend
- name: Publish CUDA to Docker Hub
# publish master
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
PYTORCH_CHANNEL=${{ matrix.pytorch_channel }}
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
cache-from: pytorchlightning/pytorch_lightning:base-cuda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
cache-to: pytorchlightning/pytorch_lightning:base-cuda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
file: dockers/base-cuda/Dockerfile
Expand All @@ -131,7 +125,11 @@ jobs:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/conda/Dockerfile
PYTORCH_CHANNEL=${{ steps.extend.outputs.CHANNEL }}
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
cache-from: pytorchlightning/pytorch_lightning:base-conda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
cache-to: pytorchlightning/pytorch_lightning:base-conda-cache-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
file: dockers/base-conda/Dockerfile
push: true
tags: pytorchlightning/pytorch_lightning:nightly-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 55
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,4 @@ mlruns/
*.ckpt
pytorch\ lightning
test-reports/
wandb
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,60 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `dirpath` and `filename` parameter in `ModelCheckpoint` ([#4213](https://github.com/PyTorchLightning/pytorch-lightning/pull/4213))


- Added plugins docs and DDPPlugin to customize ddp across all accelerators([#4258](https://github.com/PyTorchLightning/pytorch-lightning/pull/4285))


- Added `strict` option to the scheduler dictionary ([#3586](https://github.com/PyTorchLightning/pytorch-lightning/pull/3586))


- Added `fsspec` support for profilers ([#4162](https://github.com/PyTorchLightning/pytorch-lightning/pull/4162))


- Added autogenerated helptext to `Trainer.add_argparse_args`. ([#4344](https://github.com/PyTorchLightning/pytorch-lightning/pull/4344))


- Added support for string values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/PyTorchLightning/pytorch-lightning/pull/3656))

### Changed


- Improved error messages for invalid `configure_optimizers` returns ([#3587](https://github.com/PyTorchLightning/pytorch-lightning/pull/3587))


- Allow changing the logged step value in `validation_step` ([#4130](https://github.com/PyTorchLightning/pytorch-lightning/pull/4130))


- Allow setting `replace_sampler_ddp=True` with a distributed sampler already added ([#4273](https://github.com/PyTorchLightning/pytorch-lightning/pull/4273))


- Fixed santized parameters for `WandbLogger.log_hyperparams` ([#4320](https://github.com/PyTorchLightning/pytorch-lightning/pull/4320))


### Deprecated


- Deprecated `filepath` in `ModelCheckpoint` ([#4213](https://github.com/PyTorchLightning/pytorch-lightning/pull/4213))


- Deprecated `reorder` parameter of the `auc` metric ([#4237](https://github.com/PyTorchLightning/pytorch-lightning/pull/4237))


- Deprecated bool values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/PyTorchLightning/pytorch-lightning/pull/3656))


### Removed



### Fixed

- Fixed setting device ids in DDP ([#4297](https://github.com/PyTorchLightning/pytorch-lightning/pull/4297))

- Fixed synchronization of best model path in `ddp_accelerator` ([#4323](https://github.com/PyTorchLightning/pytorch-lightning/pull/4323))

- Fixed WandbLogger not uploading checkpoint artifacts at the end of training ([#4341](https://github.com/PyTorchLightning/pytorch-lightning/pull/4341))

## [1.0.3] - 2020-10-20

### Added
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ Lightning can automatically export to ONNX or TorchScript for those cases.

| System / PyTorch ver. | 1.3 (min. req.)* | 1.4 | 1.5 | 1.6 (latest) | 1.7 (nightly) |
| :---: | :---: | :---: | :---: | :---: | :---: |
| Conda py3.7 [linux] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
| Linux py3.7 [GPUs**] | - | - |[![Build Status](http://104.154.220.231/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://104.154.220.231/PyTorchLightning/pytorch-lightning) | - | - |
| Conda py3.7 [linux] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | - |
| Linux py3.7 [GPUs**] | - | - | [![Build Status](http://104.154.220.231/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://104.154.220.231/PyTorchLightning/pytorch-lightning) | - | - |
| Linux py3.7 [TPUs***] | - | - | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - |
| Linux py3.6 / py3.7 / py3.8 | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
| OSX py3.6 / py3.7 | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
Expand Down
Loading

0 comments on commit 8b5b5ef

Please sign in to comment.