From a6aa1a0f82aa3c7dc6e9529002a3bfd52ae8d4ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 4 May 2021 11:56:27 +0200 Subject: [PATCH] make gpus=str in Trainer consistent with command line parsing of string (#6388) * string gpu input * update docs * deprecation warning * Revert "update docs" This reverts commit c5f38934133812280ae98f6489c008a39796dd4d. * deprecation * add changelog * update parser * update warning * implement v1.5 behavior ahead of time * formatting * set accelerator in test to avoid different warning * add warning * remove todo warn * Update pytorch_lightning/utilities/device_parser.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * resolve flake8 Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: Jirka Borovec Co-authored-by: tchaton --- CHANGELOG.md | 2 ++ docs/source/advanced/multi_gpu.rst | 6 +++- pytorch_lightning/utilities/device_parser.py | 32 +++++++++++++++----- tests/deprecated_api/test_remove_1-5.py | 27 +++++++++++++++-- tests/models/test_gpu.py | 7 +++-- 5 files changed, 61 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a063510c544..b91d02ec1aa61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -249,6 +249,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated the `LightningModule.datamodule` getter and setter methods; access them through `Trainer.datamodule` instead ([#7168](https://github.com/PyTorchLightning/pytorch-lightning/pull/7168)) +- Deprecated the use of `Trainer(gpus="i")` (string) for selecting the i-th GPU; from v1.5 this will set the number of GPUs instead of the index ([#6388](https://github.com/PyTorchLightning/pytorch-lightning/pull/6388)) + ### Removed diff --git a/docs/source/advanced/multi_gpu.rst b/docs/source/advanced/multi_gpu.rst index 492f253214409..3e159f5ba79f1 100644 --- a/docs/source/advanced/multi_gpu.rst +++ b/docs/source/advanced/multi_gpu.rst @@ -226,13 +226,17 @@ Note in particular the difference between `gpus=0`, `gpus=[0]` and `gpus="0"`. +---------------+-----------+---------------------+---------------------------------+ | "0" | str | [0] | GPU 0 | +---------------+-----------+---------------------+---------------------------------+ -| "3" | str | [3] | GPU 3 | +| "3" | str | [3] | GPU 3 (will change in v1.5) | +---------------+-----------+---------------------+---------------------------------+ | "1, 3" | str | [1, 3] | GPUs 1 and 3 | +---------------+-----------+---------------------+---------------------------------+ | "-1" | str | [0, 1, 2, ...] | all available GPUs | +---------------+-----------+---------------------+---------------------------------+ +.. warning:: + The behavior for :code:`gpus="3"` (str) will change. Currently it selects the GPU with index 3, but will + select the first 3 GPUs from v1.5. + .. note:: When specifying number of gpus as an integer ``gpus=k``, setting the trainer flag diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py index f81a4ece1c6d0..511a91326953d 100644 --- a/pytorch_lightning/utilities/device_parser.py +++ b/pytorch_lightning/utilities/device_parser.py @@ -11,12 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import operator from typing import Any, List, MutableSequence, Optional, Tuple, Union import torch -from pytorch_lightning.utilities import _TPU_AVAILABLE +from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.imports import _compare_version def determine_root_gpu_device(gpus: List[int]) -> Optional[int]: @@ -66,9 +68,12 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i if gpus is None or isinstance(gpus, int) and gpus == 0: return None + if _compare_version("pytorch_lightning", operator.ge, "1.5") and isinstance(gpus, str) and gpus.strip() == "0": + # TODO: in v1.5 combine this with the above if statement + return None + # We know user requested GPUs therefore if some of the # requested GPUs are not available an exception is thrown. - gpus = _normalize_parse_gpu_string_input(gpus) gpus = _normalize_parse_gpu_input_to_list(gpus) if not gpus: @@ -107,13 +112,24 @@ def parse_tpu_cores(tpu_cores: Union[int, str, List]) -> Optional[Union[List[int def _normalize_parse_gpu_string_input(s: Union[int, str, List[int]]) -> Union[int, List[int]]: - if isinstance(s, str): - if s == '-1': - return -1 - else: - return [int(x.strip()) for x in s.split(',') if len(x) > 0] - else: + if not isinstance(s, str): return s + if s == '-1': + return -1 + elif ',' in s: + return [int(x.strip()) for x in s.split(',') if len(x) > 0] + else: + num_gpus = int(s.strip()) + if _compare_version("pytorch_lightning", operator.lt, "1.5"): + rank_zero_warn( + f"Parsing of the Trainer argument gpus='{s}' (string) will change in the future." + " In the current version of Lightning, this will select" + f" CUDA device with index {num_gpus}, but from v1.5 it will select gpus" + f" {list(range(num_gpus))} (same as gpus={s} (int)).", + DeprecationWarning, + ) + return [num_gpus] + return num_gpus def _sanitize_gpu_ids(gpus: List[int]) -> List[int]: diff --git a/tests/deprecated_api/test_remove_1-5.py b/tests/deprecated_api/test_remove_1-5.py index 47a76b8c6db80..f211fe08089df 100644 --- a/tests/deprecated_api/test_remove_1-5.py +++ b/tests/deprecated_api/test_remove_1-5.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Test deprecated functionality which will be removed in v1.5.0""" +import operator import os from typing import Any, Dict from unittest import mock @@ -26,8 +27,10 @@ from pytorch_lightning.loggers import WandbLogger from pytorch_lightning.profiler import AdvancedProfiler, BaseProfiler, PyTorchProfiler, SimpleProfiler from pytorch_lightning.trainer.callback_hook import warning_cache as callback_warning_cache +from pytorch_lightning.utilities import device_parser +from pytorch_lightning.utilities.imports import _compare_version from tests.deprecated_api import no_deprecated_call -from tests.helpers import BoringModel, BoringDataModule +from tests.helpers import BoringDataModule, BoringModel from tests.helpers.utils import no_warning_call @@ -48,7 +51,7 @@ def test_v1_5_0_model_checkpoint_save_function(): @mock.patch('pytorch_lightning.loggers.wandb.wandb') -def test_v1_5_0_wandb_unused_sync_step(tmpdir): +def test_v1_5_0_wandb_unused_sync_step(_): with pytest.deprecated_call(match=r"v1.2.1 and will be removed in v1.5"): WandbLogger(sync_step=True) @@ -382,6 +385,26 @@ def test_v1_5_0_lighting_module_grad_norm(tmpdir): model.grad_norm(2) +@pytest.mark.xfail( + condition=_compare_version("pytorch_lightning", operator.ge, "1.5"), + reason="parsing of string will change in v1.5", +) +@mock.patch('torch.cuda.device_count', return_value=4) +def test_v1_5_0_trainer_gpus_str_parsing(*_): + # TODO: when removing this, make sure docs in docs/advanced/multi-gpu.rst reflect the new + # behavior regarding GPU selection. Ping @awaelchli if unsure. + with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='3' .* will change."): + Trainer(gpus="3", accelerator="ddp_spawn") + + with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='3' .* will change."): + gpus = device_parser.parse_gpu_ids("3") + assert gpus == [3] + + with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='0' .* will change."): + gpus = device_parser.parse_gpu_ids("0") + assert gpus == [0] + + def test_v1_5_0_datamodule_setter(): model = BoringModel() datamodule = BoringDataModule() diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 7764754594a09..65a1e093a9e96 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import operator from collections import namedtuple from unittest.mock import patch @@ -22,12 +23,14 @@ from pytorch_lightning import Trainer from pytorch_lightning.utilities import device_parser from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.imports import _compare_version from tests.helpers import BoringModel from tests.helpers.datamodules import ClassifDataModule from tests.helpers.imports import Batch, Dataset, Example, Field, LabelField from tests.helpers.runif import RunIf from tests.helpers.simple_models import ClassificationModel +PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5") PRETEND_N_OF_GPUS = 16 @@ -171,8 +174,8 @@ def test_determine_root_gpu_device(gpus, expected_root_gpu): pytest.param([0], [0]), pytest.param([1, 3], [1, 3]), pytest.param((1, 3), [1, 3]), - pytest.param('0', [0]), - pytest.param('3', [3]), + pytest.param('0', None, marks=pytest.mark.skipif(PL_VERSION_LT_1_5, reason="available from v1.5")), + pytest.param('3', [0, 1, 2], marks=pytest.mark.skipif(PL_VERSION_LT_1_5, reason="available from v1.5")), pytest.param('1, 3', [1, 3]), pytest.param('2,', [2]), pytest.param('-1', list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),