From a6aa1a0f82aa3c7dc6e9529002a3bfd52ae8d4ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Tue, 4 May 2021 11:56:27 +0200
Subject: [PATCH] make gpus=str in Trainer consistent with command line parsing
 of string (#6388)

* string gpu input

* update docs

* deprecation warning

* Revert "update docs"

This reverts commit c5f38934133812280ae98f6489c008a39796dd4d.

* deprecation

* add changelog

* update parser

* update warning

* implement v1.5 behavior ahead of time

* formatting

* set accelerator in test to avoid different warning

* add warning

* remove todo warn

* Update pytorch_lightning/utilities/device_parser.py

Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com>

* resolve flake8

Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
Co-authored-by: tchaton <thomas@grid.ai>
---
 CHANGELOG.md                                 |  2 ++
 docs/source/advanced/multi_gpu.rst           |  6 +++-
 pytorch_lightning/utilities/device_parser.py | 32 +++++++++++++++-----
 tests/deprecated_api/test_remove_1-5.py      | 27 +++++++++++++++--
 tests/models/test_gpu.py                     |  7 +++--
 5 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b9a063510c544..b91d02ec1aa61 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -249,6 +249,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated the `LightningModule.datamodule` getter and setter methods; access them through `Trainer.datamodule` instead ([#7168](https://github.com/PyTorchLightning/pytorch-lightning/pull/7168))
 
 
+- Deprecated the use of `Trainer(gpus="i")` (string) for selecting the i-th GPU; from v1.5 this will set the number of GPUs instead of the index ([#6388](https://github.com/PyTorchLightning/pytorch-lightning/pull/6388))
+
 ### Removed
 
 
diff --git a/docs/source/advanced/multi_gpu.rst b/docs/source/advanced/multi_gpu.rst
index 492f253214409..3e159f5ba79f1 100644
--- a/docs/source/advanced/multi_gpu.rst
+++ b/docs/source/advanced/multi_gpu.rst
@@ -226,13 +226,17 @@ Note in particular the difference between `gpus=0`, `gpus=[0]` and `gpus="0"`.
 +---------------+-----------+---------------------+---------------------------------+
 | "0"           | str       | [0]                 | GPU 0                           |
 +---------------+-----------+---------------------+---------------------------------+
-| "3"           | str       | [3]                 | GPU 3                           |
+| "3"           | str       | [3]                 | GPU 3 (will change in v1.5)     |
 +---------------+-----------+---------------------+---------------------------------+
 | "1, 3"        | str       | [1, 3]              | GPUs 1 and 3                    |
 +---------------+-----------+---------------------+---------------------------------+
 | "-1"          | str       | [0, 1, 2, ...]      | all available GPUs              |
 +---------------+-----------+---------------------+---------------------------------+
 
+.. warning::
+    The behavior for :code:`gpus="3"` (str) will change. Currently it selects the GPU with index 3, but will
+    select the first 3 GPUs from v1.5.
+
 .. note::
 
     When specifying number of gpus as an integer ``gpus=k``, setting the trainer flag
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index f81a4ece1c6d0..511a91326953d 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -11,12 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import operator
 from typing import Any, List, MutableSequence, Optional, Tuple, Union
 
 import torch
 
-from pytorch_lightning.utilities import _TPU_AVAILABLE
+from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _compare_version
 
 
 def determine_root_gpu_device(gpus: List[int]) -> Optional[int]:
@@ -66,9 +68,12 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i
     if gpus is None or isinstance(gpus, int) and gpus == 0:
         return None
 
+    if _compare_version("pytorch_lightning", operator.ge, "1.5") and isinstance(gpus, str) and gpus.strip() == "0":
+        # TODO: in v1.5 combine this with the above if statement
+        return None
+
     # We know user requested GPUs therefore if some of the
     # requested GPUs are not available an exception is thrown.
-
     gpus = _normalize_parse_gpu_string_input(gpus)
     gpus = _normalize_parse_gpu_input_to_list(gpus)
     if not gpus:
@@ -107,13 +112,24 @@ def parse_tpu_cores(tpu_cores: Union[int, str, List]) -> Optional[Union[List[int
 
 
 def _normalize_parse_gpu_string_input(s: Union[int, str, List[int]]) -> Union[int, List[int]]:
-    if isinstance(s, str):
-        if s == '-1':
-            return -1
-        else:
-            return [int(x.strip()) for x in s.split(',') if len(x) > 0]
-    else:
+    if not isinstance(s, str):
         return s
+    if s == '-1':
+        return -1
+    elif ',' in s:
+        return [int(x.strip()) for x in s.split(',') if len(x) > 0]
+    else:
+        num_gpus = int(s.strip())
+        if _compare_version("pytorch_lightning", operator.lt, "1.5"):
+            rank_zero_warn(
+                f"Parsing of the Trainer argument gpus='{s}' (string) will change in the future."
+                " In the current version of Lightning, this will select"
+                f" CUDA device with index {num_gpus}, but from v1.5 it will select gpus"
+                f" {list(range(num_gpus))} (same as gpus={s} (int)).",
+                DeprecationWarning,
+            )
+            return [num_gpus]
+        return num_gpus
 
 
 def _sanitize_gpu_ids(gpus: List[int]) -> List[int]:
diff --git a/tests/deprecated_api/test_remove_1-5.py b/tests/deprecated_api/test_remove_1-5.py
index 47a76b8c6db80..f211fe08089df 100644
--- a/tests/deprecated_api/test_remove_1-5.py
+++ b/tests/deprecated_api/test_remove_1-5.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Test deprecated functionality which will be removed in v1.5.0"""
+import operator
 import os
 from typing import Any, Dict
 from unittest import mock
@@ -26,8 +27,10 @@
 from pytorch_lightning.loggers import WandbLogger
 from pytorch_lightning.profiler import AdvancedProfiler, BaseProfiler, PyTorchProfiler, SimpleProfiler
 from pytorch_lightning.trainer.callback_hook import warning_cache as callback_warning_cache
+from pytorch_lightning.utilities import device_parser
+from pytorch_lightning.utilities.imports import _compare_version
 from tests.deprecated_api import no_deprecated_call
-from tests.helpers import BoringModel, BoringDataModule
+from tests.helpers import BoringDataModule, BoringModel
 from tests.helpers.utils import no_warning_call
 
 
@@ -48,7 +51,7 @@ def test_v1_5_0_model_checkpoint_save_function():
 
 
 @mock.patch('pytorch_lightning.loggers.wandb.wandb')
-def test_v1_5_0_wandb_unused_sync_step(tmpdir):
+def test_v1_5_0_wandb_unused_sync_step(_):
     with pytest.deprecated_call(match=r"v1.2.1 and will be removed in v1.5"):
         WandbLogger(sync_step=True)
 
@@ -382,6 +385,26 @@ def test_v1_5_0_lighting_module_grad_norm(tmpdir):
         model.grad_norm(2)
 
 
+@pytest.mark.xfail(
+    condition=_compare_version("pytorch_lightning", operator.ge, "1.5"),
+    reason="parsing of string will change in v1.5",
+)
+@mock.patch('torch.cuda.device_count', return_value=4)
+def test_v1_5_0_trainer_gpus_str_parsing(*_):
+    # TODO: when removing this, make sure docs in docs/advanced/multi-gpu.rst reflect the new
+    #   behavior regarding GPU selection. Ping @awaelchli if unsure.
+    with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='3' .* will change."):
+        Trainer(gpus="3", accelerator="ddp_spawn")
+
+    with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='3' .* will change."):
+        gpus = device_parser.parse_gpu_ids("3")
+        assert gpus == [3]
+
+    with pytest.deprecated_call(match=r"Parsing of the Trainer argument gpus='0' .* will change."):
+        gpus = device_parser.parse_gpu_ids("0")
+        assert gpus == [0]
+
+
 def test_v1_5_0_datamodule_setter():
     model = BoringModel()
     datamodule = BoringDataModule()
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 7764754594a09..65a1e093a9e96 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import operator
 from collections import namedtuple
 from unittest.mock import patch
 
@@ -22,12 +23,14 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _compare_version
 from tests.helpers import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.imports import Batch, Dataset, Example, Field, LabelField
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
 
+PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
 PRETEND_N_OF_GPUS = 16
 
 
@@ -171,8 +174,8 @@ def test_determine_root_gpu_device(gpus, expected_root_gpu):
     pytest.param([0], [0]),
     pytest.param([1, 3], [1, 3]),
     pytest.param((1, 3), [1, 3]),
-    pytest.param('0', [0]),
-    pytest.param('3', [3]),
+    pytest.param('0', None, marks=pytest.mark.skipif(PL_VERSION_LT_1_5, reason="available from v1.5")),
+    pytest.param('3', [0, 1, 2], marks=pytest.mark.skipif(PL_VERSION_LT_1_5, reason="available from v1.5")),
     pytest.param('1, 3', [1, 3]),
     pytest.param('2,', [2]),
     pytest.param('-1', list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),