From 88eebbfbb1fd32a30276c75faac770bc08fe9445 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 28 Jul 2022 14:20:51 +0530 Subject: [PATCH 1/5] Fix deepspeed default APEX level --- .../plugins/precision/deepspeed.py | 13 +++++++++++-- .../connectors/accelerator_connector.py | 3 +-- .../precision/test_deepspeed_precision.py | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index fa948520e1fd6..9e98360cf8582 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -20,9 +20,9 @@ import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.utilities import GradClipAlgorithmType -from pytorch_lightning.utilities.enums import PrecisionType +from pytorch_lightning.utilities.enums import AMPType, PrecisionType from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _RequirementAvailable +from pytorch_lightning.utilities.imports import _APEX_AVAILABLE, _RequirementAvailable from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.warnings import WarningCache @@ -51,6 +51,15 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): """ def __init__(self, precision: Union[str, int], amp_type: str, amp_level: Optional[str] = None) -> None: + if amp_type == AMPType.APEX: + if not _APEX_AVAILABLE: + raise MisconfigurationException( + "You have asked for Apex AMP but you have not installed it." + " Install `apex` using this guide: https://github.com/NVIDIA/apex" + ) + + amp_level = amp_level or "O2" + supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT, PrecisionType.MIXED) if precision not in supported_precision: raise ValueError( diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index bd879cf85ff7a..8ed8069ad2064 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -185,7 +185,7 @@ def __init__( self._layer_sync: Optional[LayerSync] = NativeSyncBatchNorm() if sync_batchnorm else None self.checkpoint_io: Optional[CheckpointIO] = None self._amp_type_flag: Optional[LightningEnum] = None - self._amp_level_flag: Optional[str] = amp_level + self._amp_level_flag: str = amp_level or "O2" self._auto_select_gpus: bool = auto_select_gpus self._check_config_and_set_final_flags( @@ -732,7 +732,6 @@ def _check_and_init_precision(self) -> PrecisionPlugin: return NativeMixedPrecisionPlugin(self._precision_flag, device) if self._amp_type_flag == AMPType.APEX: - self._amp_level_flag = self._amp_level_flag or "O2" return ApexMixedPrecisionPlugin(self._amp_level_flag) raise RuntimeError("No precision set") diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index a4698e7c19c97..cb053e8ce44f5 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -11,11 +11,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + import pytest from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin +from pytorch_lightning.trainer.trainer import Trainer +from pytorch_lightning.utilities.exceptions import MisconfigurationException def test_invalid_precision_with_deepspeed_precision(): with pytest.raises(ValueError, match="is not supported. `precision` must be one of"): DeepSpeedPrecisionPlugin(precision=64, amp_type="native") + + +@mock.patch("pytorch_lightning.utilities.imports._APEX_AVAILABLE", return_value=False) +def test_deepspeed_precision_apex_not_installed(_): + with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but you have not installed it."): + DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") + + +@mock.patch("pytorch_lightning.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) +def test_deepspeed_precision_apex_default_level(_): + trainer = Trainer(strategy="deepspeed", amp_backend="apex", amp_level=None) + precision_plugin = trainer.strategy.precision_plugin + assert isinstance(precision_plugin, DeepSpeedPrecisionPlugin) + assert precision_plugin.amp_level == "O2" From 60c3934bafd649869d91e93106e0ec2444e547c9 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 28 Jul 2022 14:28:36 +0530 Subject: [PATCH 2/5] chlog --- src/pytorch_lightning/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index baf01371fb8bc..33658a10c94ea 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -393,6 +393,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue that caused the learning rate finder to set the model's learning rate to None when no suggestion was possible ([#13845](https://github.com/Lightning-AI/lightning/pull/13845)) +- Fixed default `amp_level` for `DeepSpeedPrecisionPlugin` to `O2` ([#13897](https://github.com/PyTorchLightning/pytorch-lightning/pull/13897)) + + ## [1.6.5] - 2022-07-13 From f6537ea72932b7911ee51027585f06d5c6fbaa94 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Thu, 28 Jul 2022 05:43:45 -0400 Subject: [PATCH 3/5] fix test --- .../plugins/precision/test_deepspeed_precision.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index cb053e8ce44f5..5cc30a5bc3aac 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -25,8 +25,10 @@ def test_invalid_precision_with_deepspeed_precision(): DeepSpeedPrecisionPlugin(precision=64, amp_type="native") -@mock.patch("pytorch_lightning.utilities.imports._APEX_AVAILABLE", return_value=False) -def test_deepspeed_precision_apex_not_installed(_): +def test_deepspeed_precision_apex_not_installed(monkeypatch): + import pytorch_lightning.plugins.precision.deepspeed as deepspeed_apex + + monkeypatch.setattr(deepspeed_apex, "_APEX_AVAILABLE", False) with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but you have not installed it."): DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") From 333a956e3fa6467e831f298426fdcf52cbd39796 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 28 Jul 2022 20:59:08 +0530 Subject: [PATCH 4/5] update tests --- src/pytorch_lightning/plugins/precision/apex_amp.py | 2 +- src/pytorch_lightning/plugins/precision/deepspeed.py | 2 +- .../trainer/connectors/accelerator_connector.py | 3 ++- .../plugins/precision/test_deepspeed_precision.py | 6 ++---- tests/tests_pytorch/plugins/test_amp_plugins.py | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index e18f82dc27f6e..2077f2072ab95 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -35,7 +35,7 @@ class ApexMixedPrecisionPlugin(MixedPrecisionPlugin): def __init__(self, amp_level: str = "O2") -> None: if not _APEX_AVAILABLE: raise MisconfigurationException( - "You have asked for Apex AMP but you have not installed it." + "You have asked for Apex AMP but `apex` is not installed." " Install `apex` using this guide: https://github.com/NVIDIA/apex" ) super().__init__() diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index 9e98360cf8582..791a08a87d107 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -54,7 +54,7 @@ def __init__(self, precision: Union[str, int], amp_type: str, amp_level: Optiona if amp_type == AMPType.APEX: if not _APEX_AVAILABLE: raise MisconfigurationException( - "You have asked for Apex AMP but you have not installed it." + "You have asked for Apex AMP but `apex` is not installed." " Install `apex` using this guide: https://github.com/NVIDIA/apex" ) diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 8ed8069ad2064..850eff06b03d8 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -185,7 +185,7 @@ def __init__( self._layer_sync: Optional[LayerSync] = NativeSyncBatchNorm() if sync_batchnorm else None self.checkpoint_io: Optional[CheckpointIO] = None self._amp_type_flag: Optional[LightningEnum] = None - self._amp_level_flag: str = amp_level or "O2" + self._amp_level_flag: str = amp_level self._auto_select_gpus: bool = auto_select_gpus self._check_config_and_set_final_flags( @@ -732,6 +732,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: return NativeMixedPrecisionPlugin(self._precision_flag, device) if self._amp_type_flag == AMPType.APEX: + self._amp_level_flag = self._amp_level_flag or "O2" return ApexMixedPrecisionPlugin(self._amp_level_flag) raise RuntimeError("No precision set") diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index 5cc30a5bc3aac..c1f7979ea8482 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -16,7 +16,6 @@ import pytest from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin -from pytorch_lightning.trainer.trainer import Trainer from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -29,13 +28,12 @@ def test_deepspeed_precision_apex_not_installed(monkeypatch): import pytorch_lightning.plugins.precision.deepspeed as deepspeed_apex monkeypatch.setattr(deepspeed_apex, "_APEX_AVAILABLE", False) - with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but you have not installed it."): + with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but `apex` is not installed."): DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") @mock.patch("pytorch_lightning.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) def test_deepspeed_precision_apex_default_level(_): - trainer = Trainer(strategy="deepspeed", amp_backend="apex", amp_level=None) - precision_plugin = trainer.strategy.precision_plugin + precision_plugin = DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") assert isinstance(precision_plugin, DeepSpeedPrecisionPlugin) assert precision_plugin.amp_level == "O2" diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 132d13c054926..8bb98434e76d2 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -283,5 +283,5 @@ def test_precision_selection_raises(monkeypatch): monkeypatch.setattr(apex, "_APEX_AVAILABLE", False) with mock.patch("pytorch_lightning.utilities.device_parser.num_cuda_devices", return_value=1), mock.patch( "pytorch_lightning.utilities.device_parser.is_cuda_available", return_value=True - ), pytest.raises(MisconfigurationException, match="asked for Apex AMP but you have not installed it"): + ), pytest.raises(MisconfigurationException, match="asked for Apex AMP but `apex` is not installed"): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1) From b9814ae7182eba7ac3c3763a12e3baca76fdf805 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Fri, 29 Jul 2022 14:25:52 +0530 Subject: [PATCH 5/5] Update src/pytorch_lightning/trainer/connectors/accelerator_connector.py Co-authored-by: Akihiro Nitta --- .../trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 850eff06b03d8..bd879cf85ff7a 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -185,7 +185,7 @@ def __init__( self._layer_sync: Optional[LayerSync] = NativeSyncBatchNorm() if sync_batchnorm else None self.checkpoint_io: Optional[CheckpointIO] = None self._amp_type_flag: Optional[LightningEnum] = None - self._amp_level_flag: str = amp_level + self._amp_level_flag: Optional[str] = amp_level self._auto_select_gpus: bool = auto_select_gpus self._check_config_and_set_final_flags(