From b4e2e0181ea35dc8c4224360ae0d913da0dc6cf2 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 16 Feb 2023 15:06:24 +0100 Subject: [PATCH] update tests --- tests/tests_pytorch/accelerators/test_hpu.py | 2 +- tests/tests_pytorch/accelerators/test_ipu.py | 16 +++--- .../checkpointing/test_legacy_checkpoints.py | 2 +- .../helpers/deterministic_model.py | 2 +- tests/tests_pytorch/models/test_amp.py | 18 +++--- .../tests_pytorch/models/test_ddp_fork_amp.py | 2 +- tests/tests_pytorch/models/test_hooks.py | 6 +- tests/tests_pytorch/models/test_tpu.py | 6 +- .../plugins/precision/hpu/test_hpu.py | 16 +++--- .../plugins/precision/test_amp.py | 4 +- .../plugins/precision/test_amp_integration.py | 2 +- .../precision/test_deepspeed_precision.py | 2 +- .../tests_pytorch/plugins/test_amp_plugins.py | 10 ++-- .../plugins/test_double_plugin.py | 4 +- tests/tests_pytorch/strategies/test_ddp.py | 2 +- .../strategies/test_deepspeed_strategy.py | 56 +++++++++---------- tests/tests_pytorch/strategies/test_fsdp.py | 12 ++-- .../tests_pytorch/strategies/test_registry.py | 2 +- .../connectors/test_accelerator_connector.py | 18 +++--- .../optimization/test_manual_optimization.py | 16 +++--- tests/tests_pytorch/trainer/test_trainer.py | 8 +-- .../tuner/test_scale_batch_size.py | 2 +- .../test_deepspeed_collate_checkpoint.py | 2 +- .../utilities/test_deepspeed_model_summary.py | 2 +- .../utilities/test_torchdistx.py | 2 +- 25 files changed, 107 insertions(+), 107 deletions(-) diff --git a/tests/tests_pytorch/accelerators/test_hpu.py b/tests/tests_pytorch/accelerators/test_hpu.py index 6307a78b1c815..fc08b1ee069fa 100644 --- a/tests/tests_pytorch/accelerators/test_hpu.py +++ b/tests/tests_pytorch/accelerators/test_hpu.py @@ -61,7 +61,7 @@ def test_all_stages(tmpdir, hpus): fast_dev_run=True, accelerator="hpu", devices=hpus, - precision=16, + precision='16-mixed', ) trainer.fit(model) trainer.validate(model) diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py index 01978eaeb9c8e..ab4d44f579491 100644 --- a/tests/tests_pytorch/accelerators/test_ipu.py +++ b/tests/tests_pytorch/accelerators/test_ipu.py @@ -178,15 +178,15 @@ def test_optimization(tmpdir): def test_half_precision(tmpdir): class TestCallback(Callback): def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None: - assert trainer.precision == "16" + assert trainer.precision == "16-mixed" raise SystemExit model = IPUModel() trainer = Trainer( - default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback() + default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision='16-mixed', callbacks=TestCallback() ) assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "16" + assert trainer.strategy.precision_plugin.precision == "16-mixed" with pytest.raises(SystemExit): trainer.fit(model) @@ -195,7 +195,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non def test_pure_half_precision(tmpdir): class TestCallback(Callback): def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: - assert trainer.strategy.precision_plugin.precision == "16" + assert trainer.strategy.precision_plugin.precision == "16-mixed" for param in trainer.strategy.model.parameters(): assert param.dtype == torch.float16 raise SystemExit @@ -203,12 +203,12 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: model = IPUModel() model = model.half() trainer = Trainer( - default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback() + default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision='16-mixed', callbacks=TestCallback() ) assert isinstance(trainer.strategy, IPUStrategy) assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "16" + assert trainer.strategy.precision_plugin.precision == "16-mixed" changed_dtypes = [torch.float, torch.float64] data = [torch.zeros((1), dtype=dtype) for dtype in changed_dtypes] @@ -534,8 +534,8 @@ def configure_optimizers(self): def test_precision_plugin(): """Ensure precision plugin value is set correctly.""" - plugin = IPUPrecisionPlugin(precision=16) - assert plugin.precision == "16" + plugin = IPUPrecisionPlugin(precision='16-mixed') + assert plugin.precision == "16-mixed" @RunIf(ipu=True) diff --git a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py index 829c498e1e7c5..4ede1b8baacd6 100644 --- a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py +++ b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py @@ -103,7 +103,7 @@ def test_resume_legacy_checkpoints(tmpdir, pl_version: str): default_root_dir=str(tmpdir), accelerator="auto", devices=1, - precision=(16 if torch.cuda.is_available() else 32), + precision=('16-mixed' if torch.cuda.is_available() else '32-true'), callbacks=[stop], max_epochs=21, accumulate_grad_batches=2, diff --git a/tests/tests_pytorch/helpers/deterministic_model.py b/tests/tests_pytorch/helpers/deterministic_model.py index b5a4b588881c2..158406b4b7435 100644 --- a/tests/tests_pytorch/helpers/deterministic_model.py +++ b/tests/tests_pytorch/helpers/deterministic_model.py @@ -98,7 +98,7 @@ def configure_optimizers__lr_on_plateau_step(self): def backward(self, loss, *args, **kwargs): if self.assert_backward: - if self.trainer.precision == "16": + if self.trainer.precision == "16-mixed": assert loss > 171 * 1000 else: assert loss == 171.0 diff --git a/tests/tests_pytorch/models/test_amp.py b/tests/tests_pytorch/models/test_amp.py index d7c6922362141..0bcffcd49c28e 100644 --- a/tests/tests_pytorch/models/test_amp.py +++ b/tests/tests_pytorch/models/test_amp.py @@ -29,7 +29,7 @@ class AMPTestModel(BoringModel): def step(self, batch): self._assert_autocast_enabled() output = self(batch) - is_bfloat16 = self.trainer.precision_plugin.precision == "bf16" + is_bfloat16 = self.trainer.precision_plugin.precision == "bf16-mixed" assert output.dtype == torch.float16 if not is_bfloat16 else torch.bfloat16 loss = self.loss(output) return loss @@ -37,7 +37,7 @@ def step(self, batch): def predict_step(self, batch, batch_idx, dataloader_idx=0): self._assert_autocast_enabled() output = self(batch) - is_bfloat16 = self.trainer.precision_plugin.precision == "bf16" + is_bfloat16 = self.trainer.precision_plugin.precision == "bf16-mixed" assert output.dtype == torch.float16 if not is_bfloat16 else torch.bfloat16 return output @@ -52,10 +52,10 @@ def _assert_autocast_enabled(self): @pytest.mark.parametrize( ("strategy", "precision", "devices"), ( - ("single_device", 16, 1), - ("single_device", "bf16", 1), - ("ddp_spawn", 16, 2), - ("ddp_spawn", "bf16", 2), + ("single_device", '16-mixed', 1), + ("single_device", "bf16-mixed'", 1), + ("ddp_spawn", '16-mixed', 2), + ("ddp_spawn", "bf16-mixed", 2), ), ) def test_amp_cpus(tmpdir, strategy, precision, devices): @@ -83,7 +83,7 @@ def test_amp_cpus(tmpdir, strategy, precision, devices): @pytest.mark.parametrize("strategy", [None, "ddp_spawn"]) -@pytest.mark.parametrize("precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))]) +@pytest.mark.parametrize("precision", ['16-mixed', pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True))]) @pytest.mark.parametrize( "devices", (pytest.param(1, marks=RunIf(min_cuda_gpus=1)), pytest.param(2, marks=RunIf(min_cuda_gpus=2))) ) @@ -135,7 +135,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): accelerator="gpu", devices=[0], strategy="ddp_spawn", - precision=16, + precision='16-mixed', callbacks=[checkpoint], logger=logger, ) @@ -153,7 +153,7 @@ def test_precision_16_clip_gradients(mock_clip_grad_norm, clip_val, tmpdir): enable_progress_bar=False, max_epochs=1, devices=1, - precision=16, + precision='16-mixed', limit_train_batches=4, limit_val_batches=0, gradient_clip_val=clip_val, diff --git a/tests/tests_pytorch/models/test_ddp_fork_amp.py b/tests/tests_pytorch/models/test_ddp_fork_amp.py index ae873ccad6eb0..7fba705e507bf 100644 --- a/tests/tests_pytorch/models/test_ddp_fork_amp.py +++ b/tests/tests_pytorch/models/test_ddp_fork_amp.py @@ -24,7 +24,7 @@ def test_amp_gpus_ddp_fork(): """Ensure the use of AMP with `ddp_fork` (or associated alias strategies) does not generate CUDA initialization errors.""" - _ = MixedPrecisionPlugin(precision=16, device="cuda") + _ = MixedPrecisionPlugin(precision='16-mixed', device="cuda") with multiprocessing.get_context("fork").Pool(1) as pool: in_bad_fork = pool.apply(torch.cuda._is_in_bad_fork) assert not in_bad_fork diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index 44bbedc3a819d..41e715db3fcf7 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -401,9 +401,9 @@ def _predict_batch(trainer, model, batches): [ {}, # these precision plugins modify the optimization flow, so testing them explicitly - pytest.param(dict(accelerator="gpu", devices=1, precision=16), marks=RunIf(min_cuda_gpus=1)), + pytest.param(dict(accelerator="gpu", devices=1, precision='16-mixed'), marks=RunIf(min_cuda_gpus=1)), pytest.param( - dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), + dict(accelerator="gpu", devices=1, precision='16-mixed', strategy="deepspeed"), marks=RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True), ), ], @@ -453,7 +453,7 @@ def training_step(self, batch, batch_idx): "loops": ANY, } using_deepspeed = kwargs.get("strategy") == "deepspeed" - if kwargs.get("precision") == 16 and not using_deepspeed: + if kwargs.get("precision") == '16-mixed' and not using_deepspeed: saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY device = torch.device("cuda:0" if "accelerator" in kwargs and kwargs["accelerator"] == "gpu" else "cpu") expected = [ diff --git a/tests/tests_pytorch/models/test_tpu.py b/tests/tests_pytorch/models/test_tpu.py index 790f100fe3f58..ce6db19248f3d 100644 --- a/tests/tests_pytorch/models/test_tpu.py +++ b/tests/tests_pytorch/models/test_tpu.py @@ -104,7 +104,7 @@ def test_model_16bit_tpu_devices_1(tmpdir): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, - precision=16, + precision='16-mixed', enable_progress_bar=False, max_epochs=2, accelerator="tpu", @@ -124,7 +124,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, - precision=16, + precision='16-mixed', enable_progress_bar=False, max_epochs=2, accelerator="tpu", @@ -146,7 +146,7 @@ def test_model_16bit_tpu_devices_8(tmpdir): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, - precision=16, + precision='16-mixed', enable_progress_bar=False, max_epochs=1, accelerator="tpu", diff --git a/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py b/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py index 718ef030eb507..5ddb295aa548b 100644 --- a/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py +++ b/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py @@ -34,15 +34,15 @@ def hmp_params(request): @RunIf(hpu=True) def test_precision_plugin(hmp_params): - plugin = HPUPrecisionPlugin(precision="bf16", **hmp_params) - assert plugin.precision == "bf16" + plugin = HPUPrecisionPlugin(precision="bf16-mixed", **hmp_params) + assert plugin.precision == "bf16-mixed" @RunIf(hpu=True) def test_mixed_precision(tmpdir, hmp_params: dict): class TestCallback(Callback): def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None: - assert trainer.precision == "bf16" + assert trainer.precision == "bf16-mixed" raise SystemExit model = BoringModel() @@ -51,12 +51,12 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non fast_dev_run=True, accelerator="hpu", devices=1, - plugins=[HPUPrecisionPlugin(precision="bf16", **hmp_params)], + plugins=[HPUPrecisionPlugin(precision="bf16-mixed", **hmp_params)], callbacks=TestCallback(), ) assert isinstance(trainer.strategy, SingleHPUStrategy) assert isinstance(trainer.strategy.precision_plugin, HPUPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "bf16" + assert trainer.strategy.precision_plugin.precision == "bf16-mixed" with pytest.raises(SystemExit): trainer.fit(model) @@ -65,7 +65,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non def test_pure_half_precision(tmpdir, hmp_params: dict): class TestCallback(Callback): def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: - assert trainer.precision == "16" + assert trainer.precision == "16-mixed" for param in trainer.strategy.model.parameters(): assert param.dtype == torch.float16 raise SystemExit @@ -77,13 +77,13 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: fast_dev_run=True, accelerator="hpu", devices=1, - plugins=[HPUPrecisionPlugin(precision=16, **hmp_params)], + plugins=[HPUPrecisionPlugin(precision='16-mixed', **hmp_params)], callbacks=TestCallback(), ) assert isinstance(trainer.strategy, SingleHPUStrategy) assert isinstance(trainer.strategy.precision_plugin, HPUPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "16" + assert trainer.strategy.precision_plugin.precision == "16-mixed" with pytest.raises(RuntimeError, match=r"float16/half is not supported on Gaudi."): trainer.fit(model) diff --git a/tests/tests_pytorch/plugins/precision/test_amp.py b/tests/tests_pytorch/plugins/precision/test_amp.py index 189386cb90502..fed890e471175 100644 --- a/tests/tests_pytorch/plugins/precision/test_amp.py +++ b/tests/tests_pytorch/plugins/precision/test_amp.py @@ -23,7 +23,7 @@ def test_clip_gradients(): """Test that `.clip_gradients()` is a no-op when clipping is disabled.""" optimizer = Mock(spec=Optimizer) - precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision='16-mixed', device="cuda:0", scaler=Mock()) precision.clip_grad_by_value = Mock() precision.clip_grad_by_norm = Mock() precision.clip_gradients(optimizer) @@ -47,7 +47,7 @@ def test_optimizer_amp_scaling_support_in_step_method(): gradient clipping (example: fused Adam).""" optimizer = Mock(_step_supports_amp_scaling=True) - precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision='16-mixed', device="cuda:0", scaler=Mock()) with pytest.raises(RuntimeError, match="The current optimizer.*does not allow for gradient clipping"): precision.clip_gradients(optimizer, clip_val=1.0) diff --git a/tests/tests_pytorch/plugins/precision/test_amp_integration.py b/tests/tests_pytorch/plugins/precision/test_amp_integration.py index 0d7fb3f8e2bc0..895a67c6fecc2 100644 --- a/tests/tests_pytorch/plugins/precision/test_amp_integration.py +++ b/tests/tests_pytorch/plugins/precision/test_amp_integration.py @@ -38,7 +38,7 @@ def run(fused=False): default_root_dir=tmpdir, accelerator="cuda", devices=1, - precision=16, + precision='16-mixed', max_steps=5, logger=False, enable_checkpointing=False, diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index 8420c5c793aec..2b31972cc88d4 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -19,4 +19,4 @@ def test_invalid_precision_with_deepspeed_precision(): with pytest.raises(ValueError, match="is not supported. `precision` must be one of"): - DeepSpeedPrecisionPlugin(precision=64) + DeepSpeedPrecisionPlugin(precision='64-true') diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index e542c01967cf7..c20656f8c3a96 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -54,10 +54,10 @@ class MyAMP(MixedPrecisionPlugin): def test_amp_ddp(cuda_count_2, strategy, devices, custom_plugin, plugin_cls): plugin = None if custom_plugin: - plugin = plugin_cls(16, "cpu") + plugin = plugin_cls('16-mixed', "cpu") trainer = Trainer( fast_dev_run=True, - precision=16, + precision='16-mixed', accelerator="gpu", devices=devices, strategy=strategy, @@ -137,7 +137,7 @@ def test_amp_gradient_unscale(tmpdir, accum: int): strategy="ddp_spawn", accelerator="gpu", devices=2, - precision=16, + precision='16-mixed', # use a tiny value to make sure it works gradient_clip_val=1e-3, gradient_clip_algorithm="value", @@ -179,14 +179,14 @@ def configure_optimizers(self): torch.optim.SGD(self.layer2.parameters(), lr=0.1), ] - trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision=16) + trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision='16-mixed') model = CustomBoringModel() trainer.fit(model) def test_cpu_amp_precision_context_manager(tmpdir): """Test to ensure that the context manager correctly is set to CPU + bfloat16.""" - plugin = MixedPrecisionPlugin("bf16", "cpu") + plugin = MixedPrecisionPlugin("bf16-mixed", "cpu") assert plugin.device == "cpu" assert plugin.scaler is None context_manager = plugin.autocast_context_manager() diff --git a/tests/tests_pytorch/plugins/test_double_plugin.py b/tests/tests_pytorch/plugins/test_double_plugin.py index 9c93f09cad221..b03a660b962b0 100644 --- a/tests/tests_pytorch/plugins/test_double_plugin.py +++ b/tests/tests_pytorch/plugins/test_double_plugin.py @@ -135,7 +135,7 @@ def on_fit_start(self): def test_double_precision(tmpdir, boring_model): model = boring_model() - trainer = Trainer(max_epochs=2, default_root_dir=tmpdir, fast_dev_run=2, precision=64, log_every_n_steps=1) + trainer = Trainer(max_epochs=2, default_root_dir=tmpdir, fast_dev_run=2, precision='64-true', log_every_n_steps=1) trainer.fit(model) trainer.test(model) trainer.predict(model) @@ -152,7 +152,7 @@ def test_double_precision_ddp(tmpdir): accelerator="gpu", devices=2, fast_dev_run=2, - precision=64, + precision='64-true', log_every_n_steps=1, ) trainer.fit(model) diff --git a/tests/tests_pytorch/strategies/test_ddp.py b/tests/tests_pytorch/strategies/test_ddp.py index 248e42bd7e69d..833a53f8e799d 100644 --- a/tests/tests_pytorch/strategies/test_ddp.py +++ b/tests/tests_pytorch/strategies/test_ddp.py @@ -96,7 +96,7 @@ def setup(self, stage: str) -> None: @RunIf(min_cuda_gpus=2, standalone=True) -@pytest.mark.parametrize("precision", (16, 32)) +@pytest.mark.parametrize("precision", ('16-mixed', '32-true')) def test_ddp_wrapper(tmpdir, precision): """Test parameters to ignore are carried over for DDP.""" diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py index e6eeff8c36f5f..c5b67c0bcb6bb 100644 --- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py +++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py @@ -139,12 +139,12 @@ def test_deepspeed_precision_choice(cuda_count_1, tmpdir): default_root_dir=tmpdir, accelerator="gpu", strategy="deepspeed", - precision=16, + precision='16-mixed', ) assert isinstance(trainer.strategy, DeepSpeedStrategy) assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "16" + assert trainer.strategy.precision_plugin.precision == "16-mixed" @RunIf(deepspeed=True) @@ -189,7 +189,7 @@ def backward(self, loss: Tensor, *args, **kwargs) -> None: strategy=DeepSpeedStrategy(), accelerator="gpu", devices=1, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -264,7 +264,7 @@ def configure_optimizers(self): accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', callbacks=[TestCB(), lr_monitor], logger=CSVLogger(tmpdir), enable_progress_bar=False, @@ -303,7 +303,7 @@ def on_train_start(self, trainer, pl_module) -> None: limit_val_batches=4, limit_test_batches=4, max_epochs=2, - precision=16, + precision='16-mixed', callbacks=[TestCB(), lr_monitor], logger=CSVLogger(tmpdir), enable_progress_bar=False, @@ -337,7 +337,7 @@ def on_train_start(self, trainer, pl_module) -> None: trainer = Trainer( default_root_dir=tmpdir, strategy=ds, - precision=16, + precision='16-mixed', accelerator="gpu", devices=1, callbacks=[TestCB()], @@ -380,7 +380,7 @@ def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir): default_root_dir=tmpdir, fast_dev_run=1, strategy=ds, - precision=16, + precision='16-mixed', accelerator="gpu", devices=1, enable_progress_bar=False, @@ -413,7 +413,7 @@ def setup(self, trainer, pl_module, stage=None) -> None: enable_progress_bar=False, max_epochs=1, strategy=DeepSpeedStrategy(config=deepspeed_zero_config), - precision=16, + precision='16-mixed', accelerator="gpu", devices=1, callbacks=[TestCallback()], @@ -433,7 +433,7 @@ def test_deepspeed_multigpu(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -476,7 +476,7 @@ def test_deepspeed_stage_3_save_warning(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -508,7 +508,7 @@ def test_deepspeed_multigpu_single_file(tmpdir): accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -524,7 +524,7 @@ def test_deepspeed_multigpu_single_file(tmpdir): accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -626,7 +626,7 @@ def test_deepspeed_multigpu_stage_3(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -646,7 +646,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -672,7 +672,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization strategy=DeepSpeedStrategy(stage=3), accelerator="gpu", devices=2, - precision=16, + precision='16-mixed', accumulate_grad_batches=accumulate_grad_batches, callbacks=[ck], enable_progress_bar=False, @@ -693,7 +693,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization accelerator="gpu", devices=2, strategy=DeepSpeedStrategy(stage=3), - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -722,7 +722,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir): strategy=DeepSpeedStrategy(stage=3, load_full_weights=True), accelerator="gpu", devices=1, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -751,7 +751,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir): strategy=DeepSpeedStrategy(stage=3), accelerator="gpu", devices=1, - precision=16, + precision='16-mixed', callbacks=[ck], enable_progress_bar=False, enable_model_summary=False, @@ -792,7 +792,7 @@ def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> max_epochs=2, limit_train_batches=1, limit_val_batches=0, - precision=16, + precision='16-mixed', callbacks=TestCallback(), enable_progress_bar=False, enable_model_summary=False, @@ -828,7 +828,7 @@ def on_train_batch_start(self, trainer, pl_module: LightningModule, batch: Any, devices=2, limit_train_batches=5, limit_val_batches=2, - precision=16, + precision='16-mixed', accumulate_grad_batches=2, callbacks=[verification_callback], enable_progress_bar=False, @@ -849,7 +849,7 @@ def test_deepspeed_multigpu_test(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -885,7 +885,7 @@ def on_train_epoch_start(self) -> None: accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -912,7 +912,7 @@ def on_train_epoch_start(self) -> None: accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -976,7 +976,7 @@ def test_deepspeed_multigpu_no_schedulers(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -998,7 +998,7 @@ def training_step(self, batch, batch_idx): accelerator="gpu", devices=1, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) @@ -1212,7 +1212,7 @@ def test_deepspeed_with_bfloat16_precision(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision="bf16", + precision="bf16-mixed", num_sanity_val_steps=0, enable_progress_bar=False, enable_model_summary=False, @@ -1220,7 +1220,7 @@ def test_deepspeed_with_bfloat16_precision(tmpdir): trainer.fit(model) assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin) - assert trainer.strategy.precision_plugin.precision == "bf16" + assert trainer.strategy.precision_plugin.precision == "bf16-mixed" assert trainer.strategy.config["zero_optimization"]["stage"] == 3 assert trainer.strategy.config["bf16"]["enabled"] assert model.layer.weight.dtype == torch.bfloat16 @@ -1271,7 +1271,7 @@ def transfer_batch_to_device(self, batch, *args, **kwargs): return super().transfer_batch_to_device(batch, *args, **kwargs) model = CustomBoringModel() - trainer = Trainer(strategy="deepspeed", devices=1, accelerator="cuda", precision=16) + trainer = Trainer(strategy="deepspeed", devices=1, accelerator="cuda", precision='16-mixed') trainer.strategy.connect(model) batch = torch.zeros((1), dtype=torch.float32) batch = trainer.strategy.batch_to_device(batch) diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py index 42425f581765f..9cef1fe13f6f6 100644 --- a/tests/tests_pytorch/strategies/test_fsdp.py +++ b/tests/tests_pytorch/strategies/test_fsdp.py @@ -64,7 +64,7 @@ def on_predict_batch_end(self, *_) -> None: def _assert_layer_fsdp_instance(self) -> None: assert isinstance(self.layer, FullyShardedDataParallel) assert isinstance(self.trainer.strategy.precision_plugin, FSDPMixedPrecisionPlugin) - precision = torch.float16 if self.trainer.precision == "16" else torch.bfloat16 + precision = torch.float16 if self.trainer.precision == "16-mixed" else torch.bfloat16 assert self.layer.mixed_precision.param_dtype == precision assert self.layer.mixed_precision.reduce_dtype == precision assert self.layer.mixed_precision.buffer_dtype == precision @@ -100,7 +100,7 @@ def _assert_layer_fsdp_instance(self) -> None: assert isinstance(self.layer, torch.nn.Sequential) assert isinstance(self.trainer.strategy.precision_plugin, FSDPMixedPrecisionPlugin) - precision = torch.float16 if self.trainer.precision == "16" else torch.bfloat16 + precision = torch.float16 if self.trainer.precision == "16-mixed" else torch.bfloat16 for layer_num in [0, 2]: assert isinstance(self.layer[layer_num], FullyShardedDataParallel) assert self.layer[layer_num].mixed_precision.param_dtype == precision @@ -164,7 +164,7 @@ def test_invalid_on_cpu(tmpdir): @RunIf(min_torch="1.12", min_cuda_gpus=1) -@pytest.mark.parametrize("precision, expected", [(16, torch.float16), ("bf16", torch.bfloat16)]) +@pytest.mark.parametrize("precision, expected", [('16-mixed', torch.float16), ("bf16-mixed", torch.bfloat16)]) def test_precision_plugin_config(precision, expected): plugin = FSDPMixedPrecisionPlugin(precision=precision, device="cuda") config = plugin.mixed_precision_config @@ -191,7 +191,7 @@ def test_fsdp_strategy_sync_batchnorm(tmpdir): accelerator="gpu", devices=2, strategy="fsdp", - precision=16, + precision='16-mixed', max_epochs=1, sync_batchnorm=True, ) @@ -199,7 +199,7 @@ def test_fsdp_strategy_sync_batchnorm(tmpdir): @RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, min_torch="1.12") -@pytest.mark.parametrize("precision", (16, pytest.param("bf16", marks=RunIf(bf16_cuda=True)))) +@pytest.mark.parametrize("precision", ('16-mixed', pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True)))) def test_fsdp_strategy_checkpoint(tmpdir, precision): """Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run.""" model = TestFSDPModel() @@ -230,7 +230,7 @@ def test_fsdp_checkpoint_multi_gpus(tmpdir, model, strategy): accelerator="gpu", devices=2, strategy=strategy, - precision=16, + precision='16-mixed', max_epochs=1, limit_train_batches=2, limit_val_batches=2, diff --git a/tests/tests_pytorch/strategies/test_registry.py b/tests/tests_pytorch/strategies/test_registry.py index 270fb028fad7f..9fa2e26f95008 100644 --- a/tests/tests_pytorch/strategies/test_registry.py +++ b/tests/tests_pytorch/strategies/test_registry.py @@ -48,7 +48,7 @@ def test_strategy_registry_with_deepspeed_strategies(strategy_name, init_params) @pytest.mark.parametrize("strategy", ["deepspeed", "deepspeed_stage_2_offload", "deepspeed_stage_3"]) def test_deepspeed_strategy_registry_with_trainer(tmpdir, strategy): - trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, precision=16) + trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, precision='16-mixed') assert isinstance(trainer.strategy, DeepSpeedStrategy) diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index 4621648e7c201..35f89bc1d882b 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -596,14 +596,14 @@ def test_check_fsdp_strategy_and_fallback(): def test_unsupported_tpu_choice(tpu_available): - with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"): - Trainer(accelerator="tpu", precision=64) + with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision='64-true'\)` is not implemented"): + Trainer(accelerator="tpu", precision='64-true') # if user didn't set strategy, AcceleratorConnector will choose the TPUSingleStrategy or TPUSpawnStrategy with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"), pytest.warns( - UserWarning, match=r"accelerator='tpu', precision=16\)` but AMP is not supported" + UserWarning, match=r"accelerator='tpu', precision=16-mixed\)` but AMP with fp16 is not supported" ): - Trainer(accelerator="tpu", precision=16, strategy="ddp") + Trainer(accelerator="tpu", precision='16-mixed', strategy="ddp") @mock.patch("lightning.pytorch.accelerators.ipu.IPUAccelerator.is_available", return_value=True) @@ -613,10 +613,10 @@ def test_unsupported_ipu_choice(mock_ipu_acc_avail, monkeypatch): monkeypatch.setattr(ipu_, "_IPU_AVAILABLE", True) monkeypatch.setattr(ipu, "_IPU_AVAILABLE", True) - with pytest.raises(ValueError, match=r"accelerator='ipu', precision='bf16'\)` is not supported"): - Trainer(accelerator="ipu", precision="bf16") - with pytest.raises(ValueError, match=r"accelerator='ipu', precision='64'\)` is not supported"): - Trainer(accelerator="ipu", precision=64) + with pytest.raises(ValueError, match=r"accelerator='ipu', precision='bf16-mixed'\)` is not supported"): + Trainer(accelerator="ipu", precision="bf16-mixed") + with pytest.raises(ValueError, match=r"accelerator='ipu', precision='64-true'\)` is not supported"): + Trainer(accelerator="ipu", precision="64-true") @mock.patch("lightning.pytorch.accelerators.tpu._XLA_AVAILABLE", return_value=False) @@ -846,5 +846,5 @@ def test_colossalai_external_strategy(monkeypatch): from lightning_colossalai import ColossalAIStrategy - trainer = Trainer(strategy="colossalai", precision=16) + trainer = Trainer(strategy="colossalai", precision='16-mixed') assert isinstance(trainer.strategy, ColossalAIStrategy) diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index 8a5bedf8efabe..d181dfa76067e 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -72,7 +72,7 @@ def configure_optimizers(self): @pytest.mark.parametrize( - "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": 16}, marks=RunIf(min_cuda_gpus=1))] + "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": '16-mixed'}, marks=RunIf(min_cuda_gpus=1))] ) def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs): model = ManualOptModel() @@ -87,7 +87,7 @@ def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs): **kwargs, ) - if kwargs.get("precision") == 16: + if kwargs.get("precision") == '16-mixed': # mock the scaler instead of the optimizer step because it can be skipped with NaNs scaler_step_patch = mock.patch.object( trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step @@ -99,7 +99,7 @@ def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs): assert bwd_mock.call_count == limit_train_batches * 3 assert trainer.global_step == limit_train_batches * 2 - if kwargs.get("precision") == 16: + if kwargs.get("precision") == '16-mixed': scaler_step_patch.stop() assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches @@ -141,7 +141,7 @@ def test_multiple_optimizers_manual_amp(tmpdir, accelerator): max_epochs=1, log_every_n_steps=1, enable_model_summary=False, - precision=16, + precision='16-mixed', accelerator=accelerator, devices=1, ) @@ -224,7 +224,7 @@ def test_manual_optimization_and_return_tensor(tmpdir): limit_train_batches=10, limit_test_batches=0, limit_val_batches=0, - precision=16, + precision='16-mixed', strategy="ddp_spawn", accelerator="gpu", devices=2, @@ -309,7 +309,7 @@ def on_train_epoch_end(self, *_, **__): limit_train_batches=20, limit_test_batches=0, limit_val_batches=0, - precision=16, + precision='16-mixed', accelerator="gpu", devices=1, ) @@ -383,7 +383,7 @@ def on_before_optimizer_step(self, optimizer, *_): max_epochs=1, log_every_n_steps=1, enable_model_summary=False, - precision=16, + precision='16-mixed', accelerator="gpu", devices=1, ) @@ -848,7 +848,7 @@ def test_lr_scheduler_step_not_called(tmpdir): @RunIf(min_cuda_gpus=1) -@pytest.mark.parametrize("precision", [16, 32]) +@pytest.mark.parametrize("precision", ['16-mixed', '32-true']) def test_multiple_optimizers_logging(precision, tmpdir): """Tests that metrics are properly being logged.""" diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py index 8d7b044d24a12..d1c42fa3499d0 100644 --- a/tests/tests_pytorch/trainer/test_trainer.py +++ b/tests/tests_pytorch/trainer/test_trainer.py @@ -1019,7 +1019,7 @@ def on_exception(self, trainer, pl_module, exception): assert isinstance(handle_interrupt_callback.exception, MisconfigurationException) -@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))]) +@pytest.mark.parametrize("precision", ['32-true', pytest.param('16-mixed', marks=RunIf(min_cuda_gpus=1))]) @RunIf(sklearn=True) def test_gradient_clipping_by_norm(tmpdir, precision): """Test gradient clipping by norm.""" @@ -1048,7 +1048,7 @@ def configure_gradient_clipping(self, *args, **kwargs): assert model.assertion_called -@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))]) +@pytest.mark.parametrize("precision", ['32-true', pytest.param('16-mixed', marks=RunIf(min_cuda_gpus=1))]) def test_gradient_clipping_by_value(tmpdir, precision): """Test gradient clipping by value.""" trainer = Trainer( @@ -1444,7 +1444,7 @@ def test_spawn_predict_return_predictions(tmpdir): @pytest.mark.parametrize("return_predictions", [None, False, True]) -@pytest.mark.parametrize("precision", [32, 64]) +@pytest.mark.parametrize("precision", ['32-true', '64-true']) def test_predict_return_predictions_cpu(return_predictions, precision, tmpdir): """Test that `return_predictions=True`.""" seed_everything(42) @@ -1455,7 +1455,7 @@ def test_predict_return_predictions_cpu(return_predictions, precision, tmpdir): if return_predictions or return_predictions is None: assert len(preds) == 1 assert preds[0].shape == torch.Size([1, 2]) - assert preds[0].dtype == (torch.float64 if precision == 64 else torch.float32) + assert preds[0].dtype == (torch.float64 if precision == '64-true' else torch.float32) @pytest.mark.parametrize(["max_steps", "max_epochs", "global_step"], [(10, 5, 10), (20, None, 20)]) diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py index 08b94a4763a8f..ea55b513d1ab4 100644 --- a/tests/tests_pytorch/tuner/test_scale_batch_size.py +++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py @@ -254,7 +254,7 @@ def test_error_on_dataloader_passed_to_fit(tmpdir): def test_auto_scale_batch_size_with_amp(tmpdir): before_batch_size = 2 model = BatchSizeModel(batch_size=before_batch_size) - trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=1, precision=16) + trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=1, precision='16-mixed') tuner = Tuner(trainer) tuner.scale_batch_size(model) after_batch_size = model.batch_size diff --git a/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py b/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py index 5c0cb588ebe5b..314501ef0e8ee 100644 --- a/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py +++ b/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py @@ -32,7 +32,7 @@ def test_deepspeed_collate_checkpoint(tmpdir): accelerator="gpu", devices=2, fast_dev_run=True, - precision=16, + precision='16-mixed', enable_progress_bar=False, enable_model_summary=False, ) diff --git a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py index f06c101db8246..8d734ad74649f 100644 --- a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py +++ b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py @@ -45,7 +45,7 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") - accelerator="gpu", fast_dev_run=True, devices=2, - precision=16, + precision='16-mixed', enable_model_summary=True, callbacks=[TestCallback()], ) diff --git a/tests/tests_pytorch/utilities/test_torchdistx.py b/tests/tests_pytorch/utilities/test_torchdistx.py index 9fee068cee9ab..1491dc5106443 100644 --- a/tests/tests_pytorch/utilities/test_torchdistx.py +++ b/tests/tests_pytorch/utilities/test_torchdistx.py @@ -55,7 +55,7 @@ def test_deferred_init_with_lightning_module(): ( {"accelerator": "auto", "devices": 1}, pytest.param( - {"strategy": "deepspeed_stage_3", "accelerator": "gpu", "devices": 2, "precision": 16}, + {"strategy": "deepspeed_stage_3", "accelerator": "gpu", "devices": 2, "precision": '16-mixed'}, marks=RunIf(min_cuda_gpus=2, deepspeed=True), ), ),