From b4e2e0181ea35dc8c4224360ae0d913da0dc6cf2 Mon Sep 17 00:00:00 2001
From: Justus Schock <justus.schock@posteo.de>
Date: Thu, 16 Feb 2023 15:06:24 +0100
Subject: [PATCH] update tests

---
 tests/tests_pytorch/accelerators/test_hpu.py  |  2 +-
 tests/tests_pytorch/accelerators/test_ipu.py  | 16 +++---
 .../checkpointing/test_legacy_checkpoints.py  |  2 +-
 .../helpers/deterministic_model.py            |  2 +-
 tests/tests_pytorch/models/test_amp.py        | 18 +++---
 .../tests_pytorch/models/test_ddp_fork_amp.py |  2 +-
 tests/tests_pytorch/models/test_hooks.py      |  6 +-
 tests/tests_pytorch/models/test_tpu.py        |  6 +-
 .../plugins/precision/hpu/test_hpu.py         | 16 +++---
 .../plugins/precision/test_amp.py             |  4 +-
 .../plugins/precision/test_amp_integration.py |  2 +-
 .../precision/test_deepspeed_precision.py     |  2 +-
 .../tests_pytorch/plugins/test_amp_plugins.py | 10 ++--
 .../plugins/test_double_plugin.py             |  4 +-
 tests/tests_pytorch/strategies/test_ddp.py    |  2 +-
 .../strategies/test_deepspeed_strategy.py     | 56 +++++++++----------
 tests/tests_pytorch/strategies/test_fsdp.py   | 12 ++--
 .../tests_pytorch/strategies/test_registry.py |  2 +-
 .../connectors/test_accelerator_connector.py  | 18 +++---
 .../optimization/test_manual_optimization.py  | 16 +++---
 tests/tests_pytorch/trainer/test_trainer.py   |  8 +--
 .../tuner/test_scale_batch_size.py            |  2 +-
 .../test_deepspeed_collate_checkpoint.py      |  2 +-
 .../utilities/test_deepspeed_model_summary.py |  2 +-
 .../utilities/test_torchdistx.py              |  2 +-
 25 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/tests/tests_pytorch/accelerators/test_hpu.py b/tests/tests_pytorch/accelerators/test_hpu.py
index 6307a78b1c815..fc08b1ee069fa 100644
--- a/tests/tests_pytorch/accelerators/test_hpu.py
+++ b/tests/tests_pytorch/accelerators/test_hpu.py
@@ -61,7 +61,7 @@ def test_all_stages(tmpdir, hpus):
         fast_dev_run=True,
         accelerator="hpu",
         devices=hpus,
-        precision=16,
+        precision='16-mixed',
     )
     trainer.fit(model)
     trainer.validate(model)
diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py
index 01978eaeb9c8e..ab4d44f579491 100644
--- a/tests/tests_pytorch/accelerators/test_ipu.py
+++ b/tests/tests_pytorch/accelerators/test_ipu.py
@@ -178,15 +178,15 @@ def test_optimization(tmpdir):
 def test_half_precision(tmpdir):
     class TestCallback(Callback):
         def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
-            assert trainer.precision == "16"
+            assert trainer.precision == "16-mixed"
             raise SystemExit
 
     model = IPUModel()
     trainer = Trainer(
-        default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback()
+        default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision='16-mixed', callbacks=TestCallback()
     )
     assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "16"
+    assert trainer.strategy.precision_plugin.precision == "16-mixed"
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -195,7 +195,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non
 def test_pure_half_precision(tmpdir):
     class TestCallback(Callback):
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
-            assert trainer.strategy.precision_plugin.precision == "16"
+            assert trainer.strategy.precision_plugin.precision == "16-mixed"
             for param in trainer.strategy.model.parameters():
                 assert param.dtype == torch.float16
             raise SystemExit
@@ -203,12 +203,12 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
     model = IPUModel()
     model = model.half()
     trainer = Trainer(
-        default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback()
+        default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision='16-mixed', callbacks=TestCallback()
     )
 
     assert isinstance(trainer.strategy, IPUStrategy)
     assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "16"
+    assert trainer.strategy.precision_plugin.precision == "16-mixed"
 
     changed_dtypes = [torch.float, torch.float64]
     data = [torch.zeros((1), dtype=dtype) for dtype in changed_dtypes]
@@ -534,8 +534,8 @@ def configure_optimizers(self):
 def test_precision_plugin():
     """Ensure precision plugin value is set correctly."""
 
-    plugin = IPUPrecisionPlugin(precision=16)
-    assert plugin.precision == "16"
+    plugin = IPUPrecisionPlugin(precision='16-mixed')
+    assert plugin.precision == "16-mixed"
 
 
 @RunIf(ipu=True)
diff --git a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
index 829c498e1e7c5..4ede1b8baacd6 100644
--- a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
+++ b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
@@ -103,7 +103,7 @@ def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
             default_root_dir=str(tmpdir),
             accelerator="auto",
             devices=1,
-            precision=(16 if torch.cuda.is_available() else 32),
+            precision=('16-mixed' if torch.cuda.is_available() else '32-true'),
             callbacks=[stop],
             max_epochs=21,
             accumulate_grad_batches=2,
diff --git a/tests/tests_pytorch/helpers/deterministic_model.py b/tests/tests_pytorch/helpers/deterministic_model.py
index b5a4b588881c2..158406b4b7435 100644
--- a/tests/tests_pytorch/helpers/deterministic_model.py
+++ b/tests/tests_pytorch/helpers/deterministic_model.py
@@ -98,7 +98,7 @@ def configure_optimizers__lr_on_plateau_step(self):
 
     def backward(self, loss, *args, **kwargs):
         if self.assert_backward:
-            if self.trainer.precision == "16":
+            if self.trainer.precision == "16-mixed":
                 assert loss > 171 * 1000
             else:
                 assert loss == 171.0
diff --git a/tests/tests_pytorch/models/test_amp.py b/tests/tests_pytorch/models/test_amp.py
index d7c6922362141..0bcffcd49c28e 100644
--- a/tests/tests_pytorch/models/test_amp.py
+++ b/tests/tests_pytorch/models/test_amp.py
@@ -29,7 +29,7 @@ class AMPTestModel(BoringModel):
     def step(self, batch):
         self._assert_autocast_enabled()
         output = self(batch)
-        is_bfloat16 = self.trainer.precision_plugin.precision == "bf16"
+        is_bfloat16 = self.trainer.precision_plugin.precision == "bf16-mixed"
         assert output.dtype == torch.float16 if not is_bfloat16 else torch.bfloat16
         loss = self.loss(output)
         return loss
@@ -37,7 +37,7 @@ def step(self, batch):
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
         self._assert_autocast_enabled()
         output = self(batch)
-        is_bfloat16 = self.trainer.precision_plugin.precision == "bf16"
+        is_bfloat16 = self.trainer.precision_plugin.precision == "bf16-mixed"
         assert output.dtype == torch.float16 if not is_bfloat16 else torch.bfloat16
         return output
 
@@ -52,10 +52,10 @@ def _assert_autocast_enabled(self):
 @pytest.mark.parametrize(
     ("strategy", "precision", "devices"),
     (
-        ("single_device", 16, 1),
-        ("single_device", "bf16", 1),
-        ("ddp_spawn", 16, 2),
-        ("ddp_spawn", "bf16", 2),
+        ("single_device", '16-mixed', 1),
+        ("single_device", "bf16-mixed'", 1),
+        ("ddp_spawn", '16-mixed', 2),
+        ("ddp_spawn", "bf16-mixed", 2),
     ),
 )
 def test_amp_cpus(tmpdir, strategy, precision, devices):
@@ -83,7 +83,7 @@ def test_amp_cpus(tmpdir, strategy, precision, devices):
 
 
 @pytest.mark.parametrize("strategy", [None, "ddp_spawn"])
-@pytest.mark.parametrize("precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))])
+@pytest.mark.parametrize("precision", ['16-mixed', pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True))])
 @pytest.mark.parametrize(
     "devices", (pytest.param(1, marks=RunIf(min_cuda_gpus=1)), pytest.param(2, marks=RunIf(min_cuda_gpus=2)))
 )
@@ -135,7 +135,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
         accelerator="gpu",
         devices=[0],
         strategy="ddp_spawn",
-        precision=16,
+        precision='16-mixed',
         callbacks=[checkpoint],
         logger=logger,
     )
@@ -153,7 +153,7 @@ def test_precision_16_clip_gradients(mock_clip_grad_norm, clip_val, tmpdir):
         enable_progress_bar=False,
         max_epochs=1,
         devices=1,
-        precision=16,
+        precision='16-mixed',
         limit_train_batches=4,
         limit_val_batches=0,
         gradient_clip_val=clip_val,
diff --git a/tests/tests_pytorch/models/test_ddp_fork_amp.py b/tests/tests_pytorch/models/test_ddp_fork_amp.py
index ae873ccad6eb0..7fba705e507bf 100644
--- a/tests/tests_pytorch/models/test_ddp_fork_amp.py
+++ b/tests/tests_pytorch/models/test_ddp_fork_amp.py
@@ -24,7 +24,7 @@
 def test_amp_gpus_ddp_fork():
     """Ensure the use of AMP with `ddp_fork` (or associated alias strategies) does not generate CUDA initialization
     errors."""
-    _ = MixedPrecisionPlugin(precision=16, device="cuda")
+    _ = MixedPrecisionPlugin(precision='16-mixed', device="cuda")
     with multiprocessing.get_context("fork").Pool(1) as pool:
         in_bad_fork = pool.apply(torch.cuda._is_in_bad_fork)
     assert not in_bad_fork
diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py
index 44bbedc3a819d..41e715db3fcf7 100644
--- a/tests/tests_pytorch/models/test_hooks.py
+++ b/tests/tests_pytorch/models/test_hooks.py
@@ -401,9 +401,9 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16), marks=RunIf(min_cuda_gpus=1)),
+        pytest.param(dict(accelerator="gpu", devices=1, precision='16-mixed'), marks=RunIf(min_cuda_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
+            dict(accelerator="gpu", devices=1, precision='16-mixed', strategy="deepspeed"),
             marks=RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True),
         ),
     ],
@@ -453,7 +453,7 @@ def training_step(self, batch, batch_idx):
         "loops": ANY,
     }
     using_deepspeed = kwargs.get("strategy") == "deepspeed"
-    if kwargs.get("precision") == 16 and not using_deepspeed:
+    if kwargs.get("precision") == '16-mixed' and not using_deepspeed:
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
     device = torch.device("cuda:0" if "accelerator" in kwargs and kwargs["accelerator"] == "gpu" else "cpu")
     expected = [
diff --git a/tests/tests_pytorch/models/test_tpu.py b/tests/tests_pytorch/models/test_tpu.py
index 790f100fe3f58..ce6db19248f3d 100644
--- a/tests/tests_pytorch/models/test_tpu.py
+++ b/tests/tests_pytorch/models/test_tpu.py
@@ -104,7 +104,7 @@ def test_model_16bit_tpu_devices_1(tmpdir):
     """Make sure model trains on TPU."""
     trainer_options = dict(
         default_root_dir=tmpdir,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         max_epochs=2,
         accelerator="tpu",
@@ -124,7 +124,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
     """Make sure model trains on TPU."""
     trainer_options = dict(
         default_root_dir=tmpdir,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         max_epochs=2,
         accelerator="tpu",
@@ -146,7 +146,7 @@ def test_model_16bit_tpu_devices_8(tmpdir):
     """Make sure model trains on TPU."""
     trainer_options = dict(
         default_root_dir=tmpdir,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         max_epochs=1,
         accelerator="tpu",
diff --git a/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py b/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py
index 718ef030eb507..5ddb295aa548b 100644
--- a/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py
+++ b/tests/tests_pytorch/plugins/precision/hpu/test_hpu.py
@@ -34,15 +34,15 @@ def hmp_params(request):
 
 @RunIf(hpu=True)
 def test_precision_plugin(hmp_params):
-    plugin = HPUPrecisionPlugin(precision="bf16", **hmp_params)
-    assert plugin.precision == "bf16"
+    plugin = HPUPrecisionPlugin(precision="bf16-mixed", **hmp_params)
+    assert plugin.precision == "bf16-mixed"
 
 
 @RunIf(hpu=True)
 def test_mixed_precision(tmpdir, hmp_params: dict):
     class TestCallback(Callback):
         def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
-            assert trainer.precision == "bf16"
+            assert trainer.precision == "bf16-mixed"
             raise SystemExit
 
     model = BoringModel()
@@ -51,12 +51,12 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non
         fast_dev_run=True,
         accelerator="hpu",
         devices=1,
-        plugins=[HPUPrecisionPlugin(precision="bf16", **hmp_params)],
+        plugins=[HPUPrecisionPlugin(precision="bf16-mixed", **hmp_params)],
         callbacks=TestCallback(),
     )
     assert isinstance(trainer.strategy, SingleHPUStrategy)
     assert isinstance(trainer.strategy.precision_plugin, HPUPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "bf16"
+    assert trainer.strategy.precision_plugin.precision == "bf16-mixed"
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -65,7 +65,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> Non
 def test_pure_half_precision(tmpdir, hmp_params: dict):
     class TestCallback(Callback):
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
-            assert trainer.precision == "16"
+            assert trainer.precision == "16-mixed"
             for param in trainer.strategy.model.parameters():
                 assert param.dtype == torch.float16
             raise SystemExit
@@ -77,13 +77,13 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         fast_dev_run=True,
         accelerator="hpu",
         devices=1,
-        plugins=[HPUPrecisionPlugin(precision=16, **hmp_params)],
+        plugins=[HPUPrecisionPlugin(precision='16-mixed', **hmp_params)],
         callbacks=TestCallback(),
     )
 
     assert isinstance(trainer.strategy, SingleHPUStrategy)
     assert isinstance(trainer.strategy.precision_plugin, HPUPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "16"
+    assert trainer.strategy.precision_plugin.precision == "16-mixed"
 
     with pytest.raises(RuntimeError, match=r"float16/half is not supported on Gaudi."):
         trainer.fit(model)
diff --git a/tests/tests_pytorch/plugins/precision/test_amp.py b/tests/tests_pytorch/plugins/precision/test_amp.py
index 189386cb90502..fed890e471175 100644
--- a/tests/tests_pytorch/plugins/precision/test_amp.py
+++ b/tests/tests_pytorch/plugins/precision/test_amp.py
@@ -23,7 +23,7 @@
 def test_clip_gradients():
     """Test that `.clip_gradients()` is a no-op when clipping is disabled."""
     optimizer = Mock(spec=Optimizer)
-    precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock())
+    precision = MixedPrecisionPlugin(precision='16-mixed', device="cuda:0", scaler=Mock())
     precision.clip_grad_by_value = Mock()
     precision.clip_grad_by_norm = Mock()
     precision.clip_gradients(optimizer)
@@ -47,7 +47,7 @@ def test_optimizer_amp_scaling_support_in_step_method():
     gradient clipping (example: fused Adam)."""
 
     optimizer = Mock(_step_supports_amp_scaling=True)
-    precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock())
+    precision = MixedPrecisionPlugin(precision='16-mixed', device="cuda:0", scaler=Mock())
 
     with pytest.raises(RuntimeError, match="The current optimizer.*does not allow for gradient clipping"):
         precision.clip_gradients(optimizer, clip_val=1.0)
diff --git a/tests/tests_pytorch/plugins/precision/test_amp_integration.py b/tests/tests_pytorch/plugins/precision/test_amp_integration.py
index 0d7fb3f8e2bc0..895a67c6fecc2 100644
--- a/tests/tests_pytorch/plugins/precision/test_amp_integration.py
+++ b/tests/tests_pytorch/plugins/precision/test_amp_integration.py
@@ -38,7 +38,7 @@ def run(fused=False):
             default_root_dir=tmpdir,
             accelerator="cuda",
             devices=1,
-            precision=16,
+            precision='16-mixed',
             max_steps=5,
             logger=False,
             enable_checkpointing=False,
diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py
index 8420c5c793aec..2b31972cc88d4 100644
--- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py
+++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py
@@ -19,4 +19,4 @@
 
 def test_invalid_precision_with_deepspeed_precision():
     with pytest.raises(ValueError, match="is not supported. `precision` must be one of"):
-        DeepSpeedPrecisionPlugin(precision=64)
+        DeepSpeedPrecisionPlugin(precision='64-true')
diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py
index e542c01967cf7..c20656f8c3a96 100644
--- a/tests/tests_pytorch/plugins/test_amp_plugins.py
+++ b/tests/tests_pytorch/plugins/test_amp_plugins.py
@@ -54,10 +54,10 @@ class MyAMP(MixedPrecisionPlugin):
 def test_amp_ddp(cuda_count_2, strategy, devices, custom_plugin, plugin_cls):
     plugin = None
     if custom_plugin:
-        plugin = plugin_cls(16, "cpu")
+        plugin = plugin_cls('16-mixed', "cpu")
     trainer = Trainer(
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=devices,
         strategy=strategy,
@@ -137,7 +137,7 @@ def test_amp_gradient_unscale(tmpdir, accum: int):
         strategy="ddp_spawn",
         accelerator="gpu",
         devices=2,
-        precision=16,
+        precision='16-mixed',
         # use a tiny value to make sure it works
         gradient_clip_val=1e-3,
         gradient_clip_algorithm="value",
@@ -179,14 +179,14 @@ def configure_optimizers(self):
                 torch.optim.SGD(self.layer2.parameters(), lr=0.1),
             ]
 
-    trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision=16)
+    trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision='16-mixed')
     model = CustomBoringModel()
     trainer.fit(model)
 
 
 def test_cpu_amp_precision_context_manager(tmpdir):
     """Test to ensure that the context manager correctly is set to CPU + bfloat16."""
-    plugin = MixedPrecisionPlugin("bf16", "cpu")
+    plugin = MixedPrecisionPlugin("bf16-mixed", "cpu")
     assert plugin.device == "cpu"
     assert plugin.scaler is None
     context_manager = plugin.autocast_context_manager()
diff --git a/tests/tests_pytorch/plugins/test_double_plugin.py b/tests/tests_pytorch/plugins/test_double_plugin.py
index 9c93f09cad221..b03a660b962b0 100644
--- a/tests/tests_pytorch/plugins/test_double_plugin.py
+++ b/tests/tests_pytorch/plugins/test_double_plugin.py
@@ -135,7 +135,7 @@ def on_fit_start(self):
 def test_double_precision(tmpdir, boring_model):
     model = boring_model()
 
-    trainer = Trainer(max_epochs=2, default_root_dir=tmpdir, fast_dev_run=2, precision=64, log_every_n_steps=1)
+    trainer = Trainer(max_epochs=2, default_root_dir=tmpdir, fast_dev_run=2, precision='64-true', log_every_n_steps=1)
     trainer.fit(model)
     trainer.test(model)
     trainer.predict(model)
@@ -152,7 +152,7 @@ def test_double_precision_ddp(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=2,
-        precision=64,
+        precision='64-true',
         log_every_n_steps=1,
     )
     trainer.fit(model)
diff --git a/tests/tests_pytorch/strategies/test_ddp.py b/tests/tests_pytorch/strategies/test_ddp.py
index 248e42bd7e69d..833a53f8e799d 100644
--- a/tests/tests_pytorch/strategies/test_ddp.py
+++ b/tests/tests_pytorch/strategies/test_ddp.py
@@ -96,7 +96,7 @@ def setup(self, stage: str) -> None:
 
 
 @RunIf(min_cuda_gpus=2, standalone=True)
-@pytest.mark.parametrize("precision", (16, 32))
+@pytest.mark.parametrize("precision", ('16-mixed', '32-true'))
 def test_ddp_wrapper(tmpdir, precision):
     """Test parameters to ignore are carried over for DDP."""
 
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
index e6eeff8c36f5f..c5b67c0bcb6bb 100644
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -139,12 +139,12 @@ def test_deepspeed_precision_choice(cuda_count_1, tmpdir):
         default_root_dir=tmpdir,
         accelerator="gpu",
         strategy="deepspeed",
-        precision=16,
+        precision='16-mixed',
     )
 
     assert isinstance(trainer.strategy, DeepSpeedStrategy)
     assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "16"
+    assert trainer.strategy.precision_plugin.precision == "16-mixed"
 
 
 @RunIf(deepspeed=True)
@@ -189,7 +189,7 @@ def backward(self, loss: Tensor, *args, **kwargs) -> None:
         strategy=DeepSpeedStrategy(),
         accelerator="gpu",
         devices=1,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -264,7 +264,7 @@ def configure_optimizers(self):
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         callbacks=[TestCB(), lr_monitor],
         logger=CSVLogger(tmpdir),
         enable_progress_bar=False,
@@ -303,7 +303,7 @@ def on_train_start(self, trainer, pl_module) -> None:
         limit_val_batches=4,
         limit_test_batches=4,
         max_epochs=2,
-        precision=16,
+        precision='16-mixed',
         callbacks=[TestCB(), lr_monitor],
         logger=CSVLogger(tmpdir),
         enable_progress_bar=False,
@@ -337,7 +337,7 @@ def on_train_start(self, trainer, pl_module) -> None:
     trainer = Trainer(
         default_root_dir=tmpdir,
         strategy=ds,
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=1,
         callbacks=[TestCB()],
@@ -380,7 +380,7 @@ def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir):
         default_root_dir=tmpdir,
         fast_dev_run=1,
         strategy=ds,
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=1,
         enable_progress_bar=False,
@@ -413,7 +413,7 @@ def setup(self, trainer, pl_module, stage=None) -> None:
         enable_progress_bar=False,
         max_epochs=1,
         strategy=DeepSpeedStrategy(config=deepspeed_zero_config),
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=1,
         callbacks=[TestCallback()],
@@ -433,7 +433,7 @@ def test_deepspeed_multigpu(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -476,7 +476,7 @@ def test_deepspeed_stage_3_save_warning(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -508,7 +508,7 @@ def test_deepspeed_multigpu_single_file(tmpdir):
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -524,7 +524,7 @@ def test_deepspeed_multigpu_single_file(tmpdir):
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -626,7 +626,7 @@ def test_deepspeed_multigpu_stage_3(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -646,7 +646,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -672,7 +672,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
         strategy=DeepSpeedStrategy(stage=3),
         accelerator="gpu",
         devices=2,
-        precision=16,
+        precision='16-mixed',
         accumulate_grad_batches=accumulate_grad_batches,
         callbacks=[ck],
         enable_progress_bar=False,
@@ -693,7 +693,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
         accelerator="gpu",
         devices=2,
         strategy=DeepSpeedStrategy(stage=3),
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -722,7 +722,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
         strategy=DeepSpeedStrategy(stage=3, load_full_weights=True),
         accelerator="gpu",
         devices=1,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -751,7 +751,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
         strategy=DeepSpeedStrategy(stage=3),
         accelerator="gpu",
         devices=1,
-        precision=16,
+        precision='16-mixed',
         callbacks=[ck],
         enable_progress_bar=False,
         enable_model_summary=False,
@@ -792,7 +792,7 @@ def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) ->
         max_epochs=2,
         limit_train_batches=1,
         limit_val_batches=0,
-        precision=16,
+        precision='16-mixed',
         callbacks=TestCallback(),
         enable_progress_bar=False,
         enable_model_summary=False,
@@ -828,7 +828,7 @@ def on_train_batch_start(self, trainer, pl_module: LightningModule, batch: Any,
         devices=2,
         limit_train_batches=5,
         limit_val_batches=2,
-        precision=16,
+        precision='16-mixed',
         accumulate_grad_batches=2,
         callbacks=[verification_callback],
         enable_progress_bar=False,
@@ -849,7 +849,7 @@ def test_deepspeed_multigpu_test(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -885,7 +885,7 @@ def on_train_epoch_start(self) -> None:
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -912,7 +912,7 @@ def on_train_epoch_start(self) -> None:
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -976,7 +976,7 @@ def test_deepspeed_multigpu_no_schedulers(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -998,7 +998,7 @@ def training_step(self, batch, batch_idx):
         accelerator="gpu",
         devices=1,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
@@ -1212,7 +1212,7 @@ def test_deepspeed_with_bfloat16_precision(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision="bf16",
+        precision="bf16-mixed",
         num_sanity_val_steps=0,
         enable_progress_bar=False,
         enable_model_summary=False,
@@ -1220,7 +1220,7 @@ def test_deepspeed_with_bfloat16_precision(tmpdir):
 
     trainer.fit(model)
     assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin)
-    assert trainer.strategy.precision_plugin.precision == "bf16"
+    assert trainer.strategy.precision_plugin.precision == "bf16-mixed"
     assert trainer.strategy.config["zero_optimization"]["stage"] == 3
     assert trainer.strategy.config["bf16"]["enabled"]
     assert model.layer.weight.dtype == torch.bfloat16
@@ -1271,7 +1271,7 @@ def transfer_batch_to_device(self, batch, *args, **kwargs):
             return super().transfer_batch_to_device(batch, *args, **kwargs)
 
     model = CustomBoringModel()
-    trainer = Trainer(strategy="deepspeed", devices=1, accelerator="cuda", precision=16)
+    trainer = Trainer(strategy="deepspeed", devices=1, accelerator="cuda", precision='16-mixed')
     trainer.strategy.connect(model)
     batch = torch.zeros((1), dtype=torch.float32)
     batch = trainer.strategy.batch_to_device(batch)
diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py
index 42425f581765f..9cef1fe13f6f6 100644
--- a/tests/tests_pytorch/strategies/test_fsdp.py
+++ b/tests/tests_pytorch/strategies/test_fsdp.py
@@ -64,7 +64,7 @@ def on_predict_batch_end(self, *_) -> None:
     def _assert_layer_fsdp_instance(self) -> None:
         assert isinstance(self.layer, FullyShardedDataParallel)
         assert isinstance(self.trainer.strategy.precision_plugin, FSDPMixedPrecisionPlugin)
-        precision = torch.float16 if self.trainer.precision == "16" else torch.bfloat16
+        precision = torch.float16 if self.trainer.precision == "16-mixed" else torch.bfloat16
         assert self.layer.mixed_precision.param_dtype == precision
         assert self.layer.mixed_precision.reduce_dtype == precision
         assert self.layer.mixed_precision.buffer_dtype == precision
@@ -100,7 +100,7 @@ def _assert_layer_fsdp_instance(self) -> None:
         assert isinstance(self.layer, torch.nn.Sequential)
         assert isinstance(self.trainer.strategy.precision_plugin, FSDPMixedPrecisionPlugin)
 
-        precision = torch.float16 if self.trainer.precision == "16" else torch.bfloat16
+        precision = torch.float16 if self.trainer.precision == "16-mixed" else torch.bfloat16
         for layer_num in [0, 2]:
             assert isinstance(self.layer[layer_num], FullyShardedDataParallel)
             assert self.layer[layer_num].mixed_precision.param_dtype == precision
@@ -164,7 +164,7 @@ def test_invalid_on_cpu(tmpdir):
 
 
 @RunIf(min_torch="1.12", min_cuda_gpus=1)
-@pytest.mark.parametrize("precision, expected", [(16, torch.float16), ("bf16", torch.bfloat16)])
+@pytest.mark.parametrize("precision, expected", [('16-mixed', torch.float16), ("bf16-mixed", torch.bfloat16)])
 def test_precision_plugin_config(precision, expected):
     plugin = FSDPMixedPrecisionPlugin(precision=precision, device="cuda")
     config = plugin.mixed_precision_config
@@ -191,7 +191,7 @@ def test_fsdp_strategy_sync_batchnorm(tmpdir):
         accelerator="gpu",
         devices=2,
         strategy="fsdp",
-        precision=16,
+        precision='16-mixed',
         max_epochs=1,
         sync_batchnorm=True,
     )
@@ -199,7 +199,7 @@ def test_fsdp_strategy_sync_batchnorm(tmpdir):
 
 
 @RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, min_torch="1.12")
-@pytest.mark.parametrize("precision", (16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))))
+@pytest.mark.parametrize("precision", ('16-mixed', pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True))))
 def test_fsdp_strategy_checkpoint(tmpdir, precision):
     """Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
     model = TestFSDPModel()
@@ -230,7 +230,7 @@ def test_fsdp_checkpoint_multi_gpus(tmpdir, model, strategy):
         accelerator="gpu",
         devices=2,
         strategy=strategy,
-        precision=16,
+        precision='16-mixed',
         max_epochs=1,
         limit_train_batches=2,
         limit_val_batches=2,
diff --git a/tests/tests_pytorch/strategies/test_registry.py b/tests/tests_pytorch/strategies/test_registry.py
index 270fb028fad7f..9fa2e26f95008 100644
--- a/tests/tests_pytorch/strategies/test_registry.py
+++ b/tests/tests_pytorch/strategies/test_registry.py
@@ -48,7 +48,7 @@ def test_strategy_registry_with_deepspeed_strategies(strategy_name, init_params)
 @pytest.mark.parametrize("strategy", ["deepspeed", "deepspeed_stage_2_offload", "deepspeed_stage_3"])
 def test_deepspeed_strategy_registry_with_trainer(tmpdir, strategy):
 
-    trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, precision=16)
+    trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, precision='16-mixed')
 
     assert isinstance(trainer.strategy, DeepSpeedStrategy)
 
diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
index 4621648e7c201..35f89bc1d882b 100644
--- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
@@ -596,14 +596,14 @@ def test_check_fsdp_strategy_and_fallback():
 
 
 def test_unsupported_tpu_choice(tpu_available):
-    with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"):
-        Trainer(accelerator="tpu", precision=64)
+    with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision='64-true'\)` is not implemented"):
+        Trainer(accelerator="tpu", precision='64-true')
 
     # if user didn't set strategy, AcceleratorConnector will choose the TPUSingleStrategy or TPUSpawnStrategy
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"), pytest.warns(
-        UserWarning, match=r"accelerator='tpu', precision=16\)` but AMP is not supported"
+        UserWarning, match=r"accelerator='tpu', precision=16-mixed\)` but AMP with fp16 is not supported"
     ):
-        Trainer(accelerator="tpu", precision=16, strategy="ddp")
+        Trainer(accelerator="tpu", precision='16-mixed', strategy="ddp")
 
 
 @mock.patch("lightning.pytorch.accelerators.ipu.IPUAccelerator.is_available", return_value=True)
@@ -613,10 +613,10 @@ def test_unsupported_ipu_choice(mock_ipu_acc_avail, monkeypatch):
 
     monkeypatch.setattr(ipu_, "_IPU_AVAILABLE", True)
     monkeypatch.setattr(ipu, "_IPU_AVAILABLE", True)
-    with pytest.raises(ValueError, match=r"accelerator='ipu', precision='bf16'\)` is not supported"):
-        Trainer(accelerator="ipu", precision="bf16")
-    with pytest.raises(ValueError, match=r"accelerator='ipu', precision='64'\)` is not supported"):
-        Trainer(accelerator="ipu", precision=64)
+    with pytest.raises(ValueError, match=r"accelerator='ipu', precision='bf16-mixed'\)` is not supported"):
+        Trainer(accelerator="ipu", precision="bf16-mixed")
+    with pytest.raises(ValueError, match=r"accelerator='ipu', precision='64-true'\)` is not supported"):
+        Trainer(accelerator="ipu", precision="64-true")
 
 
 @mock.patch("lightning.pytorch.accelerators.tpu._XLA_AVAILABLE", return_value=False)
@@ -846,5 +846,5 @@ def test_colossalai_external_strategy(monkeypatch):
 
     from lightning_colossalai import ColossalAIStrategy
 
-    trainer = Trainer(strategy="colossalai", precision=16)
+    trainer = Trainer(strategy="colossalai", precision='16-mixed')
     assert isinstance(trainer.strategy, ColossalAIStrategy)
diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
index 8a5bedf8efabe..d181dfa76067e 100644
--- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
+++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
@@ -72,7 +72,7 @@ def configure_optimizers(self):
 
 
 @pytest.mark.parametrize(
-    "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": 16}, marks=RunIf(min_cuda_gpus=1))]
+    "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": '16-mixed'}, marks=RunIf(min_cuda_gpus=1))]
 )
 def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs):
     model = ManualOptModel()
@@ -87,7 +87,7 @@ def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs):
         **kwargs,
     )
 
-    if kwargs.get("precision") == 16:
+    if kwargs.get("precision") == '16-mixed':
         # mock the scaler instead of the optimizer step because it can be skipped with NaNs
         scaler_step_patch = mock.patch.object(
             trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step
@@ -99,7 +99,7 @@ def test_multiple_optimizers_manual_call_counts(tmpdir, kwargs):
     assert bwd_mock.call_count == limit_train_batches * 3
     assert trainer.global_step == limit_train_batches * 2
 
-    if kwargs.get("precision") == 16:
+    if kwargs.get("precision") == '16-mixed':
         scaler_step_patch.stop()
         assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches
 
@@ -141,7 +141,7 @@ def test_multiple_optimizers_manual_amp(tmpdir, accelerator):
         max_epochs=1,
         log_every_n_steps=1,
         enable_model_summary=False,
-        precision=16,
+        precision='16-mixed',
         accelerator=accelerator,
         devices=1,
     )
@@ -224,7 +224,7 @@ def test_manual_optimization_and_return_tensor(tmpdir):
         limit_train_batches=10,
         limit_test_batches=0,
         limit_val_batches=0,
-        precision=16,
+        precision='16-mixed',
         strategy="ddp_spawn",
         accelerator="gpu",
         devices=2,
@@ -309,7 +309,7 @@ def on_train_epoch_end(self, *_, **__):
         limit_train_batches=20,
         limit_test_batches=0,
         limit_val_batches=0,
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=1,
     )
@@ -383,7 +383,7 @@ def on_before_optimizer_step(self, optimizer, *_):
         max_epochs=1,
         log_every_n_steps=1,
         enable_model_summary=False,
-        precision=16,
+        precision='16-mixed',
         accelerator="gpu",
         devices=1,
     )
@@ -848,7 +848,7 @@ def test_lr_scheduler_step_not_called(tmpdir):
 
 
 @RunIf(min_cuda_gpus=1)
-@pytest.mark.parametrize("precision", [16, 32])
+@pytest.mark.parametrize("precision", ['16-mixed', '32-true'])
 def test_multiple_optimizers_logging(precision, tmpdir):
     """Tests that metrics are properly being logged."""
 
diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py
index 8d7b044d24a12..d1c42fa3499d0 100644
--- a/tests/tests_pytorch/trainer/test_trainer.py
+++ b/tests/tests_pytorch/trainer/test_trainer.py
@@ -1019,7 +1019,7 @@ def on_exception(self, trainer, pl_module, exception):
     assert isinstance(handle_interrupt_callback.exception, MisconfigurationException)
 
 
-@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
+@pytest.mark.parametrize("precision", ['32-true', pytest.param('16-mixed', marks=RunIf(min_cuda_gpus=1))])
 @RunIf(sklearn=True)
 def test_gradient_clipping_by_norm(tmpdir, precision):
     """Test gradient clipping by norm."""
@@ -1048,7 +1048,7 @@ def configure_gradient_clipping(self, *args, **kwargs):
     assert model.assertion_called
 
 
-@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
+@pytest.mark.parametrize("precision", ['32-true', pytest.param('16-mixed', marks=RunIf(min_cuda_gpus=1))])
 def test_gradient_clipping_by_value(tmpdir, precision):
     """Test gradient clipping by value."""
     trainer = Trainer(
@@ -1444,7 +1444,7 @@ def test_spawn_predict_return_predictions(tmpdir):
 
 
 @pytest.mark.parametrize("return_predictions", [None, False, True])
-@pytest.mark.parametrize("precision", [32, 64])
+@pytest.mark.parametrize("precision", ['32-true', '64-true'])
 def test_predict_return_predictions_cpu(return_predictions, precision, tmpdir):
     """Test that `return_predictions=True`."""
     seed_everything(42)
@@ -1455,7 +1455,7 @@ def test_predict_return_predictions_cpu(return_predictions, precision, tmpdir):
     if return_predictions or return_predictions is None:
         assert len(preds) == 1
         assert preds[0].shape == torch.Size([1, 2])
-        assert preds[0].dtype == (torch.float64 if precision == 64 else torch.float32)
+        assert preds[0].dtype == (torch.float64 if precision == '64-true' else torch.float32)
 
 
 @pytest.mark.parametrize(["max_steps", "max_epochs", "global_step"], [(10, 5, 10), (20, None, 20)])
diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py
index 08b94a4763a8f..ea55b513d1ab4 100644
--- a/tests/tests_pytorch/tuner/test_scale_batch_size.py
+++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py
@@ -254,7 +254,7 @@ def test_error_on_dataloader_passed_to_fit(tmpdir):
 def test_auto_scale_batch_size_with_amp(tmpdir):
     before_batch_size = 2
     model = BatchSizeModel(batch_size=before_batch_size)
-    trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=1, precision=16)
+    trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=1, precision='16-mixed')
     tuner = Tuner(trainer)
     tuner.scale_batch_size(model)
     after_batch_size = model.batch_size
diff --git a/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py b/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py
index 5c0cb588ebe5b..314501ef0e8ee 100644
--- a/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py
+++ b/tests/tests_pytorch/utilities/test_deepspeed_collate_checkpoint.py
@@ -32,7 +32,7 @@ def test_deepspeed_collate_checkpoint(tmpdir):
         accelerator="gpu",
         devices=2,
         fast_dev_run=True,
-        precision=16,
+        precision='16-mixed',
         enable_progress_bar=False,
         enable_model_summary=False,
     )
diff --git a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py
index f06c101db8246..8d734ad74649f 100644
--- a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py
+++ b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py
@@ -45,7 +45,7 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -
         accelerator="gpu",
         fast_dev_run=True,
         devices=2,
-        precision=16,
+        precision='16-mixed',
         enable_model_summary=True,
         callbacks=[TestCallback()],
     )
diff --git a/tests/tests_pytorch/utilities/test_torchdistx.py b/tests/tests_pytorch/utilities/test_torchdistx.py
index 9fee068cee9ab..1491dc5106443 100644
--- a/tests/tests_pytorch/utilities/test_torchdistx.py
+++ b/tests/tests_pytorch/utilities/test_torchdistx.py
@@ -55,7 +55,7 @@ def test_deferred_init_with_lightning_module():
     (
         {"accelerator": "auto", "devices": 1},
         pytest.param(
-            {"strategy": "deepspeed_stage_3", "accelerator": "gpu", "devices": 2, "precision": 16},
+            {"strategy": "deepspeed_stage_3", "accelerator": "gpu", "devices": 2, "precision": '16-mixed'},
             marks=RunIf(min_cuda_gpus=2, deepspeed=True),
         ),
     ),