From 2cd6f6a169a87abf0ee801ceabdf8a9724fdfac2 Mon Sep 17 00:00:00 2001
From: Jirka B <j.borovec+github@gmail.com>
Date: Mon, 9 Sep 2024 10:35:08 +0200
Subject: [PATCH 01/11] docs: fix link to WIP

---
 docs/source/links.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/links.rst b/docs/source/links.rst
index 1fc3ab5755d..14597ae3f37 100644
--- a/docs/source/links.rst
+++ b/docs/source/links.rst
@@ -91,7 +91,7 @@
 .. _CER: https://rechtsprechung-im-ostseeraum.archiv.uni-greifswald.de/word-error-rate-character-error-rate-how-to-evaluate-a-model
 .. _MER: https://www.isca-speech.org/archive/interspeech_2004/morris04_interspeech.html
 .. _WIL: https://www.isca-speech.org/archive/interspeech_2004/morris04_interspeech.html
-.. _WIP: https://infoscience.epfl.ch/entities/publication/9983d013-8239-422e-a3f7-a1500d309474
+.. _WIP: https://www.isca-archive.org/interspeech_2004/morris04_interspeech.pdf
 .. _TV: https://en.wikipedia.org/wiki/Total_variation_denoising
 .. _InfoLM: https://arxiv.org/abs/2112.01589
 .. _alpha divergence: https://static.renyi.hu/renyi_cikkek/1961_on_measures_of_entropy_and_information.pdf

From 144f6d6290a1e78f49c49991d34eebed2e0906e0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 10 Sep 2024 10:03:09 +0200
Subject: [PATCH 02/11] build(deps): bump torch from 2.4.0 to 2.4.1 in
 /requirements (#2729)

Bumps [torch](https://github.com/pytorch/pytorch) from 2.4.0 to 2.4.1.
- [Release notes](https://github.com/pytorch/pytorch/releases)
- [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md)
- [Commits](https://github.com/pytorch/pytorch/compare/v2.4.0...v2.4.1)

---
updated-dependencies:
- dependency-name: torch
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/typing.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/typing.txt b/requirements/typing.txt
index 9b14e1e9761..2b8cf0b5082 100644
--- a/requirements/typing.txt
+++ b/requirements/typing.txt
@@ -1,5 +1,5 @@
 mypy ==1.11.2
-torch ==2.4.0
+torch ==2.4.1
 
 types-PyYAML
 types-emoji

From eecc55bc59395f66a4d50eb1e359ba38d59da30e Mon Sep 17 00:00:00 2001
From: Nicki Skafte Detlefsen <skaftenicki@gmail.com>
Date: Tue, 10 Sep 2024 20:15:57 +0200
Subject: [PATCH 03/11] Fix how `prefix`/`posfix` works in `MultitaskWrapper`
 (#2722)

* implementation
* tests
* changelog
* fix mypy
---
 CHANGELOG.md                               |   3 +
 src/torchmetrics/wrappers/multitask.py     | 102 ++++++++++++++-------
 tests/unittests/wrappers/test_multitask.py |  28 ++++--
 3 files changed, 93 insertions(+), 40 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bd51af0290..f2f3def0013 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721))
 
 
+- Fixed how `prefix`/`postfix` works in `MultitaskWrapper` ([#2722](https://github.com/Lightning-AI/torchmetrics/pull/2722))
+
+
 ## [1.4.1] - 2024-08-02
 
 ### Changed
diff --git a/src/torchmetrics/wrappers/multitask.py b/src/torchmetrics/wrappers/multitask.py
index 556a7183638..fa2f04db97d 100644
--- a/src/torchmetrics/wrappers/multitask.py
+++ b/src/torchmetrics/wrappers/multitask.py
@@ -38,12 +38,27 @@ class MultitaskWrapper(WrapperMetric):
         task_metrics:
             Dictionary associating each task to a Metric or a MetricCollection. The keys of the dictionary represent the
             names of the tasks, and the values represent the metrics to use for each task.
+        prefix:
+            A string to append in front of the metric keys. If not provided, will default to an empty string.
+        postfix:
+            A string to append after the keys of the output dict. If not provided, will default to an empty string.
+
+    .. note::
+        The use pre prefix and postfix allows for easily creating task wrappers for training, validation and test.
+        The arguments are only changing the output keys of the computed metrics and not the input keys. This means
+        that a ``MultitaskWrapper`` initialized as ``MultitaskWrapper({"task": Metric()}, prefix="train_")`` will
+        still expect the input to be a dictionary with the key "task", but the output will be a dictionary with the key
+        "train_task".
 
     Raises:
         TypeError:
             If argument `task_metrics` is not an dictionary
         TypeError:
             If not all values in the `task_metrics` dictionary is instances of `Metric` or `MetricCollection`
+        ValueError:
+            If `prefix` is not a string
+        ValueError:
+            If `postfix` is not a string
 
     Example (with a single metric per class):
          >>> import torch
@@ -91,18 +106,59 @@ class MultitaskWrapper(WrapperMetric):
          {'Classification': {'BinaryAccuracy': tensor(0.3333), 'BinaryF1Score': tensor(0.)},
           'Regression': {'MeanSquaredError': tensor(0.8333), 'MeanAbsoluteError': tensor(0.6667)}}
 
+    Example (with a prefix and postfix):
+        >>> import torch
+        >>> from torchmetrics.wrappers import MultitaskWrapper
+        >>> from torchmetrics.regression import MeanSquaredError
+        >>> from torchmetrics.classification import BinaryAccuracy
+        >>>
+        >>> classification_target = torch.tensor([0, 1, 0])
+        >>> regression_target = torch.tensor([2.5, 5.0, 4.0])
+        >>> targets = {"Classification": classification_target, "Regression": regression_target}
+        >>> classification_preds = torch.tensor([0, 0, 1])
+        >>> regression_preds = torch.tensor([3.0, 5.0, 2.5])
+        >>> preds = {"Classification": classification_preds, "Regression": regression_preds}
+        >>>
+        >>> metrics = MultitaskWrapper({
+        ...     "Classification": BinaryAccuracy(),
+        ...     "Regression": MeanSquaredError()
+        ... }, prefix="train_")
+        >>> metrics.update(preds, targets)
+        >>> metrics.compute()
+        {'train_Classification': tensor(0.3333), 'train_Regression': tensor(0.8333)}
+
     """
 
-    is_differentiable = False
+    is_differentiable: bool = False
 
     def __init__(
         self,
         task_metrics: Dict[str, Union[Metric, MetricCollection]],
+        prefix: Optional[str] = None,
+        postfix: Optional[str] = None,
     ) -> None:
-        self._check_task_metrics_type(task_metrics)
         super().__init__()
+
+        if not isinstance(task_metrics, dict):
+            raise TypeError(f"Expected argument `task_metrics` to be a dict. Found task_metrics = {task_metrics}")
+
+        for metric in task_metrics.values():
+            if not (isinstance(metric, (Metric, MetricCollection))):
+                raise TypeError(
+                    "Expected each task's metric to be a Metric or a MetricCollection. "
+                    f"Found a metric of type {type(metric)}"
+                )
+
         self.task_metrics = nn.ModuleDict(task_metrics)
 
+        if prefix is not None and not isinstance(prefix, str):
+            raise ValueError(f"Expected argument `prefix` to either be `None` or a string but got {prefix}")
+        self._prefix = prefix or ""
+
+        if postfix is not None and not isinstance(postfix, str):
+            raise ValueError(f"Expected argument `postfix` to either be `None` or a string but got {postfix}")
+        self._postfix = postfix or ""
+
     def items(self, flatten: bool = True) -> Iterable[Tuple[str, nn.Module]]:
         """Iterate over task and task metrics.
 
@@ -114,9 +170,9 @@ def items(self, flatten: bool = True) -> Iterable[Tuple[str, nn.Module]]:
         for task_name, metric in self.task_metrics.items():
             if flatten and isinstance(metric, MetricCollection):
                 for sub_metric_name, sub_metric in metric.items():
-                    yield f"{task_name}_{sub_metric_name}", sub_metric
+                    yield f"{self._prefix}{task_name}_{sub_metric_name}{self._postfix}", sub_metric
             else:
-                yield task_name, metric
+                yield f"{self._prefix}{task_name}{self._postfix}", metric
 
     def keys(self, flatten: bool = True) -> Iterable[str]:
         """Iterate over task names.
@@ -129,9 +185,9 @@ def keys(self, flatten: bool = True) -> Iterable[str]:
         for task_name, metric in self.task_metrics.items():
             if flatten and isinstance(metric, MetricCollection):
                 for sub_metric_name in metric:
-                    yield f"{task_name}_{sub_metric_name}"
+                    yield f"{self._prefix}{task_name}_{sub_metric_name}{self._postfix}"
             else:
-                yield task_name
+                yield f"{self._prefix}{task_name}{self._postfix}"
 
     def values(self, flatten: bool = True) -> Iterable[nn.Module]:
         """Iterate over task metrics.
@@ -147,18 +203,6 @@ def values(self, flatten: bool = True) -> Iterable[nn.Module]:
             else:
                 yield metric
 
-    @staticmethod
-    def _check_task_metrics_type(task_metrics: Dict[str, Union[Metric, MetricCollection]]) -> None:
-        if not isinstance(task_metrics, dict):
-            raise TypeError(f"Expected argument `task_metrics` to be a dict. Found task_metrics = {task_metrics}")
-
-        for metric in task_metrics.values():
-            if not (isinstance(metric, (Metric, MetricCollection))):
-                raise TypeError(
-                    "Expected each task's metric to be a Metric or a MetricCollection. "
-                    f"Found a metric of type {type(metric)}"
-                )
-
     def update(self, task_preds: Dict[str, Any], task_targets: Dict[str, Any]) -> None:
         """Update each task's metric with its corresponding pred and target.
 
@@ -179,9 +223,13 @@ def update(self, task_preds: Dict[str, Any], task_targets: Dict[str, Any]) -> No
             target = task_targets[task_name]
             metric.update(pred, target)
 
+    def _convert_output(self, output: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert the output of the underlying metrics to a dictionary with the task names as keys."""
+        return {f"{self._prefix}{task_name}{self._postfix}": task_output for task_name, task_output in output.items()}
+
     def compute(self) -> Dict[str, Any]:
         """Compute metrics for all tasks."""
-        return {task_name: metric.compute() for task_name, metric in self.task_metrics.items()}
+        return self._convert_output({task_name: metric.compute() for task_name, metric in self.task_metrics.items()})
 
     def forward(self, task_preds: Dict[str, Tensor], task_targets: Dict[str, Tensor]) -> Dict[str, Any]:
         """Call underlying forward methods for all tasks and return the result as a dictionary."""
@@ -189,10 +237,10 @@ def forward(self, task_preds: Dict[str, Tensor], task_targets: Dict[str, Tensor]
         # value of full_state_update, and that also accumulates the results. Here, all computations are handled by the
         # underlying metrics, which all have their own value of full_state_update, and which all accumulate the results
         # by themselves.
-        return {
+        return self._convert_output({
             task_name: metric(task_preds[task_name], task_targets[task_name])
             for task_name, metric in self.task_metrics.items()
-        }
+        })
 
     def reset(self) -> None:
         """Reset all underlying metrics."""
@@ -215,16 +263,8 @@ def clone(self, prefix: Optional[str] = None, postfix: Optional[str] = None) ->
 
         """
         multitask_copy = deepcopy(self)
-        if prefix is not None:
-            prefix = self._check_arg(prefix, "prefix")
-            multitask_copy.task_metrics = nn.ModuleDict({
-                prefix + key: value for key, value in multitask_copy.task_metrics.items()
-            })
-        if postfix is not None:
-            postfix = self._check_arg(postfix, "postfix")
-            multitask_copy.task_metrics = nn.ModuleDict({
-                key + postfix: value for key, value in multitask_copy.task_metrics.items()
-            })
+        multitask_copy._prefix = self._check_arg(prefix, "prefix") or ""
+        multitask_copy._postfix = self._check_arg(postfix, "prefix") or ""
         return multitask_copy
 
     def plot(
diff --git a/tests/unittests/wrappers/test_multitask.py b/tests/unittests/wrappers/test_multitask.py
index 63af6f31b35..069a4472d64 100644
--- a/tests/unittests/wrappers/test_multitask.py
+++ b/tests/unittests/wrappers/test_multitask.py
@@ -248,14 +248,24 @@ def test_key_value_items_method(method, flatten):
 
 def test_clone_with_prefix_and_postfix():
     """Check that the clone method works with prefix and postfix arguments."""
-    multitask_metrics = MultitaskWrapper({"Classification": BinaryAccuracy(), "Regression": MeanSquaredError()})
-    cloned_metrics_with_prefix = multitask_metrics.clone(prefix="prefix_")
-    cloned_metrics_with_postfix = multitask_metrics.clone(postfix="_postfix")
+    multitask_metrics = MultitaskWrapper(
+        {"Classification": BinaryAccuracy(), "Regression": MeanSquaredError()},
+        prefix="prefix_",
+        postfix="_postfix",
+    )
+    assert set(multitask_metrics.keys()) == {"prefix_Classification_postfix", "prefix_Regression_postfix"}
 
-    # Check if the cloned metrics have the expected keys
-    assert set(cloned_metrics_with_prefix.task_metrics.keys()) == {"prefix_Classification", "prefix_Regression"}
-    assert set(cloned_metrics_with_postfix.task_metrics.keys()) == {"Classification_postfix", "Regression_postfix"}
+    output = multitask_metrics(
+        {"Classification": _classification_preds, "Regression": _regression_preds},
+        {"Classification": _classification_target, "Regression": _regression_target},
+    )
+    assert set(output.keys()) == {"prefix_Classification_postfix", "prefix_Regression_postfix"}
 
-    # Check if the cloned metrics have the expected values
-    assert isinstance(cloned_metrics_with_prefix.task_metrics["prefix_Classification"], BinaryAccuracy)
-    assert isinstance(cloned_metrics_with_prefix.task_metrics["prefix_Regression"], MeanSquaredError)
+    cloned_metrics = multitask_metrics.clone(prefix="new_prefix_", postfix="_new_postfix")
+    assert set(cloned_metrics.keys()) == {"new_prefix_Classification_new_postfix", "new_prefix_Regression_new_postfix"}
+
+    output = cloned_metrics(
+        {"Classification": _classification_preds, "Regression": _regression_preds},
+        {"Classification": _classification_target, "Regression": _regression_target},
+    )
+    assert set(output.keys()) == {"new_prefix_Classification_new_postfix", "new_prefix_Regression_new_postfix"}

From 80929b53db36cc045da4b8b017db82e46a5159bb Mon Sep 17 00:00:00 2001
From: Rittik Panda <99414608+rittik9@users.noreply.github.com>
Date: Tue, 10 Sep 2024 23:49:16 +0530
Subject: [PATCH 04/11] Fix: handle zero division error in binary IoU
 calculation (#2726)

* Fix: Handle zero division error in binary IoU (Jaccard index) calculation
* chlog

---------

Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com>
---
 CHANGELOG.md                                  |  3 +++
 .../functional/classification/jaccard.py      |  2 +-
 .../unittests/classification/test_jaccard.py  | 21 +++++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f2f3def0013..4bac68f736f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Fixed handling zero division error in binary IoU (Jaccard index) calculation ([#2726](https://github.com/Lightning-AI/torchmetrics/pull/2726))
+
+
 - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721))
 
 
diff --git a/src/torchmetrics/functional/classification/jaccard.py b/src/torchmetrics/functional/classification/jaccard.py
index 1d240df68af..dfddd68255f 100644
--- a/src/torchmetrics/functional/classification/jaccard.py
+++ b/src/torchmetrics/functional/classification/jaccard.py
@@ -67,7 +67,7 @@ def _jaccard_index_reduce(
         raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
     confmat = confmat.float()
     if average == "binary":
-        return confmat[1, 1] / (confmat[0, 1] + confmat[1, 0] + confmat[1, 1])
+        return _safe_divide(confmat[1, 1], (confmat[0, 1] + confmat[1, 0] + confmat[1, 1]), zero_division=zero_division)
 
     ignore_index_cond = ignore_index is not None and 0 <= ignore_index < confmat.shape[0]
     multilabel = confmat.ndim == 3
diff --git a/tests/unittests/classification/test_jaccard.py b/tests/unittests/classification/test_jaccard.py
index 6901868eac9..e7afdb557a6 100644
--- a/tests/unittests/classification/test_jaccard.py
+++ b/tests/unittests/classification/test_jaccard.py
@@ -26,6 +26,7 @@
     MultilabelJaccardIndex,
 )
 from torchmetrics.functional.classification.jaccard import (
+    _jaccard_index_reduce,
     binary_jaccard_index,
     multiclass_jaccard_index,
     multilabel_jaccard_index,
@@ -403,6 +404,26 @@ def test_corner_case():
     assert torch.allclose(res, out)
 
 
+def test_jaccard_index_zero_division():
+    """Issue: https://github.com/Lightning-AI/torchmetrics/issues/2658."""
+    # Test case where all pixels are background (zeros)
+    confmat = torch.tensor([[4, 0], [0, 0]])
+
+    # Test with zero_division=0.0
+    result = _jaccard_index_reduce(confmat, average="binary", zero_division=0.0)
+    assert result == 0.0, f"Expected 0.0, but got {result}"
+
+    # Test with zero_division=1.0
+    result = _jaccard_index_reduce(confmat, average="binary", zero_division=1.0)
+    assert result == 1.0, f"Expected 1.0, but got {result}"
+
+    # Test case with some foreground pixels
+    confmat = torch.tensor([[2, 1], [1, 1]])
+    result = _jaccard_index_reduce(confmat, average="binary", zero_division=0.0)
+    expected = 1 / 3
+    assert torch.isclose(result, torch.tensor(expected)), f"Expected {expected}, but got {result}"
+
+
 @pytest.mark.parametrize(
     ("metric", "kwargs"),
     [

From f12e7af65ef14baec63c199af9a7e69a403b3c04 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Wed, 11 Sep 2024 14:30:49 +0200
Subject: [PATCH 05/11] fix: compatibility audio do with new `scipy` (#2733)

* compatibility audio do with new `scipy`
* smaller array to fix torch.unique case

---------

Co-authored-by: Nicki Skafte Detlefsen <skaftenicki@gmail.com>
---
 CHANGELOG.md                                       | 3 +++
 src/torchmetrics/__init__.py                       | 7 +++++++
 src/torchmetrics/functional/nominal/__init__.py    | 1 +
 src/torchmetrics/nominal/__init__.py               | 1 +
 src/torchmetrics/utilities/imports.py              | 1 +
 tests/unittests/classification/test_stat_scores.py | 4 ++--
 6 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4bac68f736f..0fc6c936492 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721))
 
 
+- Fixed compatibility of audio domain with new `scipy` ([#2733](https://github.com/Lightning-AI/torchmetrics/pull/2733))
+
+
 - Fixed how `prefix`/`postfix` works in `MultitaskWrapper` ([#2722](https://github.com/Lightning-AI/torchmetrics/pull/2722))
 
 
diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py
index b1549dfaf8b..2fa370cb1c9 100644
--- a/src/torchmetrics/__init__.py
+++ b/src/torchmetrics/__init__.py
@@ -20,6 +20,13 @@
     if not hasattr(PIL, "PILLOW_VERSION"):
         PIL.PILLOW_VERSION = PIL.__version__
 
+if package_available("scipy"):
+    import scipy.signal
+
+    # back compatibility patch due to SMRMpy using scipy.signal.hamming
+    if not hasattr(scipy.signal, "hamming"):
+        scipy.signal.hamming = scipy.signal.windows.hamming
+
 from torchmetrics import functional  # noqa: E402
 from torchmetrics.aggregation import (  # noqa: E402
     CatMetric,
diff --git a/src/torchmetrics/functional/nominal/__init__.py b/src/torchmetrics/functional/nominal/__init__.py
index f29dd9302f0..772cb395895 100644
--- a/src/torchmetrics/functional/nominal/__init__.py
+++ b/src/torchmetrics/functional/nominal/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix
 from torchmetrics.functional.nominal.fleiss_kappa import fleiss_kappa
 from torchmetrics.functional.nominal.pearson import (
diff --git a/src/torchmetrics/nominal/__init__.py b/src/torchmetrics/nominal/__init__.py
index f23a7eb8c6b..e36da870308 100644
--- a/src/torchmetrics/nominal/__init__.py
+++ b/src/torchmetrics/nominal/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from torchmetrics.nominal.cramers import CramersV
 from torchmetrics.nominal.fleiss_kappa import FleissKappa
 from torchmetrics.nominal.pearson import PearsonsContingencyCoefficient
diff --git a/src/torchmetrics/utilities/imports.py b/src/torchmetrics/utilities/imports.py
index b40a334558f..10affebf579 100644
--- a/src/torchmetrics/utilities/imports.py
+++ b/src/torchmetrics/utilities/imports.py
@@ -64,6 +64,7 @@
 _MECAB_KO_DIC_AVAILABLE = RequirementCache("mecab_ko_dic")
 _IPADIC_AVAILABLE = RequirementCache("ipadic")
 _SENTENCEPIECE_AVAILABLE = RequirementCache("sentencepiece")
+_SCIPI_AVAILABLE = RequirementCache("scipy")
 _SKLEARN_GREATER_EQUAL_1_3 = RequirementCache("scikit-learn>=1.3.0")
 
 _LATEX_AVAILABLE: bool = shutil.which("latex") is not None
diff --git a/tests/unittests/classification/test_stat_scores.py b/tests/unittests/classification/test_stat_scores.py
index 53fa78d0368..5ea4c206bc0 100644
--- a/tests/unittests/classification/test_stat_scores.py
+++ b/tests/unittests/classification/test_stat_scores.py
@@ -582,8 +582,8 @@ def test_support_for_int():
     """See issue: https://github.com/Lightning-AI/torchmetrics/issues/1970."""
     seed_all(42)
     metric = MulticlassStatScores(num_classes=4, average="none", multidim_average="samplewise", ignore_index=0)
-    prediction = torch.randint(low=0, high=4, size=(1, 224, 224)).to(torch.uint8)
-    label = torch.randint(low=0, high=4, size=(1, 224, 224)).to(torch.uint8)
+    prediction = torch.randint(low=0, high=4, size=(1, 50, 50)).to(torch.uint8)
+    label = torch.randint(low=0, high=4, size=(1, 50, 50)).to(torch.uint8)
     score = metric(preds=prediction, target=label)
     assert score.shape == (1, 4, 5)
 

From 708f11d0004cc7a6ea493980a733e6066a9bd6b2 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Wed, 11 Sep 2024 22:03:31 +0200
Subject: [PATCH 06/11] bump some testing requirements (#2736)

* bump some testing requirements
+ matplotlib >=3.6.0
+ dython ~=0.7.6
---
 requirements/classification_test.txt | 2 +-
 requirements/nominal_test.txt        | 4 ++--
 requirements/visual.txt              | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements/classification_test.txt b/requirements/classification_test.txt
index 688067c482a..8cadb2b3e3e 100644
--- a/requirements/classification_test.txt
+++ b/requirements/classification_test.txt
@@ -1,7 +1,7 @@
 # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
-pandas >=1.4.0, <=2.2.2
+pandas >1.4.0, <=2.2.2
 netcal >1.0.0, <1.4.0 # calibration_error
 numpy <2.2.0
 fairlearn # group_fairness
diff --git a/requirements/nominal_test.txt b/requirements/nominal_test.txt
index 09f77b20475..7ee5809af6f 100644
--- a/requirements/nominal_test.txt
+++ b/requirements/nominal_test.txt
@@ -1,7 +1,7 @@
 # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
-pandas >1.0.0, <=2.2.2 # cannot pin version due to numpy version incompatibility
-dython <=0.7.7
+pandas >1.4.0, <=2.2.2 # cannot pin version due to numpy version incompatibility
+dython ~=0.7.6
 scipy >1.0.0, <1.15.0 # cannot pin version due to some version conflicts with `oldest` CI configuration
 statsmodels >0.13.5, <0.15.0
diff --git a/requirements/visual.txt b/requirements/visual.txt
index 269a45fc7bb..1cdc4060a8b 100644
--- a/requirements/visual.txt
+++ b/requirements/visual.txt
@@ -1,5 +1,5 @@
 # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
-matplotlib >=3.3.0, <3.10.0
+matplotlib >=3.6.0, <3.10.0
 SciencePlots >= 2.0.0, <2.2.0

From 4ce2278166d453e33ae197f69e1a3bcb5409b7c2 Mon Sep 17 00:00:00 2001
From: Nicki Skafte Detlefsen <skaftenicki@gmail.com>
Date: Thu, 12 Sep 2024 00:07:57 +0200
Subject: [PATCH 07/11] Deprecate `num_outputs` in R2 because it is no longer
 needed (#2705)

* trying to see what happens
* fix doctests
* add deprecation test
* Apply suggestions from code review

---------

Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com>
---
 CHANGELOG.md                       |  5 ++++
 src/torchmetrics/regression/r2.py  | 47 ++++++++++++++++++++----------
 tests/unittests/test_deprecated.py |  8 ++++-
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fc6c936492..d3eafaf8be5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - update `InfoLM` class to dynamically set `higher_is_better` ([#2674](https://github.com/Lightning-AI/torchmetrics/pull/2674))
 
 
+### Deprecated
+
+- Deprecated `num_outputs` in `R2Score` ([#2705](https://github.com/Lightning-AI/torchmetrics/pull/2705))
+
+
 ### Removed
 
 -
diff --git a/src/torchmetrics/regression/r2.py b/src/torchmetrics/regression/r2.py
index 611d62a745c..be6b2af33d8 100644
--- a/src/torchmetrics/regression/r2.py
+++ b/src/torchmetrics/regression/r2.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 from typing import Any, Optional, Sequence, Union
 
-import torch
 from torch import Tensor, tensor
 
 from torchmetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update
 from torchmetrics.metric import Metric
+from torchmetrics.utilities import rank_zero_warn
 from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
 from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
 
@@ -65,23 +65,32 @@ class R2Score(Metric):
             * ``'variance_weighted'`` scores are weighted by their individual variances
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
+    .. warning::
+        Argument ``num_outputs`` in ``R2Score`` has been deprecated because it is no longer necessary and will be
+        removed in v1.6.0 of TorchMetrics. The number of outputs is now automatically inferred from the shape
+        of the input tensors.
+
     Raises:
         ValueError:
             If ``adjusted`` parameter is not an integer larger or equal to 0.
         ValueError:
             If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``.
 
-    Example:
+    Example (single output):
+        >>> from torch import tensor
         >>> from torchmetrics.regression import R2Score
-        >>> target = torch.tensor([3, -0.5, 2, 7])
-        >>> preds = torch.tensor([2.5, 0.0, 2, 8])
+        >>> target = tensor([3, -0.5, 2, 7])
+        >>> preds = tensor([2.5, 0.0, 2, 8])
         >>> r2score = R2Score()
         >>> r2score(preds, target)
         tensor(0.9486)
 
-        >>> target = torch.tensor([[0.5, 1], [-1, 1], [7, -6]])
-        >>> preds = torch.tensor([[0, 2], [-1, 2], [8, -5]])
-        >>> r2score = R2Score(num_outputs=2, multioutput='raw_values')
+    Example (multioutput):
+        >>> from torch import tensor
+        >>> from torchmetrics.regression import R2Score
+        >>> target = tensor([[0.5, 1], [-1, 1], [7, -6]])
+        >>> preds = tensor([[0, 2], [-1, 2], [8, -5]])
+        >>> r2score = R2Score(multioutput='raw_values')
         >>> r2score(preds, target)
         tensor([0.9654, 0.9082])
 
@@ -100,14 +109,20 @@ class R2Score(Metric):
 
     def __init__(
         self,
-        num_outputs: int = 1,
+        num_outputs: Optional[int] = None,
         adjusted: int = 0,
         multioutput: str = "uniform_average",
         **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
 
-        self.num_outputs = num_outputs
+        if num_outputs is not None:
+            rank_zero_warn(
+                "Argument `num_outputs` in `R2Score` has been deprecated because it is no longer necessary and will be"
+                "removed in v1.6.0 of TorchMetrics. The number of outputs is now automatically inferred from the shape"
+                "of the input tensors.",
+                DeprecationWarning,
+            )
 
         if adjusted < 0 or not isinstance(adjusted, int):
             raise ValueError("`adjusted` parameter should be an integer larger or equal to 0.")
@@ -120,19 +135,19 @@ def __init__(
             )
         self.multioutput = multioutput
 
-        self.add_state("sum_squared_error", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum")
-        self.add_state("sum_error", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum")
-        self.add_state("residual", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum")
+        self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("sum_error", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("residual", default=tensor(0.0), dist_reduce_fx="sum")
         self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
 
     def update(self, preds: Tensor, target: Tensor) -> None:
         """Update state with predictions and targets."""
         sum_squared_error, sum_error, residual, total = _r2_score_update(preds, target)
 
-        self.sum_squared_error += sum_squared_error
-        self.sum_error += sum_error
-        self.residual += residual
-        self.total += total
+        self.sum_squared_error = self.sum_squared_error + sum_squared_error
+        self.sum_error = self.sum_error + sum_error
+        self.residual = self.residual + residual
+        self.total = self.total + total
 
     def compute(self) -> Tensor:
         """Compute r2 score over the metric states."""
diff --git a/tests/unittests/test_deprecated.py b/tests/unittests/test_deprecated.py
index f126fa06561..49c153c4a52 100644
--- a/tests/unittests/test_deprecated.py
+++ b/tests/unittests/test_deprecated.py
@@ -1,7 +1,7 @@
 import pytest
 import torch
 from torchmetrics.functional.regression import kl_divergence
-from torchmetrics.regression import KLDivergence
+from torchmetrics.regression import KLDivergence, R2Score
 
 
 def test_deprecated_kl_divergence_input_order():
@@ -14,3 +14,9 @@ def test_deprecated_kl_divergence_input_order():
 
     with pytest.deprecated_call(match="The input order and naming in metric `KLDivergence` is set to be deprecated.*"):
         KLDivergence()
+
+
+def test_deprecated_r2_score_num_outputs():
+    """Ensure that the deprecated num_outputs argument in R2Score raises a warning."""
+    with pytest.deprecated_call(match="Argument `num_outputs` in `R2Score` has been deprecated"):
+        R2Score(num_outputs=2)

From cb1ab3798000cdfaaaf7b55905895c2445cdc0eb Mon Sep 17 00:00:00 2001
From: Vitaliy Kinakh <kinakh.vitalii@gmail.com>
Date: Thu, 12 Sep 2024 00:12:55 +0200
Subject: [PATCH 08/11] Fix `segmentation.MeanIoU` (#2698)

- use sum reduce function for score
- add state `num_batches` to keep number of processed batches
- add increment of `num_batches` in every `update` call
- in `compute` return sum of scores divided by number of processed batches

---------

Co-authored-by: Nicki Skafte <skaftenicki@gmail.com>
---
 CHANGELOG.md                              |  3 +++
 src/torchmetrics/metric.py                | 12 +++++------
 src/torchmetrics/segmentation/mean_iou.py |  8 +++++---
 tests/unittests/_helpers/testers.py       | 25 +++++++++++++++++++++--
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d3eafaf8be5..ef9ed3c896f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -44,6 +44,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Fixed wrong aggregation in `segmentation.MeanIoU` ([#2698](https://github.com/Lightning-AI/torchmetrics/pull/2698))
+
+
 - Fixed handling zero division error in binary IoU (Jaccard index) calculation ([#2726](https://github.com/Lightning-AI/torchmetrics/pull/2726))
 
 
diff --git a/src/torchmetrics/metric.py b/src/torchmetrics/metric.py
index ef9055ed560..940e393c6d1 100644
--- a/src/torchmetrics/metric.py
+++ b/src/torchmetrics/metric.py
@@ -284,7 +284,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any:
         """Aggregate and evaluate batch input directly.
 
         Serves the dual purpose of both computing the metric on the current batch of inputs but also add the batch
-        statistics to the overall accumululating metric state. Input arguments are the exact same as corresponding
+        statistics to the overall accumulating metric state. Input arguments are the exact same as corresponding
         ``update`` method. The returned output is the exact same as the output of ``compute``.
 
         Args:
@@ -361,7 +361,7 @@ def _forward_full_state_update(self, *args: Any, **kwargs: Any) -> Any:
     def _forward_reduce_state_update(self, *args: Any, **kwargs: Any) -> Any:
         """Forward computation using single call to `update`.
 
-        This can be done when the global metric state is a sinple reduction of batch states. This can be unsafe for
+        This can be done when the global metric state is a simple reduction of batch states. This can be unsafe for
         certain metric cases but is also the fastest way to both accumulate globally and compute locally.
 
         """
@@ -802,7 +802,7 @@ def _apply(self, fn: Callable, exclude_state: Sequence[str] = "") -> Module:
         """Overwrite `_apply` function such that we can also move metric states to the correct device.
 
         This method is called by the base ``nn.Module`` class whenever `.to`, `.cuda`, `.float`, `.half` etc. methods
-        are called. Dtype conversion is garded and will only happen through the special `set_dtype` method.
+        are called. Dtype conversion is guarded and will only happen through the special `set_dtype` method.
 
         Args:
             fn: the function to apply
@@ -1166,7 +1166,7 @@ def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Opt
         """
 
     def update(self, *args: Any, **kwargs: Any) -> None:
-        """Redirect the call to the input which the conposition was formed from."""
+        """Redirect the call to the input which the composition was formed from."""
         if isinstance(self.metric_a, Metric):
             self.metric_a.update(*args, **self.metric_a._filter_kwargs(**kwargs))
 
@@ -1174,7 +1174,7 @@ def update(self, *args: Any, **kwargs: Any) -> None:
             self.metric_b.update(*args, **self.metric_b._filter_kwargs(**kwargs))
 
     def compute(self) -> Any:
-        """Redirect the call to the input which the conposition was formed from."""
+        """Redirect the call to the input which the composition was formed from."""
         # also some parsing for kwargs?
         val_a = self.metric_a.compute() if isinstance(self.metric_a, Metric) else self.metric_a
         val_b = self.metric_b.compute() if isinstance(self.metric_b, Metric) else self.metric_b
@@ -1216,7 +1216,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any:
         return self._forward_cache
 
     def reset(self) -> None:
-        """Redirect the call to the input which the conposition was formed from."""
+        """Redirect the call to the input which the composition was formed from."""
         if isinstance(self.metric_a, Metric):
             self.metric_a.reset()
 
diff --git a/src/torchmetrics/segmentation/mean_iou.py b/src/torchmetrics/segmentation/mean_iou.py
index c298254585a..0fe831f5231 100644
--- a/src/torchmetrics/segmentation/mean_iou.py
+++ b/src/torchmetrics/segmentation/mean_iou.py
@@ -110,7 +110,8 @@ def __init__(
         self.input_format = input_format
 
         num_classes = num_classes - 1 if not include_background else num_classes
-        self.add_state("score", default=torch.zeros(num_classes if per_class else 1), dist_reduce_fx="mean")
+        self.add_state("score", default=torch.zeros(num_classes if per_class else 1), dist_reduce_fx="sum")
+        self.add_state("num_batches", default=torch.tensor(0), dist_reduce_fx="sum")
 
     def update(self, preds: Tensor, target: Tensor) -> None:
         """Update the state with the new data."""
@@ -119,10 +120,11 @@ def update(self, preds: Tensor, target: Tensor) -> None:
         )
         score = _mean_iou_compute(intersection, union, per_class=self.per_class)
         self.score += score.mean(0) if self.per_class else score.mean()
+        self.num_batches += 1
 
     def compute(self) -> Tensor:
-        """Update the state with the new data."""
-        return self.score  # / self.num_batches
+        """Compute the final Mean Intersection over Union (mIoU)."""
+        return self.score / self.num_batches
 
     def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE:
         """Plot a single or multiple values from the metric.
diff --git a/tests/unittests/_helpers/testers.py b/tests/unittests/_helpers/testers.py
index deb4c12324e..c5a69077f3c 100644
--- a/tests/unittests/_helpers/testers.py
+++ b/tests/unittests/_helpers/testers.py
@@ -32,7 +32,12 @@ def _assert_allclose(tm_result: Any, ref_result: Any, atol: float = 1e-8, key: O
     """Recursively assert that two results are within a certain tolerance."""
     # single output compare
     if isinstance(tm_result, Tensor):
-        assert np.allclose(tm_result.detach().cpu().numpy(), ref_result, atol=atol, equal_nan=True)
+        assert np.allclose(
+            tm_result.detach().cpu().numpy() if isinstance(tm_result, Tensor) else tm_result,
+            ref_result.detach().cpu().numpy() if isinstance(ref_result, Tensor) else ref_result,
+            atol=atol,
+            equal_nan=True,
+        )
     # multi output compare
     elif isinstance(tm_result, Sequence):
         for pl_res, ref_res in zip(tm_result, ref_result):
@@ -40,7 +45,12 @@ def _assert_allclose(tm_result: Any, ref_result: Any, atol: float = 1e-8, key: O
     elif isinstance(tm_result, Dict):
         if key is None:
             raise KeyError("Provide Key for Dict based metric results.")
-        assert np.allclose(tm_result[key].detach().cpu().numpy(), ref_result, atol=atol, equal_nan=True)
+        assert np.allclose(
+            tm_result[key].detach().cpu().numpy() if isinstance(tm_result[key], Tensor) else tm_result[key],
+            ref_result.detach().cpu().numpy() if isinstance(ref_result, Tensor) else ref_result,
+            atol=atol,
+            equal_nan=True,
+        )
     else:
         raise ValueError("Unknown format for comparison")
 
@@ -147,6 +157,7 @@ def _class_test(
     # verify metrics work after being loaded from pickled state
     pickled_metric = pickle.dumps(metric)
     metric = pickle.loads(pickled_metric)
+    metric_clone = deepcopy(metric)
 
     for i in range(rank, num_batches, world_size):
         batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()}
@@ -154,6 +165,16 @@ def _class_test(
         # compute batch stats and aggregate for global stats
         batch_result = metric(preds[i], target[i], **batch_kwargs_update)
 
+        if rank == 0 and world_size == 1 and i == 0:  # check only in non-ddp mode and first batch
+            # dummy check to make sure that forward/update works as expected
+            metric_clone.update(preds[i], target[i], **batch_kwargs_update)
+            update_result = metric_clone.compute()
+            if isinstance(batch_result, dict):
+                for key in batch_result:
+                    _assert_allclose(batch_result, update_result[key], key=key)
+            else:
+                _assert_allclose(batch_result, update_result)
+
         if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0:
             if isinstance(preds, Tensor):
                 ddp_preds = torch.cat([preds[i + r] for r in range(world_size)]).cpu()

From fa351e8bcc177132964d71d8718ef911587fc674 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Fri, 13 Sep 2024 17:23:25 +0200
Subject: [PATCH 09/11] ci/doc: install with `-e` to resolve source links
 (#2740)

---
 .github/workflows/docs-build.yml | 16 ++++++----------
 docs/source/conf.py              |  8 ++++++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml
index 5aeef255bfe..16eb7e72e4a 100644
--- a/.github/workflows/docs-build.yml
+++ b/.github/workflows/docs-build.yml
@@ -48,18 +48,11 @@ jobs:
           pytorch-version: ${{ matrix.pytorch-version }}
           pypi-dir: ${{ env.PYPI_CACHE }}
 
-      - name: Install Latex
-        if: ${{ matrix.target == 'html' }}
-        # install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
-        run: |
-          sudo apt-get update --fix-missing
-          sudo apt-get install -y \
-            texlive-latex-extra texlive-pictures texlive-fonts-recommended dvipng cm-super
-
       - name: Install package & dependencies
         run: |
           make get-sphinx-template
-          pip install . -U -r requirements/_docs.txt \
+          # install with -e so the path to source link comes from this project not from the installed package
+          pip install -e . -U -r requirements/_docs.txt \
             --find-links="${PYPI_CACHE}" --find-links="${TORCH_URL}"
       - run: pip list
       - name: Full build for deployment
@@ -70,7 +63,10 @@ jobs:
         run: echo "SPHINX_ENABLE_GALLERY=0" >> $GITHUB_ENV
       - name: make ${{ matrix.target }}
         working-directory: ./docs
-        run: make ${{ matrix.target }} --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"
+        run: |
+          pwd
+          ls -la
+          make ${{ matrix.target }} --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"
 
       - name: Upload built docs
         if: ${{ matrix.target == 'html' && github.event_name != 'pull_request' }}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9484761ba7a..d7a68156a9e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -269,6 +269,11 @@ def _set_root_image_path(page_path: str) -> None:
     ),
 ]
 
+# MathJax configuration
+mathjax3_config = {
+    "tex": {"packages": {"[+]": ["ams", "newcommand", "configMacros"]}},
+}
+
 # -- Options for Epub output -------------------------------------------------
 
 # Bibliographic Dublin Core info.
@@ -358,8 +363,7 @@ def package_list_from_file(file: str) -> list[str]:
 autodoc_mock_imports = MOCK_PACKAGES
 
 
-# Resolve function
-# This function is used to populate the (source) links in the API
+# Resolve function - this function is used to populate the (source) links in the API
 def linkcode_resolve(domain, info) -> Optional[str]:  # noqa: ANN001
     return _linkcode_resolve(domain, info=info, github_user="Lightning-AI", github_repo="torchmetrics")
 

From ead5cbb64bd7622b1750fe559ad65cc6966ce10c Mon Sep 17 00:00:00 2001
From: Jirka B <j.borovec+github@gmail.com>
Date: Fri, 13 Sep 2024 19:58:57 +0200
Subject: [PATCH 10/11] test: freeze `faster-coco-eval==1.5.*`

(cherry picked from commit c4b32aad3af797a922239d3b1fbbee0becf8d48c)
---
 requirements/detection_test.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/detection_test.txt b/requirements/detection_test.txt
index 6515620c715..edfc1b97fff 100644
--- a/requirements/detection_test.txt
+++ b/requirements/detection_test.txt
@@ -1,4 +1,4 @@
 # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
-faster-coco-eval >=1.3.3
+faster-coco-eval ==1.5.*

From 9baf6e6c68ba26e173d4945dcc4a712ae5465738 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Fri, 13 Sep 2024 20:15:20 +0200
Subject: [PATCH 11/11] Apply suggestions from code review

---
 src/torchmetrics/classification/dice.py            | 6 +++---
 src/torchmetrics/functional/classification/dice.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/torchmetrics/classification/dice.py b/src/torchmetrics/classification/dice.py
index eb6b228778a..cbc8a84987e 100644
--- a/src/torchmetrics/classification/dice.py
+++ b/src/torchmetrics/classification/dice.py
@@ -116,9 +116,9 @@ class Dice(Metric):
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
     .. warning::
-        The `dice` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be
-        removed in v1.7.0. Please instead consider using `f1score` metric from the classification subpackage as it
-        provides the same functionality. Additionally, we are going to re-add the `dice` metric in the segmentation
+        The ``dice`` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be
+        removed in v1.7.0. Please instead consider using ``f1score`` metric from the classification subpackage as it
+        provides the same functionality. Additionally, we are going to re-add the ``dice`` metric in the segmentation
         domain in v1.6.0 with slight modifications to functionality.
 
     Raises:
diff --git a/src/torchmetrics/functional/classification/dice.py b/src/torchmetrics/functional/classification/dice.py
index 3aa26212fa5..5c08a028572 100644
--- a/src/torchmetrics/functional/classification/dice.py
+++ b/src/torchmetrics/functional/classification/dice.py
@@ -152,9 +152,9 @@ def dice(
             than what they appear to be.
 
     .. warning::
-        The `dice` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be
-        removed in v1.7.0. Please instead consider using `f1score` metric from the classification subpackage as it
-        provides the same functionality. Additionally, we are going to re-add the `dice` metric in the segmentation
+        The ``dice`` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be
+        removed in v1.7.0. Please instead consider using ``f1score`` metric from the classification subpackage as it
+        provides the same functionality. Additionally, we are going to re-add the ``dice`` metric in the segmentation
         domain in v1.6.0 with slight modifications to functionality.
 
     Return: