Fix how auc scores are calculated in PrecisionRecallCurve.plot meth…

…ods (#2437) Co-authored-by: Jirka Borovec <[email protected]> (cherry picked from commit 0a6ad01)
Lightning-AI · Mar 16, 2024 · dc477a2 · dc477a2
1 parent 81f86e4
commit dc477a2
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 13 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed bug when `top_k>1` and `average="macro"` for classification metrics ([#2423](https://github.com/Lightning-AI/torchmetrics/pull/2423))
 
 
+- Fixed how auc scores are calculated in `PrecisionRecallCurve.plot` methods ([#2437](https://github.com/Lightning-AI/torchmetrics/pull/2437))
+
 ## [1.3.1] - 2024-02-12
 
 ### Fixed

diff --git a/src/torchmetrics/classification/precision_recall_curve.py b/src/torchmetrics/classification/precision_recall_curve.py
@@ -188,7 +188,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:
@@ -215,7 +216,7 @@ def plot(
         curve_computed = (curve_computed[1], curve_computed[0], curve_computed[2])
 
         score = (
-            _auc_compute_without_check(curve_computed[0], curve_computed[1], 1.0)
+            _auc_compute_without_check(curve_computed[0], curve_computed[1], direction=-1.0)
             if not curve and score is True
             else None
         )
@@ -390,7 +391,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:
@@ -416,7 +418,9 @@ def plot(
         # switch order as the standard way is recall along x-axis and precision along y-axis
         curve_computed = (curve_computed[1], curve_computed[0], curve_computed[2])
         score = (
-            _reduce_auroc(curve_computed[0], curve_computed[1], average=None) if not curve and score is True else None
+            _reduce_auroc(curve_computed[0], curve_computed[1], average=None, direction=-1.0)
+            if not curve and score is True
+            else None
         )
         return plot_curve(
             curve_computed, score=score, ax=ax, label_names=("Recall", "Precision"), name=self.__class__.__name__
@@ -583,7 +587,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:
@@ -609,7 +614,9 @@ def plot(
         # switch order as the standard way is recall along x-axis and precision along y-axis
         curve_computed = (curve_computed[1], curve_computed[0], curve_computed[2])
         score = (
-            _reduce_auroc(curve_computed[0], curve_computed[1], average=None) if not curve and score is True else None
+            _reduce_auroc(curve_computed[0], curve_computed[1], average=None, direction=-1.0)
+            if not curve and score is True
+            else None
         )
         return plot_curve(
             curve_computed, score=score, ax=ax, label_names=("Recall", "Precision"), name=self.__class__.__name__

diff --git a/src/torchmetrics/classification/roc.py b/src/torchmetrics/classification/roc.py
@@ -134,7 +134,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:
@@ -303,7 +304,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:
@@ -461,7 +463,8 @@ def plot(
             curve: the output of either `metric.compute` or `metric.forward`. If no value is provided, will
                 automatically call `metric.compute` and plot that result.
             score: Provide a area-under-the-curve score to be displayed on the plot. If `True` and no curve is provided,
-                will automatically compute the score.
+                will automatically compute the score. The score is computed by using the trapezoidal rule to compute the
+                area under the curve.
             ax: An matplotlib axis object. If provided will add plot to that axis
 
         Returns:

diff --git a/src/torchmetrics/functional/classification/auroc.py b/src/torchmetrics/functional/classification/auroc.py
@@ -47,12 +47,13 @@ def _reduce_auroc(
     tpr: Union[Tensor, List[Tensor]],
     average: Optional[Literal["macro", "weighted", "none"]] = "macro",
     weights: Optional[Tensor] = None,
+    direction: float = 1.0,
 ) -> Tensor:
     """Reduce multiple average precision score into one number."""
     if isinstance(fpr, Tensor) and isinstance(tpr, Tensor):
-        res = _auc_compute_without_check(fpr, tpr, 1.0, axis=1)
+        res = _auc_compute_without_check(fpr, tpr, direction=direction, axis=1)
     else:
-        res = torch.stack([_auc_compute_without_check(x, y, 1.0) for x, y in zip(fpr, tpr)])
+        res = torch.stack([_auc_compute_without_check(x, y, direction=direction) for x, y in zip(fpr, tpr)])
     if average is None or average == "none":
         return res
     if torch.isnan(res).any():

diff --git a/src/torchmetrics/utilities/compute.py b/src/torchmetrics/utilities/compute.py
@@ -92,8 +92,8 @@ def _auc_compute_without_check(x: Tensor, y: Tensor, direction: float, axis: int
 
     """
     with torch.no_grad():
-        auc_: Tensor = torch.trapz(y, x, dim=axis) * direction
-    return auc_
+        auc_score: Tensor = torch.trapz(y, x, dim=axis) * direction
+    return auc_score
 
 
 def _auc_compute(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: