Merge branch 'master' into bugfix/dtype_auroc

Borda · web-flow · commit e67148b364e8 · 2023-03-21T19:11:14.000+01:00
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -14,4 +14,4 @@ requests <=2.28.2
 fire <=0.5.0
 
 cloudpickle >1.3, <=2.2.1
-scikit-learn >1.0, <1.1.1
+scikit-learn >1.0, <1.2.2
diff --git a/src/torchmetrics/classification/precision_recall_curve.py b/src/torchmetrics/classification/precision_recall_curve.py
@@ -101,9 +101,9 @@ class BinaryPrecisionRecallCurve(Metric):
         >>> target = torch.tensor([0, 1, 1, 0])
         >>> bprc = BinaryPrecisionRecallCurve(thresholds=None)
         >>> bprc(preds, target)  # doctest: +NORMALIZE_WHITESPACE
-        (tensor([0.6667, 0.5000, 0.0000, 1.0000]),
-         tensor([1.0000, 0.5000, 0.0000, 0.0000]),
-         tensor([0.5000, 0.7000, 0.8000]))
+        (tensor([0.5000, 0.6667, 0.5000, 0.0000, 1.0000]),
+         tensor([1.0000, 1.0000, 0.5000, 0.0000, 0.0000]),
+         tensor([0.0000, 0.5000, 0.7000, 0.8000]))
         >>> bprc = BinaryPrecisionRecallCurve(thresholds=5)
         >>> bprc(preds, target)  # doctest: +NORMALIZE_WHITESPACE
         (tensor([0.5000, 0.6667, 0.6667, 0.0000, 0.0000, 1.0000]),
@@ -215,12 +215,13 @@ class MulticlassPrecisionRecallCurve(Metric):
         >>> mcprc = MulticlassPrecisionRecallCurve(num_classes=5, thresholds=None)
         >>> precision, recall, thresholds = mcprc(preds, target)
         >>> precision  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+        [tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall
-        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        [tensor([1., 1., 0.]), tensor([1., 1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
         >>> thresholds
-        [tensor(0.7500), tensor(0.7500), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor(0.0500)]
+        [tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]),
+         tensor(0.0500)]
         >>> mcprc = MulticlassPrecisionRecallCurve(num_classes=5, thresholds=5)
         >>> mcprc(preds, target)  # doctest: +NORMALIZE_WHITESPACE
         (tensor([[0.2500, 1.0000, 1.0000, 1.0000, 0.0000, 1.0000],
@@ -359,14 +360,13 @@ class MultilabelPrecisionRecallCurve(Metric):
         >>> mlprc = MultilabelPrecisionRecallCurve(num_labels=3, thresholds=None)
         >>> precision, recall, thresholds = mlprc(preds, target)
         >>> precision  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([0.5000, 0.5000, 1.0000, 1.0000]), tensor([0.6667, 0.5000, 0.0000, 1.0000]),
+        [tensor([0.5000, 0.5000, 1.0000, 1.0000]), tensor([0.5000, 0.6667, 0.5000, 0.0000, 1.0000]),
          tensor([0.7500, 1.0000, 1.0000, 1.0000])]
         >>> recall  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([1.0000, 0.5000, 0.5000, 0.0000]), tensor([1.0000, 0.5000, 0.0000, 0.0000]),
+        [tensor([1.0000, 0.5000, 0.5000, 0.0000]), tensor([1.0000, 1.0000, 0.5000, 0.0000, 0.0000]),
          tensor([1.0000, 0.6667, 0.3333, 0.0000])]
         >>> thresholds  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([0.0500, 0.4500, 0.7500]), tensor([0.5500, 0.6500, 0.7500]),
-         tensor([0.0500, 0.3500, 0.7500])]
+        [tensor([0.0500, 0.4500, 0.7500]), tensor([0.0500, 0.5500, 0.6500, 0.7500]), tensor([0.0500, 0.3500, 0.7500])]
         >>> mlprc = MultilabelPrecisionRecallCurve(num_labels=3, thresholds=5)
         >>> mlprc(preds, target)  # doctest: +NORMALIZE_WHITESPACE
         (tensor([[0.5000, 0.5000, 1.0000, 1.0000, 0.0000, 1.0000],
@@ -447,11 +447,11 @@ class PrecisionRecallCurve:
         >>> pr_curve = PrecisionRecallCurve(task="binary")
         >>> precision, recall, thresholds = pr_curve(pred, target)
         >>> precision
-        tensor([0.6667, 0.5000, 1.0000, 1.0000])
+        tensor([0.5000, 0.6667, 0.5000, 1.0000, 1.0000])
         >>> recall
-        tensor([1.0000, 0.5000, 0.5000, 0.0000])
+        tensor([1.0000, 1.0000, 0.5000, 0.5000, 0.0000])
         >>> thresholds
-        tensor([0.1000, 0.4000, 0.8000])
+        tensor([0.0000, 0.1000, 0.4000, 0.8000])
 
         >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
         ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
@@ -461,12 +461,13 @@ class PrecisionRecallCurve:
         >>> pr_curve = PrecisionRecallCurve(task="multiclass", num_classes=5)
         >>> precision, recall, thresholds = pr_curve(pred, target)
         >>> precision
-        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+        [tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall
-        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        [tensor([1., 1., 0.]), tensor([1., 1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
         >>> thresholds
-        [tensor(0.7500), tensor(0.7500), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor(0.0500)]
+        [tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]),
+         tensor(0.0500)]
     """
 
     def __new__(
diff --git a/src/torchmetrics/functional/classification/precision_recall_curve.py b/src/torchmetrics/functional/classification/precision_recall_curve.py
@@ -267,15 +267,11 @@ def _binary_precision_recall_curve_compute(
     precision = tps / (tps + fps)
     recall = tps / tps[-1]
 
-    # stop when full recall attained and reverse the outputs so recall is decreasing
-    last_ind = torch.where(tps == tps[-1])[0][0]
-    sl = slice(0, last_ind.item() + 1)
-
     # need to call reversed explicitly, since including that to slice would
     # introduce negative strides that are not yet supported in pytorch
-    precision = torch.cat([reversed(precision[sl]), torch.ones(1, dtype=precision.dtype, device=precision.device)])
-    recall = torch.cat([reversed(recall[sl]), torch.zeros(1, dtype=recall.dtype, device=recall.device)])
-    thresholds = reversed(thresholds[sl]).detach().clone()
+    precision = torch.cat([precision.flip(0), torch.ones(1, dtype=precision.dtype, device=precision.device)])
+    recall = torch.cat([recall.flip(0), torch.zeros(1, dtype=recall.dtype, device=recall.device)])
+    thresholds = thresholds.flip(0).detach().clone()
     return precision, recall, thresholds
 
 
@@ -338,9 +334,9 @@ def binary_precision_recall_curve(
         >>> preds = torch.tensor([0, 0.5, 0.7, 0.8])
         >>> target = torch.tensor([0, 1, 1, 0])
         >>> binary_precision_recall_curve(preds, target, thresholds=None)  # doctest: +NORMALIZE_WHITESPACE
-        (tensor([0.6667, 0.5000, 0.0000, 1.0000]),
-         tensor([1.0000, 0.5000, 0.0000, 0.0000]),
-         tensor([0.5000, 0.7000, 0.8000]))
+        (tensor([0.5000, 0.6667, 0.5000, 0.0000, 1.0000]),
+         tensor([1.0000, 1.0000, 0.5000, 0.0000, 0.0000]),
+         tensor([0.0000, 0.5000, 0.7000, 0.8000]))
         >>> binary_precision_recall_curve(preds, target, thresholds=5)  # doctest: +NORMALIZE_WHITESPACE
         (tensor([0.5000, 0.6667, 0.6667, 0.0000, 0.0000, 1.0000]),
          tensor([1., 1., 1., 0., 0., 0.]),
@@ -607,12 +603,13 @@ def multiclass_precision_recall_curve(
         ...    preds, target, num_classes=5, thresholds=None
         ... )
         >>> precision  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+        [tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall
-        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        [tensor([1., 1., 0.]), tensor([1., 1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
         >>> thresholds
-        [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
+        [tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]),
+         tensor([0.0500])]
         >>> multiclass_precision_recall_curve(
         ...     preds, target, num_classes=5, thresholds=5
         ... )  # doctest: +NORMALIZE_WHITESPACE
@@ -837,14 +834,13 @@ def multilabel_precision_recall_curve(
         ...    preds, target, num_labels=3, thresholds=None
         ... )
         >>> precision  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([0.5000, 0.5000, 1.0000, 1.0000]), tensor([0.6667, 0.5000, 0.0000, 1.0000]),
+        [tensor([0.5000, 0.5000, 1.0000, 1.0000]), tensor([0.5000, 0.6667, 0.5000, 0.0000, 1.0000]),
          tensor([0.7500, 1.0000, 1.0000, 1.0000])]
         >>> recall  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([1.0000, 0.5000, 0.5000, 0.0000]), tensor([1.0000, 0.5000, 0.0000, 0.0000]),
+        [tensor([1.0000, 0.5000, 0.5000, 0.0000]), tensor([1.0000, 1.0000, 0.5000, 0.0000, 0.0000]),
          tensor([1.0000, 0.6667, 0.3333, 0.0000])]
         >>> thresholds  # doctest: +NORMALIZE_WHITESPACE
-        [tensor([0.0500, 0.4500, 0.7500]), tensor([0.5500, 0.6500, 0.7500]),
-         tensor([0.0500, 0.3500, 0.7500])]
+        [tensor([0.0500, 0.4500, 0.7500]), tensor([0.0500, 0.5500, 0.6500, 0.7500]), tensor([0.0500, 0.3500, 0.7500])]
         >>> multilabel_precision_recall_curve(
         ...     preds, target, num_labels=3, thresholds=5
         ... )  # doctest: +NORMALIZE_WHITESPACE
@@ -887,15 +883,15 @@ def precision_recall_curve(
     :func:`multilabel_precision_recall_curve` for the specific details of each argument influence and examples.
 
     Legacy Example:
-        >>> pred = torch.tensor([0.0, 1.0, 2.0, 3.0])
+        >>> pred = torch.tensor([0, 0.1, 0.8, 0.4])
         >>> target = torch.tensor([0, 1, 1, 0])
         >>> precision, recall, thresholds = precision_recall_curve(pred, target, task='binary')
         >>> precision
-        tensor([0.6667, 0.5000, 0.0000, 1.0000])
+        tensor([0.5000, 0.6667, 0.5000, 1.0000, 1.0000])
         >>> recall
-        tensor([1.0000, 0.5000, 0.0000, 0.0000])
+        tensor([1.0000, 1.0000, 0.5000, 0.5000, 0.0000])
         >>> thresholds
-        tensor([0.7311, 0.8808, 0.9526])
+        tensor([0.0000, 0.1000, 0.4000, 0.8000])
 
         >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
         ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
@@ -904,12 +900,13 @@ def precision_recall_curve(
         >>> target = torch.tensor([0, 1, 3, 2])
         >>> precision, recall, thresholds = precision_recall_curve(pred, target, task='multiclass', num_classes=5)
         >>> precision
-        [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
+        [tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 1.0000, 1.0000]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall
-        [tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
+        [tensor([1., 1., 0.]), tensor([1., 1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
         >>> thresholds
-        [tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
+        [tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]),
+         tensor([0.0500])]
     """
     task = ClassificationTask.from_str(task)
     if task == ClassificationTask.BINARY:
diff --git a/tests/unittests/classification/test_precision_recall_curve.py b/tests/unittests/classification/test_precision_recall_curve.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import operator
 from functools import partial
 
 import numpy as np
 import pytest
 import torch
+from lightning_utilities import compare_version
 from scipy.special import expit as sigmoid
 from scipy.special import softmax
 from sklearn.metrics import precision_recall_curve as sk_precision_recall_curve
@@ -53,6 +55,7 @@ class TestBinaryPrecisionRecallCurve(MetricTester):
 
     @pytest.mark.parametrize("ignore_index", [None, -1, 0])
     @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_binary_precision_recall_curve(self, input, ddp, ignore_index):
         """Test class implementation of metric."""
         preds, target = input
@@ -71,6 +74,7 @@ def test_binary_precision_recall_curve(self, input, ddp, ignore_index):
         )
 
     @pytest.mark.parametrize("ignore_index", [None, -1, 0])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_binary_precision_recall_curve_functional(self, input, ignore_index):
         """Test functional implementation of metric."""
         preds, target = input
@@ -178,6 +182,7 @@ class TestMulticlassPrecisionRecallCurve(MetricTester):
 
     @pytest.mark.parametrize("ignore_index", [None, -1])
     @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_multiclass_precision_recall_curve(self, input, ddp, ignore_index):
         """Test class implementation of metric."""
         preds, target = input
@@ -197,6 +202,7 @@ def test_multiclass_precision_recall_curve(self, input, ddp, ignore_index):
         )
 
     @pytest.mark.parametrize("ignore_index", [None, -1])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_multiclass_precision_recall_curve_functional(self, input, ignore_index):
         """Test functional implementation of metric."""
         preds, target = input
@@ -298,6 +304,7 @@ class TestMultilabelPrecisionRecallCurve(MetricTester):
 
     @pytest.mark.parametrize("ignore_index", [None, -1, 0])
     @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_multilabel_precision_recall_curve(self, input, ddp, ignore_index):
         """Test class implementation of metric."""
         preds, target = input
@@ -317,6 +324,7 @@ def test_multilabel_precision_recall_curve(self, input, ddp, ignore_index):
         )
 
     @pytest.mark.parametrize("ignore_index", [None, -1, 0])
+    @pytest.mark.skipif(compare_version("sklearn", operator.lt, "1.1.0"), reason="Restricted to latest `sklearn`")
     def test_multilabel_precision_recall_curve_functional(self, input, ignore_index):
         """Test functional implementation of metric."""
         preds, target = input