Lightning-AI · Borda · Apr 13, 2021 · Mar 24, 2021 · Mar 25, 2021 · Mar 26, 2021
@@ -77,3 +77,29 @@
     preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)),
     target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM))
 )
+
+
+# Generate plausible-looking inputs
+def generate_plausible_inputs_multilabel():
+    correct_targets = torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE))
+    preds = torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)
+    targets = torch.zeros_like(preds, dtype=torch.long)
+    for i in range(preds.shape[0]):
+        for j in range(preds.shape[1]):
+            targets[i, j, correct_targets[i, j]] = 1
+    preds += torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES) * targets / 3
+
+    preds = preds / preds.sum(dim=2, keepdim=True)
+
+    return Input(preds=preds, target=targets)
+
+
+def generate_plausible_inputs_binary():
+    targets = torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))
+    preds = torch.rand(NUM_BATCHES, BATCH_SIZE) + torch.rand(NUM_BATCHES, BATCH_SIZE) * targets / 3
+    return Input(preds=preds, target=targets)
+
+
+_input_multilabel_prob_plausible = generate_plausible_inputs_multilabel()
+
+_input_binary_prob_plausible = generate_plausible_inputs_binary()
@@ -0,0 +1,162 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+from typing import Tuple
+
+import numpy as np
+import pytest
+import torch
+from sklearn.metrics import average_precision_score as _sk_average_precision_score
+from sklearn.metrics import precision_recall_curve as _sk_precision_recall_curve
+
+from tests.classification.inputs import (
+    _input_binary_prob,
+    _input_binary_prob_plausible,
+    _input_multilabel_prob,
+    _input_multilabel_prob_plausible,
+)
+from tests.helpers import seed_all
+from tests.helpers.testers import NUM_CLASSES, MetricTester
+from torchmetrics.classification.binned_precision_recall import BinnedAveragePrecision, BinnedRecallAtFixedPrecision
+
+seed_all(42)
+
+
+def recall_at_precision_x_multilabel(
+    predictions: torch.Tensor, targets: torch.Tensor, min_precision: float
+) -> Tuple[float, float]:
+    precision, recall, thresholds = _sk_precision_recall_curve(
+        targets, predictions,
+    )
+
+    try:
+        max_recall, max_precision, best_threshold = max(
+            (r, p, t)
+            for p, r, t in zip(precision, recall, thresholds)
+            if p >= min_precision
+        )
+    except ValueError:
+        max_recall, best_threshold = 0, 1e6
+
+    return float(max_recall), float(best_threshold)
+
+
+def _multiclass_prob_sk_metric(predictions, targets, num_classes, min_precision):
+    max_recalls = torch.zeros(num_classes)
+    best_thresholds = torch.zeros(num_classes)
+
+    for i in range(num_classes):
+        max_recalls[i], best_thresholds[i] = recall_at_precision_x_multilabel(
+            predictions[:, i], targets[:, i], min_precision
+        )
+    return max_recalls, best_thresholds
+
+
+def _binary_prob_sk_metric(predictions, targets, num_classes, min_precision):
+    return recall_at_precision_x_multilabel(
+        predictions, targets, min_precision
+    )
+
+
+def _multiclass_average_precision_sk_metric(predictions, targets, num_classes):
+    return _sk_average_precision_score(targets, predictions, average=None)
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _binary_prob_sk_metric, 1),
+        (_input_binary_prob_plausible.preds, _input_binary_prob_plausible.target, _binary_prob_sk_metric, 1),
+        (
+            _input_multilabel_prob_plausible.preds,
+            _input_multilabel_prob_plausible.target,
+            _multiclass_prob_sk_metric,
+            NUM_CLASSES,
+        ),
+        (
+            _input_multilabel_prob.preds,
+            _input_multilabel_prob.target,
+            _multiclass_prob_sk_metric,
+            NUM_CLASSES,
+        ),
+    ],
+)
+class TestBinnedRecallAtPrecision(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("min_precision", [0.1, 0.3, 0.5, 0.8])
+    def test_binned_pr(self, preds, target, sk_metric, num_classes, ddp, min_precision):
+        self.atol = 0.01
+        # rounding will simulate binning for both implementations
+        preds = torch.Tensor(np.round(preds.numpy(), 2)) + 1e-6
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=BinnedRecallAtFixedPrecision,
+            sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision),
+            dist_sync_on_step=False,
+            check_dist_sync_on_step=False,
+            check_batch=False,
+            metric_args={
+                "num_classes": num_classes,
+                "min_precision": min_precision,
+                "num_thresholds": 101,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "preds, target, sk_metric, num_classes",
+    [
+        (_input_binary_prob.preds, _input_binary_prob.target, _multiclass_average_precision_sk_metric, 1),
+        (
+            _input_binary_prob_plausible.preds,
+            _input_binary_prob_plausible.target,
+            _multiclass_average_precision_sk_metric,
+            1,
+        ),
+        (
+            _input_multilabel_prob_plausible.preds,
+            _input_multilabel_prob_plausible.target,
+            _multiclass_average_precision_sk_metric,
+            NUM_CLASSES,
+        ),
+        (
+            _input_multilabel_prob.preds,
+            _input_multilabel_prob.target,
+            _multiclass_average_precision_sk_metric,
+            NUM_CLASSES,
+        ),
+    ],
+)
+class TestBinnedAveragePrecision(MetricTester):
+    @pytest.mark.parametrize("ddp", [True, False])
+    @pytest.mark.parametrize("num_thresholds", [200, 300])
+    def test_binned_pr(self, preds, target, sk_metric, num_classes, ddp, num_thresholds):
+        self.atol = 0.01
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            target=target,
+            metric_class=BinnedAveragePrecision,
+            sk_metric=partial(sk_metric, num_classes=num_classes),
+            dist_sync_on_step=False,
+            check_dist_sync_on_step=False,
+            check_batch=False,
+            metric_args={
+                "num_classes": num_classes,
+                "num_thresholds": num_thresholds,
+            },
+        )
@@ -0,0 +1,146 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Tuple, Union
+
+import torch
+
+from torchmetrics.functional.classification.average_precision import _average_precision_compute_with_precision_recall
+from torchmetrics.metric import Metric
+from torchmetrics.utilities.data import METRIC_EPS, to_onehot
+
+
+class BinnedPrecisionRecallCurve(Metric):
+    """Returns a tensor of recalls for a fixed precision threshold.
+    It is a tensor instead of a single number, because it applies to multi-label inputs.
+    """
+
+    TPs: torch.Tensor
+    FPs: torch.Tensor
+    FNs: torch.Tensor
+    thresholds: torch.Tensor
+
+    def __init__(
+        self,
+        num_classes: int,
+        num_thresholds: int = 100,
+        compute_on_step: bool = False,  # will ignore this
+        **kwargs
+    ):
+        assert not compute_on_step, "computation on each step is not supported"
+        super().__init__(compute_on_step=False, **kwargs)
+        self.num_classes = num_classes
+        self.num_thresholds = num_thresholds
+        thresholds = torch.linspace(0, 1.0 + METRIC_EPS, num_thresholds)
+        self.register_buffer("thresholds", thresholds)
+
+        for name in ("TPs", "FPs", "FNs"):
+            self.add_state(
+                name=name,
+                default=torch.zeros(num_classes, num_thresholds),
+                dist_reduce_fx="sum",
+            )
+
+    def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None:
+        """
+        Args
+            preds: (n_samples, n_classes) tensor
+            targets: (n_samples, n_classes) tensor
+        """
+        # binary case
+        if len(preds.shape) == len(targets.shape) == 1:
+            preds = preds.reshape(-1, 1)
+            targets = targets.reshape(-1, 1)
+
+        if len(preds.shape) == len(targets.shape) + 1:
+            targets = to_onehot(targets, num_classes=self.num_classes)
+
+        targets = targets == 1
+        # Iterate one threshold at a time to conserve memory
+        for i in range(self.num_thresholds):
+            predictions = preds >= self.thresholds[i]
+            self.TPs[:, i] += (targets & predictions).sum(dim=0)
+            self.FPs[:, i] += ((~targets) & (predictions)).sum(dim=0)
+            self.FNs[:, i] += ((targets) & (~predictions)).sum(dim=0)
+
+    def compute(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Returns float tensor of size n_classes"""
+        precisions = (self.TPs + METRIC_EPS) / (self.TPs + self.FPs + METRIC_EPS)
+        recalls = self.TPs / (self.TPs + self.FNs + METRIC_EPS)
+        # Need to guarantee that last precision=1 and recall=0
+        precisions = torch.cat([precisions, torch.ones(self.num_classes, 1,
+                               dtype=precisions.dtype, device=precisions.device)], dim=1)
+        recalls = torch.cat([recalls, torch.zeros(self.num_classes, 1,
+                            dtype=recalls.dtype, device=recalls.device)], dim=1)
+        thresholds = torch.cat([self.thresholds, torch.ones(1, dtype=recalls.dtype, device=recalls.device)], dim=0)
+        if self.num_classes == 1:
+            return (precisions[0, :], recalls[0, :], thresholds)
+        else:
+            return (precisions, recalls, thresholds)
+
+
+class BinnedAveragePrecision(BinnedPrecisionRecallCurve):
+    def compute(self) -> Union[List[torch.Tensor], torch.Tensor]:
+        precisions, recalls, _ = super().compute()
+        return _average_precision_compute_with_precision_recall(precisions, recalls, self.num_classes)
+
+
+class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve):
+    def __init__(
+        self,
+        num_classes: int,
+        min_precision: float,
+        num_thresholds: int = 100,
+        compute_on_step: bool = False,  # will ignore this
+        **kwargs
+    ):
+        super().__init__(
+            num_classes=num_classes,
+            num_thresholds=num_thresholds,
+            compute_on_step=compute_on_step,
+            **kwargs
+        )
+        self.min_precision = min_precision
+
+    def compute(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Returns float tensor of size n_classes"""
+        precisions, recalls, thresholds = super().compute()
+        condition = precisions >= self.min_precision
+
+        if self.num_classes == 1:
+            recall_at_p, index = (
+                torch.where(
+                    condition, recalls, torch.scalar_tensor(0.0, device=condition.device)
+                )
+                .max(dim=0)
+            )
+            if recall_at_p == 0.0:
+                return recall_at_p, torch.scalar_tensor(1e6, device=condition.device)
+            else:
+                return recall_at_p, thresholds[index]
+
+        recalls_at_p, indices = (
+            torch.where(
+                condition, recalls, torch.scalar_tensor(0.0, device=condition.device)
+            )
+            .max(dim=1)
+        )
+
+        thresholds_at_p = torch.zeros_like(recalls_at_p, device=condition.device, dtype=thresholds.dtype)
+        for i in range(self.num_classes):
+            if recalls_at_p[i] == 0.0:
+                thresholds_at_p[i] = 1e6
+            else:
+                thresholds_at_p[i] = thresholds[indices[i]]
+
+        return (recalls_at_p, thresholds_at_p)
@@ -40,6 +40,14 @@ def _average_precision_compute(
 ) -> Union[List[Tensor], Tensor]:
     # todo: `sample_weights` is unused
     precision, recall, _ = _precision_recall_curve_compute(preds, target, num_classes, pos_label)
+    return _average_precision_compute_with_precision_recall(precision, recall, num_classes)
+
+
+def _average_precision_compute_with_precision_recall(
+    precision: Tensor,
+    recall: Tensor,
+    num_classes: int,
+) -> Union[List[Tensor], Tensor]:
     # Return the step function integral
     # The following works because the last entry of precision is
     # guaranteed to be 1, as returned by precision_recall_curve