Skip to content

Fix wrong accumulation with average='none' #1046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 27, 2022
43 changes: 42 additions & 1 deletion tests/classification/test_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from tests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
from tests.helpers import seed_all
from tests.helpers.testers import NUM_BATCHES, NUM_CLASSES, THRESHOLD, MetricTester
from torchmetrics import Accuracy
from torchmetrics import Accuracy, Metric
from torchmetrics.functional import accuracy
from torchmetrics.utilities.checks import _input_format_classification
from torchmetrics.utilities.enums import AverageMethod, DataType
Expand Down Expand Up @@ -158,6 +158,15 @@ def test_accuracy_differentiability(self, preds, target, subset_accuracy, mdmc_a
result=torch.tensor(0.75),
)

_negmetric_noneavg = {
"pred1": torch.tensor([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]),
"target1": torch.tensor([0, 1]),
"res1": torch.tensor([0.0, 0.0, float("nan")]),
"pred2": torch.tensor([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]),
"target2": torch.tensor([0, 2]),
"res2": torch.tensor([0.0, 0.0, 0.0]),
}


# Replace with a proper sk_metric test once sklearn 0.24 hits :)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -438,3 +447,35 @@ def test_negative_ignore_index(preds, target, ignore_index, result):
# Test functional
with pytest.raises(ValueError, match="^[The `target` has to be a non-negative tensor.]"):
acc_score = accuracy(preds, target, num_classes=num_classes, ignore_index=ignore_index)


@pytest.mark.parametrize(
"pred1, target1, res1, pred2, target2, res2",
[
(
_negmetric_noneavg["pred1"],
_negmetric_noneavg["target1"],
_negmetric_noneavg["res1"],
_negmetric_noneavg["pred2"],
_negmetric_noneavg["target2"],
_negmetric_noneavg["res2"],
)
],
)
def test_negmetric_noneavg(pred1, target1, res1, pred2, target2, res2):
class MetricWrapper(Metric):
def __init__(self, metric):
super().__init__()
self.metric = metric

def update(self, *args, **kwargs):
self.metric.update(*args, **kwargs)

def compute(self, *args, **kwargs):
return self.metric.compute(*args, **kwargs)

acc = MetricWrapper(Accuracy(average="none", num_classes=pred1.shape[1]))
result1 = acc(pred1, target1)
assert torch.allclose(res1, result1, equal_nan=True)
result2 = acc(pred2, target2)
assert torch.allclose(res2, result2, equal_nan=True)
74 changes: 74 additions & 0 deletions tests/classification/test_precision_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,77 @@ def test_same_input(metric_class, metric_functional, sk_fn, average):

assert torch.allclose(class_res, torch.tensor(sk_res).float())
assert torch.allclose(func_res, torch.tensor(sk_res).float())


_negmetric_noneavg = {
"pred1": torch.tensor([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]),
"target1": torch.tensor([0, 1]),
"res1": torch.tensor([0.0, 0.0, float("nan")]),
"pred2": torch.tensor([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]),
"target2": torch.tensor([0, 2]),
"res2": torch.tensor([0.0, 0.0, 0.0]),
}


@pytest.mark.parametrize(
"pred1, target1, res1, pred2, target2, res2",
[
(
_negmetric_noneavg["pred1"],
_negmetric_noneavg["target1"],
_negmetric_noneavg["res1"],
_negmetric_noneavg["pred2"],
_negmetric_noneavg["target2"],
_negmetric_noneavg["res2"],
)
],
)
def test_negprecision_noneavg(pred1, target1, res1, pred2, target2, res2):
class MetricWrapper(Metric):
def __init__(self, metric):
super().__init__()
self.metric = metric

def update(self, *args, **kwargs):
self.metric.update(*args, **kwargs)

def compute(self, *args, **kwargs):
return self.metric.compute(*args, **kwargs)

prec = MetricWrapper(Precision(average="none", num_classes=pred1.shape[1]))
result1 = prec(pred1, target1)
assert torch.allclose(res1, result1, equal_nan=True)
result2 = prec(pred2, target2)
assert torch.allclose(res2, result2, equal_nan=True)


@pytest.mark.parametrize(
"pred1, target1, res1, pred2, target2, res2",
[
(
_negmetric_noneavg["pred1"],
_negmetric_noneavg["target1"],
_negmetric_noneavg["res1"],
_negmetric_noneavg["pred2"],
_negmetric_noneavg["target2"],
_negmetric_noneavg["res2"],
)
],
)
def test_negrecall_noneavg(pred1, target1, res1, pred2, target2, res2):
class MetricWrapper(Metric):
def __init__(self, metric):
super().__init__()
self.metric = metric

def update(self, *args, **kwargs):
self.metric.update(*args, **kwargs)

def compute(self, *args, **kwargs):
return self.metric.compute(*args, **kwargs)

rec = MetricWrapper(Recall(average="none", num_classes=pred1.shape[1]))
result1 = rec(pred1, target1)
assert torch.allclose(res1, result1, equal_nan=True)
result2 = rec(pred2, target2)
assert torch.allclose(res2, result2, equal_nan=True)
2 changes: 1 addition & 1 deletion torchmetrics/functional/classification/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _accuracy_compute(
numerator = tp + tn
denominator = tp + tn + fp + fn
else:
numerator = tp
numerator = tp.clone()
denominator = tp + fn

if mdmc_average != MDMCAverageMethod.SAMPLEWISE:
Expand Down
4 changes: 2 additions & 2 deletions torchmetrics/functional/classification/precision_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _precision_compute(
tensor(0.2500)
"""

numerator = tp
numerator = tp.clone()
denominator = tp + fp

if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
Expand Down Expand Up @@ -241,7 +241,7 @@ def _recall_compute(
>>> _recall_compute(tp, fp, fn, average='micro', mdmc_average=None)
tensor(0.2500)
"""
numerator = tp
numerator = tp.clone()
denominator = tp + fn

if average == AverageMethod.MACRO and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
Expand Down
2 changes: 1 addition & 1 deletion torchmetrics/functional/classification/specificity.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _specificity_compute(
tensor(0.6250)
"""

numerator = tn
numerator = tn.clone()
denominator = tn + fp
if average == AverageMethod.NONE and mdmc_average != MDMCAverageMethod.SAMPLEWISE:
# a class is not present if there exists no TPs, no FPs, and no FNs
Expand Down