From 335ebe6cb4d97849ea23de65b78cb373f3ba000f Mon Sep 17 00:00:00 2001 From: i-aki-y Date: Fri, 3 May 2024 18:44:35 +0900 Subject: [PATCH] Add `zero_division` option to the precision, recall, f1, fbeta. (#2198) * Add support of zero_division parameter * fix overlooked * Fix type error * Fix type error * Fix missing comma * Doc fix wrong math expression * Fixed StatScores to have zero_division * fix missing zero_division arg * fix device mismatch * use scikit-learn 1.4.0 * fix scikit-learn min ver * fix for new sklearn version * fix scikit-learn requirements * fix incorrect requirements condition * fix test code to pass in multiple sklearn versions * changelog * better docstring * add jaccardindex * fix tests * skip for old sklearn versions --------- Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nicki Skafte Co-authored-by: Jirka --- CHANGELOG.md | 3 + requirements/_tests.txt | 3 +- src/torchmetrics/classification/f_beta.py | 90 ++++++++++++--- src/torchmetrics/classification/jaccard.py | 20 +++- .../classification/precision_recall.py | 88 +++++++++++--- .../classification/stat_scores.py | 16 ++- .../functional/classification/f_beta.py | 103 ++++++++++++++--- .../functional/classification/jaccard.py | 32 ++++-- .../classification/precision_recall.py | 81 ++++++++++--- .../functional/classification/stat_scores.py | 12 ++ src/torchmetrics/utilities/compute.py | 13 ++- src/torchmetrics/utilities/imports.py | 1 + tests/unittests/classification/test_f_beta.py | 100 ++++++++++++---- .../unittests/classification/test_jaccard.py | 84 +++++++++----- .../classification/test_precision_recall.py | 108 ++++++++++++++---- .../test_sensitivity_specificity.py | 5 +- tests/unittests/retrieval/test_ndcg.py | 5 + 17 files changed, 606 insertions(+), 158 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9ba95ce852..18b4681105c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added support for `torch.float` weighted networks for FID and KID calculations ([#2483](https://github.com/Lightning-AI/torchmetrics/pull/2483)) +- Added `zero_division` argument to selected classification metrics ([#2198](https://github.com/Lightning-AI/torchmetrics/pull/2198)) + + ### Changed - Made `__getattr__` and `__setattr__` of `ClasswiseWrapper` more general ([#2424](https://github.com/Lightning-AI/torchmetrics/pull/2424)) diff --git a/requirements/_tests.txt b/requirements/_tests.txt index 69f82700725..6bf2a66a3d4 100644 --- a/requirements/_tests.txt +++ b/requirements/_tests.txt @@ -15,5 +15,6 @@ pyGithub ==2.3.0 fire <=0.6.0 cloudpickle >1.3, <=3.0.0 -scikit-learn >=1.1.1, <1.4.0 +scikit-learn >=1.1.1, <1.3.0; python_version < "3.9" +scikit-learn >=1.4.0, <1.5.0; python_version >= "3.9" cachier ==3.0.0 diff --git a/src/torchmetrics/classification/f_beta.py b/src/torchmetrics/classification/f_beta.py index 93f26441c2a..526ad1ae0da 100644 --- a/src/torchmetrics/classification/f_beta.py +++ b/src/torchmetrics/classification/f_beta.py @@ -49,7 +49,8 @@ class BinaryFBetaScore(BinaryStatScores): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false - positives and false negatives respectively. If this case is encountered a score of 0 is returned. + positives and false negatives respectively. If this case is encountered a score of `zero_division` + (0 or 1, default is 0) is returned. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -83,6 +84,8 @@ class BinaryFBetaScore(BinaryStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -125,6 +128,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -135,14 +139,24 @@ def __init__( **kwargs, ) if validate_args: - _binary_fbeta_score_arg_validation(beta, threshold, multidim_average, ignore_index) + _binary_fbeta_score_arg_validation(beta, threshold, multidim_average, ignore_index, zero_division) self.validate_args = validate_args + self.zero_division = zero_division self.beta = beta def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() - return _fbeta_reduce(tp, fp, tn, fn, self.beta, average="binary", multidim_average=self.multidim_average) + return _fbeta_reduce( + tp, + fp, + tn, + fn, + self.beta, + average="binary", + multidim_average=self.multidim_average, + zero_division=self.zero_division, + ) def plot( self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None @@ -197,7 +211,7 @@ class MulticlassFBetaScore(MulticlassStatScores): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any class, the metric for that class - will be set to 0 and the overall metric may therefore be affected in turn. + will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -249,6 +263,8 @@ class MulticlassFBetaScore(MulticlassStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -306,6 +322,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -318,14 +335,26 @@ def __init__( **kwargs, ) if validate_args: - _multiclass_fbeta_score_arg_validation(beta, num_classes, top_k, average, multidim_average, ignore_index) + _multiclass_fbeta_score_arg_validation( + beta, num_classes, top_k, average, multidim_average, ignore_index, zero_division + ) self.validate_args = validate_args + self.zero_division = zero_division self.beta = beta def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() - return _fbeta_reduce(tp, fp, tn, fn, self.beta, average=self.average, multidim_average=self.multidim_average) + return _fbeta_reduce( + tp, + fp, + tn, + fn, + self.beta, + average=self.average, + multidim_average=self.multidim_average, + zero_division=self.zero_division, + ) def plot( self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None @@ -380,7 +409,7 @@ class MultilabelFBetaScore(MultilabelStatScores): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any label, the metric for that label - will be set to 0 and the overall metric may therefore be affected in turn. + will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -430,6 +459,8 @@ class MultilabelFBetaScore(MultilabelStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -485,6 +516,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -497,15 +529,26 @@ def __init__( **kwargs, ) if validate_args: - _multilabel_fbeta_score_arg_validation(beta, num_labels, threshold, average, multidim_average, ignore_index) + _multilabel_fbeta_score_arg_validation( + beta, num_labels, threshold, average, multidim_average, ignore_index, zero_division + ) self.validate_args = validate_args + self.zero_division = zero_division self.beta = beta def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _fbeta_reduce( - tp, fp, tn, fn, self.beta, average=self.average, multidim_average=self.multidim_average, multilabel=True + tp, + fp, + tn, + fn, + self.beta, + average=self.average, + multidim_average=self.multidim_average, + multilabel=True, + zero_division=self.zero_division, ) def plot( @@ -559,7 +602,8 @@ class BinaryF1Score(BinaryFBetaScore): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false - positives and false negatives respectively. If this case is encountered a score of 0 is returned. + positives and false negatives respectively. If this case is encountered a score of `zero_division` + (0 or 1, default is 0) is returned. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -592,6 +636,8 @@ class BinaryF1Score(BinaryFBetaScore): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -633,6 +679,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -641,6 +688,7 @@ def __init__( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, **kwargs, ) @@ -696,7 +744,7 @@ class MulticlassF1Score(MulticlassFBetaScore): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any class, the metric for that class - will be set to 0 and the overall metric may therefore be affected in turn. + will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -748,6 +796,8 @@ class MulticlassF1Score(MulticlassFBetaScore): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -804,6 +854,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -814,6 +865,7 @@ def __init__( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, **kwargs, ) @@ -869,7 +921,7 @@ class MultilabelF1Score(MultilabelFBetaScore): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any label, the metric for that label - will be set to 0 and the overall metric may therefore be affected in turn. + will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -919,6 +971,8 @@ class MultilabelF1Score(MultilabelFBetaScore): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -973,6 +1027,7 @@ def __init__( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -983,6 +1038,7 @@ def __init__( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, **kwargs, ) @@ -1039,7 +1095,8 @@ class FBetaScore(_ClassificationTaskWrapper): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any class/label, the metric for that - class/label will be set to 0 and the overall metric may therefore be affected in turn. + class/label will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be + affected in turn. This function is a simple wrapper to get the task specific versions of this metric, which is done by setting the ``task`` argument to either ``'binary'``, ``'multiclass'`` or ``multilabel``. See the documentation of @@ -1070,6 +1127,7 @@ def __new__( # type: ignore[misc] top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> Metric: """Initialize task metric.""" @@ -1079,6 +1137,7 @@ def __new__( # type: ignore[misc] "multidim_average": multidim_average, "ignore_index": ignore_index, "validate_args": validate_args, + "zero_division": zero_division, }) if task == ClassificationTask.BINARY: return BinaryFBetaScore(beta, threshold, **kwargs) @@ -1104,7 +1163,8 @@ class F1Score(_ClassificationTaskWrapper): The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0 \wedge \text{TP} + \text{FN} \neq 0` where :math:`\text{TP}`, :math:`\text{FP}` and :math:`\text{FN}` represent the number of true positives, false positives and false negatives respectively. If this case is encountered for any class/label, the metric for that - class/label will be set to 0 and the overall metric may therefore be affected in turn. + class/label will be set to `zero_division` (0 or 1, default is 0) and the overall metric may therefore be + affected in turn. This function is a simple wrapper to get the task specific versions of this metric, which is done by setting the ``task`` argument to either ``'binary'``, ``'multiclass'`` or ``multilabel``. See the documentation of @@ -1133,6 +1193,7 @@ def __new__( # type: ignore[misc] top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> Metric: """Initialize task metric.""" @@ -1142,6 +1203,7 @@ def __new__( # type: ignore[misc] "multidim_average": multidim_average, "ignore_index": ignore_index, "validate_args": validate_args, + "zero_division": zero_division, }) if task == ClassificationTask.BINARY: return BinaryF1Score(threshold, **kwargs) diff --git a/src/torchmetrics/classification/jaccard.py b/src/torchmetrics/classification/jaccard.py index 4a230122b1b..0ea04849696 100644 --- a/src/torchmetrics/classification/jaccard.py +++ b/src/torchmetrics/classification/jaccard.py @@ -65,6 +65,8 @@ class BinaryJaccardIndex(BinaryConfusionMatrix): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. Example (preds is int tensor): @@ -97,15 +99,17 @@ def __init__( threshold: float = 0.5, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( threshold=threshold, ignore_index=ignore_index, normalize=None, validate_args=validate_args, **kwargs ) + self.zero_division = zero_division def compute(self) -> Tensor: """Compute metric.""" - return _jaccard_index_reduce(self.confmat, average="binary") + return _jaccard_index_reduce(self.confmat, average="binary", zero_division=self.zero_division) def plot( # type: ignore[override] self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None @@ -187,6 +191,8 @@ class MulticlassJaccardIndex(MulticlassConfusionMatrix): validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. Example (pred is integer tensor): @@ -224,6 +230,7 @@ def __init__( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -233,10 +240,13 @@ def __init__( _multiclass_jaccard_index_arg_validation(num_classes, ignore_index, average) self.validate_args = validate_args self.average = average + self.zero_division = zero_division def compute(self) -> Tensor: """Compute metric.""" - return _jaccard_index_reduce(self.confmat, average=self.average, ignore_index=self.ignore_index) + return _jaccard_index_reduce( + self.confmat, average=self.average, ignore_index=self.ignore_index, zero_division=self.zero_division + ) def plot( # type: ignore[override] self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None @@ -319,6 +329,8 @@ class MultilabelJaccardIndex(MultilabelConfusionMatrix): validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. Example (preds is int tensor): @@ -354,6 +366,7 @@ def __init__( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, **kwargs: Any, ) -> None: super().__init__( @@ -368,10 +381,11 @@ def __init__( _multilabel_jaccard_index_arg_validation(num_labels, threshold, ignore_index, average) self.validate_args = validate_args self.average = average + self.zero_division = zero_division def compute(self) -> Tensor: """Compute metric.""" - return _jaccard_index_reduce(self.confmat, average=self.average) + return _jaccard_index_reduce(self.confmat, average=self.average, zero_division=self.zero_division) def plot( # type: ignore[override] self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None diff --git a/src/torchmetrics/classification/precision_recall.py b/src/torchmetrics/classification/precision_recall.py index 124215f4e03..0380545b5ac 100644 --- a/src/torchmetrics/classification/precision_recall.py +++ b/src/torchmetrics/classification/precision_recall.py @@ -18,7 +18,9 @@ from torchmetrics.classification.base import _ClassificationTaskWrapper from torchmetrics.classification.stat_scores import BinaryStatScores, MulticlassStatScores, MultilabelStatScores -from torchmetrics.functional.classification.precision_recall import _precision_recall_reduce +from torchmetrics.functional.classification.precision_recall import ( + _precision_recall_reduce, +) from torchmetrics.metric import Metric from torchmetrics.utilities.enums import ClassificationTask from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE @@ -42,7 +44,7 @@ class BinaryPrecision(BinaryStatScores): Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and false positives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0`. If this case is - encountered a score of 0 is returned. + encountered a score of `zero_division` (0 or 1, default is 0) is returned. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -73,6 +75,7 @@ class BinaryPrecision(BinaryStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -112,7 +115,14 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "precision", tp, fp, tn, fn, average="binary", multidim_average=self.multidim_average + "precision", + tp, + fp, + tn, + fn, + average="binary", + multidim_average=self.multidim_average, + zero_division=self.zero_division, ) def plot( @@ -165,8 +175,8 @@ class MulticlassPrecision(MulticlassStatScores): Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and false positives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0`. If this case is - encountered for any class, the metric for that class will be set to 0 and the overall metric may therefore be - affected in turn. + encountered for any class, the metric for that class will be set to `zero_division` (0 or 1, default is 0) and + the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -217,6 +227,7 @@ class MulticlassPrecision(MulticlassStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -269,7 +280,15 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "precision", tp, fp, tn, fn, average=self.average, multidim_average=self.multidim_average, top_k=self.top_k + "precision", + tp, + fp, + tn, + fn, + average=self.average, + multidim_average=self.multidim_average, + top_k=self.top_k, + zero_division=self.zero_division, ) def plot( @@ -322,8 +341,8 @@ class MultilabelPrecision(MultilabelStatScores): Where :math:`\text{TP}` and :math:`\text{FP}` represent the number of true positives and false positives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FP} \neq 0`. If this case is - encountered for any label, the metric for that label will be set to 0 and the overall metric may therefore be - affected in turn. + encountered for any label, the metric for that label will be set to `zero_division` (0 or 1, default is 0) and + the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -373,6 +392,7 @@ class MultilabelPrecision(MultilabelStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -423,7 +443,15 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "precision", tp, fp, tn, fn, average=self.average, multidim_average=self.multidim_average, multilabel=True + "precision", + tp, + fp, + tn, + fn, + average=self.average, + multidim_average=self.multidim_average, + multilabel=True, + zero_division=self.zero_division, ) def plot( @@ -476,7 +504,7 @@ class BinaryRecall(BinaryStatScores): Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and false negatives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FN} \neq 0`. If this case is - encountered a score of 0 is returned. + encountered a score of `zero_division` (0 or 1, default is 0) is returned. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -507,6 +535,7 @@ class BinaryRecall(BinaryStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -546,7 +575,14 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "recall", tp, fp, tn, fn, average="binary", multidim_average=self.multidim_average + "recall", + tp, + fp, + tn, + fn, + average="binary", + multidim_average=self.multidim_average, + zero_division=self.zero_division, ) def plot( @@ -599,8 +635,8 @@ class MulticlassRecall(MulticlassStatScores): Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and false negatives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FN} \neq 0`. If this case is - encountered for any class, the metric for that class will be set to 0 and the overall metric may therefore be - affected in turn. + encountered for any class, the metric for that class will be set to `zero_division` (0 or 1, default is 0) and + the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -650,6 +686,7 @@ class MulticlassRecall(MulticlassStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -702,7 +739,15 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "recall", tp, fp, tn, fn, average=self.average, multidim_average=self.multidim_average, top_k=self.top_k + "recall", + tp, + fp, + tn, + fn, + average=self.average, + multidim_average=self.multidim_average, + top_k=self.top_k, + zero_division=self.zero_division, ) def plot( @@ -755,8 +800,8 @@ class MultilabelRecall(MultilabelStatScores): Where :math:`\text{TP}` and :math:`\text{FN}` represent the number of true positives and false negatives respectively. The metric is only proper defined when :math:`\text{TP} + \text{FN} \neq 0`. If this case is - encountered for any label, the metric for that label will be set to 0 and the overall metric may therefore be - affected in turn. + encountered for any label, the metric for that label will be set to `zero_division` (0 or 1, default is 0) and + the overall metric may therefore be affected in turn. As input to ``forward`` and ``update`` the metric accepts the following input: @@ -805,6 +850,7 @@ class MultilabelRecall(MultilabelStatScores): Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Example (preds is int tensor): >>> from torch import tensor @@ -855,7 +901,15 @@ def compute(self) -> Tensor: """Compute metric.""" tp, fp, tn, fn = self._final_state() return _precision_recall_reduce( - "recall", tp, fp, tn, fn, average=self.average, multidim_average=self.multidim_average, multilabel=True + "recall", + tp, + fp, + tn, + fn, + average=self.average, + multidim_average=self.multidim_average, + multilabel=True, + zero_division=self.zero_division, ) def plot( diff --git a/src/torchmetrics/classification/stat_scores.py b/src/torchmetrics/classification/stat_scores.py index 1ae0d4285e6..96d797fd5d6 100644 --- a/src/torchmetrics/classification/stat_scores.py +++ b/src/torchmetrics/classification/stat_scores.py @@ -169,13 +169,15 @@ def __init__( validate_args: bool = True, **kwargs: Any, ) -> None: + zero_division = kwargs.pop("zero_division", 0) super(_AbstractStatScores, self).__init__(**kwargs) if validate_args: - _binary_stat_scores_arg_validation(threshold, multidim_average, ignore_index) + _binary_stat_scores_arg_validation(threshold, multidim_average, ignore_index, zero_division) self.threshold = threshold self.multidim_average = multidim_average self.ignore_index = ignore_index self.validate_args = validate_args + self.zero_division = zero_division self._create_state(size=1, multidim_average=multidim_average) @@ -313,15 +315,19 @@ def __init__( validate_args: bool = True, **kwargs: Any, ) -> None: + zero_division = kwargs.pop("zero_division", 0) super(_AbstractStatScores, self).__init__(**kwargs) if validate_args: - _multiclass_stat_scores_arg_validation(num_classes, top_k, average, multidim_average, ignore_index) + _multiclass_stat_scores_arg_validation( + num_classes, top_k, average, multidim_average, ignore_index, zero_division + ) self.num_classes = num_classes self.top_k = top_k self.average = average self.multidim_average = multidim_average self.ignore_index = ignore_index self.validate_args = validate_args + self.zero_division = zero_division self._create_state( size=1 if (average == "micro" and top_k == 1) else num_classes, multidim_average=multidim_average @@ -461,15 +467,19 @@ def __init__( validate_args: bool = True, **kwargs: Any, ) -> None: + zero_division = kwargs.pop("zero_division", 0) super(_AbstractStatScores, self).__init__(**kwargs) if validate_args: - _multilabel_stat_scores_arg_validation(num_labels, threshold, average, multidim_average, ignore_index) + _multilabel_stat_scores_arg_validation( + num_labels, threshold, average, multidim_average, ignore_index, zero_division + ) self.num_labels = num_labels self.threshold = threshold self.average = average self.multidim_average = multidim_average self.ignore_index = ignore_index self.validate_args = validate_args + self.zero_division = zero_division self._create_state(size=num_labels, multidim_average=multidim_average) diff --git a/src/torchmetrics/functional/classification/f_beta.py b/src/torchmetrics/functional/classification/f_beta.py index 0f0e883266c..83f61955960 100644 --- a/src/torchmetrics/functional/classification/f_beta.py +++ b/src/torchmetrics/functional/classification/f_beta.py @@ -43,17 +43,18 @@ def _fbeta_reduce( average: Optional[Literal["binary", "micro", "macro", "weighted", "none"]], multidim_average: Literal["global", "samplewise"] = "global", multilabel: bool = False, + zero_division: float = 0, ) -> Tensor: beta2 = beta**2 if average == "binary": - return _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp) + return _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp, zero_division) if average == "micro": tp = tp.sum(dim=0 if multidim_average == "global" else 1) fn = fn.sum(dim=0 if multidim_average == "global" else 1) fp = fp.sum(dim=0 if multidim_average == "global" else 1) - return _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp) + return _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp, zero_division) - fbeta_score = _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp) + fbeta_score = _safe_divide((1 + beta2) * tp, (1 + beta2) * tp + beta2 * fn + fp, zero_division) return _adjust_weights_safe_divide(fbeta_score, average, multilabel, tp, fp, fn) @@ -62,10 +63,11 @@ def _binary_fbeta_score_arg_validation( threshold: float = 0.5, multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: if not (isinstance(beta, float) and beta > 0): raise ValueError(f"Expected argument `beta` to be a float larger than 0, but got {beta}.") - _binary_stat_scores_arg_validation(threshold, multidim_average, ignore_index) + _binary_stat_scores_arg_validation(threshold, multidim_average, ignore_index, zero_division) def binary_fbeta_score( @@ -76,6 +78,7 @@ def binary_fbeta_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `F-score`_ metric for binary tasks. @@ -106,6 +109,8 @@ def binary_fbeta_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: If ``multidim_average`` is set to ``global``, the metric returns a scalar value. If ``multidim_average`` @@ -136,11 +141,13 @@ def binary_fbeta_score( """ if validate_args: - _binary_fbeta_score_arg_validation(beta, threshold, multidim_average, ignore_index) + _binary_fbeta_score_arg_validation(beta, threshold, multidim_average, ignore_index, zero_division) _binary_stat_scores_tensor_validation(preds, target, multidim_average, ignore_index) preds, target = _binary_stat_scores_format(preds, target, threshold, ignore_index) tp, fp, tn, fn = _binary_stat_scores_update(preds, target, multidim_average) - return _fbeta_reduce(tp, fp, tn, fn, beta, average="binary", multidim_average=multidim_average) + return _fbeta_reduce( + tp, fp, tn, fn, beta, average="binary", multidim_average=multidim_average, zero_division=zero_division + ) def _multiclass_fbeta_score_arg_validation( @@ -150,10 +157,11 @@ def _multiclass_fbeta_score_arg_validation( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: if not (isinstance(beta, float) and beta > 0): raise ValueError(f"Expected argument `beta` to be a float larger than 0, but got {beta}.") - _multiclass_stat_scores_arg_validation(num_classes, top_k, average, multidim_average, ignore_index) + _multiclass_stat_scores_arg_validation(num_classes, top_k, average, multidim_average, ignore_index, zero_division) def multiclass_fbeta_score( @@ -166,6 +174,7 @@ def multiclass_fbeta_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `F-score`_ metric for multiclass tasks. @@ -206,6 +215,8 @@ def multiclass_fbeta_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -254,13 +265,17 @@ def multiclass_fbeta_score( """ if validate_args: - _multiclass_fbeta_score_arg_validation(beta, num_classes, top_k, average, multidim_average, ignore_index) + _multiclass_fbeta_score_arg_validation( + beta, num_classes, top_k, average, multidim_average, ignore_index, zero_division + ) _multiclass_stat_scores_tensor_validation(preds, target, num_classes, multidim_average, ignore_index) preds, target = _multiclass_stat_scores_format(preds, target, top_k) tp, fp, tn, fn = _multiclass_stat_scores_update( preds, target, num_classes, top_k, average, multidim_average, ignore_index ) - return _fbeta_reduce(tp, fp, tn, fn, beta, average=average, multidim_average=multidim_average) + return _fbeta_reduce( + tp, fp, tn, fn, beta, average=average, multidim_average=multidim_average, zero_division=zero_division + ) def _multilabel_fbeta_score_arg_validation( @@ -270,10 +285,13 @@ def _multilabel_fbeta_score_arg_validation( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: if not (isinstance(beta, float) and beta > 0): raise ValueError(f"Expected argument `beta` to be a float larger than 0, but got {beta}.") - _multilabel_stat_scores_arg_validation(num_labels, threshold, average, multidim_average, ignore_index) + _multilabel_stat_scores_arg_validation( + num_labels, threshold, average, multidim_average, ignore_index, zero_division + ) def multilabel_fbeta_score( @@ -286,6 +304,7 @@ def multilabel_fbeta_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `F-score`_ metric for multilabel tasks. @@ -325,6 +344,8 @@ def multilabel_fbeta_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -371,11 +392,23 @@ def multilabel_fbeta_score( """ if validate_args: - _multilabel_fbeta_score_arg_validation(beta, num_labels, threshold, average, multidim_average, ignore_index) + _multilabel_fbeta_score_arg_validation( + beta, num_labels, threshold, average, multidim_average, ignore_index, zero_division + ) _multilabel_stat_scores_tensor_validation(preds, target, num_labels, multidim_average, ignore_index) preds, target = _multilabel_stat_scores_format(preds, target, num_labels, threshold, ignore_index) tp, fp, tn, fn = _multilabel_stat_scores_update(preds, target, multidim_average) - return _fbeta_reduce(tp, fp, tn, fn, beta, average=average, multidim_average=multidim_average, multilabel=True) + return _fbeta_reduce( + tp, + fp, + tn, + fn, + beta, + average=average, + multidim_average=multidim_average, + multilabel=True, + zero_division=zero_division, + ) def binary_f1_score( @@ -385,6 +418,7 @@ def binary_f1_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute F-1 score for binary tasks. @@ -413,6 +447,8 @@ def binary_f1_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: If ``multidim_average`` is set to ``global``, the metric returns a scalar value. If ``multidim_average`` @@ -450,6 +486,7 @@ def binary_f1_score( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, ) @@ -462,6 +499,7 @@ def multiclass_f1_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute F-1 score for multiclass tasks. @@ -500,6 +538,8 @@ def multiclass_f1_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -557,6 +597,7 @@ def multiclass_f1_score( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, ) @@ -569,6 +610,7 @@ def multilabel_f1_score( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute F-1 score for multilabel tasks. @@ -606,6 +648,8 @@ def multilabel_f1_score( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when + :math:`\text{TP} + \text{FP} = 0 \wedge \text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -661,6 +705,7 @@ def multilabel_f1_score( multidim_average=multidim_average, ignore_index=ignore_index, validate_args=validate_args, + zero_division=zero_division, ) @@ -677,6 +722,7 @@ def fbeta_score( top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `F-score`_ metric. @@ -702,20 +748,40 @@ def fbeta_score( task = ClassificationTask.from_str(task) assert multidim_average is not None # noqa: S101 # needed for mypy if task == ClassificationTask.BINARY: - return binary_fbeta_score(preds, target, beta, threshold, multidim_average, ignore_index, validate_args) + return binary_fbeta_score( + preds, target, beta, threshold, multidim_average, ignore_index, validate_args, zero_division + ) if task == ClassificationTask.MULTICLASS: if not isinstance(num_classes, int): raise ValueError(f"`num_classes` is expected to be `int` but `{type(num_classes)} was passed.`") if not isinstance(top_k, int): raise ValueError(f"`top_k` is expected to be `int` but `{type(top_k)} was passed.`") return multiclass_fbeta_score( - preds, target, beta, num_classes, average, top_k, multidim_average, ignore_index, validate_args + preds, + target, + beta, + num_classes, + average, + top_k, + multidim_average, + ignore_index, + validate_args, + zero_division, ) if task == ClassificationTask.MULTILABEL: if not isinstance(num_labels, int): raise ValueError(f"`num_labels` is expected to be `int` but `{type(num_labels)} was passed.`") return multilabel_fbeta_score( - preds, target, beta, num_labels, threshold, average, multidim_average, ignore_index, validate_args + preds, + target, + beta, + num_labels, + threshold, + average, + multidim_average, + ignore_index, + validate_args, + zero_division, ) raise ValueError(f"Unsupported task `{task}` passed.") @@ -732,6 +798,7 @@ def f1_score( top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute F-1 score. @@ -756,19 +823,19 @@ def f1_score( task = ClassificationTask.from_str(task) assert multidim_average is not None # noqa: S101 # needed for mypy if task == ClassificationTask.BINARY: - return binary_f1_score(preds, target, threshold, multidim_average, ignore_index, validate_args) + return binary_f1_score(preds, target, threshold, multidim_average, ignore_index, validate_args, zero_division) if task == ClassificationTask.MULTICLASS: if not isinstance(num_classes, int): raise ValueError(f"`num_classes` is expected to be `int` but `{type(num_classes)} was passed.`") if not isinstance(top_k, int): raise ValueError(f"`top_k` is expected to be `int` but `{type(top_k)} was passed.`") return multiclass_f1_score( - preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args + preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args, zero_division ) if task == ClassificationTask.MULTILABEL: if not isinstance(num_labels, int): raise ValueError(f"`num_labels` is expected to be `int` but `{type(num_labels)} was passed.`") return multilabel_f1_score( - preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args + preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args, zero_division ) raise ValueError(f"Unsupported task `{task}` passed.") diff --git a/src/torchmetrics/functional/classification/jaccard.py b/src/torchmetrics/functional/classification/jaccard.py index 7e928525ad8..1d240df68af 100644 --- a/src/torchmetrics/functional/classification/jaccard.py +++ b/src/torchmetrics/functional/classification/jaccard.py @@ -39,6 +39,7 @@ def _jaccard_index_reduce( confmat: Tensor, average: Optional[Literal["micro", "macro", "weighted", "none", "binary"]], ignore_index: Optional[int] = None, + zero_division: float = 0.0, ) -> Tensor: """Perform reduction of an un-normalized confusion matrix into jaccard score. @@ -57,6 +58,8 @@ def _jaccard_index_reduce( ignore_index: Specifies a target value that is ignored and does not contribute to the metric calculation + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. """ allowed_average = ["binary", "micro", "macro", "weighted", "none", None] @@ -79,7 +82,7 @@ def _jaccard_index_reduce( num = num.sum() denom = denom.sum() - (denom[ignore_index] if ignore_index_cond else 0.0) - jaccard = _safe_divide(num, denom) + jaccard = _safe_divide(num, denom, zero_division=zero_division) if average is None or average == "none" or average == "micro": return jaccard @@ -100,6 +103,7 @@ def binary_jaccard_index( threshold: float = 0.5, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0.0, ) -> Tensor: r"""Calculate the Jaccard index for binary tasks. @@ -126,7 +130,8 @@ def binary_jaccard_index( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. Example (preds is int tensor): >>> from torch import tensor @@ -149,7 +154,7 @@ def binary_jaccard_index( _binary_confusion_matrix_tensor_validation(preds, target, ignore_index) preds, target = _binary_confusion_matrix_format(preds, target, threshold, ignore_index) confmat = _binary_confusion_matrix_update(preds, target) - return _jaccard_index_reduce(confmat, average="binary") + return _jaccard_index_reduce(confmat, average="binary", zero_division=zero_division) def _multiclass_jaccard_index_arg_validation( @@ -170,6 +175,7 @@ def multiclass_jaccard_index( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0.0, ) -> Tensor: r"""Calculate the Jaccard index for multiclass tasks. @@ -204,7 +210,8 @@ def multiclass_jaccard_index( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. Example (pred is integer tensor): >>> from torch import tensor @@ -230,7 +237,7 @@ def multiclass_jaccard_index( _multiclass_confusion_matrix_tensor_validation(preds, target, num_classes, ignore_index) preds, target = _multiclass_confusion_matrix_format(preds, target, ignore_index) confmat = _multiclass_confusion_matrix_update(preds, target, num_classes) - return _jaccard_index_reduce(confmat, average=average, ignore_index=ignore_index) + return _jaccard_index_reduce(confmat, average=average, ignore_index=ignore_index, zero_division=zero_division) def _multilabel_jaccard_index_arg_validation( @@ -253,6 +260,7 @@ def multilabel_jaccard_index( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0.0, ) -> Tensor: r"""Calculate the Jaccard index for multilabel tasks. @@ -288,7 +296,8 @@ def multilabel_jaccard_index( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + zero_division: + Value to replace when there is a division by zero. Should be `0` or `1`. Example (preds is int tensor): >>> from torch import tensor @@ -311,7 +320,7 @@ def multilabel_jaccard_index( _multilabel_confusion_matrix_tensor_validation(preds, target, num_labels, ignore_index) preds, target = _multilabel_confusion_matrix_format(preds, target, num_labels, threshold, ignore_index) confmat = _multilabel_confusion_matrix_update(preds, target, num_labels) - return _jaccard_index_reduce(confmat, average=average, ignore_index=ignore_index) + return _jaccard_index_reduce(confmat, average=average, ignore_index=ignore_index, zero_division=zero_division) def jaccard_index( @@ -324,6 +333,7 @@ def jaccard_index( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0.0, ) -> Tensor: r"""Calculate the Jaccard index. @@ -351,13 +361,15 @@ def jaccard_index( """ task = ClassificationTask.from_str(task) if task == ClassificationTask.BINARY: - return binary_jaccard_index(preds, target, threshold, ignore_index, validate_args) + return binary_jaccard_index(preds, target, threshold, ignore_index, validate_args, zero_division) if task == ClassificationTask.MULTICLASS: if not isinstance(num_classes, int): raise ValueError(f"`num_classes` is expected to be `int` but `{type(num_classes)} was passed.`") - return multiclass_jaccard_index(preds, target, num_classes, average, ignore_index, validate_args) + return multiclass_jaccard_index(preds, target, num_classes, average, ignore_index, validate_args, zero_division) if task == ClassificationTask.MULTILABEL: if not isinstance(num_labels, int): raise ValueError(f"`num_labels` is expected to be `int` but `{type(num_labels)} was passed.`") - return multilabel_jaccard_index(preds, target, num_labels, threshold, average, ignore_index, validate_args) + return multilabel_jaccard_index( + preds, target, num_labels, threshold, average, ignore_index, validate_args, zero_division + ) raise ValueError(f"Not handled value: {task}") diff --git a/src/torchmetrics/functional/classification/precision_recall.py b/src/torchmetrics/functional/classification/precision_recall.py index beb70d54bbc..96214c82274 100644 --- a/src/torchmetrics/functional/classification/precision_recall.py +++ b/src/torchmetrics/functional/classification/precision_recall.py @@ -44,17 +44,18 @@ def _precision_recall_reduce( multidim_average: Literal["global", "samplewise"] = "global", multilabel: bool = False, top_k: int = 1, + zero_division: float = 0, ) -> Tensor: different_stat = fp if stat == "precision" else fn # this is what differs between the two scores if average == "binary": - return _safe_divide(tp, tp + different_stat) + return _safe_divide(tp, tp + different_stat, zero_division) if average == "micro": tp = tp.sum(dim=0 if multidim_average == "global" else 1) fn = fn.sum(dim=0 if multidim_average == "global" else 1) different_stat = different_stat.sum(dim=0 if multidim_average == "global" else 1) - return _safe_divide(tp, tp + different_stat) + return _safe_divide(tp, tp + different_stat, zero_division) - score = _safe_divide(tp, tp + different_stat) + score = _safe_divide(tp, tp + different_stat, zero_division) return _adjust_weights_safe_divide(score, average, multilabel, tp, fp, fn, top_k=top_k) @@ -65,6 +66,7 @@ def binary_precision( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Precision`_ for binary tasks. @@ -95,6 +97,7 @@ def binary_precision( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Returns: If ``multidim_average`` is set to ``global``, the metric returns a scalar value. If ``multidim_average`` @@ -129,7 +132,9 @@ def binary_precision( _binary_stat_scores_tensor_validation(preds, target, multidim_average, ignore_index) preds, target = _binary_stat_scores_format(preds, target, threshold, ignore_index) tp, fp, tn, fn = _binary_stat_scores_update(preds, target, multidim_average) - return _precision_recall_reduce("precision", tp, fp, tn, fn, average="binary", multidim_average=multidim_average) + return _precision_recall_reduce( + "precision", tp, fp, tn, fn, average="binary", multidim_average=multidim_average, zero_division=zero_division + ) def multiclass_precision( @@ -141,6 +146,7 @@ def multiclass_precision( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Precision`_ for multiclass tasks. @@ -182,6 +188,7 @@ def multiclass_precision( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -237,7 +244,15 @@ def multiclass_precision( preds, target, num_classes, top_k, average, multidim_average, ignore_index ) return _precision_recall_reduce( - "precision", tp, fp, tn, fn, average=average, multidim_average=multidim_average, top_k=top_k + "precision", + tp, + fp, + tn, + fn, + average=average, + multidim_average=multidim_average, + top_k=top_k, + zero_division=zero_division, ) @@ -250,6 +265,7 @@ def multilabel_precision( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Precision`_ for multilabel tasks. @@ -289,6 +305,7 @@ def multilabel_precision( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FP} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -340,7 +357,15 @@ def multilabel_precision( preds, target = _multilabel_stat_scores_format(preds, target, num_labels, threshold, ignore_index) tp, fp, tn, fn = _multilabel_stat_scores_update(preds, target, multidim_average) return _precision_recall_reduce( - "precision", tp, fp, tn, fn, average=average, multidim_average=multidim_average, multilabel=True + "precision", + tp, + fp, + tn, + fn, + average=average, + multidim_average=multidim_average, + multilabel=True, + zero_division=zero_division, ) @@ -351,6 +376,7 @@ def binary_recall( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Recall`_ for binary tasks. @@ -381,6 +407,7 @@ def binary_recall( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Returns: If ``multidim_average`` is set to ``global``, the metric returns a scalar value. If ``multidim_average`` @@ -415,7 +442,9 @@ def binary_recall( _binary_stat_scores_tensor_validation(preds, target, multidim_average, ignore_index) preds, target = _binary_stat_scores_format(preds, target, threshold, ignore_index) tp, fp, tn, fn = _binary_stat_scores_update(preds, target, multidim_average) - return _precision_recall_reduce("recall", tp, fp, tn, fn, average="binary", multidim_average=multidim_average) + return _precision_recall_reduce( + "recall", tp, fp, tn, fn, average="binary", multidim_average=multidim_average, zero_division=zero_division + ) def multiclass_recall( @@ -427,6 +456,7 @@ def multiclass_recall( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Recall`_ for multiclass tasks. @@ -468,6 +498,7 @@ def multiclass_recall( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -523,7 +554,15 @@ def multiclass_recall( preds, target, num_classes, top_k, average, multidim_average, ignore_index ) return _precision_recall_reduce( - "recall", tp, fp, tn, fn, average=average, multidim_average=multidim_average, top_k=top_k + "recall", + tp, + fp, + tn, + fn, + average=average, + multidim_average=multidim_average, + top_k=top_k, + zero_division=zero_division, ) @@ -536,6 +575,7 @@ def multilabel_recall( multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Recall`_ for multilabel tasks. @@ -575,6 +615,7 @@ def multilabel_recall( Specifies a target value that is ignored and does not contribute to the metric calculation validate_args: bool indicating if input arguments and tensors should be validated for correctness. Set to ``False`` for faster computations. + zero_division: Should be `0` or `1`. The value returned when :math:`\text{TP} + \text{FN} = 0`. Returns: The returned shape depends on the ``average`` and ``multidim_average`` arguments: @@ -626,7 +667,15 @@ def multilabel_recall( preds, target = _multilabel_stat_scores_format(preds, target, num_labels, threshold, ignore_index) tp, fp, tn, fn = _multilabel_stat_scores_update(preds, target, multidim_average) return _precision_recall_reduce( - "recall", tp, fp, tn, fn, average=average, multidim_average=multidim_average, multilabel=True + "recall", + tp, + fp, + tn, + fn, + average=average, + multidim_average=multidim_average, + multilabel=True, + zero_division=zero_division, ) @@ -642,6 +691,7 @@ def precision( top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Precision`_. @@ -669,20 +719,20 @@ def precision( """ assert multidim_average is not None # noqa: S101 # needed for mypy if task == ClassificationTask.BINARY: - return binary_precision(preds, target, threshold, multidim_average, ignore_index, validate_args) + return binary_precision(preds, target, threshold, multidim_average, ignore_index, validate_args, zero_division) if task == ClassificationTask.MULTICLASS: if not isinstance(num_classes, int): raise ValueError(f"`num_classes` is expected to be `int` but `{type(num_classes)} was passed.`") if not isinstance(top_k, int): raise ValueError(f"`top_k` is expected to be `int` but `{type(top_k)} was passed.`") return multiclass_precision( - preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args + preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args, zero_division ) if task == ClassificationTask.MULTILABEL: if not isinstance(num_labels, int): raise ValueError(f"`num_labels` is expected to be `int` but `{type(num_labels)} was passed.`") return multilabel_precision( - preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args + preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args, zero_division ) raise ValueError( f"Expected argument `task` to either be `'binary'`, `'multiclass'` or `'multilabel'` but got {task}" @@ -701,6 +751,7 @@ def recall( top_k: Optional[int] = 1, ignore_index: Optional[int] = None, validate_args: bool = True, + zero_division: float = 0, ) -> Tensor: r"""Compute `Recall`_. @@ -729,19 +780,19 @@ def recall( task = ClassificationTask.from_str(task) assert multidim_average is not None # noqa: S101 # needed for mypy if task == ClassificationTask.BINARY: - return binary_recall(preds, target, threshold, multidim_average, ignore_index, validate_args) + return binary_recall(preds, target, threshold, multidim_average, ignore_index, validate_args, zero_division) if task == ClassificationTask.MULTICLASS: if not isinstance(num_classes, int): raise ValueError(f"`num_classes` is expected to be `int` but `{type(num_classes)} was passed.`") if not isinstance(top_k, int): raise ValueError(f"`top_k` is expected to be `int` but `{type(top_k)} was passed.`") return multiclass_recall( - preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args + preds, target, num_classes, average, top_k, multidim_average, ignore_index, validate_args, zero_division ) if task == ClassificationTask.MULTILABEL: if not isinstance(num_labels, int): raise ValueError(f"`num_labels` is expected to be `int` but `{type(num_labels)} was passed.`") return multilabel_recall( - preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args + preds, target, num_labels, threshold, average, multidim_average, ignore_index, validate_args, zero_division ) raise ValueError(f"Not handled value: {task}") diff --git a/src/torchmetrics/functional/classification/stat_scores.py b/src/torchmetrics/functional/classification/stat_scores.py index aa8e0bf5016..a412efc180f 100644 --- a/src/torchmetrics/functional/classification/stat_scores.py +++ b/src/torchmetrics/functional/classification/stat_scores.py @@ -26,12 +26,14 @@ def _binary_stat_scores_arg_validation( threshold: float = 0.5, multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: """Validate non tensor input. - ``threshold`` has to be a float in the [0,1] range - ``multidim_average`` has to be either "global" or "samplewise" - ``ignore_index`` has to be None or int + - ``zero_division`` has to be 0 or 1 """ if not (isinstance(threshold, float) and (0 <= threshold <= 1)): @@ -43,6 +45,8 @@ def _binary_stat_scores_arg_validation( ) if ignore_index is not None and not isinstance(ignore_index, int): raise ValueError(f"Expected argument `ignore_index` to either be `None` or an integer, but got {ignore_index}") + if zero_division not in [0, 1]: + raise ValueError(f"Expected argument `zero_division` to be 0 or 1, but got {zero_division}.") def _binary_stat_scores_tensor_validation( @@ -220,6 +224,7 @@ def _multiclass_stat_scores_arg_validation( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: """Validate non tensor input. @@ -228,6 +233,7 @@ def _multiclass_stat_scores_arg_validation( - ``average`` has to be "micro" | "macro" | "weighted" | "none" - ``multidim_average`` has to be either "global" or "samplewise" - ``ignore_index`` has to be None or int + - ``zero_division`` has to be 0 or 1 """ if not isinstance(num_classes, int) or num_classes < 2: @@ -248,6 +254,8 @@ def _multiclass_stat_scores_arg_validation( ) if ignore_index is not None and not isinstance(ignore_index, int): raise ValueError(f"Expected argument `ignore_index` to either be `None` or an integer, but got {ignore_index}") + if zero_division not in [0, 1]: + raise ValueError(f"Expected argument `zero_division` to be 0 or 1, but got {zero_division}.") def _multiclass_stat_scores_tensor_validation( @@ -560,6 +568,7 @@ def _multilabel_stat_scores_arg_validation( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", multidim_average: Literal["global", "samplewise"] = "global", ignore_index: Optional[int] = None, + zero_division: float = 0, ) -> None: """Validate non tensor input. @@ -568,6 +577,7 @@ def _multilabel_stat_scores_arg_validation( - ``average`` has to be "micro" | "macro" | "weighted" | "none" - ``multidim_average`` has to be either "global" or "samplewise" - ``ignore_index`` has to be None or int + - ``zero_division`` has to be 0 or 1 """ if not isinstance(num_labels, int) or num_labels < 2: @@ -584,6 +594,8 @@ def _multilabel_stat_scores_arg_validation( ) if ignore_index is not None and not isinstance(ignore_index, int): raise ValueError(f"Expected argument `ignore_index` to either be `None` or an integer, but got {ignore_index}") + if zero_division not in [0, 1]: + raise ValueError(f"Expected argument `zero_division` to be 0 or 1, but got {zero_division}.") def _multilabel_stat_scores_tensor_validation( diff --git a/src/torchmetrics/utilities/compute.py b/src/torchmetrics/utilities/compute.py index 12613103ca6..68cd344877d 100644 --- a/src/torchmetrics/utilities/compute.py +++ b/src/torchmetrics/utilities/compute.py @@ -43,16 +43,21 @@ def _safe_xlogy(x: Tensor, y: Tensor) -> Tensor: return res -def _safe_divide(num: Tensor, denom: Tensor) -> Tensor: +def _safe_divide(num: Tensor, denom: Tensor, zero_division: float = 0.0) -> Tensor: """Safe division, by preventing division by zero. - Additionally casts to float if input is not already to secure backwards compatibility. + Function will cast to float if input is not already to secure backwards compatibility. + + Args: + num: numerator tensor + denom: denominator tensor, which may contain zeros + zero_division: value to replace elements divided by zero """ - denom[denom == 0.0] = 1 num = num if num.is_floating_point() else num.float() denom = denom if denom.is_floating_point() else denom.float() - return num / denom + zero_division = torch.tensor(zero_division).float().to(num.device) + return torch.where(denom != 0, num / denom, zero_division) def _adjust_weights_safe_divide( diff --git a/src/torchmetrics/utilities/imports.py b/src/torchmetrics/utilities/imports.py index 6e80411f5c1..f21f29f8152 100644 --- a/src/torchmetrics/utilities/imports.py +++ b/src/torchmetrics/utilities/imports.py @@ -60,5 +60,6 @@ _MECAB_KO_DIC_AVAILABLE = RequirementCache("mecab_ko_dic") _IPADIC_AVAILABLE = RequirementCache("ipadic") _SENTENCEPIECE_AVAILABLE = RequirementCache("sentencepiece") +_SKLEARN_GREATER_EQUAL_1_3 = RequirementCache("scikit-learn>=1.3.0") _LATEX_AVAILABLE: bool = shutil.which("latex") is not None diff --git a/tests/unittests/classification/test_f_beta.py b/tests/unittests/classification/test_f_beta.py index 3a334708485..03c39d336fe 100644 --- a/tests/unittests/classification/test_f_beta.py +++ b/tests/unittests/classification/test_f_beta.py @@ -49,7 +49,7 @@ seed_all(42) -def _reference_sklearn_fbeta_score_binary(preds, target, sk_fn, ignore_index, multidim_average): +def _reference_sklearn_fbeta_score_binary(preds, target, sk_fn, ignore_index, multidim_average, zero_division=0): if multidim_average == "global": preds = preds.view(-1).numpy() target = target.view(-1).numpy() @@ -64,14 +64,14 @@ def _reference_sklearn_fbeta_score_binary(preds, target, sk_fn, ignore_index, mu if multidim_average == "global": target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds) + return sk_fn(target, preds, zero_division=zero_division) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - res.append(sk_fn(true, pred)) + res.append(sk_fn(true, pred, zero_division=zero_division)) return np.stack(res) @@ -90,7 +90,10 @@ class TestBinaryFBetaScore(MetricTester): @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) - def test_binary_fbeta_score(self, ddp, inputs, module, functional, compare, ignore_index, multidim_average): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_binary_fbeta_score( + self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, zero_division + ): """Test class implementation of metric.""" preds, target = inputs if ignore_index == -1: @@ -110,13 +113,22 @@ def test_binary_fbeta_score(self, ddp, inputs, module, functional, compare, igno sk_fn=compare, ignore_index=ignore_index, multidim_average=multidim_average, + zero_division=zero_division, ), - metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index, "multidim_average": multidim_average}, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + "multidim_average": multidim_average, + "zero_division": zero_division, + }, ) @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) - def test_binary_fbeta_score_functional(self, inputs, module, functional, compare, ignore_index, multidim_average): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_binary_fbeta_score_functional( + self, inputs, module, functional, compare, ignore_index, multidim_average, zero_division + ): """Test functional implementation of metric.""" preds, target = inputs if ignore_index == -1: @@ -133,11 +145,13 @@ def test_binary_fbeta_score_functional(self, inputs, module, functional, compare sk_fn=compare, ignore_index=ignore_index, multidim_average=multidim_average, + zero_division=zero_division, ), metric_args={ "threshold": THRESHOLD, "ignore_index": ignore_index, "multidim_average": multidim_average, + "zero_division": zero_division, }, ) @@ -183,14 +197,22 @@ def test_binary_fbeta_score_half_gpu(self, inputs, module, functional, compare, ) -def _reference_sklearn_fbeta_score_multiclass(preds, target, sk_fn, ignore_index, multidim_average, average): +def _reference_sklearn_fbeta_score_multiclass( + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 +): if preds.ndim == target.ndim + 1: preds = torch.argmax(preds, 1) if multidim_average == "global": preds = preds.numpy().flatten() target = target.numpy().flatten() target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds, average=average, labels=list(range(NUM_CLASSES)) if average is None else None) + return sk_fn( + target, + preds, + average=average, + labels=list(range(NUM_CLASSES)) if average is None else None, + zero_division=zero_division, + ) preds = preds.numpy() target = target.numpy() @@ -199,7 +221,24 @@ def _reference_sklearn_fbeta_score_multiclass(preds, target, sk_fn, ignore_index pred = pred.flatten() true = true.flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - r = sk_fn(true, pred, average=average, labels=list(range(NUM_CLASSES)) if average is None else None) + + if len(pred) == 0 and average == "weighted": + # The result of sk_fn([], [], labels=None, average="weighted", zero_division=zero_division) + # varies depending on the sklearn version: + # 1.2 -> the value of zero_division + # 1.3 -> nan + # 1.4 -> nan + # To avoid breaking some test cases by this behavior, + # hard coded to return 0 in this special case. + r = 0.0 + else: + r = sk_fn( + true, + pred, + average=average, + labels=list(range(NUM_CLASSES)) if average is None else None, + zero_division=zero_division, + ) res.append(0.0 if np.isnan(r).any() else r) return np.stack(res, 0) @@ -224,8 +263,9 @@ class TestMulticlassFBetaScore(MetricTester): @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multiclass_fbeta_score( - self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average + self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test class implementation of metric.""" preds, target = inputs @@ -247,20 +287,23 @@ def test_multiclass_fbeta_score( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, "num_classes": NUM_CLASSES, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multiclass_fbeta_score_functional( - self, inputs, module, functional, compare, ignore_index, multidim_average, average + self, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test functional implementation of metric.""" preds, target = inputs @@ -279,12 +322,14 @@ def test_multiclass_fbeta_score_functional( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, "num_classes": NUM_CLASSES, + "zero_division": zero_division, }, ) @@ -368,18 +413,18 @@ def test_top_k( assert torch.isclose(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result) -def _reference_sklearn_fbeta_score_multilabel_global(preds, target, sk_fn, ignore_index, average): +def _reference_sklearn_fbeta_score_multilabel_global(preds, target, sk_fn, ignore_index, average, zero_division): if average == "micro": preds = preds.flatten() target = target.flatten() target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds) + return sk_fn(target, preds, zero_division=zero_division) fbeta_score, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - fbeta_score.append(sk_fn(true, pred)) + fbeta_score.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) res = np.stack(fbeta_score, axis=0) @@ -396,13 +441,13 @@ def _reference_sklearn_fbeta_score_multilabel_global(preds, target, sk_fn, ignor return None -def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore_index, average): +def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore_index, average, zero_division): fbeta_score, weights = [], [] for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - fbeta_score.append(sk_fn(true, pred)) + fbeta_score.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) else: @@ -410,7 +455,7 @@ def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] true, pred = remove_ignore_index(true, pred, ignore_index) - scores.append(sk_fn(true, pred)) + scores.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) fbeta_score.append(np.stack(scores)) @@ -430,7 +475,9 @@ def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore return None -def _reference_sklearn_fbeta_score_multilabel(preds, target, sk_fn, ignore_index, multidim_average, average): +def _reference_sklearn_fbeta_score_multilabel( + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 +): preds = preds.numpy() target = target.numpy() if np.issubdtype(preds.dtype, np.floating): @@ -444,10 +491,13 @@ def _reference_sklearn_fbeta_score_multilabel(preds, target, sk_fn, ignore_index target.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), preds.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), average=average, + zero_division=zero_division, ) if multidim_average == "global": - return _reference_sklearn_fbeta_score_multilabel_global(preds, target, sk_fn, ignore_index, average) - return _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore_index, average) + return _reference_sklearn_fbeta_score_multilabel_global( + preds, target, sk_fn, ignore_index, average, zero_division + ) + return _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore_index, average, zero_division) @pytest.mark.parametrize("inputs", _multilabel_cases) @@ -470,8 +520,9 @@ class TestMultilabelFBetaScore(MetricTester): @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multilabel_fbeta_score( - self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average + self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test class implementation of metric.""" preds, target = inputs @@ -493,6 +544,7 @@ def test_multilabel_fbeta_score( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, @@ -500,14 +552,16 @@ def test_multilabel_fbeta_score( "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multilabel_fbeta_score_functional( - self, inputs, module, functional, compare, ignore_index, multidim_average, average + self, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test functional implementation of metric.""" preds, target = inputs @@ -526,6 +580,7 @@ def test_multilabel_fbeta_score_functional( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, @@ -533,6 +588,7 @@ def test_multilabel_fbeta_score_functional( "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, + "zero_division": zero_division, }, ) diff --git a/tests/unittests/classification/test_jaccard.py b/tests/unittests/classification/test_jaccard.py index 8fa17ca1d32..6901868eac9 100644 --- a/tests/unittests/classification/test_jaccard.py +++ b/tests/unittests/classification/test_jaccard.py @@ -37,7 +37,7 @@ from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases -def _reference_sklearn_jaccard_index_binary(preds, target, ignore_index=None): +def _reference_sklearn_jaccard_index_binary(preds, target, ignore_index=None, zero_division=0): preds = preds.view(-1).numpy() target = target.view(-1).numpy() if np.issubdtype(preds.dtype, np.floating): @@ -45,7 +45,7 @@ def _reference_sklearn_jaccard_index_binary(preds, target, ignore_index=None): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_jaccard_index(y_true=target, y_pred=preds) + return sk_jaccard_index(y_true=target, y_pred=preds, zero_division=zero_division) @pytest.mark.parametrize("inputs", _binary_cases) @@ -53,8 +53,9 @@ class TestBinaryJaccardIndex(MetricTester): """Test class for `BinaryJaccardIndex` metric.""" @pytest.mark.parametrize("ignore_index", [None, -1, 0]) + @pytest.mark.parametrize("zero_division", [0, 1]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) - def test_binary_jaccard_index(self, inputs, ddp, ignore_index): + def test_binary_jaccard_index(self, inputs, ddp, ignore_index, zero_division): """Test class implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -64,15 +65,19 @@ def test_binary_jaccard_index(self, inputs, ddp, ignore_index): preds=preds, target=target, metric_class=BinaryJaccardIndex, - reference_metric=partial(_reference_sklearn_jaccard_index_binary, ignore_index=ignore_index), + reference_metric=partial( + _reference_sklearn_jaccard_index_binary, ignore_index=ignore_index, zero_division=zero_division + ), metric_args={ "threshold": THRESHOLD, "ignore_index": ignore_index, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("ignore_index", [None, -1, 0]) - def test_binary_jaccard_index_functional(self, inputs, ignore_index): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_binary_jaccard_index_functional(self, inputs, ignore_index, zero_division): """Test functional implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -81,11 +86,10 @@ def test_binary_jaccard_index_functional(self, inputs, ignore_index): preds=preds, target=target, metric_functional=binary_jaccard_index, - reference_metric=partial(_reference_sklearn_jaccard_index_binary, ignore_index=ignore_index), - metric_args={ - "threshold": THRESHOLD, - "ignore_index": ignore_index, - }, + reference_metric=partial( + _reference_sklearn_jaccard_index_binary, ignore_index=ignore_index, zero_division=zero_division + ), + metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index, "zero_division": zero_division}, ) def test_binary_jaccard_index_differentiability(self, inputs): @@ -129,7 +133,7 @@ def test_binary_jaccard_index_dtype_gpu(self, inputs, dtype): ) -def _reference_sklearn_jaccard_index_multiclass(preds, target, ignore_index=None, average="macro"): +def _reference_sklearn_jaccard_index_multiclass(preds, target, ignore_index=None, average="macro", zero_division=0): preds = preds.numpy() target = target.numpy() if np.issubdtype(preds.dtype, np.floating): @@ -137,13 +141,15 @@ def _reference_sklearn_jaccard_index_multiclass(preds, target, ignore_index=None preds = preds.flatten() target = target.flatten() target, preds = remove_ignore_index(target, preds, ignore_index) + if average is None: + return sk_jaccard_index( + y_true=target, y_pred=preds, average=average, labels=list(range(NUM_CLASSES)), zero_division=zero_division + ) if ignore_index is not None and 0 <= ignore_index < NUM_CLASSES: labels = [i for i in range(NUM_CLASSES) if i != ignore_index] - res = sk_jaccard_index(y_true=target, y_pred=preds, average=average, labels=labels) - return np.insert(res, ignore_index, 0.0) if average is None else res - if average is None: - return sk_jaccard_index(y_true=target, y_pred=preds, average=average, labels=list(range(NUM_CLASSES))) - return sk_jaccard_index(y_true=target, y_pred=preds, average=average) + res = sk_jaccard_index(y_true=target, y_pred=preds, average=average, labels=labels, zero_division=zero_division) + return np.insert(res, ignore_index, 0) if average is None else res + return sk_jaccard_index(y_true=target, y_pred=preds, average=average, zero_division=zero_division) @pytest.mark.parametrize("inputs", _multiclass_cases) @@ -152,8 +158,9 @@ class TestMulticlassJaccardIndex(MetricTester): @pytest.mark.parametrize("average", ["macro", "micro", "weighted", None]) @pytest.mark.parametrize("ignore_index", [None, -1, 0]) + @pytest.mark.parametrize("zero_division", [0, 1]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) - def test_multiclass_jaccard_index(self, inputs, ddp, ignore_index, average): + def test_multiclass_jaccard_index(self, inputs, ddp, ignore_index, average, zero_division): """Test class implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -164,18 +171,23 @@ def test_multiclass_jaccard_index(self, inputs, ddp, ignore_index, average): target=target, metric_class=MulticlassJaccardIndex, reference_metric=partial( - _reference_sklearn_jaccard_index_multiclass, ignore_index=ignore_index, average=average + _reference_sklearn_jaccard_index_multiclass, + ignore_index=ignore_index, + average=average, + zero_division=zero_division, ), metric_args={ "num_classes": NUM_CLASSES, "ignore_index": ignore_index, "average": average, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("average", ["macro", "micro", "weighted", None]) @pytest.mark.parametrize("ignore_index", [None, -1, 0]) - def test_multiclass_jaccard_index_functional(self, inputs, ignore_index, average): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_multiclass_jaccard_index_functional(self, inputs, ignore_index, average, zero_division): """Test functional implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -185,12 +197,16 @@ def test_multiclass_jaccard_index_functional(self, inputs, ignore_index, average target=target, metric_functional=multiclass_jaccard_index, reference_metric=partial( - _reference_sklearn_jaccard_index_multiclass, ignore_index=ignore_index, average=average + _reference_sklearn_jaccard_index_multiclass, + ignore_index=ignore_index, + average=average, + zero_division=zero_division, ), metric_args={ "num_classes": NUM_CLASSES, "ignore_index": ignore_index, "average": average, + "zero_division": zero_division, }, ) @@ -233,7 +249,7 @@ def test_multiclass_jaccard_index_dtype_gpu(self, inputs, dtype): ) -def _reference_sklearn_jaccard_index_multilabel(preds, target, ignore_index=None, average="macro"): +def _reference_sklearn_jaccard_index_multilabel(preds, target, ignore_index=None, average="macro", zero_division=0): preds = preds.numpy() target = target.numpy() if np.issubdtype(preds.dtype, np.floating): @@ -243,16 +259,18 @@ def _reference_sklearn_jaccard_index_multilabel(preds, target, ignore_index=None preds = np.moveaxis(preds, 1, -1).reshape((-1, preds.shape[1])) target = np.moveaxis(target, 1, -1).reshape((-1, target.shape[1])) if ignore_index is None: - return sk_jaccard_index(y_true=target, y_pred=preds, average=average) + return sk_jaccard_index(y_true=target, y_pred=preds, average=average, zero_division=zero_division) if average == "micro": - return _reference_sklearn_jaccard_index_binary(torch.tensor(preds), torch.tensor(target), ignore_index) + return _reference_sklearn_jaccard_index_binary( + torch.tensor(preds), torch.tensor(target), ignore_index, zero_division=zero_division + ) scores, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i], target[:, i] true, pred = remove_ignore_index(true, pred, ignore_index) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) - scores.append(sk_jaccard_index(true, pred)) + scores.append(sk_jaccard_index(true, pred, zero_division=zero_division)) weights.append(confmat[1, 0] + confmat[1, 1]) scores = np.stack(scores, axis=0) weights = np.stack(weights, axis=0) @@ -269,8 +287,9 @@ class TestMultilabelJaccardIndex(MetricTester): @pytest.mark.parametrize("average", ["macro", "micro", "weighted", None]) @pytest.mark.parametrize("ignore_index", [None, -1]) + @pytest.mark.parametrize("zero_division", [0, 1]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) - def test_multilabel_jaccard_index(self, inputs, ddp, ignore_index, average): + def test_multilabel_jaccard_index(self, inputs, ddp, ignore_index, average, zero_division): """Test class implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -281,18 +300,23 @@ def test_multilabel_jaccard_index(self, inputs, ddp, ignore_index, average): target=target, metric_class=MultilabelJaccardIndex, reference_metric=partial( - _reference_sklearn_jaccard_index_multilabel, ignore_index=ignore_index, average=average + _reference_sklearn_jaccard_index_multilabel, + ignore_index=ignore_index, + average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, "ignore_index": ignore_index, "average": average, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("average", ["macro", "micro", "weighted", None]) @pytest.mark.parametrize("ignore_index", [None, -1]) - def test_multilabel_jaccard_index_functional(self, inputs, ignore_index, average): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_multilabel_jaccard_index_functional(self, inputs, ignore_index, average, zero_division): """Test functional implementation of metric.""" preds, target = inputs if ignore_index is not None: @@ -302,12 +326,16 @@ def test_multilabel_jaccard_index_functional(self, inputs, ignore_index, average target=target, metric_functional=multilabel_jaccard_index, reference_metric=partial( - _reference_sklearn_jaccard_index_multilabel, ignore_index=ignore_index, average=average + _reference_sklearn_jaccard_index_multilabel, + ignore_index=ignore_index, + average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, "ignore_index": ignore_index, "average": average, + "zero_division": zero_division, }, ) diff --git a/tests/unittests/classification/test_precision_recall.py b/tests/unittests/classification/test_precision_recall.py index 86fbe262aea..00eee202cc0 100644 --- a/tests/unittests/classification/test_precision_recall.py +++ b/tests/unittests/classification/test_precision_recall.py @@ -49,7 +49,7 @@ seed_all(42) -def _reference_sklearn_precision_recall_binary(preds, target, sk_fn, ignore_index, multidim_average): +def _reference_sklearn_precision_recall_binary(preds, target, sk_fn, ignore_index, multidim_average, zero_division=0): if multidim_average == "global": preds = preds.view(-1).numpy() target = target.view(-1).numpy() @@ -64,14 +64,14 @@ def _reference_sklearn_precision_recall_binary(preds, target, sk_fn, ignore_inde if multidim_average == "global": target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds) + return sk_fn(target, preds, zero_division=zero_division) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - res.append(sk_fn(true, pred)) + res.append(sk_fn(true, pred, zero_division=zero_division)) return np.stack(res) @@ -90,7 +90,10 @@ class TestBinaryPrecisionRecall(MetricTester): @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) - def test_binary_precision_recall(self, ddp, inputs, module, functional, compare, ignore_index, multidim_average): + @pytest.mark.parametrize("zero_division", [0, 1]) + def test_binary_precision_recall( + self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, zero_division + ): """Test class implementation of metric.""" preds, target = inputs if ignore_index == -1: @@ -110,14 +113,21 @@ def test_binary_precision_recall(self, ddp, inputs, module, functional, compare, sk_fn=compare, ignore_index=ignore_index, multidim_average=multidim_average, + zero_division=zero_division, ), - metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index, "multidim_average": multidim_average}, + metric_args={ + "threshold": THRESHOLD, + "ignore_index": ignore_index, + "multidim_average": multidim_average, + "zero_division": zero_division, + }, ) @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_binary_precision_recall_functional( - self, inputs, module, functional, compare, ignore_index, multidim_average + self, inputs, module, functional, compare, ignore_index, multidim_average, zero_division ): """Test functional implementation of metric.""" preds, target = inputs @@ -135,11 +145,13 @@ def test_binary_precision_recall_functional( sk_fn=compare, ignore_index=ignore_index, multidim_average=multidim_average, + zero_division=zero_division, ), metric_args={ "threshold": THRESHOLD, "ignore_index": ignore_index, "multidim_average": multidim_average, + "zero_division": zero_division, }, ) @@ -184,7 +196,9 @@ def test_binary_precision_recall_half_gpu(self, inputs, module, functional, comp ) -def _reference_sklearn_precision_recall_multiclass(preds, target, sk_fn, ignore_index, multidim_average, average): +def _reference_sklearn_precision_recall_multiclass( + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 +): if preds.ndim == target.ndim + 1: preds = torch.argmax(preds, 1) @@ -192,7 +206,13 @@ def _reference_sklearn_precision_recall_multiclass(preds, target, sk_fn, ignore_ preds = preds.numpy().flatten() target = target.numpy().flatten() target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds, average=average, labels=list(range(NUM_CLASSES)) if average is None else None) + return sk_fn( + target, + preds, + average=average, + labels=list(range(NUM_CLASSES)) if average is None else None, + zero_division=zero_division, + ) preds = preds.numpy() target = target.numpy() @@ -201,7 +221,23 @@ def _reference_sklearn_precision_recall_multiclass(preds, target, sk_fn, ignore_ pred = pred.flatten() true = true.flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - r = sk_fn(true, pred, average=average, labels=list(range(NUM_CLASSES)) if average is None else None) + if len(pred) == 0 and average == "weighted": + # The result of sk_fn([], [], labels=None, average="weighted", zero_division=zero_division) + # varies depending on the sklearn version: + # 1.2 -> the value of zero_division + # 1.3 -> nan + # 1.4 -> nan + # To avoid breaking some test cases by this behavior, + # hard coded to return 0 in this special case. + r = 0.0 + else: + r = sk_fn( + true, + pred, + average=average, + labels=list(range(NUM_CLASSES)) if average is None else None, + zero_division=zero_division, + ) res.append(0.0 if np.isnan(r).any() else r) return np.stack(res, 0) @@ -223,8 +259,18 @@ class TestMulticlassPrecisionRecall(MetricTester): @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multiclass_precision_recall( - self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average + self, + ddp, + inputs, + module, + functional, + compare, + ignore_index, + multidim_average, + average, + zero_division, ): """Test class implementation of metric.""" preds, target = inputs @@ -246,20 +292,23 @@ def test_multiclass_precision_recall( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, "num_classes": NUM_CLASSES, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("ignore_index", [None, 0, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multiclass_precision_recall_functional( - self, inputs, module, functional, compare, ignore_index, multidim_average, average + self, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test functional implementation of metric.""" preds, target = inputs @@ -278,12 +327,14 @@ def test_multiclass_precision_recall_functional( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, "num_classes": NUM_CLASSES, + "zero_division": zero_division, }, ) @@ -367,18 +418,18 @@ def test_top_k( assert torch.equal(metric_fn(preds, target, top_k=k, average=average, num_classes=3), result) -def _reference_sklearn_precision_recall_multilabel_global(preds, target, sk_fn, ignore_index, average): +def _reference_sklearn_precision_recall_multilabel_global(preds, target, sk_fn, ignore_index, average, zero_division): if average == "micro": preds = preds.flatten() target = target.flatten() target, preds = remove_ignore_index(target, preds, ignore_index) - return sk_fn(target, preds) + return sk_fn(target, preds, zero_division=zero_division) precision_recall, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - precision_recall.append(sk_fn(true, pred)) + precision_recall.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) res = np.stack(precision_recall, axis=0) @@ -395,13 +446,13 @@ def _reference_sklearn_precision_recall_multilabel_global(preds, target, sk_fn, return None -def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, ignore_index, average): +def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, ignore_index, average, zero_division): precision_recall, weights = [], [] for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() true, pred = remove_ignore_index(true, pred, ignore_index) - precision_recall.append(sk_fn(true, pred)) + precision_recall.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) else: @@ -409,7 +460,7 @@ def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, i for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] true, pred = remove_ignore_index(true, pred, ignore_index) - scores.append(sk_fn(true, pred)) + scores.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) precision_recall.append(np.stack(scores)) @@ -429,7 +480,9 @@ def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, i return None -def _reference_sklearn_precision_recall_multilabel(preds, target, sk_fn, ignore_index, multidim_average, average): +def _reference_sklearn_precision_recall_multilabel( + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 +): preds = preds.numpy() target = target.numpy() if np.issubdtype(preds.dtype, np.floating): @@ -443,10 +496,15 @@ def _reference_sklearn_precision_recall_multilabel(preds, target, sk_fn, ignore_ target.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), preds.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), average=average, + zero_division=zero_division, ) if multidim_average == "global": - return _reference_sklearn_precision_recall_multilabel_global(preds, target, sk_fn, ignore_index, average) - return _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, ignore_index, average) + return _reference_sklearn_precision_recall_multilabel_global( + preds, target, sk_fn, ignore_index, average, zero_division + ) + return _reference_sklearn_precision_recall_multilabel_local( + preds, target, sk_fn, ignore_index, average, zero_division + ) @pytest.mark.parametrize("inputs", _multilabel_cases) @@ -465,8 +523,9 @@ class TestMultilabelPrecisionRecall(MetricTester): @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multilabel_precision_recall( - self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average + self, ddp, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test class implementation of metric.""" preds, target = inputs @@ -488,6 +547,7 @@ def test_multilabel_precision_recall( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, @@ -495,14 +555,16 @@ def test_multilabel_precision_recall( "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, + "zero_division": zero_division, }, ) @pytest.mark.parametrize("ignore_index", [None, -1]) @pytest.mark.parametrize("multidim_average", ["global", "samplewise"]) @pytest.mark.parametrize("average", ["micro", "macro", "weighted", None]) + @pytest.mark.parametrize("zero_division", [0, 1]) def test_multilabel_precision_recall_functional( - self, inputs, module, functional, compare, ignore_index, multidim_average, average + self, inputs, module, functional, compare, ignore_index, multidim_average, average, zero_division ): """Test functional implementation of metric.""" preds, target = inputs @@ -521,6 +583,7 @@ def test_multilabel_precision_recall_functional( ignore_index=ignore_index, multidim_average=multidim_average, average=average, + zero_division=zero_division, ), metric_args={ "num_labels": NUM_CLASSES, @@ -528,6 +591,7 @@ def test_multilabel_precision_recall_functional( "ignore_index": ignore_index, "multidim_average": multidim_average, "average": average, + "zero_division": zero_division, }, ) diff --git a/tests/unittests/classification/test_sensitivity_specificity.py b/tests/unittests/classification/test_sensitivity_specificity.py index d629c86583a..df01b67e4ab 100644 --- a/tests/unittests/classification/test_sensitivity_specificity.py +++ b/tests/unittests/classification/test_sensitivity_specificity.py @@ -33,7 +33,7 @@ multilabel_sensitivity_at_specificity, ) from torchmetrics.metric import Metric -from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_11 +from torchmetrics.utilities.imports import _SKLEARN_GREATER_EQUAL_1_3, _TORCH_GREATER_EQUAL_1_11 from unittests import NUM_CLASSES from unittests._helpers import seed_all @@ -83,6 +83,7 @@ def _reference_sklearn_sensitivity_at_specificity_binary(preds, target, min_spec return _sensitivity_at_specificity_x_multilabel(preds, target, min_specificity) +@pytest.mark.skipif(not _SKLEARN_GREATER_EQUAL_1_3, reason="metric does not support scikit-learn versions below 1.3") @pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_11, reason="metric does not support torch versions below 1.11") @pytest.mark.parametrize("inputs", (_binary_cases[1], _binary_cases[2], _binary_cases[4], _binary_cases[5])) class TestBinarySensitivityAtSpecificity(MetricTester): @@ -209,6 +210,7 @@ def _reference_sklearn_sensitivity_at_specificity_multiclass(preds, target, min_ return sensitivity, thresholds +@pytest.mark.skipif(not _SKLEARN_GREATER_EQUAL_1_3, reason="metric does not support scikit-learn versions below 1.3") @pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_11, reason="metric does not support torch versions below 1.11") @pytest.mark.parametrize( "inputs", (_multiclass_cases[1], _multiclass_cases[2], _multiclass_cases[4], _multiclass_cases[5]) @@ -340,6 +342,7 @@ def _reference_sklearn_sensitivity_at_specificity_multilabel(preds, target, min_ return sensitivity, thresholds +@pytest.mark.skipif(not _SKLEARN_GREATER_EQUAL_1_3, reason="metric does not support scikit-learn versions below 1.3") @pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_11, reason="metric does not support torch versions below 1.11") @pytest.mark.parametrize( "inputs", (_multilabel_cases[1], _multilabel_cases[2], _multilabel_cases[4], _multilabel_cases[5]) diff --git a/tests/unittests/retrieval/test_ndcg.py b/tests/unittests/retrieval/test_ndcg.py index 48e0c679195..1f68839eb4e 100644 --- a/tests/unittests/retrieval/test_ndcg.py +++ b/tests/unittests/retrieval/test_ndcg.py @@ -80,6 +80,7 @@ def test_class_metric( "ignore_index": ignore_index, "aggregation": aggregation, } + target = target if target.min() >= 0 else target - target.min() self.run_class_metric_test( ddp=ddp, @@ -107,6 +108,7 @@ def test_class_metric_ignore_index( """Test class implementation of metric with ignore_index argument.""" metric_args = {"empty_target_action": empty_target_action, "top_k": k, "ignore_index": -100} + target = target if target.min() >= 0 else target - target.min() self.run_class_metric_test( ddp=ddp, indexes=indexes, @@ -121,6 +123,7 @@ def test_class_metric_ignore_index( @pytest.mark.parametrize("k", [None, 1, 4, 10]) def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): """Test functional implementation of metric.""" + target = target if target.min() >= 0 else target - target.min() self.run_functional_metric_test( preds=preds, target=target, @@ -133,6 +136,7 @@ def test_functional_metric(self, preds: Tensor, target: Tensor, k: int): @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target) def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): """Test dtype support of the metric on CPU.""" + target = target if target.min() >= 0 else target - target.min() self.run_precision_test_cpu( indexes=indexes, preds=preds, @@ -144,6 +148,7 @@ def test_precision_cpu(self, indexes: Tensor, preds: Tensor, target: Tensor): @pytest.mark.parametrize(**_default_metric_class_input_arguments_with_non_binary_target) def test_precision_gpu(self, indexes: Tensor, preds: Tensor, target: Tensor): """Test dtype support of the metric on GPU.""" + target = target if target.min() >= 0 else target - target.min() self.run_precision_test_gpu( indexes=indexes, preds=preds,