diff --git a/CHANGELOG.md b/CHANGELOG.md index 8736f561615..99741831e6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- +- Added `NormalizedRootMeanSquaredError` metric to regression subpackage ([#2442](https://github.com/Lightning-AI/torchmetrics/pull/2442)) ### Changed diff --git a/docs/source/conf.py b/docs/source/conf.py index 5442f9641a9..81f842e7a12 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -447,6 +447,9 @@ def linkcode_resolve(domain, info) -> Optional[str]: # noqa: ANN001 "https://aclanthology.org/W17-4770", # A wavelet transform method to merge Landsat TM and SPOT panchromatic data "https://www.ingentaconnect.com/content/tandf/tres/1998/00000019/00000004/art00013", + # Improved normalization of time-lapse seismic data using normalized root mean square repeatability data ... + # ... to improve automatic production and seismic history matching in the Nelson field + "https://onlinelibrary.wiley.com/doi/abs/10.1111/1365-2478.12109", # todo: these links seems to be unstable, referring to .devcontainer "https://code.visualstudio.com", "https://code.visualstudio.com/.*", diff --git a/docs/source/links.rst b/docs/source/links.rst index 2e9b222f28f..b7a4f63565e 100644 --- a/docs/source/links.rst +++ b/docs/source/links.rst @@ -171,6 +171,7 @@ .. _FLORES-200: https://arxiv.org/abs/2207.04672 .. _averaging curve objects: https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html .. _SCC: https://www.ingentaconnect.com/content/tandf/tres/1998/00000019/00000004/art00013 +.. _Normalized Root Mean Squared Error: https://onlinelibrary.wiley.com/doi/abs/10.1111/1365-2478.12109 .. _Generalized Dice Score: https://arxiv.org/abs/1707.03237 .. _Hausdorff Distance: https://en.wikipedia.org/wiki/Hausdorff_distance .. _averaging curve objects: https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html diff --git a/docs/source/regression/normalized_root_mean_squared_error.rst b/docs/source/regression/normalized_root_mean_squared_error.rst new file mode 100644 index 00000000000..7bbc2f392d5 --- /dev/null +++ b/docs/source/regression/normalized_root_mean_squared_error.rst @@ -0,0 +1,21 @@ +.. customcarditem:: + :header: Normalized Root Mean Squared Error (NRMSE) + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg + :tags: Regression + +.. include:: ../links.rst + +########################################## +Normalized Root Mean Squared Error (NRMSE) +########################################## + +Module Interface +________________ + +.. autoclass:: torchmetrics.NormalizedRootMeanSquaredError + :exclude-members: update, compute + +Functional Interface +____________________ + +.. autofunction:: torchmetrics.functional.normalized_root_mean_squared_error diff --git a/requirements/_devel.txt b/requirements/_devel.txt index 596cc138133..6a8ea2b8e7f 100644 --- a/requirements/_devel.txt +++ b/requirements/_devel.txt @@ -20,3 +20,4 @@ -r classification_test.txt -r nominal_test.txt -r segmentation_test.txt +-r regression_test.txt diff --git a/requirements/regression_test.txt b/requirements/regression_test.txt new file mode 100644 index 00000000000..859605fda3b --- /dev/null +++ b/requirements/regression_test.txt @@ -0,0 +1 @@ +permetrics==2.0.0 diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index 2fa370cb1c9..a6105df3480 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -114,6 +114,7 @@ MeanSquaredError, MeanSquaredLogError, MinkowskiDistance, + NormalizedRootMeanSquaredError, PearsonCorrCoef, R2Score, RelativeSquaredError, @@ -158,25 +159,23 @@ ) __all__ = [ - "functional", - "Accuracy", "AUROC", + "Accuracy", "AveragePrecision", "BLEUScore", "BootStrapper", + "CHRFScore", "CalibrationError", "CatMetric", - "ClasswiseWrapper", "CharErrorRate", - "CHRFScore", - "ConcordanceCorrCoef", + "ClasswiseWrapper", "CohenKappa", + "ConcordanceCorrCoef", "ConfusionMatrix", "CosineSimilarity", "CramersV", "CriticalSuccessIndex", "Dice", - "TweedieDevianceScore", "ErrorRelativeGlobalDimensionlessSynthesis", "ExactMatch", "ExplainedVariance", @@ -187,8 +186,8 @@ "HammingDistance", "HingeLoss", "JaccardIndex", - "KendallRankCorrCoef", "KLDivergence", + "KendallRankCorrCoef", "LogCoshError", "MatchErrorRate", "MatthewsCorrCoef", @@ -201,14 +200,16 @@ "Metric", "MetricCollection", "MetricTracker", - "MinkowskiDistance", "MinMaxMetric", "MinMetric", + "MinkowskiDistance", "ModifiedPanopticQuality", + "MultiScaleStructuralSimilarityIndexMeasure", "MultioutputWrapper", "MultitaskWrapper", - "MultiScaleStructuralSimilarityIndexMeasure", + "NormalizedRootMeanSquaredError", "PanopticQuality", + "PeakSignalNoiseRatio", "PearsonCorrCoef", "PearsonsContingencyCoefficient", "PermutationInvariantTraining", @@ -216,8 +217,8 @@ "Precision", "PrecisionAtFixedRecall", "PrecisionRecallCurve", - "PeakSignalNoiseRatio", "R2Score", + "ROC", "Recall", "RecallAtFixedPrecision", "RelativeAverageSpectralError", @@ -228,37 +229,38 @@ "RetrievalMRR", "RetrievalNormalizedDCG", "RetrievalPrecision", - "RetrievalRecall", - "RetrievalRPrecision", "RetrievalPrecisionRecallCurve", + "RetrievalRPrecision", + "RetrievalRecall", "RetrievalRecallAtFixedPrecision", - "ROC", "RootMeanSquaredErrorUsingSlidingWindow", "RunningMean", "RunningSum", + "SQuAD", "SacreBLEUScore", - "SignalDistortionRatio", "ScaleInvariantSignalDistortionRatio", "ScaleInvariantSignalNoiseRatio", + "SensitivityAtSpecificity", + "SignalDistortionRatio", "SignalNoiseRatio", "SpearmanCorrCoef", "Specificity", "SpecificityAtSensitivity", - "SensitivityAtSpecificity", "SpectralAngleMapper", "SpectralDistortionIndex", - "SQuAD", - "StructuralSimilarityIndexMeasure", "StatScores", + "StructuralSimilarityIndexMeasure", "SumMetric", "SymmetricMeanAbsolutePercentageError", "TheilsU", "TotalVariation", "TranslationEditRate", "TschuprowsT", + "TweedieDevianceScore", "UniversalImageQualityIndex", "WeightedMeanAbsolutePercentageError", "WordErrorRate", "WordInfoLost", "WordInfoPreserved", + "functional", ] diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py index 30a7145aa71..7de7f261867 100644 --- a/src/torchmetrics/functional/__init__.py +++ b/src/torchmetrics/functional/__init__.py @@ -100,6 +100,7 @@ mean_squared_error, mean_squared_log_error, minkowski_distance, + normalized_root_mean_squared_error, pearson_corrcoef, r2_score, relative_squared_error, @@ -146,14 +147,13 @@ "calibration_error", "char_error_rate", "chrf_score", - "concordance_corrcoef", "cohen_kappa", + "concordance_corrcoef", "confusion_matrix", "cosine_similarity", "cramers_v", "cramers_v_matrix", "critical_success_index", - "tweedie_deviance_score", "dice", "error_relative_global_dimensionless_synthesis", "exact_match", @@ -177,12 +177,14 @@ "mean_squared_log_error", "minkowski_distance", "multiscale_structural_similarity_index_measure", + "normalized_root_mean_squared_error", "pairwise_cosine_similarity", "pairwise_euclidean_distance", "pairwise_linear_similarity", "pairwise_manhattan_distance", "pairwise_minkowski_distance", "panoptic_quality", + "peak_signal_noise_ratio", "pearson_corrcoef", "pearsons_contingency_coefficient", "pearsons_contingency_coefficient_matrix", @@ -190,10 +192,11 @@ "perplexity", "pit_permutate", "precision", + "precision_at_fixed_recall", "precision_recall_curve", - "peak_signal_noise_ratio", "r2_score", "recall", + "recall_at_fixed_precision", "relative_average_spectral_error", "relative_squared_error", "retrieval_average_precision", @@ -201,24 +204,27 @@ "retrieval_hit_rate", "retrieval_normalized_dcg", "retrieval_precision", + "retrieval_precision_recall_curve", "retrieval_r_precision", "retrieval_recall", "retrieval_reciprocal_rank", - "retrieval_precision_recall_curve", "roc", "root_mean_squared_error_using_sliding_window", "rouge_score", "sacre_bleu_score", - "signal_distortion_ratio", "scale_invariant_signal_distortion_ratio", "scale_invariant_signal_noise_ratio", + "sensitivity_at_specificity", + "signal_distortion_ratio", "signal_noise_ratio", "spearman_corrcoef", "specificity", + "specificity_at_sensitivity", + "spectral_angle_mapper", "spectral_distortion_index", "squad", - "structural_similarity_index_measure", "stat_scores", + "structural_similarity_index_measure", "symmetric_mean_absolute_percentage_error", "theils_u", "theils_u_matrix", @@ -226,14 +232,10 @@ "translation_edit_rate", "tschuprows_t", "tschuprows_t_matrix", + "tweedie_deviance_score", "universal_image_quality_index", - "spectral_angle_mapper", "weighted_mean_absolute_percentage_error", "word_error_rate", "word_information_lost", "word_information_preserved", - "precision_at_fixed_recall", - "recall_at_fixed_precision", - "sensitivity_at_specificity", - "specificity_at_sensitivity", ] diff --git a/src/torchmetrics/functional/regression/__init__.py b/src/torchmetrics/functional/regression/__init__.py index c2dab8c5f59..063fbc059e3 100644 --- a/src/torchmetrics/functional/regression/__init__.py +++ b/src/torchmetrics/functional/regression/__init__.py @@ -23,6 +23,7 @@ from torchmetrics.functional.regression.mape import mean_absolute_percentage_error from torchmetrics.functional.regression.minkowski import minkowski_distance from torchmetrics.functional.regression.mse import mean_squared_error +from torchmetrics.functional.regression.nrmse import normalized_root_mean_squared_error from torchmetrics.functional.regression.pearson import pearson_corrcoef from torchmetrics.functional.regression.r2 import r2_score from torchmetrics.functional.regression.rse import relative_squared_error @@ -39,13 +40,14 @@ "kendall_rank_corrcoef", "kl_divergence", "log_cosh_error", - "mean_squared_log_error", "mean_absolute_error", - "mean_squared_error", - "pearson_corrcoef", "mean_absolute_percentage_error", "mean_absolute_percentage_error", + "mean_squared_error", + "mean_squared_log_error", "minkowski_distance", + "normalized_root_mean_squared_error", + "pearson_corrcoef", "r2_score", "relative_squared_error", "spearman_corrcoef", diff --git a/src/torchmetrics/functional/regression/nrmse.py b/src/torchmetrics/functional/regression/nrmse.py new file mode 100644 index 00000000000..52cae36adb0 --- /dev/null +++ b/src/torchmetrics/functional/regression/nrmse.py @@ -0,0 +1,106 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.regression.mse import _mean_squared_error_update + + +def _normalized_root_mean_squared_error_update( + preds: Tensor, target: Tensor, num_outputs: int, normalization: Literal["mean", "range", "std", "l2"] = "mean" +) -> Tuple[Tensor, int, Tensor]: + """Updates and returns the sum of squared errors and the number of observations for NRMSE computation. + + Args: + preds: Predicted tensor + target: Ground truth tensor + num_outputs: Number of outputs in multioutput setting + normalization: type of normalization to be applied. Choose from "mean", "range", "std", "l2" + + """ + sum_squared_error, num_obs = _mean_squared_error_update(preds, target, num_outputs) + + target = target.view(-1) if num_outputs == 1 else target + if normalization == "mean": + denom = torch.mean(target, dim=0) + elif normalization == "range": + denom = torch.max(target, dim=0).values - torch.min(target, dim=0).values + elif normalization == "std": + denom = torch.std(target, correction=0, dim=0) + elif normalization == "l2": + denom = torch.norm(target, p=2, dim=0) + else: + raise ValueError( + f"Argument `normalization` should be either 'mean', 'range', 'std' or 'l2' but got {normalization}" + ) + return sum_squared_error, num_obs, denom + + +def _normalized_root_mean_squared_error_compute( + sum_squared_error: Tensor, num_obs: Union[int, Tensor], denom: Tensor +) -> Tensor: + """Calculates RMSE and normalizes it.""" + rmse = torch.sqrt(sum_squared_error / num_obs) + return rmse / denom + + +def normalized_root_mean_squared_error( + preds: Tensor, + target: Tensor, + normalization: Literal["mean", "range", "std", "l2"] = "mean", + num_outputs: int = 1, +) -> Tensor: + """Calculates the `Normalized Root Mean Squared Error`_ (NRMSE) also know as scatter index. + + Args: + preds: estimated labels + target: ground truth labels + normalization: type of normalization to be applied. Choose from "mean", "range", "std", "l2" which corresponds + to normalizing the RMSE by the mean of the target, the range of the target, the standard deviation of the + target or the L2 norm of the target. + num_outputs: Number of outputs in multioutput setting + + Return: + Tensor with the NRMSE score + + Example: + >>> import torch + >>> from torchmetrics.functional.regression import normalized_root_mean_squared_error + >>> preds = torch.tensor([0., 1, 2, 3]) + >>> target = torch.tensor([0., 1, 2, 2]) + >>> normalized_root_mean_squared_error(preds, target, normalization="mean") + tensor(0.4000) + >>> normalized_root_mean_squared_error(preds, target, normalization="range") + tensor(0.2500) + >>> normalized_root_mean_squared_error(preds, target, normalization="std") + tensor(0.6030) + >>> normalized_root_mean_squared_error(preds, target, normalization="l2") + tensor(0.1667) + + Example (multioutput): + >>> import torch + >>> from torchmetrics.functional.regression import normalized_root_mean_squared_error + >>> preds = torch.tensor([[0., 1], [2, 3], [4, 5], [6, 7]]) + >>> target = torch.tensor([[0., 1], [3, 3], [4, 5], [8, 9]]) + >>> normalized_root_mean_squared_error(preds, target, normalization="mean", num_outputs=2) + tensor([0.2981, 0.2222]) + + """ + sum_squared_error, num_obs, denom = _normalized_root_mean_squared_error_update( + preds, target, num_outputs=num_outputs, normalization=normalization + ) + return _normalized_root_mean_squared_error_compute(sum_squared_error, num_obs, denom) diff --git a/src/torchmetrics/regression/__init__.py b/src/torchmetrics/regression/__init__.py index 03ba8023a10..6a41c01bcdb 100644 --- a/src/torchmetrics/regression/__init__.py +++ b/src/torchmetrics/regression/__init__.py @@ -23,6 +23,7 @@ from torchmetrics.regression.mape import MeanAbsolutePercentageError from torchmetrics.regression.minkowski import MinkowskiDistance from torchmetrics.regression.mse import MeanSquaredError +from torchmetrics.regression.nrmse import NormalizedRootMeanSquaredError from torchmetrics.regression.pearson import PearsonCorrCoef from torchmetrics.regression.r2 import R2Score from torchmetrics.regression.rse import RelativeSquaredError @@ -36,14 +37,15 @@ "CosineSimilarity", "CriticalSuccessIndex", "ExplainedVariance", - "KendallRankCorrCoef", "KLDivergence", + "KendallRankCorrCoef", "LogCoshError", - "MeanSquaredLogError", "MeanAbsoluteError", "MeanAbsolutePercentageError", - "MinkowskiDistance", "MeanSquaredError", + "MeanSquaredLogError", + "MinkowskiDistance", + "NormalizedRootMeanSquaredError", "PearsonCorrCoef", "R2Score", "RelativeSquaredError", diff --git a/src/torchmetrics/regression/nrmse.py b/src/torchmetrics/regression/nrmse.py new file mode 100644 index 00000000000..62562803542 --- /dev/null +++ b/src/torchmetrics/regression/nrmse.py @@ -0,0 +1,279 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, Optional, Sequence, Union + +import torch +from torch import Tensor +from typing_extensions import Literal + +from torchmetrics.functional.regression.nrmse import ( + _mean_squared_error_update, + _normalized_root_mean_squared_error_compute, +) +from torchmetrics.metric import Metric +from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE +from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE + +if not _MATPLOTLIB_AVAILABLE: + __doctest_skip__ = ["NormalizedRootMeanSquaredError.plot"] + + +def _final_aggregation( + min_val: Tensor, + max_val: Tensor, + mean_val: Tensor, + var_val: Tensor, + target_squared: Tensor, + total: Tensor, + normalization: Literal["mean", "range", "std", "l2"] = "mean", +) -> Tensor: + """In the case of multiple devices we need to aggregate the statistics from the different devices.""" + if len(min_val) == 1: + if normalization == "mean": + return mean_val[0] + if normalization == "range": + return max_val[0] - min_val[0] + if normalization == "std": + return var_val[0] + if normalization == "l2": + return target_squared[0] + + min_val_1, max_val_1, mean_val_1, var_val_1, target_squared_1, total_1 = ( + min_val[0], + max_val[0], + mean_val[0], + var_val[0], + target_squared[0], + total[0], + ) + for i in range(1, len(min_val)): + min_val_2, max_val_2, mean_val_2, var_val_2, target_squared_2, total_2 = ( + min_val[i], + max_val[i], + mean_val[i], + var_val[i], + target_squared[i], + total[i], + ) + # update total and mean + total = total_1 + total_2 + mean = (total_1 * mean_val_1 + total_2 * mean_val_2) / total + + # update variance + _temp = (total_1 + 1) * mean - total_1 * mean_val_1 + var_val_1 += (_temp - mean_val_1) * (_temp - mean) - (_temp - mean) ** 2 + _temp = (total_2 + 1) * mean - total_2 * mean_val_2 + var_val_2 += (_temp - mean_val_2) * (_temp - mean) - (_temp - mean) ** 2 + var = var_val_1 + var_val_2 + + # update min and max and target squared + min_val = torch.min(min_val_1, min_val_2) + max_val = torch.max(max_val_1, max_val_2) + target_squared = target_squared_1 + target_squared_2 + + if normalization == "mean": + return mean + if normalization == "range": + return max_val - min_val + if normalization == "std": + return (var / total).sqrt() + return target_squared.sqrt() + + +class NormalizedRootMeanSquaredError(Metric): + r"""Calculates the `Normalized Root Mean Squared Error`_ (NRMSE) also know as scatter index. + + The metric is defined as: + + .. math:: + \text{NRMSE} = \frac{\text{RMSE}}{\text{denom}} + + where RMSE is the root mean squared error and `denom` is the normalization factor. The normalization factor can be + either be the mean, range, standard deviation or L2 norm of the target, which can be set using the `normalization` + argument. + + As input to ``forward`` and ``update`` the metric accepts the following input: + + - ``preds`` (:class:`~torch.Tensor`): Predictions from model + - ``target`` (:class:`~torch.Tensor`): Ground truth values + + As output of ``forward`` and ``compute`` the metric returns the following output: + + - ``nrmse`` (:class:`~torch.Tensor`): A tensor with the mean squared error + + Args: + normalization: type of normalization to be applied. Choose from "mean", "range", "std", "l2" which corresponds + to normalizing the RMSE by the mean of the target, the range of the target, the standard deviation of the + target or the L2 norm of the target. + num_outputs: Number of outputs in multioutput setting + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Example:: + Single output normalized root mean squared error computation: + + >>> import torch + >>> from torchmetrics import NormalizedRootMeanSquaredError + >>> target = torch.tensor([2.5, 5.0, 4.0, 8.0]) + >>> preds = torch.tensor([3.0, 5.0, 2.5, 7.0]) + >>> nrmse = NormalizedRootMeanSquaredError(normalization="mean") + >>> nrmse(preds, target) + tensor(0.1919) + >>> nrmse = NormalizedRootMeanSquaredError(normalization="range") + >>> nrmse(preds, target) + tensor(0.1701) + + Example:: + Multioutput normalized root mean squared error computation: + + >>> import torch + >>> from torchmetrics import NormalizedRootMeanSquaredError + >>> preds = torch.tensor([[0., 1], [2, 3], [4, 5], [6, 7]]) + >>> target = torch.tensor([[0., 1], [3, 3], [4, 5], [8, 9]]) + >>> nrmse = NormalizedRootMeanSquaredError(num_outputs=2) + >>> nrmse(preds, target) + tensor([0.2981, 0.2222]) + + """ + + is_differentiable: bool = True + higher_is_better: bool = False + full_state_update: bool = True + plot_lower_bound: float = 0.0 + + sum_squared_error: Tensor + total: Tensor + min_val: Tensor + max_val: Tensor + target_squared: Tensor + mean_val: Tensor + var_val: Tensor + + def __init__( + self, + normalization: Literal["mean", "range", "std", "l2"] = "mean", + num_outputs: int = 1, + **kwargs: Any, + ) -> None: + super().__init__(**kwargs) + + if normalization not in ("mean", "range", "std", "l2"): + raise ValueError( + f"Argument `normalization` should be either 'mean', 'range', 'std' or 'l2', but got {normalization}" + ) + self.normalization = normalization + + if not (isinstance(num_outputs, int) and num_outputs > 0): + raise ValueError(f"Expected num_outputs to be a positive integer but got {num_outputs}") + self.num_outputs = num_outputs + + self.add_state("sum_squared_error", default=torch.zeros(num_outputs), dist_reduce_fx="sum") + self.add_state("total", default=torch.zeros(num_outputs), dist_reduce_fx=None) + self.add_state("min_val", default=float("Inf") * torch.ones(self.num_outputs), dist_reduce_fx=None) + self.add_state("max_val", default=-float("Inf") * torch.ones(self.num_outputs), dist_reduce_fx=None) + self.add_state("mean_val", default=torch.zeros(self.num_outputs), dist_reduce_fx=None) + self.add_state("var_val", default=torch.zeros(self.num_outputs), dist_reduce_fx=None) + self.add_state("target_squared", default=torch.zeros(self.num_outputs), dist_reduce_fx=None) + + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + + See `mean_squared_error_update` for details. + + """ + sum_squared_error, num_obs = _mean_squared_error_update(preds, target, self.num_outputs) + self.sum_squared_error += sum_squared_error + target = target.view(-1) if self.num_outputs == 1 else target + + # Update min and max and target squared + self.min_val = torch.minimum(target.min(dim=0).values, self.min_val) + self.max_val = torch.maximum(target.max(dim=0).values, self.max_val) + self.target_squared += (target**2).sum(dim=0) + + # Update mean and variance + new_mean = (self.total * self.mean_val + target.sum(dim=0)) / (self.total + num_obs) + self.total += num_obs + new_var = ((target - new_mean) * (target - self.mean_val)).sum(dim=0) + self.mean_val = new_mean + self.var_val += new_var + + def compute(self) -> Tensor: + """Computes NRMSE over state. + + See `mean_squared_error_compute` for details. + + """ + if (self.num_outputs == 1 and self.mean_val.numel() > 1) or (self.num_outputs > 1 and self.mean_val.ndim > 1): + denom = _final_aggregation( + min_val=self.min_val, + max_val=self.max_val, + mean_val=self.mean_val, + var_val=self.var_val, + target_squared=self.target_squared, + total=self.total, + normalization=self.normalization, + ) + total = self.total.squeeze().sum(dim=0) + else: + if self.normalization == "mean": + denom = self.mean_val + elif self.normalization == "range": + denom = self.max_val - self.min_val + elif self.normalization == "std": + denom = torch.sqrt(self.var_val / self.total) + else: + denom = torch.sqrt(self.target_squared) + total = self.total + return _normalized_root_mean_squared_error_compute(self.sum_squared_error, total, denom) + + def plot( + self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None + ) -> _PLOT_OUT_TYPE: + """Plot a single or multiple values from the metric. + + Args: + val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results. + If no value is provided, will automatically call `metric.compute` and plot that result. + ax: An matplotlib axis object. If provided will add plot to that axis + + Returns: + Figure and Axes object + + Raises: + ModuleNotFoundError: + If `matplotlib` is not installed + + .. plot:: + :scale: 75 + + >>> from torch import randn + >>> # Example plotting a single value + >>> from torchmetrics.regression import NormalizedRootMeanSquaredError + >>> metric = NormalizedRootMeanSquaredError() + >>> metric.update(randn(10,), randn(10,)) + >>> fig_, ax_ = metric.plot() + + .. plot:: + :scale: 75 + + >>> from torch import randn + >>> # Example plotting multiple values + >>> from torchmetrics.regression import NormalizedRootMeanSquaredError + >>> metric = NormalizedRootMeanSquaredError() + >>> values = [] + >>> for _ in range(10): + ... values.append(metric(randn(10,), randn(10,))) + >>> fig, ax = metric.plot(values) + + """ + return self._plot(val, ax) diff --git a/tests/README.md b/tests/README.md index 7f5cbd4e98a..6fce25567ef 100644 --- a/tests/README.md +++ b/tests/README.md @@ -7,16 +7,16 @@ the following command in the root directory of the project: pip install . -r requirements/_devel.txt ``` -Then for windows users, to execute the tests (unit tests and integration tests) run the following command (will only run non-DDP tests): +Then for Windows users, to execute the tests (unit tests and integration tests) run the following command (will only run non-DDP tests): ```bash pytest tests/ ``` -For linux/Mac users you will need to provide the `-m` argument to indicate if `ddp` tests should also be executed: +For Linux/Mac users you will need to provide the `-m` argument to indicate if `ddp` tests should also be executed: ```bash -pytest -m DDP tests/ # to run only DDP tests +USE_PYTEST_POOL="1" pytest -m DDP tests/ # to run only DDP tests pytest -m "not DDP" tests/ # to run all tests except DDP tests ``` diff --git a/tests/unittests/regression/test_mean_error.py b/tests/unittests/regression/test_mean_error.py index f37e80e4d16..38c86817184 100644 --- a/tests/unittests/regression/test_mean_error.py +++ b/tests/unittests/regression/test_mean_error.py @@ -18,6 +18,7 @@ import numpy as np import pytest import torch +from permetrics.regression import RegressionMetric from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error from sklearn.metrics import mean_absolute_percentage_error as sk_mean_abs_percentage_error from sklearn.metrics import mean_squared_error as sk_mean_squared_error @@ -29,6 +30,7 @@ mean_absolute_percentage_error, mean_squared_error, mean_squared_log_error, + normalized_root_mean_squared_error, weighted_mean_absolute_percentage_error, ) from torchmetrics.functional.regression.symmetric_mape import symmetric_mean_absolute_percentage_error @@ -39,6 +41,7 @@ MeanSquaredLogError, WeightedMeanAbsolutePercentageError, ) +from torchmetrics.regression.nrmse import NormalizedRootMeanSquaredError from torchmetrics.regression.symmetric_mape import SymmetricMeanAbsolutePercentageError from unittests import BATCH_SIZE, NUM_BATCHES, _Input @@ -114,66 +117,179 @@ def _reference_symmetric_mape( return np.average(output_errors, weights=multioutput) +def _reference_normalized_root_mean_squared_error( + y_true: np.ndarray, y_pred: np.ndarray, normalization: str = "mean", num_outputs: int = 1 +): + """Reference implementation of Normalized Root Mean Squared Error (NRMSE) metric.""" + if num_outputs == 1: + y_true = y_true.flatten() + y_pred = y_pred.flatten() + if normalization != "l2": + evaluator = RegressionMetric(y_true, y_pred) if normalization == "range" else RegressionMetric(y_pred, y_true) + arg_mapping = {"mean": 1, "range": 2, "std": 4} + return evaluator.normalized_root_mean_square_error(model=arg_mapping[normalization]) + # for l2 normalization we do not have a reference implementation + return np.sqrt(np.mean(np.square(y_true - y_pred), axis=0)) / np.linalg.norm(y_true, axis=0) + + def _reference_weighted_mean_abs_percentage_error(target, preds): + """Reference implementation of Weighted Mean Absolute Percentage Error (WMAPE) metric.""" return np.sum(np.abs(target - preds)) / np.sum(np.abs(target)) def _single_target_ref_wrapper(preds, target, sk_fn, metric_args): + """Reference implementation of single-target metrics.""" sk_preds = preds.view(-1).numpy() sk_target = target.view(-1).numpy() - res = sk_fn(sk_target, sk_preds) - - return math.sqrt(res) if (metric_args and "squared" in metric_args and not metric_args["squared"]) else res + if metric_args and "normalization" in metric_args: + res = sk_fn(sk_target, sk_preds, normalization=metric_args["normalization"]) + else: + res = sk_fn(sk_target, sk_preds) + if metric_args and "squared" in metric_args and not metric_args["squared"]: + res = math.sqrt(res) + return res def _multi_target_ref_wrapper(preds, target, sk_fn, metric_args): + """Reference implementation of multi-target metrics.""" sk_preds = preds.view(-1, NUM_TARGETS).numpy() sk_target = target.view(-1, NUM_TARGETS).numpy() sk_kwargs = {"multioutput": "raw_values"} if metric_args and "num_outputs" in metric_args else {} - res = sk_fn(sk_target, sk_preds, **sk_kwargs) - return math.sqrt(res) if (metric_args and "squared" in metric_args and not metric_args["squared"]) else res + if metric_args and "normalization" in metric_args: + res = sk_fn(sk_target, sk_preds, **metric_args) + else: + res = sk_fn(sk_target, sk_preds, **sk_kwargs) + if metric_args and "squared" in metric_args and not metric_args["squared"]: + res = math.sqrt(res) + return res @pytest.mark.parametrize( - "preds, target, ref_metric", + ("preds", "target", "ref_metric"), [ (_single_target_inputs.preds, _single_target_inputs.target, _single_target_ref_wrapper), (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_ref_wrapper), ], ) @pytest.mark.parametrize( - "metric_class, metric_functional, sk_fn, metric_args", + ("metric_class", "metric_functional", "sk_fn", "metric_args"), [ - (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": True}), - (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": False}), - (MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": True, "num_outputs": NUM_TARGETS}), - (MeanAbsoluteError, mean_absolute_error, sk_mean_absolute_error, {}), - (MeanAbsoluteError, mean_absolute_error, sk_mean_absolute_error, {"num_outputs": NUM_TARGETS}), - (MeanAbsolutePercentageError, mean_absolute_percentage_error, sk_mean_abs_percentage_error, {}), - ( + pytest.param( + MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": True}, id="mse_singleoutput" + ), + pytest.param( + MeanSquaredError, mean_squared_error, sk_mean_squared_error, {"squared": False}, id="rmse_singleoutput" + ), + pytest.param( + MeanSquaredError, + mean_squared_error, + sk_mean_squared_error, + {"squared": True, "num_outputs": NUM_TARGETS}, + id="mse_multioutput", + ), + pytest.param(MeanAbsoluteError, mean_absolute_error, sk_mean_absolute_error, {}, id="mae_singleoutput"), + pytest.param( + MeanAbsoluteError, + mean_absolute_error, + sk_mean_absolute_error, + {"num_outputs": NUM_TARGETS}, + id="mae_multioutput", + ), + pytest.param( + MeanAbsolutePercentageError, + mean_absolute_percentage_error, + sk_mean_abs_percentage_error, + {}, + id="mape_singleoutput", + ), + pytest.param( SymmetricMeanAbsolutePercentageError, symmetric_mean_absolute_percentage_error, _reference_symmetric_mape, {}, + id="symmetric_mean_absolute_percentage_error", ), - (MeanSquaredLogError, mean_squared_log_error, sk_mean_squared_log_error, {}), - ( + pytest.param( + MeanSquaredLogError, mean_squared_log_error, sk_mean_squared_log_error, {}, id="mean_squared_log_error" + ), + pytest.param( WeightedMeanAbsolutePercentageError, weighted_mean_absolute_percentage_error, _reference_weighted_mean_abs_percentage_error, {}, + id="weighted_mean_absolute_percentage_error", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "mean", "num_outputs": 1}, + id="nrmse_singleoutput_mean", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "range", "num_outputs": 1}, + id="nrmse_singleoutput_range", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "std", "num_outputs": 1}, + id="nrmse_singleoutput_std", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "l2", "num_outputs": 1}, + id="nrmse_multioutput_l2", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "mean", "num_outputs": NUM_TARGETS}, + id="nrmse_multioutput_mean", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "range", "num_outputs": NUM_TARGETS}, + id="nrmse_multioutput_range", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "std", "num_outputs": NUM_TARGETS}, + id="nrmse_multioutput_std", + ), + pytest.param( + NormalizedRootMeanSquaredError, + normalized_root_mean_squared_error, + _reference_normalized_root_mean_squared_error, + {"normalization": "l2", "num_outputs": NUM_TARGETS}, + id="nrmse_multioutput_l2", ), ], ) class TestMeanError(MetricTester): """Test class for `MeanError` metric.""" + atol = 1e-5 + @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False]) def test_mean_error_class( self, preds, target, ref_metric, metric_class, metric_functional, sk_fn, metric_args, ddp ): """Test class implementation of metric.""" + if metric_args and "num_outputs" in metric_args and preds.ndim < 3: + pytest.skip("Test only runs for multi-output setting") self.run_class_metric_test( ddp=ddp, preds=preds, @@ -187,6 +303,8 @@ def test_mean_error_functional( self, preds, target, ref_metric, metric_class, metric_functional, sk_fn, metric_args ): """Test functional implementation of metric.""" + if metric_args and "num_outputs" in metric_args and preds.ndim < 3: + pytest.skip("Test only runs for multi-output setting") self.run_functional_metric_test( preds=preds, target=target, @@ -199,6 +317,8 @@ def test_mean_error_differentiability( self, preds, target, ref_metric, metric_class, metric_functional, sk_fn, metric_args ): """Test the differentiability of the metric, according to its `is_differentiable` attribute.""" + if metric_args and "num_outputs" in metric_args and preds.ndim < 3: + pytest.skip("Test only runs for multi-output setting") self.run_differentiability_test( preds=preds, target=target, @@ -225,6 +345,10 @@ def test_mean_error_half_cpu(self, preds, target, ref_metric, metric_class, metr # WeightedMeanAbsolutePercentageError half + cpu does not work due to missing support in torch.clamp pytest.xfail("WeightedMeanAbsolutePercentageError metric does not support cpu + half precision") + if metric_class == NormalizedRootMeanSquaredError: + # NormalizedRootMeanSquaredError half + cpu does not work due to missing support in torch.sqrt + pytest.xfail("NormalizedRootMeanSquaredError metric does not support cpu + half precision") + self.run_precision_test_cpu(preds, target, metric_class, metric_functional) @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires cuda") @@ -234,10 +358,30 @@ def test_mean_error_half_gpu(self, preds, target, ref_metric, metric_class, metr @pytest.mark.parametrize( - "metric_class", [MeanSquaredError, MeanAbsoluteError, MeanSquaredLogError, MeanAbsolutePercentageError] + "metric_class", + [ + MeanSquaredError, + MeanAbsoluteError, + MeanSquaredLogError, + MeanAbsolutePercentageError, + NormalizedRootMeanSquaredError, + ], ) def test_error_on_different_shape(metric_class): """Test that error is raised on different shapes of input.""" metric = metric_class() with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape"): metric(torch.randn(100), torch.randn(50)) + + +@pytest.mark.parametrize( + ("metric_class", "arguments", "error_msg"), + [ + (MeanSquaredError, {"squared": "something"}, "Expected argument `squared` to be a boolean.*"), + (NormalizedRootMeanSquaredError, {"normalization": "something"}, "Argument `normalization` should be either.*"), + ], +) +def test_error_on_wrong_extra_args(metric_class, arguments, error_msg): + """Test that error is raised on wrong extra arguments.""" + with pytest.raises(ValueError, match=error_msg): + metric_class(**arguments) diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py index 5b85a01af5a..efb7077682e 100644 --- a/tests/unittests/utilities/test_plot.py +++ b/tests/unittests/utilities/test_plot.py @@ -130,6 +130,7 @@ MeanSquaredError, MeanSquaredLogError, MinkowskiDistance, + NormalizedRootMeanSquaredError, PearsonCorrCoef, R2Score, RelativeSquaredError, @@ -469,6 +470,7 @@ pytest.param(MeanAbsoluteError, _rand_input, _rand_input, id="mean absolute error"), pytest.param(MeanAbsolutePercentageError, _rand_input, _rand_input, id="mean absolute percentage error"), pytest.param(partial(MinkowskiDistance, p=3), _rand_input, _rand_input, id="minkowski distance"), + pytest.param(NormalizedRootMeanSquaredError, _rand_input, _rand_input, id="normalized root mean squared error"), pytest.param(PearsonCorrCoef, _rand_input, _rand_input, id="pearson corr coef"), pytest.param(R2Score, _rand_input, _rand_input, id="r2 score"), pytest.param(RelativeSquaredError, _rand_input, _rand_input, id="relative squared error"),