From 2cd6f6a169a87abf0ee801ceabdf8a9724fdfac2 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Mon, 9 Sep 2024 10:35:08 +0200 Subject: [PATCH 01/11] docs: fix link to WIP --- docs/source/links.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/links.rst b/docs/source/links.rst index 1fc3ab5755d..14597ae3f37 100644 --- a/docs/source/links.rst +++ b/docs/source/links.rst @@ -91,7 +91,7 @@ .. _CER: https://rechtsprechung-im-ostseeraum.archiv.uni-greifswald.de/word-error-rate-character-error-rate-how-to-evaluate-a-model .. _MER: https://www.isca-speech.org/archive/interspeech_2004/morris04_interspeech.html .. _WIL: https://www.isca-speech.org/archive/interspeech_2004/morris04_interspeech.html -.. _WIP: https://infoscience.epfl.ch/entities/publication/9983d013-8239-422e-a3f7-a1500d309474 +.. _WIP: https://www.isca-archive.org/interspeech_2004/morris04_interspeech.pdf .. _TV: https://en.wikipedia.org/wiki/Total_variation_denoising .. _InfoLM: https://arxiv.org/abs/2112.01589 .. _alpha divergence: https://static.renyi.hu/renyi_cikkek/1961_on_measures_of_entropy_and_information.pdf From 144f6d6290a1e78f49c49991d34eebed2e0906e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 10:03:09 +0200 Subject: [PATCH 02/11] build(deps): bump torch from 2.4.0 to 2.4.1 in /requirements (#2729) Bumps [torch](https://github.com/pytorch/pytorch) from 2.4.0 to 2.4.1. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md) - [Commits](https://github.com/pytorch/pytorch/compare/v2.4.0...v2.4.1) --- updated-dependencies: - dependency-name: torch dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/typing.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index 9b14e1e9761..2b8cf0b5082 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy ==1.11.2 -torch ==2.4.0 +torch ==2.4.1 types-PyYAML types-emoji From eecc55bc59395f66a4d50eb1e359ba38d59da30e Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Tue, 10 Sep 2024 20:15:57 +0200 Subject: [PATCH 03/11] Fix how `prefix`/`posfix` works in `MultitaskWrapper` (#2722) * implementation * tests * changelog * fix mypy --- CHANGELOG.md | 3 + src/torchmetrics/wrappers/multitask.py | 102 ++++++++++++++------- tests/unittests/wrappers/test_multitask.py | 28 ++++-- 3 files changed, 93 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bd51af0290..f2f3def0013 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721)) +- Fixed how `prefix`/`postfix` works in `MultitaskWrapper` ([#2722](https://github.com/Lightning-AI/torchmetrics/pull/2722)) + + ## [1.4.1] - 2024-08-02 ### Changed diff --git a/src/torchmetrics/wrappers/multitask.py b/src/torchmetrics/wrappers/multitask.py index 556a7183638..fa2f04db97d 100644 --- a/src/torchmetrics/wrappers/multitask.py +++ b/src/torchmetrics/wrappers/multitask.py @@ -38,12 +38,27 @@ class MultitaskWrapper(WrapperMetric): task_metrics: Dictionary associating each task to a Metric or a MetricCollection. The keys of the dictionary represent the names of the tasks, and the values represent the metrics to use for each task. + prefix: + A string to append in front of the metric keys. If not provided, will default to an empty string. + postfix: + A string to append after the keys of the output dict. If not provided, will default to an empty string. + + .. note:: + The use pre prefix and postfix allows for easily creating task wrappers for training, validation and test. + The arguments are only changing the output keys of the computed metrics and not the input keys. This means + that a ``MultitaskWrapper`` initialized as ``MultitaskWrapper({"task": Metric()}, prefix="train_")`` will + still expect the input to be a dictionary with the key "task", but the output will be a dictionary with the key + "train_task". Raises: TypeError: If argument `task_metrics` is not an dictionary TypeError: If not all values in the `task_metrics` dictionary is instances of `Metric` or `MetricCollection` + ValueError: + If `prefix` is not a string + ValueError: + If `postfix` is not a string Example (with a single metric per class): >>> import torch @@ -91,18 +106,59 @@ class MultitaskWrapper(WrapperMetric): {'Classification': {'BinaryAccuracy': tensor(0.3333), 'BinaryF1Score': tensor(0.)}, 'Regression': {'MeanSquaredError': tensor(0.8333), 'MeanAbsoluteError': tensor(0.6667)}} + Example (with a prefix and postfix): + >>> import torch + >>> from torchmetrics.wrappers import MultitaskWrapper + >>> from torchmetrics.regression import MeanSquaredError + >>> from torchmetrics.classification import BinaryAccuracy + >>> + >>> classification_target = torch.tensor([0, 1, 0]) + >>> regression_target = torch.tensor([2.5, 5.0, 4.0]) + >>> targets = {"Classification": classification_target, "Regression": regression_target} + >>> classification_preds = torch.tensor([0, 0, 1]) + >>> regression_preds = torch.tensor([3.0, 5.0, 2.5]) + >>> preds = {"Classification": classification_preds, "Regression": regression_preds} + >>> + >>> metrics = MultitaskWrapper({ + ... "Classification": BinaryAccuracy(), + ... "Regression": MeanSquaredError() + ... }, prefix="train_") + >>> metrics.update(preds, targets) + >>> metrics.compute() + {'train_Classification': tensor(0.3333), 'train_Regression': tensor(0.8333)} + """ - is_differentiable = False + is_differentiable: bool = False def __init__( self, task_metrics: Dict[str, Union[Metric, MetricCollection]], + prefix: Optional[str] = None, + postfix: Optional[str] = None, ) -> None: - self._check_task_metrics_type(task_metrics) super().__init__() + + if not isinstance(task_metrics, dict): + raise TypeError(f"Expected argument `task_metrics` to be a dict. Found task_metrics = {task_metrics}") + + for metric in task_metrics.values(): + if not (isinstance(metric, (Metric, MetricCollection))): + raise TypeError( + "Expected each task's metric to be a Metric or a MetricCollection. " + f"Found a metric of type {type(metric)}" + ) + self.task_metrics = nn.ModuleDict(task_metrics) + if prefix is not None and not isinstance(prefix, str): + raise ValueError(f"Expected argument `prefix` to either be `None` or a string but got {prefix}") + self._prefix = prefix or "" + + if postfix is not None and not isinstance(postfix, str): + raise ValueError(f"Expected argument `postfix` to either be `None` or a string but got {postfix}") + self._postfix = postfix or "" + def items(self, flatten: bool = True) -> Iterable[Tuple[str, nn.Module]]: """Iterate over task and task metrics. @@ -114,9 +170,9 @@ def items(self, flatten: bool = True) -> Iterable[Tuple[str, nn.Module]]: for task_name, metric in self.task_metrics.items(): if flatten and isinstance(metric, MetricCollection): for sub_metric_name, sub_metric in metric.items(): - yield f"{task_name}_{sub_metric_name}", sub_metric + yield f"{self._prefix}{task_name}_{sub_metric_name}{self._postfix}", sub_metric else: - yield task_name, metric + yield f"{self._prefix}{task_name}{self._postfix}", metric def keys(self, flatten: bool = True) -> Iterable[str]: """Iterate over task names. @@ -129,9 +185,9 @@ def keys(self, flatten: bool = True) -> Iterable[str]: for task_name, metric in self.task_metrics.items(): if flatten and isinstance(metric, MetricCollection): for sub_metric_name in metric: - yield f"{task_name}_{sub_metric_name}" + yield f"{self._prefix}{task_name}_{sub_metric_name}{self._postfix}" else: - yield task_name + yield f"{self._prefix}{task_name}{self._postfix}" def values(self, flatten: bool = True) -> Iterable[nn.Module]: """Iterate over task metrics. @@ -147,18 +203,6 @@ def values(self, flatten: bool = True) -> Iterable[nn.Module]: else: yield metric - @staticmethod - def _check_task_metrics_type(task_metrics: Dict[str, Union[Metric, MetricCollection]]) -> None: - if not isinstance(task_metrics, dict): - raise TypeError(f"Expected argument `task_metrics` to be a dict. Found task_metrics = {task_metrics}") - - for metric in task_metrics.values(): - if not (isinstance(metric, (Metric, MetricCollection))): - raise TypeError( - "Expected each task's metric to be a Metric or a MetricCollection. " - f"Found a metric of type {type(metric)}" - ) - def update(self, task_preds: Dict[str, Any], task_targets: Dict[str, Any]) -> None: """Update each task's metric with its corresponding pred and target. @@ -179,9 +223,13 @@ def update(self, task_preds: Dict[str, Any], task_targets: Dict[str, Any]) -> No target = task_targets[task_name] metric.update(pred, target) + def _convert_output(self, output: Dict[str, Any]) -> Dict[str, Any]: + """Convert the output of the underlying metrics to a dictionary with the task names as keys.""" + return {f"{self._prefix}{task_name}{self._postfix}": task_output for task_name, task_output in output.items()} + def compute(self) -> Dict[str, Any]: """Compute metrics for all tasks.""" - return {task_name: metric.compute() for task_name, metric in self.task_metrics.items()} + return self._convert_output({task_name: metric.compute() for task_name, metric in self.task_metrics.items()}) def forward(self, task_preds: Dict[str, Tensor], task_targets: Dict[str, Tensor]) -> Dict[str, Any]: """Call underlying forward methods for all tasks and return the result as a dictionary.""" @@ -189,10 +237,10 @@ def forward(self, task_preds: Dict[str, Tensor], task_targets: Dict[str, Tensor] # value of full_state_update, and that also accumulates the results. Here, all computations are handled by the # underlying metrics, which all have their own value of full_state_update, and which all accumulate the results # by themselves. - return { + return self._convert_output({ task_name: metric(task_preds[task_name], task_targets[task_name]) for task_name, metric in self.task_metrics.items() - } + }) def reset(self) -> None: """Reset all underlying metrics.""" @@ -215,16 +263,8 @@ def clone(self, prefix: Optional[str] = None, postfix: Optional[str] = None) -> """ multitask_copy = deepcopy(self) - if prefix is not None: - prefix = self._check_arg(prefix, "prefix") - multitask_copy.task_metrics = nn.ModuleDict({ - prefix + key: value for key, value in multitask_copy.task_metrics.items() - }) - if postfix is not None: - postfix = self._check_arg(postfix, "postfix") - multitask_copy.task_metrics = nn.ModuleDict({ - key + postfix: value for key, value in multitask_copy.task_metrics.items() - }) + multitask_copy._prefix = self._check_arg(prefix, "prefix") or "" + multitask_copy._postfix = self._check_arg(postfix, "prefix") or "" return multitask_copy def plot( diff --git a/tests/unittests/wrappers/test_multitask.py b/tests/unittests/wrappers/test_multitask.py index 63af6f31b35..069a4472d64 100644 --- a/tests/unittests/wrappers/test_multitask.py +++ b/tests/unittests/wrappers/test_multitask.py @@ -248,14 +248,24 @@ def test_key_value_items_method(method, flatten): def test_clone_with_prefix_and_postfix(): """Check that the clone method works with prefix and postfix arguments.""" - multitask_metrics = MultitaskWrapper({"Classification": BinaryAccuracy(), "Regression": MeanSquaredError()}) - cloned_metrics_with_prefix = multitask_metrics.clone(prefix="prefix_") - cloned_metrics_with_postfix = multitask_metrics.clone(postfix="_postfix") + multitask_metrics = MultitaskWrapper( + {"Classification": BinaryAccuracy(), "Regression": MeanSquaredError()}, + prefix="prefix_", + postfix="_postfix", + ) + assert set(multitask_metrics.keys()) == {"prefix_Classification_postfix", "prefix_Regression_postfix"} - # Check if the cloned metrics have the expected keys - assert set(cloned_metrics_with_prefix.task_metrics.keys()) == {"prefix_Classification", "prefix_Regression"} - assert set(cloned_metrics_with_postfix.task_metrics.keys()) == {"Classification_postfix", "Regression_postfix"} + output = multitask_metrics( + {"Classification": _classification_preds, "Regression": _regression_preds}, + {"Classification": _classification_target, "Regression": _regression_target}, + ) + assert set(output.keys()) == {"prefix_Classification_postfix", "prefix_Regression_postfix"} - # Check if the cloned metrics have the expected values - assert isinstance(cloned_metrics_with_prefix.task_metrics["prefix_Classification"], BinaryAccuracy) - assert isinstance(cloned_metrics_with_prefix.task_metrics["prefix_Regression"], MeanSquaredError) + cloned_metrics = multitask_metrics.clone(prefix="new_prefix_", postfix="_new_postfix") + assert set(cloned_metrics.keys()) == {"new_prefix_Classification_new_postfix", "new_prefix_Regression_new_postfix"} + + output = cloned_metrics( + {"Classification": _classification_preds, "Regression": _regression_preds}, + {"Classification": _classification_target, "Regression": _regression_target}, + ) + assert set(output.keys()) == {"new_prefix_Classification_new_postfix", "new_prefix_Regression_new_postfix"} From 80929b53db36cc045da4b8b017db82e46a5159bb Mon Sep 17 00:00:00 2001 From: Rittik Panda <99414608+rittik9@users.noreply.github.com> Date: Tue, 10 Sep 2024 23:49:16 +0530 Subject: [PATCH 04/11] Fix: handle zero division error in binary IoU calculation (#2726) * Fix: Handle zero division error in binary IoU (Jaccard index) calculation * chlog --------- Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- CHANGELOG.md | 3 +++ .../functional/classification/jaccard.py | 2 +- .../unittests/classification/test_jaccard.py | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2f3def0013..4bac68f736f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed handling zero division error in binary IoU (Jaccard index) calculation ([#2726](https://github.com/Lightning-AI/torchmetrics/pull/2726)) + + - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721)) diff --git a/src/torchmetrics/functional/classification/jaccard.py b/src/torchmetrics/functional/classification/jaccard.py index 1d240df68af..dfddd68255f 100644 --- a/src/torchmetrics/functional/classification/jaccard.py +++ b/src/torchmetrics/functional/classification/jaccard.py @@ -67,7 +67,7 @@ def _jaccard_index_reduce( raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.") confmat = confmat.float() if average == "binary": - return confmat[1, 1] / (confmat[0, 1] + confmat[1, 0] + confmat[1, 1]) + return _safe_divide(confmat[1, 1], (confmat[0, 1] + confmat[1, 0] + confmat[1, 1]), zero_division=zero_division) ignore_index_cond = ignore_index is not None and 0 <= ignore_index < confmat.shape[0] multilabel = confmat.ndim == 3 diff --git a/tests/unittests/classification/test_jaccard.py b/tests/unittests/classification/test_jaccard.py index 6901868eac9..e7afdb557a6 100644 --- a/tests/unittests/classification/test_jaccard.py +++ b/tests/unittests/classification/test_jaccard.py @@ -26,6 +26,7 @@ MultilabelJaccardIndex, ) from torchmetrics.functional.classification.jaccard import ( + _jaccard_index_reduce, binary_jaccard_index, multiclass_jaccard_index, multilabel_jaccard_index, @@ -403,6 +404,26 @@ def test_corner_case(): assert torch.allclose(res, out) +def test_jaccard_index_zero_division(): + """Issue: https://github.com/Lightning-AI/torchmetrics/issues/2658.""" + # Test case where all pixels are background (zeros) + confmat = torch.tensor([[4, 0], [0, 0]]) + + # Test with zero_division=0.0 + result = _jaccard_index_reduce(confmat, average="binary", zero_division=0.0) + assert result == 0.0, f"Expected 0.0, but got {result}" + + # Test with zero_division=1.0 + result = _jaccard_index_reduce(confmat, average="binary", zero_division=1.0) + assert result == 1.0, f"Expected 1.0, but got {result}" + + # Test case with some foreground pixels + confmat = torch.tensor([[2, 1], [1, 1]]) + result = _jaccard_index_reduce(confmat, average="binary", zero_division=0.0) + expected = 1 / 3 + assert torch.isclose(result, torch.tensor(expected)), f"Expected {expected}, but got {result}" + + @pytest.mark.parametrize( ("metric", "kwargs"), [ From f12e7af65ef14baec63c199af9a7e69a403b3c04 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 11 Sep 2024 14:30:49 +0200 Subject: [PATCH 05/11] fix: compatibility audio do with new `scipy` (#2733) * compatibility audio do with new `scipy` * smaller array to fix torch.unique case --------- Co-authored-by: Nicki Skafte Detlefsen --- CHANGELOG.md | 3 +++ src/torchmetrics/__init__.py | 7 +++++++ src/torchmetrics/functional/nominal/__init__.py | 1 + src/torchmetrics/nominal/__init__.py | 1 + src/torchmetrics/utilities/imports.py | 1 + tests/unittests/classification/test_stat_scores.py | 4 ++-- 6 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bac68f736f..0fc6c936492 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Correct the padding related calculation errors in SSIM ([#2721](https://github.com/Lightning-AI/torchmetrics/pull/2721)) +- Fixed compatibility of audio domain with new `scipy` ([#2733](https://github.com/Lightning-AI/torchmetrics/pull/2733)) + + - Fixed how `prefix`/`postfix` works in `MultitaskWrapper` ([#2722](https://github.com/Lightning-AI/torchmetrics/pull/2722)) diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index b1549dfaf8b..2fa370cb1c9 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -20,6 +20,13 @@ if not hasattr(PIL, "PILLOW_VERSION"): PIL.PILLOW_VERSION = PIL.__version__ +if package_available("scipy"): + import scipy.signal + + # back compatibility patch due to SMRMpy using scipy.signal.hamming + if not hasattr(scipy.signal, "hamming"): + scipy.signal.hamming = scipy.signal.windows.hamming + from torchmetrics import functional # noqa: E402 from torchmetrics.aggregation import ( # noqa: E402 CatMetric, diff --git a/src/torchmetrics/functional/nominal/__init__.py b/src/torchmetrics/functional/nominal/__init__.py index f29dd9302f0..772cb395895 100644 --- a/src/torchmetrics/functional/nominal/__init__.py +++ b/src/torchmetrics/functional/nominal/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix from torchmetrics.functional.nominal.fleiss_kappa import fleiss_kappa from torchmetrics.functional.nominal.pearson import ( diff --git a/src/torchmetrics/nominal/__init__.py b/src/torchmetrics/nominal/__init__.py index f23a7eb8c6b..e36da870308 100644 --- a/src/torchmetrics/nominal/__init__.py +++ b/src/torchmetrics/nominal/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from torchmetrics.nominal.cramers import CramersV from torchmetrics.nominal.fleiss_kappa import FleissKappa from torchmetrics.nominal.pearson import PearsonsContingencyCoefficient diff --git a/src/torchmetrics/utilities/imports.py b/src/torchmetrics/utilities/imports.py index b40a334558f..10affebf579 100644 --- a/src/torchmetrics/utilities/imports.py +++ b/src/torchmetrics/utilities/imports.py @@ -64,6 +64,7 @@ _MECAB_KO_DIC_AVAILABLE = RequirementCache("mecab_ko_dic") _IPADIC_AVAILABLE = RequirementCache("ipadic") _SENTENCEPIECE_AVAILABLE = RequirementCache("sentencepiece") +_SCIPI_AVAILABLE = RequirementCache("scipy") _SKLEARN_GREATER_EQUAL_1_3 = RequirementCache("scikit-learn>=1.3.0") _LATEX_AVAILABLE: bool = shutil.which("latex") is not None diff --git a/tests/unittests/classification/test_stat_scores.py b/tests/unittests/classification/test_stat_scores.py index 53fa78d0368..5ea4c206bc0 100644 --- a/tests/unittests/classification/test_stat_scores.py +++ b/tests/unittests/classification/test_stat_scores.py @@ -582,8 +582,8 @@ def test_support_for_int(): """See issue: https://github.com/Lightning-AI/torchmetrics/issues/1970.""" seed_all(42) metric = MulticlassStatScores(num_classes=4, average="none", multidim_average="samplewise", ignore_index=0) - prediction = torch.randint(low=0, high=4, size=(1, 224, 224)).to(torch.uint8) - label = torch.randint(low=0, high=4, size=(1, 224, 224)).to(torch.uint8) + prediction = torch.randint(low=0, high=4, size=(1, 50, 50)).to(torch.uint8) + label = torch.randint(low=0, high=4, size=(1, 50, 50)).to(torch.uint8) score = metric(preds=prediction, target=label) assert score.shape == (1, 4, 5) From 708f11d0004cc7a6ea493980a733e6066a9bd6b2 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 11 Sep 2024 22:03:31 +0200 Subject: [PATCH 06/11] bump some testing requirements (#2736) * bump some testing requirements + matplotlib >=3.6.0 + dython ~=0.7.6 --- requirements/classification_test.txt | 2 +- requirements/nominal_test.txt | 4 ++-- requirements/visual.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/classification_test.txt b/requirements/classification_test.txt index 688067c482a..8cadb2b3e3e 100644 --- a/requirements/classification_test.txt +++ b/requirements/classification_test.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -pandas >=1.4.0, <=2.2.2 +pandas >1.4.0, <=2.2.2 netcal >1.0.0, <1.4.0 # calibration_error numpy <2.2.0 fairlearn # group_fairness diff --git a/requirements/nominal_test.txt b/requirements/nominal_test.txt index 09f77b20475..7ee5809af6f 100644 --- a/requirements/nominal_test.txt +++ b/requirements/nominal_test.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -pandas >1.0.0, <=2.2.2 # cannot pin version due to numpy version incompatibility -dython <=0.7.7 +pandas >1.4.0, <=2.2.2 # cannot pin version due to numpy version incompatibility +dython ~=0.7.6 scipy >1.0.0, <1.15.0 # cannot pin version due to some version conflicts with `oldest` CI configuration statsmodels >0.13.5, <0.15.0 diff --git a/requirements/visual.txt b/requirements/visual.txt index 269a45fc7bb..1cdc4060a8b 100644 --- a/requirements/visual.txt +++ b/requirements/visual.txt @@ -1,5 +1,5 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -matplotlib >=3.3.0, <3.10.0 +matplotlib >=3.6.0, <3.10.0 SciencePlots >= 2.0.0, <2.2.0 From 4ce2278166d453e33ae197f69e1a3bcb5409b7c2 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Thu, 12 Sep 2024 00:07:57 +0200 Subject: [PATCH 07/11] Deprecate `num_outputs` in R2 because it is no longer needed (#2705) * trying to see what happens * fix doctests * add deprecation test * Apply suggestions from code review --------- Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- CHANGELOG.md | 5 ++++ src/torchmetrics/regression/r2.py | 47 ++++++++++++++++++++---------- tests/unittests/test_deprecated.py | 8 ++++- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fc6c936492..d3eafaf8be5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - update `InfoLM` class to dynamically set `higher_is_better` ([#2674](https://github.com/Lightning-AI/torchmetrics/pull/2674)) +### Deprecated + +- Deprecated `num_outputs` in `R2Score` ([#2705](https://github.com/Lightning-AI/torchmetrics/pull/2705)) + + ### Removed - diff --git a/src/torchmetrics/regression/r2.py b/src/torchmetrics/regression/r2.py index 611d62a745c..be6b2af33d8 100644 --- a/src/torchmetrics/regression/r2.py +++ b/src/torchmetrics/regression/r2.py @@ -13,11 +13,11 @@ # limitations under the License. from typing import Any, Optional, Sequence, Union -import torch from torch import Tensor, tensor from torchmetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update from torchmetrics.metric import Metric +from torchmetrics.utilities import rank_zero_warn from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE @@ -65,23 +65,32 @@ class R2Score(Metric): * ``'variance_weighted'`` scores are weighted by their individual variances kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + .. warning:: + Argument ``num_outputs`` in ``R2Score`` has been deprecated because it is no longer necessary and will be + removed in v1.6.0 of TorchMetrics. The number of outputs is now automatically inferred from the shape + of the input tensors. + Raises: ValueError: If ``adjusted`` parameter is not an integer larger or equal to 0. ValueError: If ``multioutput`` is not one of ``"raw_values"``, ``"uniform_average"`` or ``"variance_weighted"``. - Example: + Example (single output): + >>> from torch import tensor >>> from torchmetrics.regression import R2Score - >>> target = torch.tensor([3, -0.5, 2, 7]) - >>> preds = torch.tensor([2.5, 0.0, 2, 8]) + >>> target = tensor([3, -0.5, 2, 7]) + >>> preds = tensor([2.5, 0.0, 2, 8]) >>> r2score = R2Score() >>> r2score(preds, target) tensor(0.9486) - >>> target = torch.tensor([[0.5, 1], [-1, 1], [7, -6]]) - >>> preds = torch.tensor([[0, 2], [-1, 2], [8, -5]]) - >>> r2score = R2Score(num_outputs=2, multioutput='raw_values') + Example (multioutput): + >>> from torch import tensor + >>> from torchmetrics.regression import R2Score + >>> target = tensor([[0.5, 1], [-1, 1], [7, -6]]) + >>> preds = tensor([[0, 2], [-1, 2], [8, -5]]) + >>> r2score = R2Score(multioutput='raw_values') >>> r2score(preds, target) tensor([0.9654, 0.9082]) @@ -100,14 +109,20 @@ class R2Score(Metric): def __init__( self, - num_outputs: int = 1, + num_outputs: Optional[int] = None, adjusted: int = 0, multioutput: str = "uniform_average", **kwargs: Any, ) -> None: super().__init__(**kwargs) - self.num_outputs = num_outputs + if num_outputs is not None: + rank_zero_warn( + "Argument `num_outputs` in `R2Score` has been deprecated because it is no longer necessary and will be" + "removed in v1.6.0 of TorchMetrics. The number of outputs is now automatically inferred from the shape" + "of the input tensors.", + DeprecationWarning, + ) if adjusted < 0 or not isinstance(adjusted, int): raise ValueError("`adjusted` parameter should be an integer larger or equal to 0.") @@ -120,19 +135,19 @@ def __init__( ) self.multioutput = multioutput - self.add_state("sum_squared_error", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum") - self.add_state("sum_error", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum") - self.add_state("residual", default=torch.zeros(self.num_outputs), dist_reduce_fx="sum") + self.add_state("sum_squared_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("sum_error", default=tensor(0.0), dist_reduce_fx="sum") + self.add_state("residual", default=tensor(0.0), dist_reduce_fx="sum") self.add_state("total", default=tensor(0), dist_reduce_fx="sum") def update(self, preds: Tensor, target: Tensor) -> None: """Update state with predictions and targets.""" sum_squared_error, sum_error, residual, total = _r2_score_update(preds, target) - self.sum_squared_error += sum_squared_error - self.sum_error += sum_error - self.residual += residual - self.total += total + self.sum_squared_error = self.sum_squared_error + sum_squared_error + self.sum_error = self.sum_error + sum_error + self.residual = self.residual + residual + self.total = self.total + total def compute(self) -> Tensor: """Compute r2 score over the metric states.""" diff --git a/tests/unittests/test_deprecated.py b/tests/unittests/test_deprecated.py index f126fa06561..49c153c4a52 100644 --- a/tests/unittests/test_deprecated.py +++ b/tests/unittests/test_deprecated.py @@ -1,7 +1,7 @@ import pytest import torch from torchmetrics.functional.regression import kl_divergence -from torchmetrics.regression import KLDivergence +from torchmetrics.regression import KLDivergence, R2Score def test_deprecated_kl_divergence_input_order(): @@ -14,3 +14,9 @@ def test_deprecated_kl_divergence_input_order(): with pytest.deprecated_call(match="The input order and naming in metric `KLDivergence` is set to be deprecated.*"): KLDivergence() + + +def test_deprecated_r2_score_num_outputs(): + """Ensure that the deprecated num_outputs argument in R2Score raises a warning.""" + with pytest.deprecated_call(match="Argument `num_outputs` in `R2Score` has been deprecated"): + R2Score(num_outputs=2) From cb1ab3798000cdfaaaf7b55905895c2445cdc0eb Mon Sep 17 00:00:00 2001 From: Vitaliy Kinakh Date: Thu, 12 Sep 2024 00:12:55 +0200 Subject: [PATCH 08/11] Fix `segmentation.MeanIoU` (#2698) - use sum reduce function for score - add state `num_batches` to keep number of processed batches - add increment of `num_batches` in every `update` call - in `compute` return sum of scores divided by number of processed batches --------- Co-authored-by: Nicki Skafte --- CHANGELOG.md | 3 +++ src/torchmetrics/metric.py | 12 +++++------ src/torchmetrics/segmentation/mean_iou.py | 8 +++++--- tests/unittests/_helpers/testers.py | 25 +++++++++++++++++++++-- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3eafaf8be5..ef9ed3c896f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed wrong aggregation in `segmentation.MeanIoU` ([#2698](https://github.com/Lightning-AI/torchmetrics/pull/2698)) + + - Fixed handling zero division error in binary IoU (Jaccard index) calculation ([#2726](https://github.com/Lightning-AI/torchmetrics/pull/2726)) diff --git a/src/torchmetrics/metric.py b/src/torchmetrics/metric.py index ef9055ed560..940e393c6d1 100644 --- a/src/torchmetrics/metric.py +++ b/src/torchmetrics/metric.py @@ -284,7 +284,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any: """Aggregate and evaluate batch input directly. Serves the dual purpose of both computing the metric on the current batch of inputs but also add the batch - statistics to the overall accumululating metric state. Input arguments are the exact same as corresponding + statistics to the overall accumulating metric state. Input arguments are the exact same as corresponding ``update`` method. The returned output is the exact same as the output of ``compute``. Args: @@ -361,7 +361,7 @@ def _forward_full_state_update(self, *args: Any, **kwargs: Any) -> Any: def _forward_reduce_state_update(self, *args: Any, **kwargs: Any) -> Any: """Forward computation using single call to `update`. - This can be done when the global metric state is a sinple reduction of batch states. This can be unsafe for + This can be done when the global metric state is a simple reduction of batch states. This can be unsafe for certain metric cases but is also the fastest way to both accumulate globally and compute locally. """ @@ -802,7 +802,7 @@ def _apply(self, fn: Callable, exclude_state: Sequence[str] = "") -> Module: """Overwrite `_apply` function such that we can also move metric states to the correct device. This method is called by the base ``nn.Module`` class whenever `.to`, `.cuda`, `.float`, `.half` etc. methods - are called. Dtype conversion is garded and will only happen through the special `set_dtype` method. + are called. Dtype conversion is guarded and will only happen through the special `set_dtype` method. Args: fn: the function to apply @@ -1166,7 +1166,7 @@ def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Opt """ def update(self, *args: Any, **kwargs: Any) -> None: - """Redirect the call to the input which the conposition was formed from.""" + """Redirect the call to the input which the composition was formed from.""" if isinstance(self.metric_a, Metric): self.metric_a.update(*args, **self.metric_a._filter_kwargs(**kwargs)) @@ -1174,7 +1174,7 @@ def update(self, *args: Any, **kwargs: Any) -> None: self.metric_b.update(*args, **self.metric_b._filter_kwargs(**kwargs)) def compute(self) -> Any: - """Redirect the call to the input which the conposition was formed from.""" + """Redirect the call to the input which the composition was formed from.""" # also some parsing for kwargs? val_a = self.metric_a.compute() if isinstance(self.metric_a, Metric) else self.metric_a val_b = self.metric_b.compute() if isinstance(self.metric_b, Metric) else self.metric_b @@ -1216,7 +1216,7 @@ def forward(self, *args: Any, **kwargs: Any) -> Any: return self._forward_cache def reset(self) -> None: - """Redirect the call to the input which the conposition was formed from.""" + """Redirect the call to the input which the composition was formed from.""" if isinstance(self.metric_a, Metric): self.metric_a.reset() diff --git a/src/torchmetrics/segmentation/mean_iou.py b/src/torchmetrics/segmentation/mean_iou.py index c298254585a..0fe831f5231 100644 --- a/src/torchmetrics/segmentation/mean_iou.py +++ b/src/torchmetrics/segmentation/mean_iou.py @@ -110,7 +110,8 @@ def __init__( self.input_format = input_format num_classes = num_classes - 1 if not include_background else num_classes - self.add_state("score", default=torch.zeros(num_classes if per_class else 1), dist_reduce_fx="mean") + self.add_state("score", default=torch.zeros(num_classes if per_class else 1), dist_reduce_fx="sum") + self.add_state("num_batches", default=torch.tensor(0), dist_reduce_fx="sum") def update(self, preds: Tensor, target: Tensor) -> None: """Update the state with the new data.""" @@ -119,10 +120,11 @@ def update(self, preds: Tensor, target: Tensor) -> None: ) score = _mean_iou_compute(intersection, union, per_class=self.per_class) self.score += score.mean(0) if self.per_class else score.mean() + self.num_batches += 1 def compute(self) -> Tensor: - """Update the state with the new data.""" - return self.score # / self.num_batches + """Compute the final Mean Intersection over Union (mIoU).""" + return self.score / self.num_batches def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE: """Plot a single or multiple values from the metric. diff --git a/tests/unittests/_helpers/testers.py b/tests/unittests/_helpers/testers.py index deb4c12324e..c5a69077f3c 100644 --- a/tests/unittests/_helpers/testers.py +++ b/tests/unittests/_helpers/testers.py @@ -32,7 +32,12 @@ def _assert_allclose(tm_result: Any, ref_result: Any, atol: float = 1e-8, key: O """Recursively assert that two results are within a certain tolerance.""" # single output compare if isinstance(tm_result, Tensor): - assert np.allclose(tm_result.detach().cpu().numpy(), ref_result, atol=atol, equal_nan=True) + assert np.allclose( + tm_result.detach().cpu().numpy() if isinstance(tm_result, Tensor) else tm_result, + ref_result.detach().cpu().numpy() if isinstance(ref_result, Tensor) else ref_result, + atol=atol, + equal_nan=True, + ) # multi output compare elif isinstance(tm_result, Sequence): for pl_res, ref_res in zip(tm_result, ref_result): @@ -40,7 +45,12 @@ def _assert_allclose(tm_result: Any, ref_result: Any, atol: float = 1e-8, key: O elif isinstance(tm_result, Dict): if key is None: raise KeyError("Provide Key for Dict based metric results.") - assert np.allclose(tm_result[key].detach().cpu().numpy(), ref_result, atol=atol, equal_nan=True) + assert np.allclose( + tm_result[key].detach().cpu().numpy() if isinstance(tm_result[key], Tensor) else tm_result[key], + ref_result.detach().cpu().numpy() if isinstance(ref_result, Tensor) else ref_result, + atol=atol, + equal_nan=True, + ) else: raise ValueError("Unknown format for comparison") @@ -147,6 +157,7 @@ def _class_test( # verify metrics work after being loaded from pickled state pickled_metric = pickle.dumps(metric) metric = pickle.loads(pickled_metric) + metric_clone = deepcopy(metric) for i in range(rank, num_batches, world_size): batch_kwargs_update = {k: v[i] if isinstance(v, Tensor) else v for k, v in kwargs_update.items()} @@ -154,6 +165,16 @@ def _class_test( # compute batch stats and aggregate for global stats batch_result = metric(preds[i], target[i], **batch_kwargs_update) + if rank == 0 and world_size == 1 and i == 0: # check only in non-ddp mode and first batch + # dummy check to make sure that forward/update works as expected + metric_clone.update(preds[i], target[i], **batch_kwargs_update) + update_result = metric_clone.compute() + if isinstance(batch_result, dict): + for key in batch_result: + _assert_allclose(batch_result, update_result[key], key=key) + else: + _assert_allclose(batch_result, update_result) + if metric.dist_sync_on_step and check_dist_sync_on_step and rank == 0: if isinstance(preds, Tensor): ddp_preds = torch.cat([preds[i + r] for r in range(world_size)]).cpu() From fa351e8bcc177132964d71d8718ef911587fc674 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 13 Sep 2024 17:23:25 +0200 Subject: [PATCH 09/11] ci/doc: install with `-e` to resolve source links (#2740) --- .github/workflows/docs-build.yml | 16 ++++++---------- docs/source/conf.py | 8 ++++++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml index 5aeef255bfe..16eb7e72e4a 100644 --- a/.github/workflows/docs-build.yml +++ b/.github/workflows/docs-build.yml @@ -48,18 +48,11 @@ jobs: pytorch-version: ${{ matrix.pytorch-version }} pypi-dir: ${{ env.PYPI_CACHE }} - - name: Install Latex - if: ${{ matrix.target == 'html' }} - # install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux - run: | - sudo apt-get update --fix-missing - sudo apt-get install -y \ - texlive-latex-extra texlive-pictures texlive-fonts-recommended dvipng cm-super - - name: Install package & dependencies run: | make get-sphinx-template - pip install . -U -r requirements/_docs.txt \ + # install with -e so the path to source link comes from this project not from the installed package + pip install -e . -U -r requirements/_docs.txt \ --find-links="${PYPI_CACHE}" --find-links="${TORCH_URL}" - run: pip list - name: Full build for deployment @@ -70,7 +63,10 @@ jobs: run: echo "SPHINX_ENABLE_GALLERY=0" >> $GITHUB_ENV - name: make ${{ matrix.target }} working-directory: ./docs - run: make ${{ matrix.target }} --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going" + run: | + pwd + ls -la + make ${{ matrix.target }} --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going" - name: Upload built docs if: ${{ matrix.target == 'html' && github.event_name != 'pull_request' }} diff --git a/docs/source/conf.py b/docs/source/conf.py index 9484761ba7a..d7a68156a9e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -269,6 +269,11 @@ def _set_root_image_path(page_path: str) -> None: ), ] +# MathJax configuration +mathjax3_config = { + "tex": {"packages": {"[+]": ["ams", "newcommand", "configMacros"]}}, +} + # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. @@ -358,8 +363,7 @@ def package_list_from_file(file: str) -> list[str]: autodoc_mock_imports = MOCK_PACKAGES -# Resolve function -# This function is used to populate the (source) links in the API +# Resolve function - this function is used to populate the (source) links in the API def linkcode_resolve(domain, info) -> Optional[str]: # noqa: ANN001 return _linkcode_resolve(domain, info=info, github_user="Lightning-AI", github_repo="torchmetrics") From ead5cbb64bd7622b1750fe559ad65cc6966ce10c Mon Sep 17 00:00:00 2001 From: Jirka B Date: Fri, 13 Sep 2024 19:58:57 +0200 Subject: [PATCH 10/11] test: freeze `faster-coco-eval==1.5.*` (cherry picked from commit c4b32aad3af797a922239d3b1fbbee0becf8d48c) --- requirements/detection_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/detection_test.txt b/requirements/detection_test.txt index 6515620c715..edfc1b97fff 100644 --- a/requirements/detection_test.txt +++ b/requirements/detection_test.txt @@ -1,4 +1,4 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -faster-coco-eval >=1.3.3 +faster-coco-eval ==1.5.* From 9baf6e6c68ba26e173d4945dcc4a712ae5465738 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 13 Sep 2024 20:15:20 +0200 Subject: [PATCH 11/11] Apply suggestions from code review --- src/torchmetrics/classification/dice.py | 6 +++--- src/torchmetrics/functional/classification/dice.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/torchmetrics/classification/dice.py b/src/torchmetrics/classification/dice.py index eb6b228778a..cbc8a84987e 100644 --- a/src/torchmetrics/classification/dice.py +++ b/src/torchmetrics/classification/dice.py @@ -116,9 +116,9 @@ class Dice(Metric): kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. .. warning:: - The `dice` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be - removed in v1.7.0. Please instead consider using `f1score` metric from the classification subpackage as it - provides the same functionality. Additionally, we are going to re-add the `dice` metric in the segmentation + The ``dice`` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be + removed in v1.7.0. Please instead consider using ``f1score`` metric from the classification subpackage as it + provides the same functionality. Additionally, we are going to re-add the ``dice`` metric in the segmentation domain in v1.6.0 with slight modifications to functionality. Raises: diff --git a/src/torchmetrics/functional/classification/dice.py b/src/torchmetrics/functional/classification/dice.py index 3aa26212fa5..5c08a028572 100644 --- a/src/torchmetrics/functional/classification/dice.py +++ b/src/torchmetrics/functional/classification/dice.py @@ -152,9 +152,9 @@ def dice( than what they appear to be. .. warning:: - The `dice` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be - removed in v1.7.0. Please instead consider using `f1score` metric from the classification subpackage as it - provides the same functionality. Additionally, we are going to re-add the `dice` metric in the segmentation + The ``dice`` metrics is being deprecated from the classification subpackage in v1.6.0 of torchmetrics and will be + removed in v1.7.0. Please instead consider using ``f1score`` metric from the classification subpackage as it + provides the same functionality. Additionally, we are going to re-add the ``dice`` metric in the segmentation domain in v1.6.0 with slight modifications to functionality. Return: