Merge branch 'master' into master

Lightning-AI · Mar 5, 2024 · b17dbce · b17dbce
2 parents a3d4166 + 2c2316e
commit b17dbce
Show file tree

Hide file tree

Showing 39 changed files with 130 additions and 134 deletions.
diff --git a/.github/actions/pull-caches/action.yml b/.github/actions/pull-caches/action.yml
@@ -90,5 +90,5 @@ runs:
 
     - name: Restored References
       continue-on-error: true
-      run: ls -lh tests/_cache-references/
+      run: py-tree tests/_cache-references/ --show_hidden
       shell: bash
diff --git a/.github/actions/push-caches/action.yml b/.github/actions/push-caches/action.yml
@@ -99,5 +99,5 @@ runs:
         key: cache-references
 
     - name: Post References
-      run: ls -lh tests/_cache-references/
+      run: py-tree tests/_cache-references/ --show_hidden
       shell: bash
diff --git a/.github/workflows/ci-integrate.yml b/.github/workflows/ci-integrate.yml
@@ -34,6 +34,7 @@ jobs:
           - { python-version: "3.10", os: "windows" } # todo: https://discuss.pytorch.org/t/numpy-is-not-available-error/146192
         include:
           - { python-version: "3.10", requires: "latest", os: "ubuntu-22.04" }
+          - { python-version: "3.10", requires: "latest", os: "macOS-14" } # M1 machine
     env:
       PYTORCH_URL: "https://download.pytorch.org/whl/cpu/torch_stable.html"
       FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -42,13 +42,19 @@ jobs:
           - "2.1.2"
           - "2.2.1"
         include:
+          # cover additional python nad PR combinations
           - { os: "ubuntu-22.04", python-version: "3.8", pytorch-version: "1.13.1" }
           - { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.0.1" }
           - { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.1" }
           - { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.2.1" }
+          # standard mac machine, not the M1
           - { os: "macOS-12", python-version: "3.8", pytorch-version: "1.13.1" }
           - { os: "macOS-12", python-version: "3.10", pytorch-version: "2.0.1" }
           - { os: "macOS-12", python-version: "3.11", pytorch-version: "2.2.1" }
+          # using the ARM based M1 machine
+          - { os: "macOS-14", python-version: "3.10", pytorch-version: "2.0.1" }
+          - { os: "macOS-14", python-version: "3.11", pytorch-version: "2.2.1" }
+          # some windows
           - { os: "windows-2022", python-version: "3.8", pytorch-version: "1.13.1" }
           - { os: "windows-2022", python-version: "3.10", pytorch-version: "2.0.1" }
           - { os: "windows-2022", python-version: "3.11", pytorch-version: "2.2.1" }
@@ -75,6 +81,7 @@ jobs:
         if: ${{ runner.os == 'macOS' }}
         run: |
           echo 'UNITTEST_TIMEOUT=--timeout=75' >> $GITHUB_ENV
+          brew install mecab # https://github.com/coqui-ai/TTS/issues/1533#issuecomment-1338662303
           brew install gcc libomp ffmpeg # https://github.com/pytorch/pytorch/issues/20030
       - name: Setup Linux
         if: ${{ runner.os == 'Linux' }}

diff --git a/.github/workflows/publish-pkg.yml b/.github/workflows/publish-pkg.yml
@@ -67,7 +67,7 @@ jobs:
       - run: ls -lh dist/
       # We do this, since failures on test.pypi aren't that bad
       - name: Publish to Test PyPI
-        uses: pypa/[email protected].11
+        uses: pypa/[email protected].12
         with:
           user: __token__
           password: ${{ secrets.test_pypi_password }}
@@ -94,7 +94,7 @@ jobs:
           path: dist
       - run: ls -lh dist/
       - name: Publish distribution 📦 to PyPI
-        uses: pypa/[email protected].11
+        uses: pypa/[email protected].12
         with:
           user: __token__
           password: ${{ secrets.pypi_password }}

diff --git a/.gitignore b/.gitignore
@@ -40,7 +40,7 @@ pip-delete-this-directory.txt
 # Unit test / coverage reports
 tests/_data/
 data.zip
-tests/_reference-cache/
+tests/_cache-references/
 htmlcov/
 .coverage
 .coverage.*

diff --git a/Makefile b/Makefile
@@ -36,5 +36,6 @@ env:
 	pip install -e . -U -r requirements/_devel.txt
 
 data:
-	python -c "from urllib.request import urlretrieve ; urlretrieve('https://pl-public-data.s3.amazonaws.com/metrics/data.zip', 'data.zip')"
+	pip install -q wget
+	python -m wget https://pl-public-data.s3.amazonaws.com/metrics/data.zip
 	unzip -o data.zip -d ./tests
diff --git a/requirements/_doctest.txt b/requirements/_doctest.txt
@@ -2,5 +2,5 @@
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
 pytest >=6.0.0, <7.5.0
-pytest-doctestplus >=0.9.0, <=1.1.0
+pytest-doctestplus >=0.9.0, <=1.2.0
 pytest-rerunfailures >=10.0, <14.0
diff --git a/requirements/_tests.txt b/requirements/_tests.txt
@@ -4,7 +4,7 @@
 coverage ==7.4.3
 pytest ==7.4.4
 pytest-cov ==4.1.0
-pytest-doctestplus ==1.1.0
+pytest-doctestplus ==1.2.0
 pytest-rerunfailures ==13.0
 pytest-timeout ==2.2.0
 pytest-xdist ==3.5.0

diff --git a/requirements/text_test.txt b/requirements/text_test.txt
@@ -4,5 +4,5 @@
 jiwer >=2.3.0, <3.1.0
 rouge-score >0.1.0, <=0.1.2
 bert_score ==0.3.13
-huggingface-hub <0.21  # hotfix, failing SDR for latest PT 1.11
+huggingface-hub <0.22
 sacrebleu >=2.3.0, <2.5.0
diff --git a/src/torchmetrics/functional/image/_deprecated.py b/src/torchmetrics/functional/image/_deprecated.py
@@ -110,7 +110,7 @@ def _relative_average_spectral_error(preds: Tensor, target: Tensor, window_size:
     >>> preds = torch.rand(4, 3, 16, 16, generator=gen)
     >>> target = torch.rand(4, 3, 16, 16, generator=gen)
     >>> _relative_average_spectral_error(preds, target)
-    tensor(5114.6641)
+    tensor(5114.66...)
 
     """
     _deprecated_root_import_func("relative_average_spectral_error", "image")

diff --git a/src/torchmetrics/functional/image/rase.py b/src/torchmetrics/functional/image/rase.py
@@ -85,7 +85,7 @@ def relative_average_spectral_error(preds: Tensor, target: Tensor, window_size:
         >>> preds = torch.rand(4, 3, 16, 16)
         >>> target = torch.rand(4, 3, 16, 16)
         >>> relative_average_spectral_error(preds, target)
-        tensor(5114.6641)
+        tensor(5114.66...)
 
     Raises:
         ValueError: If ``window_size`` is not a positive integer.

diff --git a/src/torchmetrics/image/_deprecated.py b/src/torchmetrics/image/_deprecated.py
@@ -109,7 +109,7 @@ class _RelativeAverageSpectralError(RelativeAverageSpectralError):
     >>> target = torch.rand(4, 3, 16, 16)
     >>> rase = _RelativeAverageSpectralError()
     >>> rase(preds, target)
-    tensor(5114.6641)
+    tensor(5114.66...)
 
     """
 

diff --git a/src/torchmetrics/image/rase.py b/src/torchmetrics/image/rase.py
@@ -53,7 +53,7 @@ class RelativeAverageSpectralError(Metric):
         >>> target = torch.rand(4, 3, 16, 16)
         >>> rase = RelativeAverageSpectralError()
         >>> rase(preds, target)
-        tensor(5114.6641)
+        tensor(5114.66...)
 
     Raises:
         ValueError: If ``window_size`` is not a positive integer.

diff --git a/src/torchmetrics/utilities/imports.py b/src/torchmetrics/utilities/imports.py
@@ -17,10 +17,8 @@
 import sys
 
 from lightning_utilities.core.imports import RequirementCache
-from packaging.version import Version, parse
 
 _PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
-_PYTHON_LOWER_3_8 = parse(_PYTHON_VERSION) < Version("3.8")
 _TORCH_LOWER_2_0 = RequirementCache("torch<2.0.0")
 _TORCH_GREATER_EQUAL_1_11 = RequirementCache("torch>=1.11.0")
 _TORCH_GREATER_EQUAL_1_12 = RequirementCache("torch>=1.12.0")
@@ -29,7 +27,6 @@
 _TORCH_GREATER_EQUAL_2_1 = RequirementCache("torch>=2.1.0")
 _TORCH_GREATER_EQUAL_2_2 = RequirementCache("torch>=2.2.0")
 
-_JIWER_AVAILABLE = RequirementCache("jiwer")
 _NLTK_AVAILABLE = RequirementCache("nltk")
 _ROUGE_SCORE_AVAILABLE = RequirementCache("rouge_score")
 _BERTSCORE_AVAILABLE = RequirementCache("bert_score")
@@ -49,7 +46,6 @@
 _GAMMATONE_AVAILABLE = RequirementCache("gammatone")
 _TORCHAUDIO_AVAILABLE = RequirementCache("torchaudio")
 _TORCHAUDIO_GREATER_EQUAL_0_10 = RequirementCache("torchaudio>=0.10.0")
-_SACREBLEU_AVAILABLE = RequirementCache("sacrebleu")
 _REGEX_AVAILABLE = RequirementCache("regex")
 _PYSTOI_AVAILABLE = RequirementCache("pystoi")
 _FAST_BSS_EVAL_AVAILABLE = RequirementCache("fast_bss_eval")

diff --git a/tests/unittests/audio/test_pesq.py b/tests/unittests/audio/test_pesq.py
@@ -130,7 +130,7 @@ def test_on_real_audio():
     """Test that metric works as expected on real audio signals."""
     rate, ref = wavfile.read(_SAMPLE_AUDIO_SPEECH)
     rate, deg = wavfile.read(_SAMPLE_AUDIO_SPEECH_BAB_DB)
-    pesq = perceptual_evaluation_speech_quality(torch.from_numpy(deg), torch.from_numpy(ref), rate, "wb")
-    assert pesq == 1.0832337141036987
-    pesq = perceptual_evaluation_speech_quality(torch.from_numpy(deg), torch.from_numpy(ref), rate, "nb")
-    assert pesq == 1.6072081327438354
+    pesq_score = perceptual_evaluation_speech_quality(torch.from_numpy(deg), torch.from_numpy(ref), rate, "wb")
+    assert torch.allclose(pesq_score, torch.tensor(1.0832337141036987), atol=1e-4)
+    pesq_score = perceptual_evaluation_speech_quality(torch.from_numpy(deg), torch.from_numpy(ref), rate, "nb")
+    assert torch.allclose(pesq_score, torch.tensor(1.6072081327438354), atol=1e-4)
diff --git a/tests/unittests/classification/test_group_fairness.py b/tests/unittests/classification/test_group_fairness.py
@@ -26,7 +26,6 @@
 from torchmetrics import Metric
 from torchmetrics.classification.group_fairness import BinaryFairness
 from torchmetrics.functional.classification.group_fairness import binary_fairness
-from torchmetrics.utilities.imports import _PYTHON_LOWER_3_8
 
 from unittests import THRESHOLD
 from unittests.classification._inputs import _group_cases
@@ -222,7 +221,6 @@ def run_precision_test_gpu(
 
 @mock.patch("unittests.helpers.testers._assert_tensor", _assert_tensor)
 @mock.patch("unittests.helpers.testers._assert_allclose", _assert_allclose)
-@pytest.mark.skipif(_PYTHON_LOWER_3_8, reason="`TestBinaryFairness` requires `python>=3.8`.")
 @pytest.mark.parametrize("inputs", _group_cases)
 class TestBinaryFairness(BinaryFairnessTester):
     """Test class for `BinaryFairness` metric."""

diff --git a/tests/unittests/image/test_fid.py b/tests/unittests/image/test_fid.py
@@ -34,7 +34,7 @@ def test_no_train_network_missing_torch_fidelity():
         NoTrainInceptionV3(name="inception-v3-compat", features_list=["2048"])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -52,7 +52,7 @@ def forward(self, x):
     assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_fid_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = FrechetInceptionDistance()
@@ -80,7 +80,7 @@ def test_fid_raises_errors_and_warnings():
         _ = FrechetInceptionDistance(feature=[1, 2])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_fid_same_input(feature):
     """If real and fake are update on the same data the fid score should be 0."""
@@ -111,7 +111,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("equal_size", [False, True])
 def test_compare_fid(tmpdir, equal_size, feature=768):
     """Check that the hole pipeline give the same result as torch-fidelity."""

diff --git a/tests/unittests/image/test_inception.py b/tests/unittests/image/test_inception.py
@@ -24,7 +24,7 @@
 torch.manual_seed(42)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -44,7 +44,7 @@ def forward(self, x):
     ), "InceptionScore metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_is_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = InceptionScore()
@@ -79,7 +79,7 @@ def test_is_raises_errors_and_warnings():
         InceptionScore(feature=[1, 2])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_is_update_compute():
     """Test that inception score works as expected."""
     metric = InceptionScore()
@@ -105,7 +105,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("compute_on_cpu", [True, False])
 def test_compare_is(tmpdir, compute_on_cpu):
     """Check that the hole pipeline give the same result as torch-fidelity."""

diff --git a/tests/unittests/image/test_kid.py b/tests/unittests/image/test_kid.py
@@ -24,7 +24,7 @@
 torch.manual_seed(42)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -42,7 +42,7 @@ def forward(self, x):
     assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_kid_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = KernelInceptionDistance()
@@ -83,7 +83,7 @@ def test_kid_raises_errors_and_warnings():
         m.compute()
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_kid_extra_parameters():
     """Test that the different input arguments raises expected errors if wrong."""
     with pytest.raises(ValueError, match="Argument `subsets` expected to be integer larger than 0"):
@@ -102,7 +102,7 @@ def test_kid_extra_parameters():
         KernelInceptionDistance(coef=-1)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_kid_same_input(feature):
     """Test that the metric works."""
@@ -132,7 +132,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_compare_kid(tmpdir, feature=2048):
     """Check that the hole pipeline give the same result as torch-fidelity."""
     from torch_fidelity import calculate_metrics

diff --git a/tests/unittests/image/test_lpips.py b/tests/unittests/image/test_lpips.py
@@ -16,11 +16,10 @@
 
 import pytest
 import torch
-from lpips import LPIPS as LPIPS_reference  # noqa: N811
 from torch import Tensor
 from torchmetrics.functional.image.lpips import learned_perceptual_image_patch_similarity
 from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
-from torchmetrics.utilities.imports import _LPIPS_AVAILABLE, _TORCHVISION_AVAILABLE
+from torchmetrics.utilities.imports import _TORCHVISION_AVAILABLE
 
 from unittests.helpers import seed_all
 from unittests.helpers.testers import MetricTester
@@ -43,15 +42,19 @@ def _reference_lpips(
     img1: Tensor, img2: Tensor, net_type: str, normalize: bool = False, reduction: str = "mean"
 ) -> Tensor:
     """Comparison function for tm implementation."""
-    ref = LPIPS_reference(net=net_type)
+    try:
+        from lpips import LPIPS
+    except ImportError:
+        pytest.skip("test requires lpips package to be installed")
+
+    ref = LPIPS(net=net_type)
     res = ref(img1, img2, normalize=normalize).detach().cpu().numpy()
     if reduction == "mean":
         return res.mean()
     return res.sum()
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 class TestLPIPS(MetricTester):
     """Test class for `LearnedPerceptualImagePatchSimilarity` metric."""
 
@@ -109,7 +112,6 @@ def test_normalize_arg(normalize):
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 def test_error_on_wrong_init():
     """Test class raises the expected errors."""
     with pytest.raises(ValueError, match="Argument `net_type` must be one .*"):
@@ -120,7 +122,6 @@ def test_error_on_wrong_init():
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 @pytest.mark.parametrize(
     ("inp1", "inp2"),
     [

diff --git a/tests/unittests/image/test_mifid.py b/tests/unittests/image/test_mifid.py
@@ -98,7 +98,7 @@ def calculate_mifid(m1, s1, features1, m2, s2, features2):
     return fid_private / (distance_private_thresholded + 1e-15)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -139,7 +139,7 @@ def test_mifid_raises_errors_and_warnings():
         _ = MemorizationInformedFrechetInceptionDistance(cosine_distance_eps=1.1)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_fid_same_input(feature):
     """If real and fake are update on the same data the fid score should be 0."""
@@ -157,7 +157,7 @@ def test_fid_same_input(feature):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("equal_size", [False, True])
 def test_compare_mifid(equal_size):
     """Check that our implementation of MIFID is correct by comparing it to the original implementation."""