From 3e890d700601d0d96ac842e137190aefc7514119 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Tue, 3 Sep 2024 08:59:01 +0200
Subject: [PATCH] ci/gpu: debug skipped cache (#2709)

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .azure/gpu-nuke-cache.yml                     | 56 +++++++++++++++++++
 .azure/gpu-unittests.yml                      | 47 ++++++++++------
 requirements/_docs.txt                        |  2 +-
 requirements/_tests.txt                       | 11 ++--
 requirements/text.txt                         |  4 +-
 src/torchmetrics/audio/dnsmos.py              |  2 +-
 src/torchmetrics/audio/stoi.py                |  2 +-
 src/torchmetrics/functional/audio/dnsmos.py   |  2 +-
 src/torchmetrics/functional/audio/stoi.py     |  2 +-
 tests/unittests/audio/test_stoi.py            |  2 +
 .../test_generalized_dice_score.py            |  6 +-
 11 files changed, 105 insertions(+), 31 deletions(-)
 create mode 100644 .azure/gpu-nuke-cache.yml

diff --git a/.azure/gpu-nuke-cache.yml b/.azure/gpu-nuke-cache.yml
new file mode 100644
index 00000000000..f8f758ce8c0
--- /dev/null
+++ b/.azure/gpu-nuke-cache.yml
@@ -0,0 +1,56 @@
+trigger:
+  tags:
+    include:
+      - "*"
+# run every month to sanitatize dev environment
+schedules:
+  - cron: "0 0 1 * *"
+    displayName: Monthly nuke caches
+    branches:
+      include:
+        - master
+# run on PR changing only this file
+pr:
+  branches:
+    include:
+      - master
+  paths:
+    include:
+      - .azure/gpu-nuke-cache.yml
+
+jobs:
+  - job: nuke_caches
+    # how long to run the job before automatically cancelling
+    timeoutInMinutes: "10"
+    # how much time to give 'run always even if cancelled tasks' before stopping them
+    cancelTimeoutInMinutes: "2"
+
+    pool: "lit-rtx-3090"
+
+    variables:
+      # these two caches assume to run repetitively on the same set of machines
+      #  see: https://github.com/microsoft/azure-pipelines-agent/issues/4113#issuecomment-1439241481
+      TORCH_HOME: "/var/tmp/torch"
+      TRANSFORMERS_CACHE: "/var/tmp/hf/transformers"
+      HF_HOME: "/var/tmp/hf/home"
+      HF_HUB_CACHE: "/var/tmp/hf/hub"
+      PIP_CACHE_DIR: "/var/tmp/pip"
+      CACHED_REFERENCES: "/var/tmp/cached-references.zip"
+
+    container:
+      image: "ubuntu:22.04"
+      options: "-v /var/tmp:/var/tmp"
+
+    steps:
+      - bash: |
+          set -ex
+          rm -rf $(TORCH_HOME)
+          rm -rf $(TRANSFORMERS_CACHE)
+          rm -rf $(HF_HOME)
+          rm -rf $(HF_HUB_CACHE)
+          rm -rf $(PIP_CACHE_DIR)
+          rm -rf $(CACHED_REFERENCES)
+        displayName: "delete all caches"
+      - bash: |
+          ls -lh /var/tmp
+        displayName: "show tmp/ folder"
diff --git a/.azure/gpu-unittests.yml b/.azure/gpu-unittests.yml
index 25dc593558d..78dc6beee86 100644
--- a/.azure/gpu-unittests.yml
+++ b/.azure/gpu-unittests.yml
@@ -9,6 +9,13 @@ trigger:
       - master
       - release/*
       - refs/tags/*
+# run every month to populate caches
+schedules:
+  - cron: "0 1 1 * *"
+    displayName: Monthly re-build caches
+    branches:
+      include:
+        - master
 pr:
   - master
   - release/*
@@ -67,6 +74,11 @@ jobs:
           CUDA_version_mm="${CUDA_version//'.'/''}"
           echo "##vso[task.setvariable variable=CUDA_VERSION_MM]$CUDA_version_mm"
           echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${CUDA_version_mm}/torch_stable.html"
+          mkdir -p $(TORCH_HOME)
+          mkdir -p $(TRANSFORMERS_CACHE)
+          mkdir -p $(HF_HOME)
+          mkdir -p $(HF_HUB_CACHE)
+          mkdir -p $(PIP_CACHE_DIR)
         displayName: "set Env. vars"
       - bash: |
           echo "##vso[task.setvariable variable=ALLOW_SKIP_IF_OUT_OF_MEMORY]1"
@@ -111,7 +123,7 @@ jobs:
 
       - bash: |
           python .github/assistant.py set-oldest-versions
-        condition: eq(variables['torch-ver'], '1.10.2')
+        condition: eq(variables['torch-ver'], '1.10')
         displayName: "Setting oldest versions"
 
       - bash: |
@@ -132,6 +144,21 @@ jobs:
         displayName: "Show caches"
 
       - bash: |
+          python -m pytest torchmetrics --cov=torchmetrics \
+            --timeout=240 --durations=50 \
+            --reruns 2 --reruns-delay 1
+          #  --numprocesses=5 --dist=loadfile
+        env:
+          DOCTEST_DOWNLOAD_TIMEOUT: "180"
+          SKIP_SLOW_DOCTEST: "1"
+        workingDirectory: "src/"
+        timeoutInMinutes: "40"
+        displayName: "DocTesting"
+
+      - bash: |
+          df -h .
+          ls -lh $(CACHED_REFERENCES)
+          ls -lh tests/
           # Check if the file references exists
           if [ -f $(CACHED_REFERENCES) ]; then
               # Create a directory if it doesn't already exist
@@ -142,25 +169,12 @@ jobs:
           else
               echo "The file '$(CACHED_REFERENCES)' does not exist."
           fi
-          du -h --max-depth=1 tests/
         timeoutInMinutes: "5"
         # if pull request, copy the cache to the tests folder to be used in the next steps
         condition: eq(variables['Build.Reason'], 'PullRequest')
         continueOnError: "true"
         displayName: "Copy/Unzip cached refs"
 
-      - bash: |
-          python -m pytest torchmetrics --cov=torchmetrics \
-            --timeout=240 --durations=50 \
-            --reruns 2 --reruns-delay 1
-          #  --numprocesses=5 --dist=loadfile
-        env:
-          DOCTEST_DOWNLOAD_TIMEOUT: "180"
-          SKIP_SLOW_DOCTEST: "1"
-        workingDirectory: "src/"
-        timeoutInMinutes: "40"
-        displayName: "DocTesting"
-
       - bash: |
           wget https://pl-public-data.s3.amazonaws.com/metrics/data.zip
           unzip -o data.zip
@@ -169,6 +183,7 @@ jobs:
         displayName: "Pull testing data from S3"
 
       - bash: |
+          du -h --max-depth=1 .
           python -m pytest $(TEST_DIRS) \
             -m "not DDP" --numprocesses=5 --dist=loadfile \
             --cov=torchmetrics --timeout=240 --durations=100 \
@@ -192,9 +207,10 @@ jobs:
         displayName: "UnitTesting DDP"
 
       - bash: |
+          du -h --max-depth=1 tests/
           # archive potentially updated cache to the machine filesystem to be reused with next jobs
           zip -q -r $(CACHED_REFERENCES) tests/_cache-references
-          du -h --max-depth=1 tests/
+          ls -lh $(CACHED_REFERENCES)
         # set as extra step to not pollute general cache when jobs fails or crashes
         # so do this update only with successful jobs on master
         condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest'))
@@ -209,7 +225,6 @@ jobs:
           python -m coverage xml
           python -m codecov --token=$(CODECOV_TOKEN) --name="GPU-coverage" \
             --commit=$(Build.SourceVersion) --flags=gpu,unittest --env=linux,azure
-          ls -l
         workingDirectory: "tests/"
         # skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
         condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
diff --git a/requirements/_docs.txt b/requirements/_docs.txt
index 1752cd11dc2..d2e17eb64ce 100644
--- a/requirements/_docs.txt
+++ b/requirements/_docs.txt
@@ -29,4 +29,4 @@ pydantic > 1.0.0, < 3.0.0
 # todo: until this has resolution - https://github.com/sphinx-gallery/sphinx-gallery/issues/1290
 # Image
 scikit-image ~=0.22; python_version < "3.10"
-scikit-image ~=0.24; python_version >= "3.10"
+scikit-image ~=0.24; python_version > "3.9"  # we do not use `> =` because of oldest replcement
diff --git a/requirements/_tests.txt b/requirements/_tests.txt
index 1aeb982ee9e..7708dfa0f3e 100644
--- a/requirements/_tests.txt
+++ b/requirements/_tests.txt
@@ -2,6 +2,7 @@
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
 coverage ==7.6.*
+codecov ==2.1.13
 pytest ==8.3.*
 pytest-cov ==5.0.0
 pytest-doctestplus ==1.2.1
@@ -10,11 +11,11 @@ pytest-timeout ==2.3.1
 pytest-xdist ==3.6.1
 phmdoctest ==1.4.0
 
-psutil <6.1.0
-pyGithub ==2.4.0
-fire <=0.6.0
+psutil ==6.*
+pyGithub >2.0.0, <2.5.0
+fire ==0.6.*
 
 cloudpickle >1.3, <=3.0.0
-scikit-learn >=1.1.1, <1.3.0; python_version < "3.9"
-scikit-learn >=1.4.0, <1.6.0; python_version >= "3.9"
+scikit-learn ==1.2.*; python_version < "3.9"
+scikit-learn ==1.5.*; python_version > "3.8"  # we do not use `> =` because of oldest replcement
 cachier ==3.0.1
diff --git a/requirements/text.txt b/requirements/text.txt
index 65396b65451..abdfe6808a7 100644
--- a/requirements/text.txt
+++ b/requirements/text.txt
@@ -1,8 +1,8 @@
 # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
-nltk >=3.8.2, <=3.9.1
-tqdm >=4.41.0, <4.67.0
+nltk >3.8.1, <=3.9.1
+tqdm <4.67.0
 regex >=2021.9.24, <=2024.7.24
 transformers >4.4.0, <4.45.0
 mecab-python3 >=1.0.6, <1.1.0
diff --git a/src/torchmetrics/audio/dnsmos.py b/src/torchmetrics/audio/dnsmos.py
index d164721f167..74d035a7fd4 100644
--- a/src/torchmetrics/audio/dnsmos.py
+++ b/src/torchmetrics/audio/dnsmos.py
@@ -79,7 +79,7 @@ class DeepNoiseSuppressionMeanOpinionScore(Metric):
         >>> preds = randn(8000)
         >>> dnsmos = DeepNoiseSuppressionMeanOpinionScore(8000, False)
         >>> dnsmos(preds)
-        tensor([2.2687, 2.0766, 1.1375, 1.2722], dtype=torch.float64)
+        tensor([2.2..., 2.0..., 1.1..., 1.2...], dtype=torch.float64)
 
     """
 
diff --git a/src/torchmetrics/audio/stoi.py b/src/torchmetrics/audio/stoi.py
index cf5d5204f77..253dab3ea38 100644
--- a/src/torchmetrics/audio/stoi.py
+++ b/src/torchmetrics/audio/stoi.py
@@ -69,7 +69,7 @@ class ShortTimeObjectiveIntelligibility(Metric):
         >>> target = randn(8000)
         >>> stoi = ShortTimeObjectiveIntelligibility(8000, False)
         >>> stoi(preds, target)
-        tensor(-0.0842)
+        tensor(-0.084...)
 
     """
 
diff --git a/src/torchmetrics/functional/audio/dnsmos.py b/src/torchmetrics/functional/audio/dnsmos.py
index 91c69de7a2b..9b0dca883db 100644
--- a/src/torchmetrics/functional/audio/dnsmos.py
+++ b/src/torchmetrics/functional/audio/dnsmos.py
@@ -218,7 +218,7 @@ def deep_noise_suppression_mean_opinion_score(
         >>> from torchmetrics.functional.audio.dnsmos import deep_noise_suppression_mean_opinion_score
         >>> preds = randn(8000)
         >>> deep_noise_suppression_mean_opinion_score(preds, 8000, False)
-        tensor([2.2687, 2.0766, 1.1375, 1.2722], dtype=torch.float64)
+        tensor([2.2..., 2.0..., 1.1..., 1.2...], dtype=torch.float64)
 
     """
     if not _LIBROSA_AVAILABLE or not _ONNXRUNTIME_AVAILABLE or not _REQUESTS_AVAILABLE:
diff --git a/src/torchmetrics/functional/audio/stoi.py b/src/torchmetrics/functional/audio/stoi.py
index 91d09cc64c3..48e9e78510b 100644
--- a/src/torchmetrics/functional/audio/stoi.py
+++ b/src/torchmetrics/functional/audio/stoi.py
@@ -64,7 +64,7 @@ def short_time_objective_intelligibility(
         >>> preds = randn(8000)
         >>> target = randn(8000)
         >>> short_time_objective_intelligibility(preds, target, 8000).float()
-        tensor(-0.0842)
+        tensor(-0.084...)
 
     """
     if not _PYSTOI_AVAILABLE:
diff --git a/tests/unittests/audio/test_stoi.py b/tests/unittests/audio/test_stoi.py
index 54374098779..2d872507401 100644
--- a/tests/unittests/audio/test_stoi.py
+++ b/tests/unittests/audio/test_stoi.py
@@ -20,6 +20,7 @@
 from torch import Tensor
 from torchmetrics.audio import ShortTimeObjectiveIntelligibility
 from torchmetrics.functional.audio import short_time_objective_intelligibility
+from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_2_0
 
 from unittests import _Input
 from unittests._helpers import seed_all
@@ -120,6 +121,7 @@ def test_error_on_different_shape(metric_class=ShortTimeObjectiveIntelligibility
         metric(torch.randn(100), torch.randn(50))
 
 
+@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_2_0, reason="precision issue with older torch")
 def test_on_real_audio():
     """Test that metric works on real audio signal."""
     rate, ref = wavfile.read(_SAMPLE_AUDIO_SPEECH)
diff --git a/tests/unittests/segmentation/test_generalized_dice_score.py b/tests/unittests/segmentation/test_generalized_dice_score.py
index f7e43ef2b56..3f8acec842a 100644
--- a/tests/unittests/segmentation/test_generalized_dice_score.py
+++ b/tests/unittests/segmentation/test_generalized_dice_score.py
@@ -66,11 +66,11 @@ def _reference_generalized_dice(
     ],
 )
 @pytest.mark.parametrize("include_background", [True, False])
-class TestMeanDiceScore(MetricTester):
+class TestGeneralizedDiceScore(MetricTester):
     """Test class for `MeanIoU` metric."""
 
     @pytest.mark.parametrize("ddp", [pytest.param(True, marks=pytest.mark.DDP), False])
-    def test_mean_iou_class(self, preds, target, input_format, include_background, ddp):
+    def test_generalized_dice_class(self, preds, target, input_format, include_background, ddp):
         """Test class implementation of metric."""
         self.run_class_metric_test(
             ddp=ddp,
@@ -90,7 +90,7 @@ def test_mean_iou_class(self, preds, target, input_format, include_background, d
             },
         )
 
-    def test_mean_iou_functional(self, preds, target, input_format, include_background):
+    def test_generalized_dice_functional(self, preds, target, input_format, include_background):
         """Test functional implementation of metric."""
         self.run_functional_metric_test(
             preds=preds,