From 2a8789e1c680833d550cf4569e6b77bf40261ad4 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Wed, 13 Dec 2023 20:27:49 +0100
Subject: [PATCH] ci/tests: cleaning standalone script (#19141)

* tests: cleaning standalone script

* switch

* from tests

* -m

* collect

* array

* tests_fabric/

* ..

* path prefix

* pl

* cleaning

* test_pytorch_profiler_nested_emit_nvtx

* Apply suggestions from code review

* Apply suggestions from code review

* todo
---
 .azure/gpu-tests-fabric.yml                   | 18 +++---
 .azure/gpu-tests-pytorch.yml                  |  7 +--
 pyproject.toml                                |  2 +-
 .../pytorch/utilities/testing/_runif.py       |  4 +-
 .../run_standalone_tests.sh                   | 59 ++++++++-----------
 tests/tests_fabric/conftest.py                | 33 ++++++-----
 tests/tests_fabric/run_standalone_tests.sh    |  1 -
 tests/tests_fabric/run_tpu_tests.sh           |  2 +-
 tests/tests_pytorch/conftest.py               | 33 ++++++-----
 .../tests_pytorch/profilers/test_profiler.py  | 30 +++++-----
 tests/tests_pytorch/run_standalone_tasks.sh   | 15 +++--
 tests/tests_pytorch/run_tpu_tests.sh          |  2 +-
 12 files changed, 99 insertions(+), 107 deletions(-)
 rename tests/{tests_pytorch => }/run_standalone_tests.sh (55%)
 delete mode 120000 tests/tests_fabric/run_standalone_tests.sh

diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml
index da477107e8467..a84a426e6d296 100644
--- a/.azure/gpu-tests-fabric.yml
+++ b/.azure/gpu-tests-fabric.yml
@@ -49,6 +49,7 @@ jobs:
       DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
       FREEZE_REQUIREMENTS: "1"
       PIP_CACHE_DIR: "/var/tmp/pip"
+      PL_RUN_CUDA_TESTS: "1"
     container:
       image: $(image)
       # default shm size is 64m. Increase it to avoid:
@@ -126,19 +127,16 @@ jobs:
         condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
         displayName: "Adjust tests & examples"
 
-      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
-        workingDirectory: tests/tests_fabric
-        env:
-          PL_RUN_CUDA_TESTS: "1"
+      - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest . -v --durations=50
+        workingDirectory: tests/tests_fabric/
         displayName: "Testing: fabric standard"
         timeoutInMinutes: "10"
 
-      - bash: bash run_standalone_tests.sh
-        workingDirectory: tests/tests_fabric
+      - bash: bash ../run_standalone_tests.sh "."
+        workingDirectory: tests/tests_fabric/
         env:
-          PL_RUN_CUDA_TESTS: "1"
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
-        displayName: "Testing: fabric standalone tests"
+        displayName: "Testing: fabric standalone"
         timeoutInMinutes: "10"
 
       - bash: |
@@ -152,12 +150,12 @@ jobs:
           ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
             --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
           ls -l
-        workingDirectory: tests/tests_fabric
+        workingDirectory: tests/tests_fabric/
         displayName: "Statistics"
 
       - script: |
           set -e
           bash run_fabric_examples.sh --accelerator=cuda --devices=1
           bash run_fabric_examples.sh --accelerator=cuda --devices=2 --strategy ddp
-        workingDirectory: examples
+        workingDirectory: examples/
         displayName: "Testing: fabric examples"
diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml
index f154eb6632feb..19390490f0091 100644
--- a/.azure/gpu-tests-pytorch.yml
+++ b/.azure/gpu-tests-pytorch.yml
@@ -59,6 +59,7 @@ jobs:
       DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
       FREEZE_REQUIREMENTS: "1"
       PIP_CACHE_DIR: "/var/tmp/pip"
+      PL_RUN_CUDA_TESTS: "1"
     container:
       image: $(image)
       # default shm size is 64m. Increase it to avoid:
@@ -154,16 +155,13 @@ jobs:
 
       - bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
         workingDirectory: tests/tests_pytorch
-        env:
-          PL_RUN_CUDA_TESTS: "1"
         displayName: "Testing: PyTorch standard"
         timeoutInMinutes: "35"
 
-      - bash: bash run_standalone_tests.sh
+      - bash: bash ../run_standalone_tests.sh "."
         workingDirectory: tests/tests_pytorch
         env:
           PL_USE_MOCKED_MNIST: "1"
-          PL_RUN_CUDA_TESTS: "1"
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
         displayName: "Testing: PyTorch standalone tests"
         timeoutInMinutes: "35"
@@ -172,7 +170,6 @@ jobs:
         workingDirectory: tests/tests_pytorch
         env:
           PL_USE_MOCKED_MNIST: "1"
-          PL_RUN_CUDA_TESTS: "1"
         displayName: "Testing: PyTorch standalone tasks"
         timeoutInMinutes: "10"
 
diff --git a/pyproject.toml b/pyproject.toml
index b1a0dbd9f83c1..b78e03aed5c5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -269,7 +269,7 @@ addopts = [
     "--ignore=legacy/checkpoints",
 ]
 markers = [
-    "cloud:Run the cloud tests for example",
+    "cloud: Run the cloud tests for example",
 ]
 filterwarnings = [
     "error::FutureWarning",
diff --git a/src/lightning/pytorch/utilities/testing/_runif.py b/src/lightning/pytorch/utilities/testing/_runif.py
index 3c67260a88bed..c3e0262d9906f 100644
--- a/src/lightning/pytorch/utilities/testing/_runif.py
+++ b/src/lightning/pytorch/utilities/testing/_runif.py
@@ -16,7 +16,7 @@
 from lightning_utilities.core.imports import RequirementCache
 
 from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_0
-from lightning.fabric.utilities.testing import _runif_reasons as FabricRunIf
+from lightning.fabric.utilities.testing import _runif_reasons as fabric_run_if
 from lightning.pytorch.accelerators.cpu import _PSUTIL_AVAILABLE
 from lightning.pytorch.callbacks.progress.rich_progress import _RICH_AVAILABLE
 from lightning.pytorch.core.module import _ONNX_AVAILABLE
@@ -68,7 +68,7 @@ def _runif_reasons(
 
     """
 
-    reasons, kwargs = FabricRunIf(
+    reasons, kwargs = fabric_run_if(
         min_cuda_gpus=min_cuda_gpus,
         min_torch=min_torch,
         max_torch=max_torch,
diff --git a/tests/tests_pytorch/run_standalone_tests.sh b/tests/run_standalone_tests.sh
similarity index 55%
rename from tests/tests_pytorch/run_standalone_tests.sh
rename to tests/run_standalone_tests.sh
index a8f7432eee3fb..19d233794719f 100644
--- a/tests/tests_pytorch/run_standalone_tests.sh
+++ b/tests/run_standalone_tests.sh
@@ -23,25 +23,19 @@ source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"
 # this environment variable allows special tests to run
 export PL_RUN_STANDALONE_TESTS=1
 # python arguments
-defaults="-m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120"
+defaults=" -m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120 "
 echo "Using defaults: ${defaults}"
 
-# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
-grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')
+# get the testing location as the fist argument
+test_path=$1
+printf "source path: $test_path\n"
 
-# file paths, remove duplicates
-files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq)
-
-# get the list of parametrizations. we need to call them separately. the last two lines are removed.
-# note: if there's a syntax error, this will fail with some garbled output
-if [[ "$OSTYPE" == "darwin"* ]]; then
-  parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r)
-else
-  parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | head -n -2)
-fi
-# remove the "tests/tests_pytorch/" path suffixes
-path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/"  # https://stackoverflow.com/a/8223345
-parametrizations=${parametrizations//$path_suffix/}
+# collect all tests with parametrization based filtering with PL_RUN_STANDALONE_TESTS
+standalone_tests=$(python -m pytest $test_path -q --collect-only --pythonwarnings ignore)
+printf "Collected tests: \n $standalone_tests"
+# match only lines with tests
+parametrizations=$(grep -oP '\S+::test_\S+' <<< "$standalone_tests")
+# convert the list to be array
 parametrizations_arr=($parametrizations)
 
 report=''
@@ -61,30 +55,25 @@ function show_batched_output {
 }
 trap show_batched_output EXIT  # show the output on exit
 
+# remove the "tests/tests_pytorch/" path suffixes
+path_prefix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/"  # https://stackoverflow.com/a/8223345
 for i in "${!parametrizations_arr[@]}"; do
-  parametrization=${parametrizations_arr[$i]}
+  parametrization=${parametrizations_arr[$i]//$path_prefix/}
   prefix="$((i+1))/${#parametrizations_arr[@]}"
 
-  # check blocklist
-  if [[ "${parametrization}" == *"test_pytorch_profiler_nested_emit_nvtx"* ]]; then
-    echo "$prefix: Skipping $parametrization"
-    report+="Skipped\t$parametrization\n"
-    # do not continue the loop because we might need to wait for batched jobs
-  else
-    echo "$prefix: Running $parametrization"
+  echo "$prefix: Running $parametrization"
 
-    # fix the port to avoid race condition when batched distributed tests select the port randomly
-    export MASTER_PORT=$((29500 + $i % $test_batch_size))
+  # fix the port to avoid race condition when batched distributed tests select the port randomly
+  export MASTER_PORT=$((29500 + $i % $test_batch_size))
 
-    # execute the test in the background
-    # redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
-    # output to std{out,err} because the outputs would be garbled together
-    python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
-    # save the PID in an array
-    pids[${i}]=$!
-    # add row to the final report
-    report+="Ran\t$parametrization\n"
-  fi
+  # execute the test in the background
+  # redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
+  # output to std{out,err} because the outputs would be garbled together
+  python ${defaults} "$parametrization" &>> standalone_test_output.txt &
+  # save the PID in an array
+  pids[${i}]=$!
+  # add row to the final report
+  report+="Ran\t$parametrization\n"
 
   if ((($i + 1) % $test_batch_size == 0)); then
     # wait for running tests
diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py
index 6edce6c85b8e0..221efe2d4155f 100644
--- a/tests/tests_fabric/conftest.py
+++ b/tests/tests_fabric/conftest.py
@@ -192,22 +192,23 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C
 
     for kwarg, env_var in options.items():
         # this will compute the intersection of all tests selected per environment variable
-        if os.getenv(env_var, "0") == "1":
-            conditions.append(env_var)
-            for i, test in reversed(list(enumerate(items))):  # loop in reverse, since we are going to pop items
-                already_skipped = any(marker.name == "skip" for marker in test.own_markers)
-                if already_skipped:
-                    # the test was going to be skipped anyway, filter it out
-                    items.pop(i)
-                    skipped += 1
-                    continue
-                has_runif_with_kwarg = any(
-                    marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
-                )
-                if not has_runif_with_kwarg:
-                    # the test has `@RunIf(kwarg=True)`, filter it out
-                    items.pop(i)
-                    filtered += 1
+        if os.getenv(env_var, "0") != "1":
+            continue
+        conditions.append(env_var)
+        for i, test in reversed(list(enumerate(items))):  # loop in reverse, since we are going to pop items
+            already_skipped = any(marker.name == "skip" for marker in test.own_markers)
+            if already_skipped:
+                # the test was going to be skipped anyway, filter it out
+                items.pop(i)
+                skipped += 1
+                continue
+            has_runif_with_kwarg = any(
+                marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
+            )
+            if not has_runif_with_kwarg:
+                # the test has `@RunIf(kwarg=True)`, filter it out
+                items.pop(i)
+                filtered += 1
 
     if config.option.verbose >= 0 and (filtered or skipped):
         writer = config.get_terminal_writer()
diff --git a/tests/tests_fabric/run_standalone_tests.sh b/tests/tests_fabric/run_standalone_tests.sh
deleted file mode 120000
index 23049489b7160..0000000000000
--- a/tests/tests_fabric/run_standalone_tests.sh
+++ /dev/null
@@ -1 +0,0 @@
-../tests_pytorch/run_standalone_tests.sh
\ No newline at end of file
diff --git a/tests/tests_fabric/run_tpu_tests.sh b/tests/tests_fabric/run_tpu_tests.sh
index 2f98b2a258e48..ca59a001927c9 100644
--- a/tests/tests_fabric/run_tpu_tests.sh
+++ b/tests/tests_fabric/run_tpu_tests.sh
@@ -35,7 +35,7 @@ cd tests/tests_fabric
 PL_RUN_TPU_TESTS=1 python3 -m coverage run --source=lightning -m pytest -vv --durations=0 --timeout 60 ./
 
 echo "--- Running standalone Fabric tests ---"
-PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash run_standalone_tests.sh
+PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash ../run_standalone_tests.sh "."
 
 echo "--- Generating coverage ---"
 python3 -m coverage xml
diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py
index aabea254b1365..7194fdc862738 100644
--- a/tests/tests_pytorch/conftest.py
+++ b/tests/tests_pytorch/conftest.py
@@ -318,22 +318,23 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C
 
     for kwarg, env_var in options.items():
         # this will compute the intersection of all tests selected per environment variable
-        if os.getenv(env_var, "0") == "1":
-            conditions.append(env_var)
-            for i, test in reversed(list(enumerate(items))):  # loop in reverse, since we are going to pop items
-                already_skipped = any(marker.name == "skip" for marker in test.own_markers)
-                if already_skipped:
-                    # the test was going to be skipped anyway, filter it out
-                    items.pop(i)
-                    skipped += 1
-                    continue
-                has_runif_with_kwarg = any(
-                    marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
-                )
-                if not has_runif_with_kwarg:
-                    # the test has `@RunIf(kwarg=True)`, filter it out
-                    items.pop(i)
-                    filtered += 1
+        if os.getenv(env_var, "0") != "1":
+            continue
+        conditions.append(env_var)
+        for i, test in reversed(list(enumerate(items))):  # loop in reverse, since we are going to pop items
+            already_skipped = any(marker.name == "skip" for marker in test.own_markers)
+            if already_skipped:
+                # the test was going to be skipped anyway, filter it out
+                items.pop(i)
+                skipped += 1
+                continue
+            has_runif_with_kwarg = any(
+                marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
+            )
+            if not has_runif_with_kwarg:
+                # the test has `@RunIf(kwarg=True)`, filter it out
+                items.pop(i)
+                filtered += 1
 
     if config.option.verbose >= 0 and (filtered or skipped):
         writer = config.get_terminal_writer()
diff --git a/tests/tests_pytorch/profilers/test_profiler.py b/tests/tests_pytorch/profilers/test_profiler.py
index 56d82734dcf14..0838d5b1b8c5a 100644
--- a/tests/tests_pytorch/profilers/test_profiler.py
+++ b/tests/tests_pytorch/profilers/test_profiler.py
@@ -471,20 +471,22 @@ def look_for_trace(trace_dir):
     assert look_for_trace(tmpdir / "lightning_logs" / "version_0")
 
 
-@RunIf(min_cuda_gpus=1, standalone=True)
-def test_pytorch_profiler_nested_emit_nvtx():
-    """This test check emit_nvtx is correctly supported."""
-    profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
-    model = BoringModel()
-    trainer = Trainer(
-        fast_dev_run=True,
-        profiler=profiler,
-        accelerator="gpu",
-        devices=1,
-        enable_progress_bar=False,
-        enable_model_summary=False,
-    )
-    trainer.fit(model)
+# Todo: this test has not been running as all our CI GPU runners have higher capacity
+# @RunIf(min_cuda_gpus=1, standalone=True)
+# @pytest.mark.skipif(torch.cuda.get_device_capability()[0] >= 8)
+# def test_pytorch_profiler_nested_emit_nvtx():
+#     """This test check emit_nvtx is correctly supported."""
+#     profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
+#     model = BoringModel()
+#     trainer = Trainer(
+#         fast_dev_run=True,
+#         profiler=profiler,
+#         accelerator="gpu",
+#         devices=1,
+#         enable_progress_bar=False,
+#         enable_model_summary=False,
+#     )
+#     trainer.fit(model)
 
 
 def test_register_record_function(tmpdir):
diff --git a/tests/tests_pytorch/run_standalone_tasks.sh b/tests/tests_pytorch/run_standalone_tasks.sh
index 7648adfb45b27..6f69046dff72d 100644
--- a/tests/tests_pytorch/run_standalone_tasks.sh
+++ b/tests/tests_pytorch/run_standalone_tasks.sh
@@ -18,11 +18,16 @@ set -e
 # this environment variable allows special tests to run
 export PL_RUN_STANDALONE_TESTS=1
 
-can_run_nvprof=$(python -c "import torch; print(torch.cuda.is_available() and torch.cuda.get_device_capability()[0] < 8)")
-if [[ $can_run_nvprof == "True" ]]; then
-    echo "Running profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx"
-    nvprof --profile-from-start off -o trace_name.prof -- python -m coverage run --source lightning.pytorch --append -m pytest --no-header profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
-fi
+#can_run_nvprof=$(python -c "import torch; print(torch.cuda.is_available() and torch.cuda.get_device_capability()[0] < 8)")
+#if [[ $can_run_nvprof == "True" ]]; then
+#    echo "Running profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx"
+#    nvprof --profile-from-start off \
+#      -o trace_name.prof \
+#        -- python -m coverage run \
+#          --source lightning.pytorch \
+#          --append -m pytest \
+#            --no-header profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
+#fi
 
 # test that a user can manually launch individual processes
 echo "Running manual ddp launch test"
diff --git a/tests/tests_pytorch/run_tpu_tests.sh b/tests/tests_pytorch/run_tpu_tests.sh
index ec5f8d44c9ef6..bdb21ef0531a8 100644
--- a/tests/tests_pytorch/run_tpu_tests.sh
+++ b/tests/tests_pytorch/run_tpu_tests.sh
@@ -34,7 +34,7 @@ cd tests/tests_pytorch
 PL_RUN_TPU_TESTS=1 python3 -m coverage run --source=lightning -m pytest -vv --durations=0 --timeout 60 ./
 
 echo "--- Running standalone PL tests ---"
-PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash run_standalone_tests.sh
+PL_RUN_TPU_TESTS=1 PL_STANDALONE_TESTS_BATCH_SIZE=1 bash ../run_standalone_tests.sh "."
 
 echo "--- Generating coverage ---"
 python3 -m coverage xml