[CI] Run doc notebooks in CI (#24816)

Currently, we are not running doc notebooks in CI due to a bazel misconfiguration - we are using `glob` in a top level package in order to get the paths for the notebooks, but those are contained inside subpackages, which glob purposefully ignores. Therefore, the lists of notebooks to run are empty. This PR fixes that by: * Running the `py_test_run_all_notebooks` macro inside the relevant subpackages * Editing the `test_myst_doc.py` script to allow for recursive search for the target file, allowing to deal with mismatches between `name` and `data` arguments in `py_test_run_all_notebooks` * Setting the `allow_empty=False` flag inside `glob` calls in our macros to ensure that this oversight is caught early * Enabling detection of changes in doc folder for `*.ipynb` and `BUILD` files This PR also adds a GPU runner for doc tests, allowing one of our examples to pass - and setting the infra for more to come. Finally, a misconfigured path for one set of doc tests is also fixed.
ray-project · May 17, 2022 · c74886a · c74886a
1 parent 25001f6
commit c74886a
Show file tree

Hide file tree

Showing 8 changed files with 101 additions and 41 deletions.
diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml
@@ -39,3 +39,16 @@
     - DATA_PROCESSING_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
     - pip install -Ur ./python/requirements_ml_docker.txt
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/ml/...
+
+
+- label: ":tv: :book: Doc GPU tests and examples"
+  conditions:
+    ["RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
+  commands:
+    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+    - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
+    - pip install -Ur ./python/requirements_ml_docker.txt
+    - ./ci/env/env_info.sh
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-tf,-pytorch,-py37,-post_wheel_build doc/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,tf,-pytorch,-py37,-post_wheel_build doc/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-tf,pytorch,-py37,-post_wheel_build doc/...
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
@@ -377,6 +377,6 @@
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
     - DOC_TESTING=1 PYTHON=3.7 ./ci/env/install-dependencies.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-tf,-pytorch,-py37,-post_wheel_build doc/...
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=tf,-pytorch,-py37,-post_wheel_build doc/...
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37,-post_wheel_build doc/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-tf,-pytorch,-py37,-post_wheel_build doc/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,tf,-pytorch,-py37,-post_wheel_build doc/...
+    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-tf,pytorch,-py37,-post_wheel_build doc/...
diff --git a/bazel/python.bzl b/bazel/python.bzl
@@ -1,10 +1,13 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
 # py_test_module_list creates a py_test target for each
 # Python file in `files`
 def py_test_module_list(files, size, deps, extra_srcs, name_suffix="", **kwargs):
     for file in files:
         # remove .py
-        name = file[:-3] + name_suffix
-        main = file
+        name = paths.split_extension(file)[0] + name_suffix
+        if name == file:
+            basename = basename + "_test"
         native.py_test(
             name = name,
             size = size,
@@ -14,24 +17,32 @@ def py_test_module_list(files, size, deps, extra_srcs, name_suffix="", **kwargs)
         )
 
 def py_test_run_all_subdirectory(include, exclude, extra_srcs, **kwargs):
-    for file in native.glob(include = include, exclude = exclude):
+    for file in native.glob(include = include, exclude = exclude, allow_empty=False):
         print(file)
-        basename = file.rpartition("/")[-1]
+        basename = paths.split_extension(file)[0]
+        if basename == file:
+            basename = basename + "_test"
         native.py_test(
-            name = basename[:-3],
+            name = basename,
             srcs = extra_srcs + [file],
             **kwargs
         )
 
 # Runs all included notebooks as py_test targets, by first converting them to .py files with "test_myst_doc.py".
 def py_test_run_all_notebooks(include, exclude, **kwargs):
-    for file in native.glob(include = include, exclude = exclude):
+    for file in native.glob(include = include, exclude = exclude, allow_empty=False):
         print(file)
-        basename = file.rpartition("/")[-1]
+        basename = paths.split_extension(file)[0]
+        if basename == file:
+            basename = basename + "_test"
         native.py_test(
-            name = basename[:-3],
+            name = basename,
             main = "test_myst_doc.py",
-            srcs = ["test_myst_doc.py"],
-            args = ["--path", file],
+            srcs = ["//doc:test_myst_doc.py"],
+            # --find-recursively will look for file in all
+            # directories inside cwd recursively if it cannot
+            # find it right away. This allows to deal with
+            # mismatches between `name` and `data` args.
+            args = ["--find-recursively", "--path", file],
             **kwargs
         )
diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
@@ -192,7 +192,11 @@ def get_commit_range():
             elif changed_file.startswith("docker/"):
                 RAY_CI_DOCKER_AFFECTED = 1
                 RAY_CI_LINUX_WHEELS_AFFECTED = 1
-            elif changed_file.startswith("doc/") and changed_file.endswith(".py"):
+            elif changed_file.startswith("doc/") and (
+                changed_file.endswith(".py")
+                or changed_file.endswith(".ipynb")
+                or changed_file.endswith("BUILD")
+            ):
                 RAY_CI_DOC_AFFECTED = 1
             elif any(changed_file.startswith(prefix) for prefix in skip_prefix_list):
                 # nothing is run but linting in these cases

diff --git a/doc/BUILD b/doc/BUILD
@@ -1,6 +1,8 @@
 load("//bazel:python.bzl", "py_test_run_all_subdirectory")
 load("//bazel:python.bzl", "py_test_run_all_notebooks")
 
+exports_files(["test_myst_doc.py"])
+
 # --------------------------------------------------------------------
 # Tests from the doc directory.
 # Please keep these sorted alphabetically, but start with the
@@ -138,17 +140,7 @@ py_test_run_all_subdirectory(
     tags = ["exclusive", "team:serve"],
 )
 
-# --------------------------------------------------------------------
-# Test all doc/source/tune/examples notebooks.
-# --------------------------------------------------------------------
 
-py_test_run_all_notebooks(
-    size = "medium",
-    include = ["doc/source/tune/examples/*.ipynb"],
-    exclude = [],
-    data = ["//doc/source/tune/examples:tune_examples"],
-    tags = ["exclusive", "team:ml"],
-)
 
 # --------------------------------------------------------------------
 # Test all doc/source/tune/doc_code code included in rst/md files.
@@ -162,18 +154,6 @@ py_test_run_all_subdirectory(
     tags = ["exclusive", "team:ml"],
 )
 
-# --------------------------------------------------------------------
-# Test all doc/source/ray-air/examples notebooks.
-# --------------------------------------------------------------------
-
-py_test_run_all_notebooks(
-    size = "medium",
-    include = ["doc/source/ray-air/examples/*.ipynb"],
-    exclude = [],
-    data = ["//doc/source/ray-air/examples:air_examples"],
-    tags = ["exclusive", "team:ml"],
-)
-
 # --------------------------------------------------------------------
 # Test all doc/source/ray-air/doc_code code included in rst/md files.
 # --------------------------------------------------------------------
@@ -188,13 +168,13 @@ py_test_run_all_subdirectory(
 
 
 # --------------------------------------------------------------------
-# Test all doc/source/ray-overview/doc_code snippets, used on ray.io
+# Test all doc/source/ray-overview/doc_test snippets, used on ray.io
 # --------------------------------------------------------------------
 
 py_test_run_all_subdirectory(
     size = "small",
-    include = ["source/ray-overview/doc_code/*.py"],
-    exclude = ["source/ray-overview/doc_code/ray_data.py"],
+    include = ["source/ray-overview/doc_test/*.py"],
+    exclude = ["source/ray-overview/doc_test/ray_data.py"],
     extra_srcs = [],
     tags = ["exclusive", "team:ml"],
 )
diff --git a/doc/source/ray-air/examples/BUILD b/doc/source/ray-air/examples/BUILD
@@ -1,5 +1,19 @@
+load("//bazel:python.bzl", "py_test_run_all_notebooks")
+
 filegroup(
     name = "air_examples",
     srcs = glob(["*.ipynb"]),
     visibility = ["//doc:__subpackages__"]
-)
+)
+
+# --------------------------------------------------------------------
+# Test all doc/source/ray-air/examples notebooks.
+# --------------------------------------------------------------------
+
+py_test_run_all_notebooks(
+    size = "large",
+    include = ["*.ipynb"],
+    exclude = [],
+    data = ["//doc/source/ray-air/examples:air_examples"],
+    tags = ["exclusive", "team:ml"],
+)
diff --git a/doc/source/tune/examples/BUILD b/doc/source/tune/examples/BUILD
@@ -1,5 +1,31 @@
+load("//bazel:python.bzl", "py_test_run_all_notebooks")
+
 filegroup(
     name = "tune_examples",
     srcs = glob(["*.ipynb"]),
     visibility = ["//doc:__subpackages__"]
+)
+
+# --------------------------------------------------------------------
+# Test all doc/source/tune/examples notebooks.
+# --------------------------------------------------------------------
+
+# pbt_ppo_example.ipynb is not tested right now due to large resource
+# requirements
+
+py_test_run_all_notebooks(
+    size = "medium",
+    include = ["*.ipynb"],
+    exclude = ["pbt_ppo_example.ipynb", "tune-xgboost.ipynb"],
+    data = ["//doc/source/tune/examples:tune_examples"],
+    tags = ["exclusive", "team:ml"],
+)
+
+# GPU tests
+py_test_run_all_notebooks(
+    size = "large",
+    include = ["tune-xgboost.ipynb"],
+    exclude = [],
+    data = ["//doc/source/tune/examples:tune_examples"],
+    tags = ["exclusive", "team:ml", "gpu"],
 )
diff --git a/doc/test_myst_doc.py b/doc/test_myst_doc.py
@@ -5,6 +5,7 @@
 import argparse
 import tempfile
 import sys
+from pathlib import Path
 
 import jupytext
 
@@ -13,12 +14,23 @@
     "--path",
     help="path to the jupytext-compatible file",
 )
+parser.add_argument(
+    "--find-recursively",
+    action="store_true",
+    help="if true, will attempt to find path recursively in cwd",
+)
 
 if __name__ == "__main__":
 
     args, remainder = parser.parse_known_args()
 
-    with open(args.path, "r") as f:
+    path = Path(args.path)
+    cwd = Path.cwd()
+    if args.find_recursively and not path.exists():
+        path = next((p for p in cwd.rglob("*") if str(p).endswith(args.path)), None)
+    assert path and path.exists()
+
+    with open(path, "r") as f:
         notebook = jupytext.read(f)
 
     name = ""