From 8057153823711d8f486b1c52469090ce404771cb Mon Sep 17 00:00:00 2001
From: Jeremy Goh <jeremy_gsk@outlook.com>
Date: Sun, 20 Feb 2022 18:03:05 +0800
Subject: [PATCH] feat: Implement trim_doctest_flags for Google and Numpy

---
 src/griffe/docstrings/google.py      | 81 +++++++++++++++++++++++-----
 src/griffe/docstrings/numpy.py       | 78 +++++++++++++++++++++++----
 tests/test_docstrings/test_google.py | 76 +++++++++++++++++++++++++-
 tests/test_docstrings/test_numpy.py  | 81 +++++++++++++++++++++++++++-
 4 files changed, 289 insertions(+), 27 deletions(-)
diff --git a/src/griffe/docstrings/google.py b/src/griffe/docstrings/google.py
index 1146e480..308b1ba3 100644
--- a/src/griffe/docstrings/google.py
+++ b/src/griffe/docstrings/google.py
@@ -49,6 +49,8 @@
 
 _RE_ADMONITION: Pattern = re.compile(r"^(?P<type>[\w][\s\w-]*):(\s+(?P<title>[^\s].*))?$", re.I)
 _RE_NAME_ANNOTATION_DESCRIPTION: Pattern = re.compile(r"^(?:(?P<name>\w+)?\s*(?:\((?P<type>.+)\))?:\s*)?(?P<desc>.*)$")
+_RE_DOCTEST_BLANKLINE: Pattern = re.compile(r"^\s*<BLANKLINE>\s*$")
+_RE_DOCTEST_FLAGS: Pattern = re.compile(r"(\s*#\s*doctest:.+)$")
 
 
 def _read_block_items(docstring: Docstring, offset: int) -> ItemsBlock:  # noqa: WPS231
@@ -191,7 +193,11 @@ def _read_parameters(docstring: Docstring, offset: int) -> tuple[list[DocstringP
     return parameters, new_offset
 
 
-def _read_parameters_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_parameters_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     parameters, new_offset = _read_parameters(docstring, offset)
 
     if parameters:
@@ -201,7 +207,11 @@ def _read_parameters_section(docstring: Docstring, offset: int) -> tuple[Docstri
     return None, new_offset
 
 
-def _read_other_parameters_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_other_parameters_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     parameters, new_offset = _read_parameters(docstring, offset)
 
     if parameters:
@@ -211,7 +221,11 @@ def _read_other_parameters_section(docstring: Docstring, offset: int) -> tuple[D
     return None, new_offset
 
 
-def _read_attributes_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_attributes_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     attributes = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -246,7 +260,11 @@ def _read_attributes_section(docstring: Docstring, offset: int) -> tuple[Docstri
     return None, new_offset
 
 
-def _read_raises_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_raises_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     exceptions = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -269,7 +287,11 @@ def _read_raises_section(docstring: Docstring, offset: int) -> tuple[DocstringSe
     return None, new_offset
 
 
-def _read_warns_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_warns_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     warns = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -289,7 +311,11 @@ def _read_warns_section(docstring: Docstring, offset: int) -> tuple[DocstringSec
     return None, new_offset
 
 
-def _read_returns_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_returns_section(  # noqa: WPS231
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     returns = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -326,7 +352,11 @@ def _read_returns_section(docstring: Docstring, offset: int) -> tuple[DocstringS
     return None, new_offset
 
 
-def _read_yields_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_yields_section(  # noqa: WPS231
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     yields = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -364,7 +394,11 @@ def _read_yields_section(docstring: Docstring, offset: int) -> tuple[DocstringSe
     return None, new_offset
 
 
-def _read_receives_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_receives_section(  # noqa: WPS231
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     receives = []
     block, new_offset = _read_block_items(docstring, offset)
 
@@ -402,7 +436,12 @@ def _read_receives_section(docstring: Docstring, offset: int) -> tuple[Docstring
     return None, new_offset
 
 
-def _read_examples_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_examples_section(  # noqa: WPS231
+    docstring: Docstring,
+    offset: int,
+    trim_doctest_flags: bool = True,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     text, new_offset = _read_block(docstring, offset)
 
     sub_sections = []
@@ -422,6 +461,9 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
                 current_text.append(line)
 
         elif in_code_example:
+            if trim_doctest_flags:
+                line = _RE_DOCTEST_FLAGS.sub("", line)
+                line = _RE_DOCTEST_BLANKLINE.sub("", line)
             current_example.append(line)
 
         elif line.startswith("```"):
@@ -436,6 +478,9 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
                 sub_sections.append((DocstringSectionKind.text, "\n".join(current_text).rstrip("\n")))
                 current_text = []
             in_code_example = True
+
+            if trim_doctest_flags:
+                line = _RE_DOCTEST_FLAGS.sub("", line)
             current_example.append(line)
 
         else:
@@ -453,7 +498,11 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
     return None, new_offset
 
 
-def _read_deprecated_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_deprecated_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     text, new_offset = _read_block(docstring, offset)
 
     # early exit if there is no text in the yield section
@@ -498,6 +547,7 @@ def _is_empty_line(line) -> bool:
 def parse(  # noqa: WPS231
     docstring: Docstring,
     ignore_init_summary: bool = False,
+    trim_doctest_flags: bool = True,
     **options: Any,
 ) -> list[DocstringSection]:
     """Parse a docstring.
@@ -508,6 +558,7 @@ def parse(  # noqa: WPS231
     Parameters:
         docstring: The docstring to parse.
         ignore_init_summary: Whether to ignore the summary in `__init__` methods' docstrings.
+        trim_doctest_flags: Whether to remove doctest flags from Python example blocks.
         **options: Additional parsing options.
 
     Returns:
@@ -519,8 +570,14 @@ def parse(  # noqa: WPS231
     in_code_block = False
     lines = docstring.lines
 
+    options = {
+        "ignore_init_summary": ignore_init_summary,
+        "trim_doctest_flags": trim_doctest_flags,
+        **options,
+    }
+
     ignore_summary = (
-        ignore_init_summary  # noqa: WPS222
+        options["ignore_init_summary"]  # noqa: WPS222
         and docstring.parent is not None
         and docstring.parent.name == "__init__"
         and docstring.parent.is_function
@@ -563,7 +620,7 @@ def parse(  # noqa: WPS231
                             )
                         current_section = []
                     reader = _section_reader[_section_kind[admonition_type.lower()]]
-                    section, offset = reader(docstring, offset + 1)
+                    section, offset = reader(docstring, offset + 1, **options)
                     if section:
                         section.title = title
                         sections.append(section)
diff --git a/src/griffe/docstrings/numpy.py b/src/griffe/docstrings/numpy.py
index 2bcfa3e5..3d9c8455 100644
--- a/src/griffe/docstrings/numpy.py
+++ b/src/griffe/docstrings/numpy.py
@@ -190,6 +190,8 @@ def _read_block(docstring: Docstring, offset: int) -> tuple[str, int]:
     """,
     re.IGNORECASE | re.VERBOSE,
 )
+_RE_DOCTEST_BLANKLINE: Pattern = re.compile(r"^\s*<BLANKLINE>\s*$")
+_RE_DOCTEST_FLAGS: Pattern = re.compile(r"(\s*#\s*doctest:.+)$")
 
 
 def _read_parameters(docstring: Docstring, offset: int) -> tuple[list[DocstringParameter], int]:  # noqa: WPS231
@@ -239,7 +241,11 @@ def _read_parameters(docstring: Docstring, offset: int) -> tuple[list[DocstringP
     return parameters, index
 
 
-def _read_parameters_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_parameters_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     parameters, index = _read_parameters(docstring, offset)
 
     if parameters:
@@ -249,7 +255,11 @@ def _read_parameters_section(docstring: Docstring, offset: int) -> tuple[Docstri
     return None, index
 
 
-def _read_other_parameters_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_other_parameters_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     parameters, index = _read_parameters(docstring, offset)
 
     if parameters:
@@ -259,7 +269,11 @@ def _read_other_parameters_section(docstring: Docstring, offset: int) -> tuple[D
     return None, index
 
 
-def _read_deprecated_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_deprecated_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # deprecated
     # SINCE_VERSION
     #     TEXT?
@@ -278,7 +292,11 @@ def _read_deprecated_section(docstring: Docstring, offset: int) -> tuple[Docstri
     return DocstringSection(DocstringSectionKind.deprecated, (version, text)), index
 
 
-def _read_returns_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_returns_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # returns
     # (NAME : )?TYPE
     #     TEXT?
@@ -301,7 +319,11 @@ def _read_returns_section(docstring: Docstring, offset: int) -> tuple[DocstringS
     return DocstringSection(DocstringSectionKind.returns, returns), index
 
 
-def _read_yields_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_yields_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # yields
     # (NAME : )?TYPE
     #     TEXT?
@@ -324,7 +346,11 @@ def _read_yields_section(docstring: Docstring, offset: int) -> tuple[DocstringSe
     return DocstringSection(DocstringSectionKind.yields, yields), index
 
 
-def _read_receives_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_receives_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # receives
     # (NAME : )?TYPE
     #     TEXT?
@@ -347,7 +373,11 @@ def _read_receives_section(docstring: Docstring, offset: int) -> tuple[Docstring
     return DocstringSection(DocstringSectionKind.receives, receives), index
 
 
-def _read_raises_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_raises_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # raises
     # EXCEPTION
     #     TEXT?
@@ -365,7 +395,11 @@ def _read_raises_section(docstring: Docstring, offset: int) -> tuple[DocstringSe
     return DocstringSection(DocstringSectionKind.raises, raises), index
 
 
-def _read_warns_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_warns_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # warns
     # WARNING
     #     TEXT?
@@ -383,7 +417,11 @@ def _read_warns_section(docstring: Docstring, offset: int) -> tuple[DocstringSec
     return DocstringSection(DocstringSectionKind.warns, warns), index
 
 
-def _read_attributes_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:
+def _read_attributes_section(
+    docstring: Docstring,
+    offset: int,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     # attributes (for classes)
     # NAME( : TYPE)?
     #    TEXT?
@@ -407,7 +445,12 @@ def _read_attributes_section(docstring: Docstring, offset: int) -> tuple[Docstri
     return DocstringSection(DocstringSectionKind.attributes, attributes), index
 
 
-def _read_examples_section(docstring: Docstring, offset: int) -> tuple[DocstringSection | None, int]:  # noqa: WPS231
+def _read_examples_section(  # noqa: WPS231
+    docstring: Docstring,
+    offset: int,
+    trim_doctest_flags: bool = True,
+    **options: Any,
+) -> tuple[DocstringSection | None, int]:
     text, index = _read_block(docstring, offset)
 
     sub_sections = []
@@ -427,6 +470,9 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
                 current_text.append(line)
 
         elif in_code_example:
+            if trim_doctest_flags:
+                line = _RE_DOCTEST_FLAGS.sub("", line)
+                line = _RE_DOCTEST_BLANKLINE.sub("", line)
             current_example.append(line)
 
         elif line.startswith("```"):
@@ -441,6 +487,9 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
                 sub_sections.append((DocstringSectionKind.text, "\n".join(current_text).rstrip("\n")))
                 current_text = []
             in_code_example = True
+
+            if trim_doctest_flags:
+                line = _RE_DOCTEST_FLAGS.sub("", line)
             current_example.append(line)
 
         else:
@@ -474,6 +523,7 @@ def _read_examples_section(docstring: Docstring, offset: int) -> tuple[Docstring
 
 def parse(  # noqa: WPS231
     docstring: Docstring,
+    trim_doctest_flags: bool = True,
     **options: Any,
 ) -> list[DocstringSection]:
     """Parse a docstring.
@@ -483,6 +533,7 @@ def parse(  # noqa: WPS231
 
     Parameters:
         docstring: The docstring to parse.
+        trim_doctest_flags: Whether to remove doctest flags from Python example blocks.
         **options: Additional parsing options.
 
     Returns:
@@ -493,6 +544,11 @@ def parse(  # noqa: WPS231
 
     in_code_block = False
 
+    options = {
+        "trim_doctest_flags": trim_doctest_flags,
+        **options,
+    }
+
     lines = docstring.lines
     index = 0
 
@@ -512,7 +568,7 @@ def parse(  # noqa: WPS231
                     )
                 current_section = []
             reader = _section_reader[_section_kind[line_lower]]
-            section, index = reader(docstring, index + 2)
+            section, index = reader(docstring, index + 2, **options)
             if section:
                 sections.append(section)
 
diff --git a/tests/test_docstrings/test_google.py b/tests/test_docstrings/test_google.py
index f2566c69..b0a3043c 100644
--- a/tests/test_docstrings/test_google.py
+++ b/tests/test_docstrings/test_google.py
@@ -267,7 +267,7 @@ def test_parse_examples_sections():
 
             We also can write multiline examples:
 
-            >>> x = 3 + 2
+            >>> x = 3 + 2  # doctest: +SKIP
             >>> y = x + 10
             >>> y
             15
@@ -309,9 +309,12 @@ def test_parse_examples_sections():
             ),
             returns="int",
         ),
+        trim_doctest_flags=False,
     )
     assert len(sections) == 1
-    assert len(sections[0].value) == 9
+    examples = sections[0]
+    assert len(examples.value) == 9
+    assert examples.value[6][1].startswith(">>> x = 3 + 2  # doctest: +SKIP")
     assert not warnings
 
 
@@ -752,3 +755,72 @@ def test_ignore_init_summary(docstring):
         assert "Summary" in sections[0].value
         sections, _ = parse(docstring, ignore_init_summary=True)
         assert "Summary" in sections[0].value
+
+
+@pytest.mark.parametrize(
+    "docstring",
+    [
+        """
+        Examples:
+            Base case 1. We want to skip the following test.
+            >>> 1 + 1 == 3  # doctest: +SKIP
+            True
+        """,
+        r"""
+        Examples:
+
+            Base case 2. We have a blankline test.
+            >>> print("a\n\nb")
+            a
+            <BLANKLINE>
+            b
+        """,
+    ],
+)
+def test_trim_doctest_flags_basic_example(docstring):
+    """Correctly parse simple example docstrings when `trim_doctest_flags` option is turned on.
+
+    Parameters:
+        docstring: The docstring to parse (parametrized).
+    """
+    sections, warnings = parse(docstring, trim_doctest_flags=True)
+    assert len(sections) == 1
+    assert len(sections[0].value) == 2
+    assert not warnings
+
+    # verify that doctest flags have indeed been trimmed
+    example_str = sections[0].value[1][1]
+    assert "# doctest: +SKIP" not in example_str
+    assert "<BLANKLINE>" not in example_str
+
+
+def test_trim_doctest_flags_multi_example():
+    """Correctly parse multiline example docstrings when `trim_doctest_flags` option is turned on."""
+    docstring = r"""
+    Examples:
+
+        Test multiline example blocks.
+        We want to skip the following test.
+        >>> 1 + 1 == 3  # doctest: +SKIP
+        True
+
+        And then a few more examples here:
+        >>> print("a\n\nb")
+        a
+        <BLANKLINE>
+        b
+        >>> 1 + 1 == 2  # doctest: +SKIP
+        >>> print(list(range(1, 100)))    # doctest: +ELLIPSIS
+        [1, 2, ..., 98, 99]
+    """
+    sections, warnings = parse(docstring, trim_doctest_flags=True)
+    assert len(sections) == 1
+    assert len(sections[0].value) == 4
+    assert not warnings
+
+    # verify that doctest flags have indeed been trimmed
+    example_str = sections[0].value[1][1]
+    assert "# doctest: +SKIP" not in example_str
+    example_str = sections[0].value[3][1]
+    assert "<BLANKLINE>" not in example_str
+    assert "\n>>> print(list(range(1, 100)))\n" in example_str
diff --git a/tests/test_docstrings/test_numpy.py b/tests/test_docstrings/test_numpy.py
index 58887c53..3b43ede0 100644
--- a/tests/test_docstrings/test_numpy.py
+++ b/tests/test_docstrings/test_numpy.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import pytest
+
 from griffe.dataclasses import Function, Parameter, Parameters
 from griffe.docstrings import numpy
 from griffe.docstrings.dataclasses import (
@@ -296,7 +298,7 @@ def test_examples_section():
         >>> print("Hello again.")
         ```
 
-        >>> a = 0
+        >>> a = 0  # doctest: +SKIP
         >>> b = a + 1
         >>> print(b)
         1
@@ -308,12 +310,13 @@ def test_examples_section():
         Not in the section.
     """
 
-    sections, _ = parse(docstring)
+    sections, _ = parse(docstring, trim_doctest_flags=False)
     assert len(sections) == 2
     examples = sections[0]
     assert len(examples.value) == 5
     assert examples.value[0] == (DocstringSectionKind.text, "Hello.")
     assert examples.value[1] == (DocstringSectionKind.examples, ">>> 1 + 2\n3")
+    assert examples.value[3][1].startswith(">>> a = 0  # doctest: +SKIP")
 
 
 # =============================================================================================
@@ -329,3 +332,77 @@ def test_prefer_docstring_type_over_annotation():
     sections, _ = parse(docstring, parent=Function("func", parameters=Parameters(Parameter("a", annotation="str"))))
     assert len(sections) == 1
     assert_parameter_equal(sections[0].value[0], DocstringParameter("a", description="", annotation="int"))
+
+
+# =============================================================================================
+# Parser special features
+@pytest.mark.parametrize(
+    "docstring",
+    [
+        """
+        Examples
+        --------
+        Base case 1. We want to skip the following test.
+        >>> 1 + 1 == 3  # doctest: +SKIP
+        True
+        """,
+        r"""
+        Examples
+        --------
+
+        Base case 2. We have a blankline test.
+        >>> print("a\n\nb")
+        a
+        <BLANKLINE>
+        b
+        """,
+    ],
+)
+def test_trim_doctest_flags_basic_example(docstring):
+    """Correctly parse simple example docstrings when `trim_doctest_flags` option is turned on.
+
+    Parameters:
+        docstring: The docstring to parse (parametrized).
+    """
+    sections, warnings = parse(docstring, trim_doctest_flags=True)
+    assert len(sections) == 1
+    assert len(sections[0].value) == 2
+    assert not warnings
+
+    # verify that doctest flags have indeed been trimmed
+    example_str = sections[0].value[1][1]
+    assert "# doctest: +SKIP" not in example_str
+    assert "<BLANKLINE>" not in example_str
+
+
+def test_trim_doctest_flags_multi_example():
+    """Correctly parse multiline example docstrings when `trim_doctest_flags` option is turned on."""
+    docstring = r"""
+    Examples
+    --------
+
+    Test multiline example blocks.
+    We want to skip the following test.
+    >>> 1 + 1 == 3  # doctest: +SKIP
+    True
+
+    And then a few more examples here:
+    >>> print("a\n\nb")
+    a
+    <BLANKLINE>
+    b
+    >>> 1 + 1 == 2  # doctest: +SKIP
+    >>> print(list(range(1, 100)))    # doctest: +ELLIPSIS
+    [1, 2, ..., 98, 99]
+    """
+    sections, warnings = parse(docstring, trim_doctest_flags=True)
+    assert len(sections) == 1
+    assert len(sections[0].value) == 4
+    assert not warnings
+
+    # verify that doctest flags have indeed been trimmed
+    example_str = sections[0].value[1][1]
+    assert "# doctest: +SKIP" not in example_str
+    example_str = sections[0].value[3][1]
+    assert "<BLANKLINE>" not in example_str
+    assert "\n>>> print(list(range(1, 100)))\n" in example_str