From bd1c0a42eab54fa295d4817cad74c89078425116 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@xes-inc.com>
Date: Fri, 27 Dec 2024 09:54:16 -0600
Subject: [PATCH 01/11] [buffering] drop forced multiline match for string
 patterns

Previously, when scanning for matches to a regex, if the type of the
pattern was `str`, the pattern was always compiled with `re.MULTILINE`.

Recent changes to `ParserConfig` [0] changed the type used for regex
matches in generated code from `str` to `re.Pattern` which could lead to
a difference in behavior from previous versions where a defined comments
or eol_comments may have been implicitly relying on the `re.MULTILINE`
flag.

After discussion [1], it has been determined that usage of `re` flags
within TatSu should be deprecated in favor of users specifying the
necessary flags within patterns.

As such, drop the `re.MULTILINE` flag for strings compiled on the fly.

[0]: https://github.com/neogeny/TatSu/pull/338
[1]: https://github.com/neogeny/TatSu/issues/351#issuecomment-2563635784
---
 tatsu/buffering.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tatsu/buffering.py b/tatsu/buffering.py
index 87358d99..5a2a91fd 100644
--- a/tatsu/buffering.py
+++ b/tatsu/buffering.py
@@ -357,7 +357,7 @@ def _scanre(self, pattern):
         if isinstance(pattern, RETYPE):
             cre = pattern
         else:
-            cre = re.compile(pattern, re.MULTILINE)
+            cre = re.compile(pattern)
         return cre.match(self.text, self.pos)
 
     @property

From 9ba28d5fd1977ab00beb004f9133f805492261f7 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@xes-inc.com>
Date: Fri, 27 Dec 2024 09:53:02 -0600
Subject: [PATCH 02/11] [grammar] make eol_comments multiline match

Make the default eol_comments regex use multiline matching.

Recent changes to `ParserConfig` [0] now use a precompiled regex (an
`re.Pattern`) instead of compiling the `str` regex on the fly.

The `Tokenizer` previously assumed `str` type regexes should all be
`re.MULTILINE` regardless of options defined in the regex itself when
compiling the pattern. This behavior has since changed to no longer
automatically apply and thus requires configurations to specify the
option in the pattern.

[0]: https://github.com/neogeny/TatSu/pull/338
---
 grammar/tatsu.ebnf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grammar/tatsu.ebnf b/grammar/tatsu.ebnf
index 870caae7..b955d6a2 100644
--- a/grammar/tatsu.ebnf
+++ b/grammar/tatsu.ebnf
@@ -1,7 +1,7 @@
 @@grammar :: TatSu
 @@whitespace :: /\s+/
 @@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]"
-@@eol_comments :: ?"#[^\n]*$"
+@@eol_comments :: ?"(?m)#[^\n]*$"
 @@parseinfo :: True
 @@left_recursion :: False
 

From adbc2f269c8eeae7bcd8291a633ff6a16e59543b Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@xes-inc.com>
Date: Fri, 27 Dec 2024 13:49:30 -0600
Subject: [PATCH 03/11] [infos] make {eol_}comments_re read-only attributes

Previously, the `eol_comments_re` and `comments_re` attributes were
public init arguments, were modifiable, and could thus become out of
sync with the `eol_comments` and `comments` attributes.

Also, with recent changes to `ParserConfig` [0], there were two ways to
initialize the regex values for comments and eol_comments directives;
either via the constructor using the *_re variables or by using the
sister string arguments and relying on `__post_init__` to compile the
values which trumped the explicit *_re argument values.

Now, the constructor interface has been simplified to not take either
`eol_comments_re` or `comments_re` as arguments. Callers may only use
`eol_comments` and `comments`.

The `eol_comments_re` and `comments_re` attributes are still
public, but are read-only so they are always a reflection of their
sister string values passed into the constructor.

[0]: https://github.com/neogeny/TatSu/pull/200
---
 tatsu/infos.py | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/tatsu/infos.py b/tatsu/infos.py
index 6ec898ad..3bba14f2 100644
--- a/tatsu/infos.py
+++ b/tatsu/infos.py
@@ -3,7 +3,7 @@
 import copy
 import dataclasses
 import re
-from collections.abc import Callable, Mapping
+from collections.abc import Callable, MutableMapping
 from itertools import starmap
 from typing import Any, NamedTuple
 
@@ -30,8 +30,8 @@ class ParserConfig:
     start_rule: str | None = None  # FIXME
     rule_name: str | None = None  # Backward compatibility
 
-    comments_re: re.Pattern | None = None
-    eol_comments_re: re.Pattern | None = None
+    _comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
+    _eol_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
 
     tokenizercls: type[Tokenizer] | None = None  # FIXME
     semantics: type | None = None
@@ -64,9 +64,17 @@ def __post_init__(self):  # pylint: disable=W0235
         if self.ignorecase:
             self.keywords = [k.upper() for k in self.keywords]
         if self.comments:
-            self.comments_re = re.compile(self.comments)
+            self._comments_re = re.compile(self.comments)
         if self.eol_comments:
-            self.eol_comments_re = re.compile(self.eol_comments)
+            self._eol_comments_re = re.compile(self.eol_comments)
+
+    @property
+    def comments_re(self) -> re.Pattern | None:
+        return self._comments_re
+
+    @property
+    def eol_comments_re(self) -> re.Pattern | None:
+        return self._eol_comments_re
 
     @classmethod
     def new(
@@ -84,7 +92,7 @@ def effective_rule_name(self):
         # note: there are legacy reasons for this mess
         return self.start_rule or self.rule_name or self.start
 
-    def _find_common(self, **settings: Any) -> Mapping[str, Any]:
+    def _find_common(self, **settings: Any) -> MutableMapping[str, Any]:
         return {
             name: value
             for name, value in settings.items()
@@ -101,8 +109,20 @@ def replace_config(
         else:
             return self.replace(**vars(other))
 
+    # non-init fields cannot be used as arguments in `replace`, however
+    # they are values returned by `vars` and `dataclass.asdict` so they
+    # must be filtered out.
+    # If the `ParserConfig` dataclass drops these fields, then this filter can be removed
+    def _filter_non_init_fields(self, settings: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
+        for field in [
+            field.name for field in dataclasses.fields(self) if not field.init
+        ]:
+            if field in settings:
+                del settings[field]
+        return settings
+
     def replace(self, **settings: Any) -> ParserConfig:
-        overrides = self._find_common(**settings)
+        overrides = self._filter_non_init_fields(self._find_common(**settings))
         result = dataclasses.replace(self, **overrides)
         if 'grammar' in overrides:
             result.name = result.grammar

From 4bdd4a54124de6164ad4164658856180473b796d Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@xes-inc.com>
Date: Fri, 27 Dec 2024 14:39:14 -0600
Subject: [PATCH 04/11] [codegen] migrate to {eol_}comments

---
 tatsu/codegen/python.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tatsu/codegen/python.py b/tatsu/codegen/python.py
index 31e0dea9..f25e1d8c 100755
--- a/tatsu/codegen/python.py
+++ b/tatsu/codegen/python.py
@@ -462,8 +462,8 @@ def render_fields(self, fields):
         left_recursion = self.node.config.left_recursion
         parseinfo = self.node.config.parseinfo
         namechars = repr(self.node.config.namechars or '')
-        comments_re = repr(self.node.config.comments_re)
-        eol_comments_re = repr(self.node.config.eol_comments_re)
+        comments = repr(self.node.config.comments)
+        eol_comments = repr(self.node.config.eol_comments)
 
         rules = '\n'.join(
             [self.get_renderer(rule).render() for rule in self.node.rules],
@@ -488,8 +488,8 @@ def render_fields(self, fields):
             parseinfo=parseinfo,
             keywords=keywords,
             namechars=namechars,
-            comments_re=comments_re,
-            eol_comments_re=eol_comments_re,
+            comments=comments,
+            eol_comments=eol_comments,
         )
 
     abstract_rule_template = """
@@ -535,8 +535,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
                             ignorecase={ignorecase},
                             namechars={namechars},
                             parseinfo={parseinfo},
-                            comments_re={comments_re},
-                            eol_comments_re={eol_comments_re},
+                            comments={comments},
+                            eol_comments={eol_comments},
                             keywords=KEYWORDS,
                             start={start!r},
                         )
@@ -554,8 +554,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
                             ignorecase={ignorecase},
                             namechars={namechars},
                             parseinfo={parseinfo},
-                            comments_re={comments_re},
-                            eol_comments_re={eol_comments_re},
+                            comments={comments},
+                            eol_comments={eol_comments},
                             left_recursion={left_recursion},
                             keywords=KEYWORDS,
                             start={start!r},

From 42cb810440e7f1fe062d942d50f9871cb48cb4bf Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 18:04:39 -0600
Subject: [PATCH 05/11] [ngcodegen] migrate to {eol_}comments

---
 tatsu/ngcodegen/python.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py
index 6a83e5c5..76583377 100644
--- a/tatsu/ngcodegen/python.py
+++ b/tatsu/ngcodegen/python.py
@@ -323,8 +323,8 @@ def _gen_init(self, grammar: grammars.Grammar):
                     ignorecase={grammar.config.ignorecase},
                     namechars={grammar.config.namechars!r},
                     parseinfo={grammar.config.parseinfo},
-                    comments_re={grammar.config.comments_re!r},
-                    eol_comments_re={grammar.config.eol_comments_re!r},
+                    comments={grammar.config.comments!r},
+                    eol_comments={grammar.config.eol_comments!r},
                     keywords=KEYWORDS,
                     start={start!r},
                 )

From 9160c08638dea336783106fcdca6b232459afcf2 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@xes-inc.com>
Date: Fri, 27 Dec 2024 09:53:19 -0600
Subject: [PATCH 06/11] [bootstrap] migrate to {eol_}comments

---
 tatsu/bootstrap.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tatsu/bootstrap.py b/tatsu/bootstrap.py
index 4f656b2a..87c925c7 100644
--- a/tatsu/bootstrap.py
+++ b/tatsu/bootstrap.py
@@ -35,8 +35,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
             ignorecase=False,
             namechars='',
             parseinfo=True,
-            comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
-            eol_comments_re='#[^\\n]*$',
+            comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
+            eol_comments='(?m)#[^\\n]*$',
             keywords=KEYWORDS,
             start='start',
         )
@@ -55,8 +55,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
             ignorecase=False,
             namechars='',
             parseinfo=True,
-            comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
-            eol_comments_re='#[^\\n]*$',
+            comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
+            eol_comments='(?m)#[^\\n]*$',
             keywords=KEYWORDS,
             start='start',
         )

From 03d4b7ff778272000b869d3fdef31b17d3903796 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 15:16:10 -0600
Subject: [PATCH 07/11] [lint] resolve errors

---
 tatsu/codegen/objectmodel.py    | 6 +++---
 tatsu/g2e/semantics.py          | 2 +-
 tatsu/grammars.py               | 2 +-
 tatsu/util/_common.py           | 2 +-
 tatsu/walkers.py                | 2 +-
 test/parser_equivalence_test.py | 7 +++----
 6 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/tatsu/codegen/objectmodel.py b/tatsu/codegen/objectmodel.py
index d52ea9df..bc787f59 100644
--- a/tatsu/codegen/objectmodel.py
+++ b/tatsu/codegen/objectmodel.py
@@ -67,11 +67,11 @@ def _get_full_name(cls):
     # Try to reference the class
     try:
         idents = name.split('.')
-        _cls = getattr(module, idents[0])
+        cls_ = getattr(module, idents[0])
         for ident in idents[1:]:
-            _cls = getattr(_cls, ident)
+            cls_ = getattr(cls_, ident)
 
-        assert _cls == cls
+        assert cls_ == cls
     except AttributeError as e:
         raise CodegenError(
             "Couldn't find base type, it has to be importable",
diff --git a/tatsu/g2e/semantics.py b/tatsu/g2e/semantics.py
index 982ed777..ccf0b497 100644
--- a/tatsu/g2e/semantics.py
+++ b/tatsu/g2e/semantics.py
@@ -9,7 +9,7 @@
 
 def camel2py(name):
     return re.sub(
-        '([a-z0-9])([A-Z])',
+        r'([a-z0-9])([A-Z])',
         lambda m: m.group(1) + '_' + m.group(2).lower(),
         name,
     )
diff --git a/tatsu/grammars.py b/tatsu/grammars.py
index 65def8b9..66f2173b 100644
--- a/tatsu/grammars.py
+++ b/tatsu/grammars.py
@@ -519,7 +519,7 @@ def _to_str(self, lean=False):
 
         if multi:
             return '\n|\n'.join(indent(o) for o in options)
-        elif len(options) and len(single) > PEP8_LLEN:
+        elif options and len(single) > PEP8_LLEN:
             return '| ' + '\n| '.join(o for o in options)
         else:
             return single
diff --git a/tatsu/util/_common.py b/tatsu/util/_common.py
index c0819064..1123e9fb 100644
--- a/tatsu/util/_common.py
+++ b/tatsu/util/_common.py
@@ -27,7 +27,7 @@
 logger.addHandler(ch)
 
 
-RETYPE = type(re.compile('.'))
+RETYPE = re.Pattern
 
 
 ESCAPE_SEQUENCE_RE = re.compile(
diff --git a/tatsu/walkers.py b/tatsu/walkers.py
index 3de070ea..7762a4d7 100644
--- a/tatsu/walkers.py
+++ b/tatsu/walkers.py
@@ -74,7 +74,7 @@ def pythonize_match(m):
 
             # walk__pythonic_name with double underscore after walk
             pythonic_name = re.sub(
-                '[A-Z]+', pythonize_match, node_cls.__name__,
+                r'[A-Z]+', pythonize_match, node_cls.__name__,
             )
             if pythonic_name != cammelcase_name:
                 walker = getattr(cls, prefix + pythonic_name, None)
diff --git a/test/parser_equivalence_test.py b/test/parser_equivalence_test.py
index 02b4367f..62c6eaa1 100644
--- a/test/parser_equivalence_test.py
+++ b/test/parser_equivalence_test.py
@@ -171,6 +171,7 @@ def test_none_whitespace():
     output = parser.parse(input, parseinfo=False)
     assert output == ('This is a', ' test')
 
+
 def test_sep_join():
     grammar = """
     @@grammar::numbers
@@ -183,9 +184,7 @@ def test_sep_join():
         = ~ ( "," )%{ digit }+
         ;
 
-    digit = /\d+/ ;
+    digit = /\\d+/ ;
     """
     parser = generate_and_load_parser('W', grammar)
-    ast = parser.parse('1,2,3,4', nameguard=False)
-
-
+    parser.parse('1,2,3,4', nameguard=False)

From 923a67833b546a71fc967877515b134b69a8dd1e Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 17:54:23 -0600
Subject: [PATCH 08/11] [docs] note {eol_}comments directive behavior changes

---
 docs/directives.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/directives.rst b/docs/directives.rst
index 82852984..4a765dfe 100644
--- a/docs/directives.rst
+++ b/docs/directives.rst
@@ -29,6 +29,8 @@ Specifies a regular expression to identify and exclude inline (bracketed) commen
 
     @@comments :: /\(\*((?:.|\n)*?)\*\)/
 
+.. note::
+   Prior to 5.12.1, comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.
 
 ``@@eol_comments :: <regexp>``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -39,6 +41,8 @@ Specifies a regular expression to identify and exclude end-of-line comments befo
 
     @@eol_comments :: /#([^\n]*?)$/
 
+.. note::
+   Prior to 5.12.1, eol_comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.
 
 ``@@ignorecase :: <bool>``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

From ddbe27f46c9364f2ebe363ad06ada964cfdf6ea3 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 17:54:57 -0600
Subject: [PATCH 09/11] [docs] update syntax to reflect {eol_}comments
 arguments

---
 docs/syntax.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/syntax.rst b/docs/syntax.rst
index 1a5ad10c..b1dcc603 100644
--- a/docs/syntax.rst
+++ b/docs/syntax.rst
@@ -735,11 +735,11 @@ Comments
 ~~~~~~~~
 
 Parsers will skip over comments specified as a regular expression using
-the ``comments_re`` parameter:
+the ``comments`` parameter:
 
 .. code:: python
 
-   parser = MyParser(text, comments_re="\(\*.*?\*\)")
+   parser = MyParser(text, comments="\(\*.*?\*\)")
 
 For more complex comment handling, you can override the
 ``Buffer.eat_comments()`` method.
@@ -751,8 +751,8 @@ comments separately:
 
    parser = MyParser(
        text,
-       comments_re="\(\*.*?\*\)",
-       eol_comments_re="#.*?$"
+       comments="\(\*.*?\*\)",
+       eol_comments="#.*?$"
    )
 
 Both patterns may also be specified within a grammar using the

From dacb978a25d2563a763dc0dcd10e591823fbe191 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 18:12:56 -0600
Subject: [PATCH 10/11] [test] fix test_parse_hash to use eol_comments

---
 test/grammar/syntax_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py
index b59b7bdf..f111a92b 100644
--- a/test/grammar/syntax_test.py
+++ b/test/grammar/syntax_test.py
@@ -352,7 +352,7 @@ def test_parse_hash():
         start = '#' ;
     """
 
-    parser = compile(grammar, eol_comments_re='')
+    parser = compile(grammar, eol_comments='')
     parser.parse('#', trace=True)
 
 

From fdad7932744dd91502d871870ac390ba74272026 Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Sat, 28 Dec 2024 18:16:12 -0600
Subject: [PATCH 11/11] [test] explicitly use multiline match in
 test_patterns_with_newlines

---
 test/grammar/pattern_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/grammar/pattern_test.py b/test/grammar/pattern_test.py
index 91094fad..c651baf3 100644
--- a/test/grammar/pattern_test.py
+++ b/test/grammar/pattern_test.py
@@ -22,7 +22,7 @@ def test_patterns_with_newlines(self):
 
             blankline
                 =
-                /^[^\\n]*\\n$/
+                /(?m)^[^\\n]*\\n$/
                 ;
         """