neogeny · apalala · Dec 29, 2024 · Dec 27, 2024 · Dec 27, 2024 · Dec 27, 2024
diff --git a/docs/directives.rst b/docs/directives.rst
@@ -29,6 +29,8 @@ Specifies a regular expression to identify and exclude inline (bracketed) commen
 
     @@comments :: /\(\*((?:.|\n)*?)\*\)/
 
+.. note::
+   Prior to 5.12.1, comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.
 
 ``@@eol_comments :: <regexp>``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -39,6 +41,8 @@ Specifies a regular expression to identify and exclude end-of-line comments befo
 
     @@eol_comments :: /#([^\n]*?)$/
 
+.. note::
+   Prior to 5.12.1, eol_comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.
 
 ``@@ignorecase :: <bool>``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/docs/syntax.rst b/docs/syntax.rst
@@ -735,11 +735,11 @@ Comments
 ~~~~~~~~
 
 Parsers will skip over comments specified as a regular expression using
-the ``comments_re`` parameter:
+the ``comments`` parameter:
 
 .. code:: python
 
-   parser = MyParser(text, comments_re="\(\*.*?\*\)")
+   parser = MyParser(text, comments="\(\*.*?\*\)")
 
 For more complex comment handling, you can override the
 ``Buffer.eat_comments()`` method.
@@ -751,8 +751,8 @@ comments separately:
 
    parser = MyParser(
        text,
-       comments_re="\(\*.*?\*\)",
-       eol_comments_re="#.*?$"
+       comments="\(\*.*?\*\)",
+       eol_comments="#.*?$"
    )
 
 Both patterns may also be specified within a grammar using the

diff --git a/grammar/tatsu.ebnf b/grammar/tatsu.ebnf
@@ -1,7 +1,7 @@
 @@grammar :: TatSu
 @@whitespace :: /\s+/
 @@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]"
-@@eol_comments :: ?"#[^\n]*$"
+@@eol_comments :: ?"(?m)#[^\n]*$"
 @@parseinfo :: True
 @@left_recursion :: False
 

diff --git a/tatsu/bootstrap.py b/tatsu/bootstrap.py
@@ -35,8 +35,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
             ignorecase=False,
             namechars='',
             parseinfo=True,
-            comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
-            eol_comments_re='#[^\\n]*$',
+            comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
+            eol_comments='(?m)#[^\\n]*$',
             keywords=KEYWORDS,
             start='start',
         )
@@ -55,8 +55,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
             ignorecase=False,
             namechars='',
             parseinfo=True,
-            comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
-            eol_comments_re='#[^\\n]*$',
+            comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
+            eol_comments='(?m)#[^\\n]*$',
             keywords=KEYWORDS,
             start='start',
         )

diff --git a/tatsu/buffering.py b/tatsu/buffering.py
@@ -357,7 +357,7 @@ def _scanre(self, pattern):
         if isinstance(pattern, RETYPE):
             cre = pattern
         else:
-            cre = re.compile(pattern, re.MULTILINE)
+            cre = re.compile(pattern)
         return cre.match(self.text, self.pos)
 
     @property

diff --git a/tatsu/codegen/objectmodel.py b/tatsu/codegen/objectmodel.py
@@ -67,11 +67,11 @@ def _get_full_name(cls):
     # Try to reference the class
     try:
         idents = name.split('.')
-        _cls = getattr(module, idents[0])
+        cls_ = getattr(module, idents[0])
         for ident in idents[1:]:
-            _cls = getattr(_cls, ident)
+            cls_ = getattr(cls_, ident)
 
-        assert _cls == cls
+        assert cls_ == cls
     except AttributeError as e:
         raise CodegenError(
             "Couldn't find base type, it has to be importable",

diff --git a/tatsu/codegen/python.py b/tatsu/codegen/python.py
@@ -462,8 +462,8 @@ def render_fields(self, fields):
         left_recursion = self.node.config.left_recursion
         parseinfo = self.node.config.parseinfo
         namechars = repr(self.node.config.namechars or '')
-        comments_re = repr(self.node.config.comments_re)
-        eol_comments_re = repr(self.node.config.eol_comments_re)
+        comments = repr(self.node.config.comments)
+        eol_comments = repr(self.node.config.eol_comments)
 
         rules = '\n'.join(
             [self.get_renderer(rule).render() for rule in self.node.rules],
@@ -488,8 +488,8 @@ def render_fields(self, fields):
             parseinfo=parseinfo,
             keywords=keywords,
             namechars=namechars,
-            comments_re=comments_re,
-            eol_comments_re=eol_comments_re,
+            comments=comments,
+            eol_comments=eol_comments,
         )
 
     abstract_rule_template = """
@@ -535,8 +535,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
                             ignorecase={ignorecase},
                             namechars={namechars},
                             parseinfo={parseinfo},
-                            comments_re={comments_re},
-                            eol_comments_re={eol_comments_re},
+                            comments={comments},
+                            eol_comments={eol_comments},
                             keywords=KEYWORDS,
                             start={start!r},
                         )
@@ -554,8 +554,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
                             ignorecase={ignorecase},
                             namechars={namechars},
                             parseinfo={parseinfo},
-                            comments_re={comments_re},
-                            eol_comments_re={eol_comments_re},
+                            comments={comments},
+                            eol_comments={eol_comments},
                             left_recursion={left_recursion},
                             keywords=KEYWORDS,
                             start={start!r},

diff --git a/tatsu/g2e/semantics.py b/tatsu/g2e/semantics.py
@@ -9,7 +9,7 @@
 
 def camel2py(name):
     return re.sub(
-        '([a-z0-9])([A-Z])',
+        r'([a-z0-9])([A-Z])',
         lambda m: m.group(1) + '_' + m.group(2).lower(),
         name,
     )

diff --git a/tatsu/grammars.py b/tatsu/grammars.py
@@ -519,7 +519,7 @@ def _to_str(self, lean=False):
 
         if multi:
             return '\n|\n'.join(indent(o) for o in options)
-        elif len(options) and len(single) > PEP8_LLEN:
+        elif options and len(single) > PEP8_LLEN:
             return '| ' + '\n| '.join(o for o in options)
         else:
             return single

diff --git a/tatsu/infos.py b/tatsu/infos.py
@@ -3,7 +3,7 @@
 import copy
 import dataclasses
 import re
-from collections.abc import Callable, Mapping
+from collections.abc import Callable, MutableMapping
 from itertools import starmap
 from typing import Any, NamedTuple
 
@@ -30,8 +30,8 @@ class ParserConfig:
     start_rule: str | None = None  # FIXME
     rule_name: str | None = None  # Backward compatibility
 
-    comments_re: re.Pattern | None = None
-    eol_comments_re: re.Pattern | None = None
+    _comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
+    _eol_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
 
     tokenizercls: type[Tokenizer] | None = None  # FIXME
     semantics: type | None = None
@@ -64,9 +64,17 @@ def __post_init__(self):  # pylint: disable=W0235
         if self.ignorecase:
             self.keywords = [k.upper() for k in self.keywords]
         if self.comments:
-            self.comments_re = re.compile(self.comments)
+            self._comments_re = re.compile(self.comments)
         if self.eol_comments:
-            self.eol_comments_re = re.compile(self.eol_comments)
+            self._eol_comments_re = re.compile(self.eol_comments)
+
+    @property
+    def comments_re(self) -> re.Pattern | None:
+        return self._comments_re
+
+    @property
+    def eol_comments_re(self) -> re.Pattern | None:
+        return self._eol_comments_re
 
     @classmethod
     def new(
@@ -84,7 +92,7 @@ def effective_rule_name(self):
         # note: there are legacy reasons for this mess
         return self.start_rule or self.rule_name or self.start
 
-    def _find_common(self, **settings: Any) -> Mapping[str, Any]:
+    def _find_common(self, **settings: Any) -> MutableMapping[str, Any]:
         return {
             name: value
             for name, value in settings.items()
@@ -101,8 +109,20 @@ def replace_config(
         else:
             return self.replace(**vars(other))
 
+    # non-init fields cannot be used as arguments in `replace`, however
+    # they are values returned by `vars` and `dataclass.asdict` so they
+    # must be filtered out.
+    # If the `ParserConfig` dataclass drops these fields, then this filter can be removed
+    def _filter_non_init_fields(self, settings: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
+        for field in [
+            field.name for field in dataclasses.fields(self) if not field.init
+        ]:
+            if field in settings:
+                del settings[field]
+        return settings
+
     def replace(self, **settings: Any) -> ParserConfig:
-        overrides = self._find_common(**settings)
+        overrides = self._filter_non_init_fields(self._find_common(**settings))
         result = dataclasses.replace(self, **overrides)
         if 'grammar' in overrides:
             result.name = result.grammar

diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py
@@ -323,8 +323,8 @@ def _gen_init(self, grammar: grammars.Grammar):
                     ignorecase={grammar.config.ignorecase},
                     namechars={grammar.config.namechars!r},
                     parseinfo={grammar.config.parseinfo},
-                    comments_re={grammar.config.comments_re!r},
-                    eol_comments_re={grammar.config.eol_comments_re!r},
+                    comments={grammar.config.comments!r},
+                    eol_comments={grammar.config.eol_comments!r},
                     keywords=KEYWORDS,
                     start={start!r},
                 )

diff --git a/tatsu/util/_common.py b/tatsu/util/_common.py
@@ -27,7 +27,7 @@
 logger.addHandler(ch)
 
 
-RETYPE = type(re.compile('.'))
+RETYPE = re.Pattern
 
 
 ESCAPE_SEQUENCE_RE = re.compile(

diff --git a/tatsu/walkers.py b/tatsu/walkers.py
@@ -74,7 +74,7 @@ def pythonize_match(m):
 
             # walk__pythonic_name with double underscore after walk
             pythonic_name = re.sub(
-                '[A-Z]+', pythonize_match, node_cls.__name__,
+                r'[A-Z]+', pythonize_match, node_cls.__name__,
             )
             if pythonic_name != cammelcase_name:
                 walker = getattr(cls, prefix + pythonic_name, None)

diff --git a/test/grammar/pattern_test.py b/test/grammar/pattern_test.py
@@ -22,7 +22,7 @@ def test_patterns_with_newlines(self):
 
             blankline
                 =
-                /^[^\\n]*\\n$/
+                /(?m)^[^\\n]*\\n$/
                 ;
         """
 

diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py
@@ -352,7 +352,7 @@ def test_parse_hash():
         start = '#' ;
     """
 
-    parser = compile(grammar, eol_comments_re='')
+    parser = compile(grammar, eol_comments='')
     parser.parse('#', trace=True)
 
 

diff --git a/test/parser_equivalence_test.py b/test/parser_equivalence_test.py
@@ -171,6 +171,7 @@ def test_none_whitespace():
     output = parser.parse(input, parseinfo=False)
     assert output == ('This is a', ' test')
 
+
 def test_sep_join():
     grammar = """
     @@grammar::numbers
@@ -183,9 +184,7 @@ def test_sep_join():
         = ~ ( "," )%{ digit }+
         ;
 
-    digit = /\d+/ ;
+    digit = /\\d+/ ;
     """
     parser = generate_and_load_parser('W', grammar)
-    ast = parser.parse('1,2,3,4', nameguard=False)
-
-
+    parser.parse('1,2,3,4', nameguard=False)
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,7 @@ def test_patterns_with_newlines(self): @@
                 blankline
                     =
-                    /^[^\\n]*\\n$/
+                    /(?m)^[^\\n]*\\n$/
                     ;
             """
@@ Expand Down @@