diff --git a/docs/directives.rst b/docs/directives.rst index 82852984..4a765dfe 100644 --- a/docs/directives.rst +++ b/docs/directives.rst @@ -29,6 +29,8 @@ Specifies a regular expression to identify and exclude inline (bracketed) commen @@comments :: /\(\*((?:.|\n)*?)\*\)/ +.. note:: + Prior to 5.12.1, comments implicitly had the `(?m) `_ option defined. This is no longer the case. ``@@eol_comments :: `` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -39,6 +41,8 @@ Specifies a regular expression to identify and exclude end-of-line comments befo @@eol_comments :: /#([^\n]*?)$/ +.. note:: + Prior to 5.12.1, eol_comments implicitly had the `(?m) `_ option defined. This is no longer the case. ``@@ignorecase :: `` ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/syntax.rst b/docs/syntax.rst index 1a5ad10c..b1dcc603 100644 --- a/docs/syntax.rst +++ b/docs/syntax.rst @@ -735,11 +735,11 @@ Comments ~~~~~~~~ Parsers will skip over comments specified as a regular expression using -the ``comments_re`` parameter: +the ``comments`` parameter: .. code:: python - parser = MyParser(text, comments_re="\(\*.*?\*\)") + parser = MyParser(text, comments="\(\*.*?\*\)") For more complex comment handling, you can override the ``Buffer.eat_comments()`` method. @@ -751,8 +751,8 @@ comments separately: parser = MyParser( text, - comments_re="\(\*.*?\*\)", - eol_comments_re="#.*?$" + comments="\(\*.*?\*\)", + eol_comments="#.*?$" ) Both patterns may also be specified within a grammar using the diff --git a/grammar/tatsu.ebnf b/grammar/tatsu.ebnf index 870caae7..b955d6a2 100644 --- a/grammar/tatsu.ebnf +++ b/grammar/tatsu.ebnf @@ -1,7 +1,7 @@ @@grammar :: TatSu @@whitespace :: /\s+/ @@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]" -@@eol_comments :: ?"#[^\n]*$" +@@eol_comments :: ?"(?m)#[^\n]*$" @@parseinfo :: True @@left_recursion :: False diff --git a/tatsu/bootstrap.py b/tatsu/bootstrap.py index 4f656b2a..87c925c7 100644 --- a/tatsu/bootstrap.py +++ b/tatsu/bootstrap.py @@ -35,8 +35,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings): ignorecase=False, namechars='', parseinfo=True, - comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]', - eol_comments_re='#[^\\n]*$', + comments='(?sm)[(][*](?:.|\\n)*?[*][)]', + eol_comments='(?m)#[^\\n]*$', keywords=KEYWORDS, start='start', ) @@ -55,8 +55,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings): ignorecase=False, namechars='', parseinfo=True, - comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]', - eol_comments_re='#[^\\n]*$', + comments='(?sm)[(][*](?:.|\\n)*?[*][)]', + eol_comments='(?m)#[^\\n]*$', keywords=KEYWORDS, start='start', ) diff --git a/tatsu/buffering.py b/tatsu/buffering.py index 87358d99..5a2a91fd 100644 --- a/tatsu/buffering.py +++ b/tatsu/buffering.py @@ -357,7 +357,7 @@ def _scanre(self, pattern): if isinstance(pattern, RETYPE): cre = pattern else: - cre = re.compile(pattern, re.MULTILINE) + cre = re.compile(pattern) return cre.match(self.text, self.pos) @property diff --git a/tatsu/codegen/objectmodel.py b/tatsu/codegen/objectmodel.py index d52ea9df..bc787f59 100644 --- a/tatsu/codegen/objectmodel.py +++ b/tatsu/codegen/objectmodel.py @@ -67,11 +67,11 @@ def _get_full_name(cls): # Try to reference the class try: idents = name.split('.') - _cls = getattr(module, idents[0]) + cls_ = getattr(module, idents[0]) for ident in idents[1:]: - _cls = getattr(_cls, ident) + cls_ = getattr(cls_, ident) - assert _cls == cls + assert cls_ == cls except AttributeError as e: raise CodegenError( "Couldn't find base type, it has to be importable", diff --git a/tatsu/codegen/python.py b/tatsu/codegen/python.py index 31e0dea9..f25e1d8c 100755 --- a/tatsu/codegen/python.py +++ b/tatsu/codegen/python.py @@ -462,8 +462,8 @@ def render_fields(self, fields): left_recursion = self.node.config.left_recursion parseinfo = self.node.config.parseinfo namechars = repr(self.node.config.namechars or '') - comments_re = repr(self.node.config.comments_re) - eol_comments_re = repr(self.node.config.eol_comments_re) + comments = repr(self.node.config.comments) + eol_comments = repr(self.node.config.eol_comments) rules = '\n'.join( [self.get_renderer(rule).render() for rule in self.node.rules], @@ -488,8 +488,8 @@ def render_fields(self, fields): parseinfo=parseinfo, keywords=keywords, namechars=namechars, - comments_re=comments_re, - eol_comments_re=eol_comments_re, + comments=comments, + eol_comments=eol_comments, ) abstract_rule_template = """ @@ -535,8 +535,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings): ignorecase={ignorecase}, namechars={namechars}, parseinfo={parseinfo}, - comments_re={comments_re}, - eol_comments_re={eol_comments_re}, + comments={comments}, + eol_comments={eol_comments}, keywords=KEYWORDS, start={start!r}, ) @@ -554,8 +554,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings): ignorecase={ignorecase}, namechars={namechars}, parseinfo={parseinfo}, - comments_re={comments_re}, - eol_comments_re={eol_comments_re}, + comments={comments}, + eol_comments={eol_comments}, left_recursion={left_recursion}, keywords=KEYWORDS, start={start!r}, diff --git a/tatsu/g2e/semantics.py b/tatsu/g2e/semantics.py index 982ed777..ccf0b497 100644 --- a/tatsu/g2e/semantics.py +++ b/tatsu/g2e/semantics.py @@ -9,7 +9,7 @@ def camel2py(name): return re.sub( - '([a-z0-9])([A-Z])', + r'([a-z0-9])([A-Z])', lambda m: m.group(1) + '_' + m.group(2).lower(), name, ) diff --git a/tatsu/grammars.py b/tatsu/grammars.py index 65def8b9..66f2173b 100644 --- a/tatsu/grammars.py +++ b/tatsu/grammars.py @@ -519,7 +519,7 @@ def _to_str(self, lean=False): if multi: return '\n|\n'.join(indent(o) for o in options) - elif len(options) and len(single) > PEP8_LLEN: + elif options and len(single) > PEP8_LLEN: return '| ' + '\n| '.join(o for o in options) else: return single diff --git a/tatsu/infos.py b/tatsu/infos.py index 6ec898ad..3bba14f2 100644 --- a/tatsu/infos.py +++ b/tatsu/infos.py @@ -3,7 +3,7 @@ import copy import dataclasses import re -from collections.abc import Callable, Mapping +from collections.abc import Callable, MutableMapping from itertools import starmap from typing import Any, NamedTuple @@ -30,8 +30,8 @@ class ParserConfig: start_rule: str | None = None # FIXME rule_name: str | None = None # Backward compatibility - comments_re: re.Pattern | None = None - eol_comments_re: re.Pattern | None = None + _comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False) + _eol_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False) tokenizercls: type[Tokenizer] | None = None # FIXME semantics: type | None = None @@ -64,9 +64,17 @@ def __post_init__(self): # pylint: disable=W0235 if self.ignorecase: self.keywords = [k.upper() for k in self.keywords] if self.comments: - self.comments_re = re.compile(self.comments) + self._comments_re = re.compile(self.comments) if self.eol_comments: - self.eol_comments_re = re.compile(self.eol_comments) + self._eol_comments_re = re.compile(self.eol_comments) + + @property + def comments_re(self) -> re.Pattern | None: + return self._comments_re + + @property + def eol_comments_re(self) -> re.Pattern | None: + return self._eol_comments_re @classmethod def new( @@ -84,7 +92,7 @@ def effective_rule_name(self): # note: there are legacy reasons for this mess return self.start_rule or self.rule_name or self.start - def _find_common(self, **settings: Any) -> Mapping[str, Any]: + def _find_common(self, **settings: Any) -> MutableMapping[str, Any]: return { name: value for name, value in settings.items() @@ -101,8 +109,20 @@ def replace_config( else: return self.replace(**vars(other)) + # non-init fields cannot be used as arguments in `replace`, however + # they are values returned by `vars` and `dataclass.asdict` so they + # must be filtered out. + # If the `ParserConfig` dataclass drops these fields, then this filter can be removed + def _filter_non_init_fields(self, settings: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + for field in [ + field.name for field in dataclasses.fields(self) if not field.init + ]: + if field in settings: + del settings[field] + return settings + def replace(self, **settings: Any) -> ParserConfig: - overrides = self._find_common(**settings) + overrides = self._filter_non_init_fields(self._find_common(**settings)) result = dataclasses.replace(self, **overrides) if 'grammar' in overrides: result.name = result.grammar diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 6a83e5c5..76583377 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -323,8 +323,8 @@ def _gen_init(self, grammar: grammars.Grammar): ignorecase={grammar.config.ignorecase}, namechars={grammar.config.namechars!r}, parseinfo={grammar.config.parseinfo}, - comments_re={grammar.config.comments_re!r}, - eol_comments_re={grammar.config.eol_comments_re!r}, + comments={grammar.config.comments!r}, + eol_comments={grammar.config.eol_comments!r}, keywords=KEYWORDS, start={start!r}, ) diff --git a/tatsu/util/_common.py b/tatsu/util/_common.py index c0819064..1123e9fb 100644 --- a/tatsu/util/_common.py +++ b/tatsu/util/_common.py @@ -27,7 +27,7 @@ logger.addHandler(ch) -RETYPE = type(re.compile('.')) +RETYPE = re.Pattern ESCAPE_SEQUENCE_RE = re.compile( diff --git a/tatsu/walkers.py b/tatsu/walkers.py index 3de070ea..7762a4d7 100644 --- a/tatsu/walkers.py +++ b/tatsu/walkers.py @@ -74,7 +74,7 @@ def pythonize_match(m): # walk__pythonic_name with double underscore after walk pythonic_name = re.sub( - '[A-Z]+', pythonize_match, node_cls.__name__, + r'[A-Z]+', pythonize_match, node_cls.__name__, ) if pythonic_name != cammelcase_name: walker = getattr(cls, prefix + pythonic_name, None) diff --git a/test/grammar/pattern_test.py b/test/grammar/pattern_test.py index 91094fad..c651baf3 100644 --- a/test/grammar/pattern_test.py +++ b/test/grammar/pattern_test.py @@ -22,7 +22,7 @@ def test_patterns_with_newlines(self): blankline = - /^[^\\n]*\\n$/ + /(?m)^[^\\n]*\\n$/ ; """ diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py index b59b7bdf..f111a92b 100644 --- a/test/grammar/syntax_test.py +++ b/test/grammar/syntax_test.py @@ -352,7 +352,7 @@ def test_parse_hash(): start = '#' ; """ - parser = compile(grammar, eol_comments_re='') + parser = compile(grammar, eol_comments='') parser.parse('#', trace=True) diff --git a/test/parser_equivalence_test.py b/test/parser_equivalence_test.py index 02b4367f..62c6eaa1 100644 --- a/test/parser_equivalence_test.py +++ b/test/parser_equivalence_test.py @@ -171,6 +171,7 @@ def test_none_whitespace(): output = parser.parse(input, parseinfo=False) assert output == ('This is a', ' test') + def test_sep_join(): grammar = """ @@grammar::numbers @@ -183,9 +184,7 @@ def test_sep_join(): = ~ ( "," )%{ digit }+ ; - digit = /\d+/ ; + digit = /\\d+/ ; """ parser = generate_and_load_parser('W', grammar) - ast = parser.parse('1,2,3,4', nameguard=False) - - + parser.parse('1,2,3,4', nameguard=False)