Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make {eol_}comments_re read-only and non-init arguments in ParserConfig #352

Merged
merged 11 commits into from
Dec 29, 2024
4 changes: 4 additions & 0 deletions docs/directives.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Specifies a regular expression to identify and exclude inline (bracketed) commen

@@comments :: /\(\*((?:.|\n)*?)\*\)/

.. note::
Prior to 5.12.1, comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.

``@@eol_comments :: <regexp>``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -39,6 +41,8 @@ Specifies a regular expression to identify and exclude end-of-line comments befo

@@eol_comments :: /#([^\n]*?)$/

.. note::
Prior to 5.12.1, eol_comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.

``@@ignorecase :: <bool>``
~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
8 changes: 4 additions & 4 deletions docs/syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -735,11 +735,11 @@ Comments
~~~~~~~~

Parsers will skip over comments specified as a regular expression using
the ``comments_re`` parameter:
the ``comments`` parameter:

.. code:: python

parser = MyParser(text, comments_re="\(\*.*?\*\)")
parser = MyParser(text, comments="\(\*.*?\*\)")

For more complex comment handling, you can override the
``Buffer.eat_comments()`` method.
Expand All @@ -751,8 +751,8 @@ comments separately:

parser = MyParser(
text,
comments_re="\(\*.*?\*\)",
eol_comments_re="#.*?$"
comments="\(\*.*?\*\)",
eol_comments="#.*?$"
)

Both patterns may also be specified within a grammar using the
Expand Down
2 changes: 1 addition & 1 deletion grammar/tatsu.ebnf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@@grammar :: TatSu
@@whitespace :: /\s+/
@@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]"
@@eol_comments :: ?"#[^\n]*$"
@@eol_comments :: ?"(?m)#[^\n]*$"
@@parseinfo :: True
@@left_recursion :: False

Expand Down
8 changes: 4 additions & 4 deletions tatsu/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
ignorecase=False,
namechars='',
parseinfo=True,
comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments_re='#[^\\n]*$',
comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments='(?m)#[^\\n]*$',
keywords=KEYWORDS,
start='start',
)
Expand All @@ -55,8 +55,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase=False,
namechars='',
parseinfo=True,
comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments_re='#[^\\n]*$',
comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments='(?m)#[^\\n]*$',
keywords=KEYWORDS,
start='start',
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def _scanre(self, pattern):
if isinstance(pattern, RETYPE):
cre = pattern
else:
cre = re.compile(pattern, re.MULTILINE)
cre = re.compile(pattern)
return cre.match(self.text, self.pos)

@property
Expand Down
6 changes: 3 additions & 3 deletions tatsu/codegen/objectmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ def _get_full_name(cls):
# Try to reference the class
try:
idents = name.split('.')
_cls = getattr(module, idents[0])
cls_ = getattr(module, idents[0])
for ident in idents[1:]:
_cls = getattr(_cls, ident)
cls_ = getattr(cls_, ident)

assert _cls == cls
assert cls_ == cls
except AttributeError as e:
raise CodegenError(
"Couldn't find base type, it has to be importable",
Expand Down
16 changes: 8 additions & 8 deletions tatsu/codegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ def render_fields(self, fields):
left_recursion = self.node.config.left_recursion
parseinfo = self.node.config.parseinfo
namechars = repr(self.node.config.namechars or '')
comments_re = repr(self.node.config.comments_re)
eol_comments_re = repr(self.node.config.eol_comments_re)
comments = repr(self.node.config.comments)
eol_comments = repr(self.node.config.eol_comments)

rules = '\n'.join(
[self.get_renderer(rule).render() for rule in self.node.rules],
Expand All @@ -488,8 +488,8 @@ def render_fields(self, fields):
parseinfo=parseinfo,
keywords=keywords,
namechars=namechars,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
comments=comments,
eol_comments=eol_comments,
)

abstract_rule_template = """
Expand Down Expand Up @@ -535,8 +535,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
comments_re={comments_re},
eol_comments_re={eol_comments_re},
comments={comments},
eol_comments={eol_comments},
keywords=KEYWORDS,
start={start!r},
)
Expand All @@ -554,8 +554,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
comments_re={comments_re},
eol_comments_re={eol_comments_re},
comments={comments},
eol_comments={eol_comments},
left_recursion={left_recursion},
keywords=KEYWORDS,
start={start!r},
Expand Down
2 changes: 1 addition & 1 deletion tatsu/g2e/semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def camel2py(name):
return re.sub(
'([a-z0-9])([A-Z])',
r'([a-z0-9])([A-Z])',
lambda m: m.group(1) + '_' + m.group(2).lower(),
name,
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/grammars.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def _to_str(self, lean=False):

if multi:
return '\n|\n'.join(indent(o) for o in options)
elif len(options) and len(single) > PEP8_LLEN:
elif options and len(single) > PEP8_LLEN:
return '| ' + '\n| '.join(o for o in options)
else:
return single
Expand Down
34 changes: 27 additions & 7 deletions tatsu/infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import copy
import dataclasses
import re
from collections.abc import Callable, Mapping
from collections.abc import Callable, MutableMapping
from itertools import starmap
from typing import Any, NamedTuple

Expand All @@ -30,8 +30,8 @@ class ParserConfig:
start_rule: str | None = None # FIXME
rule_name: str | None = None # Backward compatibility

comments_re: re.Pattern | None = None
eol_comments_re: re.Pattern | None = None
_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
_eol_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)

tokenizercls: type[Tokenizer] | None = None # FIXME
semantics: type | None = None
Expand Down Expand Up @@ -64,9 +64,17 @@ def __post_init__(self): # pylint: disable=W0235
if self.ignorecase:
self.keywords = [k.upper() for k in self.keywords]
if self.comments:
self.comments_re = re.compile(self.comments)
self._comments_re = re.compile(self.comments)
if self.eol_comments:
self.eol_comments_re = re.compile(self.eol_comments)
self._eol_comments_re = re.compile(self.eol_comments)

@property
def comments_re(self) -> re.Pattern | None:
return self._comments_re

@property
def eol_comments_re(self) -> re.Pattern | None:
return self._eol_comments_re

@classmethod
def new(
Expand All @@ -84,7 +92,7 @@ def effective_rule_name(self):
# note: there are legacy reasons for this mess
return self.start_rule or self.rule_name or self.start

def _find_common(self, **settings: Any) -> Mapping[str, Any]:
def _find_common(self, **settings: Any) -> MutableMapping[str, Any]:
return {
name: value
for name, value in settings.items()
Expand All @@ -101,8 +109,20 @@ def replace_config(
else:
return self.replace(**vars(other))

# non-init fields cannot be used as arguments in `replace`, however
# they are values returned by `vars` and `dataclass.asdict` so they
# must be filtered out.
# If the `ParserConfig` dataclass drops these fields, then this filter can be removed
def _filter_non_init_fields(self, settings: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
for field in [
field.name for field in dataclasses.fields(self) if not field.init
]:
if field in settings:
del settings[field]
return settings

def replace(self, **settings: Any) -> ParserConfig:
overrides = self._find_common(**settings)
overrides = self._filter_non_init_fields(self._find_common(**settings))
result = dataclasses.replace(self, **overrides)
if 'grammar' in overrides:
result.name = result.grammar
Expand Down
4 changes: 2 additions & 2 deletions tatsu/ngcodegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,8 @@ def _gen_init(self, grammar: grammars.Grammar):
ignorecase={grammar.config.ignorecase},
namechars={grammar.config.namechars!r},
parseinfo={grammar.config.parseinfo},
comments_re={grammar.config.comments_re!r},
eol_comments_re={grammar.config.eol_comments_re!r},
comments={grammar.config.comments!r},
eol_comments={grammar.config.eol_comments!r},
keywords=KEYWORDS,
start={start!r},
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/util/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger.addHandler(ch)


RETYPE = type(re.compile('.'))
RETYPE = re.Pattern


ESCAPE_SEQUENCE_RE = re.compile(
Expand Down
2 changes: 1 addition & 1 deletion tatsu/walkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def pythonize_match(m):

# walk__pythonic_name with double underscore after walk
pythonic_name = re.sub(
'[A-Z]+', pythonize_match, node_cls.__name__,
r'[A-Z]+', pythonize_match, node_cls.__name__,
)
if pythonic_name != cammelcase_name:
walker = getattr(cls, prefix + pythonic_name, None)
Expand Down
2 changes: 1 addition & 1 deletion test/grammar/pattern_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_patterns_with_newlines(self):
blankline
=
/^[^\\n]*\\n$/
/(?m)^[^\\n]*\\n$/
;
"""

Expand Down
2 changes: 1 addition & 1 deletion test/grammar/syntax_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_parse_hash():
start = '#' ;
"""

parser = compile(grammar, eol_comments_re='')
parser = compile(grammar, eol_comments='')
parser.parse('#', trace=True)


Expand Down
7 changes: 3 additions & 4 deletions test/parser_equivalence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def test_none_whitespace():
output = parser.parse(input, parseinfo=False)
assert output == ('This is a', ' test')


def test_sep_join():
grammar = """
@@grammar::numbers
Expand All @@ -183,9 +184,7 @@ def test_sep_join():
= ~ ( "," )%{ digit }+
;

digit = /\d+/ ;
digit = /\\d+/ ;
"""
parser = generate_and_load_parser('W', grammar)
ast = parser.parse('1,2,3,4', nameguard=False)


parser.parse('1,2,3,4', nameguard=False)
Loading