From 22e69d51b82653c0419eebfca3eb01e8f96faa5d Mon Sep 17 00:00:00 2001 From: David Hotham Date: Mon, 31 Jul 2023 09:50:33 +0100 Subject: [PATCH] update vendored packages (#617) --- src/poetry/core/_vendor/lark/__init__.py | 2 +- src/poetry/core/_vendor/lark/common.py | 5 +- src/poetry/core/_vendor/lark/exceptions.py | 2 +- .../core/_vendor/lark/grammars/lark.lark | 5 +- .../core/_vendor/lark/grammars/python.lark | 8 +- src/poetry/core/_vendor/lark/lark.py | 27 ++-- src/poetry/core/_vendor/lark/lexer.py | 134 +++++++++++++----- src/poetry/core/_vendor/lark/load_grammar.py | 16 +-- .../core/_vendor/lark/parse_tree_builder.py | 2 + .../core/_vendor/lark/parser_frontends.py | 52 ++++--- src/poetry/core/_vendor/lark/parsers/cyk.py | 15 +- .../core/_vendor/lark/parsers/earley.py | 14 +- .../_vendor/lark/parsers/earley_forest.py | 4 +- .../_vendor/lark/parsers/grammar_analysis.py | 3 +- .../_vendor/lark/parsers/lalr_analysis.py | 26 ++-- .../lark/parsers/lalr_interactive_parser.py | 2 +- .../core/_vendor/lark/parsers/lalr_parser.py | 6 +- .../_vendor/lark/parsers/resolve_ambig.py | 109 -------------- src/poetry/core/_vendor/lark/reconstruct.py | 4 +- .../core/_vendor/lark/tools/__init__.py | 7 + .../core/_vendor/lark/tools/serialize.py | 4 +- src/poetry/core/_vendor/lark/tree.py | 16 ++- src/poetry/core/_vendor/lark/tree_matcher.py | 2 +- .../core/_vendor/lark/tree_templates.py | 4 +- src/poetry/core/_vendor/lark/utils.py | 27 ++-- src/poetry/core/_vendor/lark/visitors.py | 8 +- src/poetry/core/_vendor/vendor.txt | 2 +- vendors/poetry.lock | 37 ++--- vendors/pyproject.toml | 3 +- 29 files changed, 269 insertions(+), 277 deletions(-) delete mode 100644 src/poetry/core/_vendor/lark/parsers/resolve_ambig.py diff --git a/src/poetry/core/_vendor/lark/__init__.py b/src/poetry/core/_vendor/lark/__init__.py index 881f37a05..632230d62 100644 --- a/src/poetry/core/_vendor/lark/__init__.py +++ b/src/poetry/core/_vendor/lark/__init__.py @@ -14,7 +14,7 @@ from .utils import logger from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args -__version__: str = "1.1.5" +__version__: str = "1.1.7" __all__ = ( "GrammarError", diff --git a/src/poetry/core/_vendor/lark/common.py b/src/poetry/core/_vendor/lark/common.py index d716add7e..870b51547 100644 --- a/src/poetry/core/_vendor/lark/common.py +++ b/src/poetry/core/_vendor/lark/common.py @@ -38,8 +38,10 @@ class LexerConf(Serialize): skip_validation: bool use_bytes: bool lexer_type: Optional[_LexerArgType] + strict: bool - def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): + def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, + callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False): self.terminals = terminals self.terminals_by_name = {t.name: t for t in self.terminals} assert len(self.terminals) == len(self.terminals_by_name) @@ -50,6 +52,7 @@ def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ig self.re_module = re_module self.skip_validation = skip_validation self.use_bytes = use_bytes + self.strict = strict self.lexer_type = None def _deserialize(self): diff --git a/src/poetry/core/_vendor/lark/exceptions.py b/src/poetry/core/_vendor/lark/exceptions.py index 35b986af4..32f0930a7 100644 --- a/src/poetry/core/_vendor/lark/exceptions.py +++ b/src/poetry/core/_vendor/lark/exceptions.py @@ -217,7 +217,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): expected: The set of expected tokens considered_rules: Which rules were considered, to deduce the expected tokens state: A value representing the parser state. Do not rely on its value or type. - interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failture, + interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure, and can be used for debugging and error handling. Note: These parameters are available as attributes of the instance. diff --git a/src/poetry/core/_vendor/lark/grammars/lark.lark b/src/poetry/core/_vendor/lark/grammars/lark.lark index fcac71ec0..fbcf88a06 100644 --- a/src/poetry/core/_vendor/lark/grammars/lark.lark +++ b/src/poetry/core/_vendor/lark/grammars/lark.lark @@ -1,3 +1,6 @@ +# Lark grammar of Lark's syntax +# Note: Lark is not bootstrapped, its parser is implemented in load_grammar.py + start: (_item? _NL)* _item? _item: rule @@ -53,7 +56,7 @@ _NL: /(\r?\n)+\s*/ %import common.SIGNED_INT -> NUMBER %import common.WS_INLINE -COMMENT: /\s*/ "//" /[^\n]/* +COMMENT: /\s*/ "//" /[^\n]/* | /\s*/ "#" /[^\n]/* %ignore WS_INLINE %ignore COMMENT diff --git a/src/poetry/core/_vendor/lark/grammars/python.lark b/src/poetry/core/_vendor/lark/grammars/python.lark index 5c131a2a3..70ffad7e3 100644 --- a/src/poetry/core/_vendor/lark/grammars/python.lark +++ b/src/poetry/core/_vendor/lark/grammars/python.lark @@ -197,11 +197,11 @@ AWAIT: "await" ?atom: "(" yield_expr ")" | "(" _tuple_inner? ")" -> tuple | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension - | "[" _testlist_comp? "]" -> list + | "[" _exprlist? "]" -> list | "[" comprehension{test_or_star_expr} "]" -> list_comprehension | "{" _dict_exprlist? "}" -> dict | "{" comprehension{key_value} "}" -> dict_comprehension - | "{" _set_exprlist "}" -> set + | "{" _exprlist "}" -> set | "{" comprehension{test} "}" -> set_comprehension | name -> var | number @@ -215,10 +215,8 @@ AWAIT: "await" ?string_concat: string+ -_testlist_comp: test | _tuple_inner _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") - ?test_or_star_expr: test | star_expr @@ -234,7 +232,7 @@ _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test -_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] +_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] classdef: "class" name ["(" [arguments] ")"] ":" suite diff --git a/src/poetry/core/_vendor/lark/lark.py b/src/poetry/core/_vendor/lark/lark.py index c93e9e19c..d60c2040a 100644 --- a/src/poetry/core/_vendor/lark/lark.py +++ b/src/poetry/core/_vendor/lark/lark.py @@ -20,7 +20,7 @@ from .exceptions import ConfigurationError, assert_config, UnexpectedInput from .utils import Serialize, SerializeMemoizer, FS, isascii, logger -from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, md5_digest +from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest from .tree import Tree from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType @@ -54,6 +54,7 @@ class LarkOptions(Serialize): start: List[str] debug: bool + strict: bool transformer: 'Optional[Transformer]' propagate_positions: Union[bool, str] maybe_placeholders: bool @@ -81,10 +82,14 @@ class LarkOptions(Serialize): debug Display debug information and extra warnings. Use only when debugging (Default: ``False``) When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed. + strict + Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions. transformer Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) propagate_positions - Propagates (line, column, end_line, end_column) attributes into all tree branches. + Propagates positional attributes into the 'meta' attribute of all tree branches. + Sets attributes: (line, column, end_line, end_column, start_pos, end_pos, + container_line, container_column, container_end_line, container_end_column) Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. maybe_placeholders When ``True``, the ``[]`` operator returns ``None`` when not matched. @@ -156,6 +161,7 @@ class LarkOptions(Serialize): # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument _defaults: Dict[str, Any] = { 'debug': False, + 'strict': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, @@ -254,6 +260,7 @@ class Lark(Serialize): grammar: 'Grammar' options: LarkOptions lexer: Lexer + parser: 'ParsingFrontend' terminals: Collection[TerminalDef] def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: @@ -288,7 +295,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: grammar = read() cache_fn = None - cache_md5 = None + cache_sha256 = None if isinstance(grammar, str): self.source_grammar = grammar if self.options.use_bytes: @@ -303,7 +310,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) from . import __version__ s = grammar + options_str + __version__ + str(sys.version_info[:2]) - cache_md5 = md5_digest(s) + cache_sha256 = sha256_digest(s) if isinstance(self.options.cache, str): cache_fn = self.options.cache @@ -319,7 +326,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: # specific reason - we just want a username. username = "unknown" - cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2]) + cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2]) old_options = self.options try: @@ -328,9 +335,9 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: # Remove options that aren't relevant for loading from cache for name in (set(options) - _LOAD_ALLOWED_OPTIONS): del options[name] - file_md5 = f.readline().rstrip(b'\n') + file_sha256 = f.readline().rstrip(b'\n') cached_used_files = pickle.load(f) - if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files): + if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files): cached_parser_data = pickle.load(f) self._load(cached_parser_data, **options) return @@ -424,7 +431,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: # TODO Deprecate lexer_callbacks? self.lexer_conf = LexerConf( self.terminals, re_module, self.ignore_tokens, self.options.postlex, - self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes + self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict ) if self.options.parser: @@ -436,8 +443,8 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: logger.debug('Saving grammar to cache: %s', cache_fn) try: with FS.open(cache_fn, 'wb') as f: - assert cache_md5 is not None - f.write(cache_md5.encode('utf8') + b'\n') + assert cache_sha256 is not None + f.write(cache_sha256.encode('utf8') + b'\n') pickle.dump(used_files, f) self.save(f, _LOAD_ALLOWED_OPTIONS) except IOError as e: diff --git a/src/poetry/core/_vendor/lark/lexer.py b/src/poetry/core/_vendor/lark/lexer.py index 5e6d6d406..b75493342 100644 --- a/src/poetry/core/_vendor/lark/lexer.py +++ b/src/poetry/core/_vendor/lark/lexer.py @@ -4,21 +4,30 @@ import re from contextlib import suppress from typing import ( - TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, - Pattern as REPattern, ClassVar, TYPE_CHECKING, overload + TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + ClassVar, TYPE_CHECKING, overload ) from types import ModuleType import warnings +try: + import interegular +except ImportError: + pass if TYPE_CHECKING: from .common import LexerConf -from .utils import classify, get_regexp_width, Serialize +from .utils import classify, get_regexp_width, Serialize, logger from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken from .grammar import TOKEN_DEFAULT_PRIORITY + ###{standalone from copy import copy +try: # For the standalone parser, we need to make sure that has_interegular is False to avoid NameErrors later on + has_interegular = bool(interegular) +except NameError: + has_interegular = False class Pattern(Serialize, ABC): @@ -27,7 +36,7 @@ class Pattern(Serialize, ABC): raw: Optional[str] type: ClassVar[str] - def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: + def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None: self.value = value self.flags = frozenset(flags) self.raw = raw @@ -63,7 +72,7 @@ def _get_flags(self, value): class PatternStr(Pattern): - __serialize_fields__ = 'value', 'flags' + __serialize_fields__ = 'value', 'flags', 'raw' type: ClassVar[str] = "str" @@ -80,7 +89,7 @@ def max_width(self) -> int: class PatternRE(Pattern): - __serialize_fields__ = 'value', 'flags', '_width' + __serialize_fields__ = 'value', 'flags', 'raw', '_width' type: ClassVar[str] = "re" @@ -110,7 +119,7 @@ class TerminalDef(Serialize): pattern: Pattern priority: int - def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None: + def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None: assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern @@ -120,7 +129,7 @@ def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) def user_repr(self) -> str: - if self.name.startswith('__'): # We represent a generated terminal + if self.name.startswith('__'): # We represent a generated terminal return self.pattern.raw or self.name else: return self.name @@ -162,29 +171,29 @@ class Token(str): @overload def __new__( - cls, - type: str, - value: Any, - start_pos: Optional[int]=None, - line: Optional[int]=None, - column: Optional[int]=None, - end_line: Optional[int]=None, - end_column: Optional[int]=None, - end_pos: Optional[int]=None + cls, + type: str, + value: Any, + start_pos: Optional[int] = None, + line: Optional[int] = None, + column: Optional[int] = None, + end_line: Optional[int] = None, + end_column: Optional[int] = None, + end_pos: Optional[int] = None ) -> 'Token': ... @overload def __new__( - cls, - type_: str, - value: Any, - start_pos: Optional[int]=None, - line: Optional[int]=None, - column: Optional[int]=None, - end_line: Optional[int]=None, - end_column: Optional[int]=None, - end_pos: Optional[int]=None + cls, + type_: str, + value: Any, + start_pos: Optional[int] = None, + line: Optional[int] = None, + column: Optional[int] = None, + end_line: Optional[int] = None, + end_column: Optional[int] = None, + end_pos: Optional[int] = None ) -> 'Token': ... def __new__(cls, *args, **kwargs): @@ -213,11 +222,11 @@ def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_li return inst @overload - def update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token': + def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': ... @overload - def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': + def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token': ... def update(self, *args, **kwargs): @@ -230,7 +239,7 @@ def update(self, *args, **kwargs): return self._future_update(*args, **kwargs) - def _future_update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token': + def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': return Token.new_borrow_pos( type if type is not None else self.type, value if value is not None else self.value, @@ -364,7 +373,7 @@ def _build_mres(self, terminals, max_size): try: mre = self.re_.compile(pattern, self.g_regex_flags) except AssertionError: # Yes, this is what Python provides us.. :/ - return self._build_mres(terminals, max_size//2) + return self._build_mres(terminals, max_size // 2) mres.append(mre) terminals = terminals[max_size:] @@ -390,12 +399,16 @@ def _regexp_has_newline(r: str): class LexerState: """Represents the current state of the lexer as it scans the text - (Lexer objects are only instanciated per grammar, not per text) + (Lexer objects are only instantiated per grammar, not per text) """ __slots__ = 'text', 'line_ctr', 'last_token' - def __init__(self, text, line_ctr=None, last_token=None): + text: str + line_ctr: LineCounter + last_token: Optional[Token] + + def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None): self.text = text self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n') self.last_token = last_token @@ -448,8 +461,39 @@ def make_lexer_state(self, text): return LexerState(text) -class BasicLexer(Lexer): +def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8): + if not comparator: + comparator = interegular.Comparator.from_regexes(terminal_to_regexp) + # When in strict mode, we only ever try to provide one example, so taking + # a long time for that should be fine + max_time = 2 if strict_mode else 0.2 + + # We don't want to show too many collisions. + if comparator.count_marked_pairs() >= max_collisions_to_show: + return + for group in classify(terminal_to_regexp, lambda t: t.priority).values(): + for a, b in comparator.check(group, skip_marked=True): + assert a.priority == b.priority + # Mark this pair to not repeat warnings when multiple different BasicLexers see the same collision + comparator.mark(a, b) + + # Notify the user + message = f"Collision between Terminals {a.name} and {b.name}. " + try: + example = comparator.get_example_overlap(a, b, max_time).format_multiline() + except ValueError: + # Couldn't find an example within max_time steps. + example = "No example could be found fast enough. However, the collision does still exists" + if strict_mode: + raise LexError(f"{message}\n{example}") + logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example) + if comparator.count_marked_pairs() >= max_collisions_to_show: + logger.warning("Found 8 regex collisions, will not check for more.") + return + + +class BasicLexer(Lexer): terminals: Collection[TerminalDef] ignore_types: FrozenSet[str] newline_types: FrozenSet[str] @@ -457,7 +501,7 @@ class BasicLexer(Lexer): callback: Dict[str, _Callback] re: ModuleType - def __init__(self, conf: 'LexerConf') -> None: + def __init__(self, conf: 'LexerConf', comparator=None) -> None: terminals = list(conf.terminals) assert all(isinstance(t, TerminalDef) for t in terminals), terminals @@ -465,18 +509,27 @@ def __init__(self, conf: 'LexerConf') -> None: if not conf.skip_validation: # Sanitization + terminal_to_regexp = {} for t in terminals: + regexp = t.pattern.to_regexp() try: - self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags) + self.re.compile(regexp, conf.g_regex_flags) except self.re.error: raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) if t.pattern.min_width == 0: raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) + if t.pattern.type == "re": + terminal_to_regexp[t] = regexp if not (set(conf.ignore) <= {t.name for t in terminals}): raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) + if has_interegular: + _check_regex_collisions(terminal_to_regexp, comparator, conf.strict) + elif conf.strict: + raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.") + # Init self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) self.ignore_types = frozenset(conf.ignore) @@ -517,7 +570,7 @@ def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: while True: yield self.next_token(state, parser_state) - def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: + def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) @@ -565,6 +618,10 @@ def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always trad_conf = copy(conf) trad_conf.terminals = terminals + if has_interegular and not conf.skip_validation: + comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals}) + else: + comparator = None lexer_by_tokens: Dict[FrozenSet[str], BasicLexer] = {} self.lexers = {} for state, accepts in states.items(): @@ -575,13 +632,14 @@ def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always accepts = set(accepts) | set(conf.ignore) | set(always_accept) lexer_conf = copy(trad_conf) lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] - lexer = BasicLexer(lexer_conf) + lexer = BasicLexer(lexer_conf, comparator) lexer_by_tokens[key] = lexer self.lexers[state] = lexer assert trad_conf.terminals is terminals - self.root_lexer = BasicLexer(trad_conf) + trad_conf.skip_validation = True # We don't need to verify all terminals again + self.root_lexer = BasicLexer(trad_conf, comparator) def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: try: diff --git a/src/poetry/core/_vendor/lark/load_grammar.py b/src/poetry/core/_vendor/lark/load_grammar.py index d4f553c50..90401ad6c 100644 --- a/src/poetry/core/_vendor/lark/load_grammar.py +++ b/src/poetry/core/_vendor/lark/load_grammar.py @@ -79,7 +79,7 @@ '_RBRA': r'\]', '_LBRACE': r'\{', '_RBRACE': r'\}', - 'OP': '[+*]|[?](?![a-z])', + 'OP': '[+*]|[?](?![a-z_])', '_COLON': ':', '_COMMA': ',', '_OR': r'\|', @@ -94,7 +94,7 @@ '_NL': r'(\r?\n)+\s*', '_NL_OR': r'(\r?\n)+\s*\|', 'WS': r'[ \t]+', - 'COMMENT': r'\s*//[^\n]*', + 'COMMENT': r'\s*//[^\n]*|\s*#[^\n]*', 'BACKSLASH': r'\\[ ]*\n', '_TO': '->', '_IGNORE': r'%ignore', @@ -1314,7 +1314,7 @@ def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], alia except IOError: continue else: - h = md5_digest(text) + h = sha256_digest(text) if self.used_files.get(joined_path, h) != h: raise RuntimeError("Grammar file was changed during importing") self.used_files[joined_path] = h @@ -1393,7 +1393,7 @@ def verify_used_files(file_hashes): if text is None: # We don't know how to load the path. ignore it. continue - current = md5_digest(text) + current = sha256_digest(text) if old != current: logger.info("File %r changed, rebuilding Parser" % path) return False @@ -1411,13 +1411,13 @@ def load_grammar(grammar, source, import_paths, global_keep_all_tokens): return builder.build(), builder.used_files -def md5_digest(s: str) -> str: - """Get the md5 digest of a string +def sha256_digest(s: str) -> str: + """Get the sha256 digest of a string Supports the `usedforsecurity` argument for Python 3.9+ to allow running on a FIPS-enabled system. """ if sys.version_info >= (3, 9): - return hashlib.md5(s.encode('utf8'), usedforsecurity=False).hexdigest() + return hashlib.sha256(s.encode('utf8'), usedforsecurity=False).hexdigest() else: - return hashlib.md5(s.encode('utf8')).hexdigest() + return hashlib.sha256(s.encode('utf8')).hexdigest() diff --git a/src/poetry/core/_vendor/lark/parse_tree_builder.py b/src/poetry/core/_vendor/lark/parse_tree_builder.py index bb907a24b..1ce3373d0 100644 --- a/src/poetry/core/_vendor/lark/parse_tree_builder.py +++ b/src/poetry/core/_vendor/lark/parse_tree_builder.py @@ -50,6 +50,7 @@ def __call__(self, children): res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) + res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) last_meta = self._pp_get_meta(reversed(children)) if last_meta is not None: @@ -61,6 +62,7 @@ def __call__(self, children): res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) + res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) return res diff --git a/src/poetry/core/_vendor/lark/parser_frontends.py b/src/poetry/core/_vendor/lark/parser_frontends.py index 4e28e3613..19d7696d1 100644 --- a/src/poetry/core/_vendor/lark/parser_frontends.py +++ b/src/poetry/core/_vendor/lark/parser_frontends.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, Tuple +from typing import Any, Callable, Dict, Optional, Collection from .exceptions import ConfigurationError, GrammarError, assert_config from .utils import get_regexp_width, Serialize @@ -38,7 +38,11 @@ def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options): class ParsingFrontend(Serialize): __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser' - def __init__(self, lexer_conf, parser_conf, options, parser=None): + lexer_conf: LexerConf + parser_conf: ParserConf + options: Any + + def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None): self.parser_conf = parser_conf self.lexer_conf = lexer_conf self.options = options @@ -61,16 +65,17 @@ def __init__(self, lexer_conf, parser_conf, options, parser=None): self.skip_lexer = True return - try: + if isinstance(lexer_type, type): + assert issubclass(lexer_type, Lexer) + self.lexer = _wrap_lexer(lexer_type)(lexer_conf) + elif isinstance(lexer_type, str): create_lexer = { 'basic': create_basic_lexer, 'contextual': create_contextual_lexer, }[lexer_type] - except KeyError: - assert issubclass(lexer_type, Lexer), lexer_type - self.lexer = _wrap_lexer(lexer_type)(lexer_conf) - else: self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options) + else: + raise TypeError("Bad value for lexer_type: {lexer_type}") if lexer_conf.postlex: self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) @@ -85,21 +90,23 @@ def _verify_start(self, start=None): raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) return start - def _make_lexer_thread(self, text): + def _make_lexer_thread(self, text: str): cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread return text if self.skip_lexer else cls.from_text(self.lexer, text) - def parse(self, text, start=None, on_error=None): + def parse(self, text: str, start=None, on_error=None): chosen_start = self._verify_start(start) kw = {} if on_error is None else {'on_error': on_error} stream = self._make_lexer_thread(text) return self.parser.parse(stream, chosen_start, **kw) - def parse_interactive(self, text=None, start=None): + def parse_interactive(self, text: Optional[str]=None, start=None): + # TODO BREAK - Change text from Optional[str] to text: str = ''. + # Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return [] chosen_start = self._verify_start(start) if self.parser_conf.parser_type != 'lalr': raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") - stream = self._make_lexer_thread(text) + stream = self._make_lexer_thread(text) # type: ignore[arg-type] return self.parser.parse_interactive(stream, chosen_start) @@ -133,20 +140,21 @@ def lex(self, lexer_state, parser_state): -def create_basic_lexer(lexer_conf, parser, postlex, options): +def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer: cls = (options and options._plugins.get('BasicLexer')) or BasicLexer return cls(lexer_conf) -def create_contextual_lexer(lexer_conf, parser, postlex, options): +def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer: cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer - states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} - always_accept = postlex.always_accept if postlex else () + states: Dict[str, Collection[str]] = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()} + always_accept: Collection[str] = postlex.always_accept if postlex else () return cls(lexer_conf, states, always_accept=always_accept) -def create_lalr_parser(lexer_conf, parser_conf, options=None): +def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser: debug = options.debug if options else False + strict = options.strict if options else False cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser - return cls(parser_conf, debug=debug) + return cls(parser_conf, debug=debug, strict=strict) _parser_creators['lalr'] = create_lalr_parser @@ -173,7 +181,7 @@ def match(self, term, text, index=0): return self.regexps[term.name].match(text, index) -def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw): +def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): if lexer_conf.callbacks: raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.") @@ -183,10 +191,10 @@ def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw): def _match_earley_basic(term, token): return term.name == token.type -def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw): +def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw): return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw) -def create_earley_parser(lexer_conf, parser_conf, options): +def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser: resolve_ambiguity = options.ambiguity == 'resolve' debug = options.debug if options else False tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None @@ -195,12 +203,12 @@ def create_earley_parser(lexer_conf, parser_conf, options): if lexer_conf.lexer_type == 'dynamic': f = create_earley_parser__dynamic elif lexer_conf.lexer_type == 'dynamic_complete': - extra['complete_lex'] =True + extra['complete_lex'] = True f = create_earley_parser__dynamic else: f = create_earley_parser__basic - return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra) + return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra) diff --git a/src/poetry/core/_vendor/lark/parsers/cyk.py b/src/poetry/core/_vendor/lark/parsers/cyk.py index 82818ccf9..b5334f907 100644 --- a/src/poetry/core/_vendor/lark/parsers/cyk.py +++ b/src/poetry/core/_vendor/lark/parsers/cyk.py @@ -13,11 +13,6 @@ from ..tree import Tree from ..grammar import Terminal as T, NonTerminal as NT, Symbol -try: - xrange -except NameError: - xrange = range - def match(t, s): assert isinstance(t, T) return t.name == s.type @@ -153,11 +148,11 @@ def _parse(s, g): trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight) # Iterate over lengths of sub-sentences - for l in xrange(2, len(s) + 1): + for l in range(2, len(s) + 1): # Iterate over sub-sentences with the given length - for i in xrange(len(s) - l + 1): + for i in range(len(s) - l + 1): # Choose partition of the sub-sentence in [1, l) - for p in xrange(i + 1, i + l): + for p in range(i + 1, i + l): span1 = (i, p - 1) span2 = (p, i + l - 1) for r1, r2 in itertools.product(table[span1], table[span2]): @@ -250,7 +245,7 @@ def get_any_nt_unit_rule(g): def _remove_unit_rule(g, rule): - """Removes 'rule' from 'g' without changing the langugage produced by 'g'.""" + """Removes 'rule' from 'g' without changing the language produced by 'g'.""" new_rules = [x for x in g.rules if x != rule] refs = [x for x in g.rules if x.lhs == rule.rhs[0]] new_rules += [build_unit_skiprule(rule, ref) for ref in refs] @@ -262,7 +257,7 @@ def _split(rule): rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) rule_name = '__SP_%s' % (rule_str) + '_%d' yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias) - for i in xrange(1, len(rule.rhs) - 2): + for i in range(1, len(rule.rhs) - 2): yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split') yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split') diff --git a/src/poetry/core/_vendor/lark/parsers/earley.py b/src/poetry/core/_vendor/lark/parsers/earley.py index 2a047b032..a6229f888 100644 --- a/src/poetry/core/_vendor/lark/parsers/earley.py +++ b/src/poetry/core/_vendor/lark/parsers/earley.py @@ -9,6 +9,8 @@ is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html """ +import typing + from collections import deque from ..lexer import Token @@ -20,8 +22,15 @@ from .earley_common import Item from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree +if typing.TYPE_CHECKING: + from ..common import LexerConf, ParserConf + class Parser: - def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree): + lexer_conf: 'LexerConf' + parser_conf: 'ParserConf' + debug: bool + + def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree): analysis = GrammarAnalyzer(parser_conf) self.lexer_conf = lexer_conf self.parser_conf = parser_conf @@ -32,7 +41,8 @@ def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True self.FIRST = analysis.FIRST self.NULLABLE = analysis.NULLABLE self.callbacks = parser_conf.callbacks - self.predictions = {} + # TODO add typing info + self.predictions = {} # type: ignore[var-annotated] ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than # the slow 'isupper' in is_terminal. diff --git a/src/poetry/core/_vendor/lark/parsers/earley_forest.py b/src/poetry/core/_vendor/lark/parsers/earley_forest.py index 5892c782c..6763af619 100644 --- a/src/poetry/core/_vendor/lark/parsers/earley_forest.py +++ b/src/poetry/core/_vendor/lark/parsers/earley_forest.py @@ -15,7 +15,6 @@ from ..parse_tree_builder import AmbiguousIntermediateExpander from ..visitors import Discard -from ..lexer import Token from ..utils import logger from ..tree import Tree @@ -85,7 +84,8 @@ def is_ambiguous(self): def children(self): """Returns a list of this node's children sorted from greatest to least priority.""" - if not self.paths_loaded: self.load_paths() + if not self.paths_loaded: + self.load_paths() return sorted(self._children, key=attrgetter('sort_key')) def __iter__(self): diff --git a/src/poetry/core/_vendor/lark/parsers/grammar_analysis.py b/src/poetry/core/_vendor/lark/parsers/grammar_analysis.py index b526e470a..bdbadf37c 100644 --- a/src/poetry/core/_vendor/lark/parsers/grammar_analysis.py +++ b/src/poetry/core/_vendor/lark/parsers/grammar_analysis.py @@ -122,8 +122,9 @@ def calculate_sets(rules): class GrammarAnalyzer: - def __init__(self, parser_conf, debug=False): + def __init__(self, parser_conf, debug=False, strict=False): self.debug = debug + self.strict = strict root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) for start in parser_conf.start} diff --git a/src/poetry/core/_vendor/lark/parsers/lalr_analysis.py b/src/poetry/core/_vendor/lark/parsers/lalr_analysis.py index 216371e5d..7373ebd36 100644 --- a/src/poetry/core/_vendor/lark/parsers/lalr_analysis.py +++ b/src/poetry/core/_vendor/lark/parsers/lalr_analysis.py @@ -91,9 +91,7 @@ def from_ParseTable(cls, parse_table): def digraph(X, R, G): F = {} S = [] - N = {} - for x in X: - N[x] = 0 + N = dict.fromkeys(X, 0) for x in X: # this is always true for the first iteration, but N[x] may be updated in traverse below if N[x] == 0: @@ -133,8 +131,8 @@ def traverse(x, S, N, X, R, G, F): class LALR_Analyzer(GrammarAnalyzer): - def __init__(self, parser_conf, debug=False): - GrammarAnalyzer.__init__(self, parser_conf, debug) + def __init__(self, parser_conf, debug=False, strict=False): + GrammarAnalyzer.__init__(self, parser_conf, debug, strict) self.nonterminal_transitions = [] self.directly_reads = defaultdict(set) self.reads = defaultdict(set) @@ -222,7 +220,7 @@ def compute_includes_lookback(self): if nt2 not in self.reads: continue for j in range(i + 1, len(rp.rule.expansion)): - if not rp.rule.expansion[j] in self.NULLABLE: + if rp.rule.expansion[j] not in self.NULLABLE: break else: includes.append(nt2) @@ -247,9 +245,7 @@ def compute_lalr1_states(self): m = {} reduce_reduce = [] for state in self.lr0_states: - actions = {} - for la, next_state in state.transitions.items(): - actions[la] = (Shift, next_state.closure) + actions = {la: (Shift, next_state.closure) for la, next_state in state.transitions.items()} for la, rules in state.lookaheads.items(): if len(rules) > 1: # Try to resolve conflict based on priority @@ -260,10 +256,18 @@ def compute_lalr1_states(self): rules = [best[1]] else: reduce_reduce.append((state, la, rules)) + continue + + rule ,= rules if la in actions: - if self.debug: + if self.strict: + raise GrammarError(f"Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n ") + elif self.debug: logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) - logger.warning(' * %s', list(rules)[0]) + logger.warning(' * %s', rule) + else: + logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) + logger.debug(' * %s', rule) else: actions[la] = (Reduce, list(rules)[0]) m[state] = { k.name: v for k, v in actions.items() } diff --git a/src/poetry/core/_vendor/lark/parsers/lalr_interactive_parser.py b/src/poetry/core/_vendor/lark/parsers/lalr_interactive_parser.py index f8fd93aae..7ee400170 100644 --- a/src/poetry/core/_vendor/lark/parsers/lalr_interactive_parser.py +++ b/src/poetry/core/_vendor/lark/parsers/lalr_interactive_parser.py @@ -116,7 +116,7 @@ def accepts(self): def resume_parse(self): """Resume automated parsing from the current state. """ - return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_state.state.last_token) + return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token) diff --git a/src/poetry/core/_vendor/lark/parsers/lalr_parser.py b/src/poetry/core/_vendor/lark/parsers/lalr_parser.py index 5cf6ca451..c330a4d2b 100644 --- a/src/poetry/core/_vendor/lark/parsers/lalr_parser.py +++ b/src/poetry/core/_vendor/lark/parsers/lalr_parser.py @@ -7,15 +7,15 @@ from ..lexer import Token from ..utils import Serialize -from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable +from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable from .lalr_interactive_parser import InteractiveParser from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken ###{standalone class LALR_Parser(Serialize): - def __init__(self, parser_conf, debug=False): - analysis = LALR_Analyzer(parser_conf, debug=debug) + def __init__(self, parser_conf, debug=False, strict=False): + analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict) analysis.compute_lalr() callbacks = parser_conf.callbacks diff --git a/src/poetry/core/_vendor/lark/parsers/resolve_ambig.py b/src/poetry/core/_vendor/lark/parsers/resolve_ambig.py deleted file mode 100644 index 2470eb978..000000000 --- a/src/poetry/core/_vendor/lark/parsers/resolve_ambig.py +++ /dev/null @@ -1,109 +0,0 @@ -from ..utils import compare -from functools import cmp_to_key - -from ..tree import Tree - - -# Standard ambiguity resolver (uses comparison) -# -# Author: Erez Sh - -def _compare_rules(rule1, rule2): - return -compare( len(rule1.expansion), len(rule2.expansion)) - -def _sum_priority(tree): - p = 0 - - for n in tree.iter_subtrees(): - try: - p += n.meta.rule.options.priority or 0 - except AttributeError: - pass - - return p - -def _compare_priority(tree1, tree2): - tree1.iter_subtrees() - -def _compare_drv(tree1, tree2): - try: - rule1 = tree1.meta.rule - except AttributeError: - rule1 = None - - try: - rule2 = tree2.meta.rule - except AttributeError: - rule2 = None - - if None == rule1 == rule2: - return compare(tree1, tree2) - elif rule1 is None: - return -1 - elif rule2 is None: - return 1 - - assert tree1.data != '_ambig' - assert tree2.data != '_ambig' - - p1 = _sum_priority(tree1) - p2 = _sum_priority(tree2) - c = (p1 or p2) and compare(p1, p2) - if c: - return c - - c = _compare_rules(tree1.meta.rule, tree2.meta.rule) - if c: - return c - - # rules are "equal", so compare trees - if len(tree1.children) == len(tree2.children): - for t1, t2 in zip(tree1.children, tree2.children): - c = _compare_drv(t1, t2) - if c: - return c - - return compare(len(tree1.children), len(tree2.children)) - - -def _standard_resolve_ambig(tree): - assert tree.data == '_ambig' - key_f = cmp_to_key(_compare_drv) - best = max(tree.children, key=key_f) - assert best.data == 'drv' - tree.set('drv', best.children) - tree.meta.rule = best.meta.rule # needed for applying callbacks - -def standard_resolve_ambig(tree): - for ambig in tree.find_data('_ambig'): - _standard_resolve_ambig(ambig) - - return tree - - - - -# Anti-score Sum -# -# Author: Uriva (https://github.com/uriva) - -def _antiscore_sum_drv(tree): - if not isinstance(tree, Tree): - return 0 - - assert tree.data != '_ambig' - - return _sum_priority(tree) - -def _antiscore_sum_resolve_ambig(tree): - assert tree.data == '_ambig' - best = min(tree.children, key=_antiscore_sum_drv) - assert best.data == 'drv' - tree.set('drv', best.children) - tree.meta.rule = best.meta.rule # needed for applying callbacks - -def antiscore_sum_resolve_ambig(tree): - for ambig in tree.find_data('_ambig'): - _antiscore_sum_resolve_ambig(ambig) - - return tree diff --git a/src/poetry/core/_vendor/lark/reconstruct.py b/src/poetry/core/_vendor/lark/reconstruct.py index 906ca8122..79a5eee86 100644 --- a/src/poetry/core/_vendor/lark/reconstruct.py +++ b/src/poetry/core/_vendor/lark/reconstruct.py @@ -1,6 +1,6 @@ """Reconstruct text from a tree, based on Lark grammar""" -from typing import List, Dict, Union, Callable, Iterable, Optional +from typing import Dict, Callable, Iterable, Optional from .lark import Lark from .tree import Tree, ParseTree @@ -69,7 +69,7 @@ class Reconstructor(TreeMatcher): The reconstructor cannot generate values from regexps. If you need to produce discarded regexes, such as newlines, use `term_subs` and provide default values for them. - Paramters: + Parameters: parser: a Lark instance term_subs: a dictionary of [Terminal name as str] to [output text as str] """ diff --git a/src/poetry/core/_vendor/lark/tools/__init__.py b/src/poetry/core/_vendor/lark/tools/__init__.py index 391f991f1..c6995c690 100644 --- a/src/poetry/core/_vendor/lark/tools/__init__.py +++ b/src/poetry/core/_vendor/lark/tools/__init__.py @@ -6,6 +6,11 @@ import warnings from lark import Lark, logger +try: + from interegular import logger as interegular_logger + has_interegular = True +except ImportError: + has_interegular = False lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') @@ -40,6 +45,8 @@ def build_lalr(namespace): logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) + if has_interegular: + interegular_logger.setLevel(logger.getEffectiveLevel()) if len(namespace.start) == 0: namespace.start.append('start') kwargs = {n: getattr(namespace, n) for n in options} diff --git a/src/poetry/core/_vendor/lark/tools/serialize.py b/src/poetry/core/_vendor/lark/tools/serialize.py index 61540242a..eb28824b7 100644 --- a/src/poetry/core/_vendor/lark/tools/serialize.py +++ b/src/poetry/core/_vendor/lark/tools/serialize.py @@ -1,9 +1,7 @@ -import codecs import sys import json -from lark import Lark -from lark.grammar import RuleOptions, Rule +from lark.grammar import Rule from lark.lexer import TerminalDef from lark.tools import lalr_argparser, build_lalr diff --git a/src/poetry/core/_vendor/lark/tree.py b/src/poetry/core/_vendor/lark/tree.py index 8d83a0443..438837ebe 100644 --- a/src/poetry/core/_vendor/lark/tree.py +++ b/src/poetry/core/_vendor/lark/tree.py @@ -1,11 +1,14 @@ import sys from copy import deepcopy -from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, Any, TYPE_CHECKING +from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING if TYPE_CHECKING: from .lexer import TerminalDef, Token - import rich + try: + import rich + except ImportError: + pass if sys.version_info >= (3, 8): from typing import Literal else: @@ -44,7 +47,12 @@ class Tree(Generic[_Leaf_T]): data: The name of the rule or alias children: List of matched sub-rules and terminals meta: Line & Column numbers (if ``propagate_positions`` is enabled). - meta attributes: line, column, start_pos, end_line, end_column, end_pos + meta attributes: (line, column, end_line, end_column, start_pos, end_pos, + container_line, container_column, container_end_line, container_end_column) + container_* attributes consider all symbols, including those that have been inlined in the tree. + For example, in the rule 'a: _A B _C', the regular attributes will mark the start and end of B, + but the container_* attributes will also include _A and _C in the range. However, rules that + contain 'a' will consider it in full, including _A and _C for all attributes. """ data: str @@ -86,7 +94,7 @@ def pretty(self, indent_str: str=' ') -> str: """ return ''.join(self._pretty(0, indent_str)) - def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree': + def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree': """Returns a tree widget for the 'rich' library. Example: diff --git a/src/poetry/core/_vendor/lark/tree_matcher.py b/src/poetry/core/_vendor/lark/tree_matcher.py index fdcd2bfc2..0f42652e5 100644 --- a/src/poetry/core/_vendor/lark/tree_matcher.py +++ b/src/poetry/core/_vendor/lark/tree_matcher.py @@ -83,7 +83,7 @@ class TreeMatcher: Supports templates and inlined rules (`rule{a, b,..}` and `_rule`) - Initiialize with an instance of Lark. + Initialize with an instance of Lark. """ def __init__(self, parser): diff --git a/src/poetry/core/_vendor/lark/tree_templates.py b/src/poetry/core/_vendor/lark/tree_templates.py index 03eaa27b8..6ec7323f4 100644 --- a/src/poetry/core/_vendor/lark/tree_templates.py +++ b/src/poetry/core/_vendor/lark/tree_templates.py @@ -138,7 +138,7 @@ def match(self, tree: TreeOrCode) -> Optional[MatchResult]: return self.conf._match_tree_template(self.tree, tree) def search(self, tree: TreeOrCode) -> Iterator[Tuple[Tree[str], MatchResult]]: - """Search for all occurances of the tree template inside ``tree``. + """Search for all occurrences of the tree template inside ``tree``. """ tree = self.conf._get_tree(tree) for subtree in tree.iter_subtrees(): @@ -153,7 +153,7 @@ def apply_vars(self, vars: Mapping[str, Tree[str]]) -> Tree[str]: def translate(t1: Template, t2: Template, tree: TreeOrCode): - """Search tree and translate each occurrance of t1 into t2. + """Search tree and translate each occurrence of t1 into t2. """ tree = t1.conf._get_tree(tree) # ensure it's a tree, parse if necessary and possible for subtree, vars in t1.search(tree): diff --git a/src/poetry/core/_vendor/lark/utils.py b/src/poetry/core/_vendor/lark/utils.py index 6781e6fb1..b47096f2c 100644 --- a/src/poetry/core/_vendor/lark/utils.py +++ b/src/poetry/core/_vendor/lark/utils.py @@ -1,8 +1,8 @@ import unicodedata import os -from functools import reduce +from itertools import product from collections import deque -from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence +from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable ###{standalone import sys, re @@ -20,14 +20,14 @@ T = TypeVar("T") -def classify(seq: Sequence, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict: +def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict: d: Dict[Any, Any] = {} for item in seq: k = key(item) if (key is not None) else item v = value(item) if (value is not None) else item - if k in d: + try: d[k].append(v) - else: + except KeyError: d[k] = [v] return d @@ -188,7 +188,7 @@ def dedup_list(l: List[T]) -> List[T]: dedup = set() # This returns None, but that's expected return [x for x in l if not (x in dedup or dedup.add(x))] # type: ignore[func-returns-value] - # 2x faster (ordered in PyPy and CPython 3.6+, gaurenteed to be ordered in Python 3.7+) + # 2x faster (ordered in PyPy and CPython 3.6+, guaranteed to be ordered in Python 3.7+) # return list(dict.fromkeys(l)) @@ -213,7 +213,7 @@ def reversed(self) -> Dict[int, Any]: def combine_alternatives(lists): """ - Accepts a list of alternatives, and enumerates all their possible concatinations. + Accepts a list of alternatives, and enumerates all their possible concatenations. Examples: >>> combine_alternatives([range(2), [4,5]]) @@ -228,9 +228,7 @@ def combine_alternatives(lists): if not lists: return [[]] assert all(l for l in lists), lists - init = [[x] for x in lists[0]] - return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) - + return list(product(*lists)) try: import atomicwrites @@ -268,15 +266,8 @@ def __repr__(self): def classify_bool(seq: Sequence, pred: Callable) -> Any: - true_elems = [] false_elems = [] - - for elem in seq: - if pred(elem): - true_elems.append(elem) - else: - false_elems.append(elem) - + true_elems = [elem for elem in seq if pred(elem) or false_elems.append(elem)] # type: ignore[func-returns-value] return true_elems, false_elems diff --git a/src/poetry/core/_vendor/lark/visitors.py b/src/poetry/core/_vendor/lark/visitors.py index 932fbee19..f73241803 100644 --- a/src/poetry/core/_vendor/lark/visitors.py +++ b/src/poetry/core/_vendor/lark/visitors.py @@ -520,7 +520,7 @@ def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper Parameters: inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). - meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) + meta (bool, optional): Provides two arguments: ``meta`` and ``children`` (instead of just the latter) tree (bool, optional): Provides the entire tree as the argument, instead of the children. wrapper (function, optional): Provide a function to decorate all methods. @@ -532,6 +532,12 @@ class SolveArith(Transformer): def add(self, left, right): return left + right + @v_args(meta=True) + def mul(self, meta, children): + logger.info(f'mul at line {meta.line}') + left, right = children + return left * right + class ReverseNotation(Transformer_InPlace): @v_args(tree=True) diff --git a/src/poetry/core/_vendor/vendor.txt b/src/poetry/core/_vendor/vendor.txt index 9a4b488e1..1cf68fcf1 100644 --- a/src/poetry/core/_vendor/vendor.txt +++ b/src/poetry/core/_vendor/vendor.txt @@ -1,6 +1,6 @@ attrs==23.1.0 ; python_version >= "3.8" and python_version < "4.0" jsonschema==4.17.3 ; python_version >= "3.8" and python_version < "4.0" -lark==1.1.5 ; python_version >= "3.8" and python_version < "4.0" +lark==1.1.7 ; python_version >= "3.8" and python_version < "4.0" packaging==23.1 ; python_version >= "3.8" and python_version < "4.0" pyrsistent==0.19.3 ; python_version >= "3.8" and python_version < "4.0" tomli==2.0.1 ; python_version >= "3.8" and python_version < "4.0" diff --git a/vendors/poetry.lock b/vendors/poetry.lock index 69d9420f1..b84fe98f4 100644 --- a/vendors/poetry.lock +++ b/vendors/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "attrs" @@ -20,21 +20,21 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte [[package]] name = "importlib-resources" -version = "5.12.0" +version = "6.0.0" description = "Read resources from Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, - {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, + {file = "importlib_resources-6.0.0-py3-none-any.whl", hash = "sha256:d952faee11004c045f785bb5636e8f885bed30dc3c940d5d42798a2a4541c185"}, + {file = "importlib_resources-6.0.0.tar.gz", hash = "sha256:4cf94875a8368bd89531a756df9a9ebe1f150e0f885030b461237bc7f2d905f2"}, ] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [[package]] name = "jsonschema" @@ -59,17 +59,18 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "lark" -version = "1.1.5" +version = "1.1.7" description = "a modern parsing library" optional = false -python-versions = "*" +python-versions = ">=3.6" files = [ - {file = "lark-1.1.5-py3-none-any.whl", hash = "sha256:8476f9903e93fbde4f6c327f74d79e9b4bd0ed9294c5dfa3164ab8c581b5de2a"}, - {file = "lark-1.1.5.tar.gz", hash = "sha256:4b534eae1f9af5b4ea000bea95776350befe1981658eea3820a01c37e504bb4d"}, + {file = "lark-1.1.7-py3-none-any.whl", hash = "sha256:9e5dc5bbf93fa1840083707285262514a0ef8a6613874af7ea1cec60468d6e92"}, + {file = "lark-1.1.7.tar.gz", hash = "sha256:be7437bf1f37ab08b355f29ff2571d77d777113d0a8c4352b0c513dced6c5a1e"}, ] [package.extras] atomic-cache = ["atomicwrites"] +interegular = ["interegular (>=0.3.1,<0.4.0)"] nearley = ["js2py"] regex = ["regex"] @@ -144,20 +145,20 @@ files = [ [[package]] name = "zipp" -version = "3.15.0" +version = "3.16.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, - {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "26f259e62d069e29592917492deae2255dd5090bb72e7a8a87f113c2db8768d9" +content-hash = "8d7512f287afdae58acb428e12215fb1c3eb3099994796c9d4d73d72094f69dd" diff --git a/vendors/pyproject.toml b/vendors/pyproject.toml index 5fff4a40c..e6d172bc3 100644 --- a/vendors/pyproject.toml +++ b/vendors/pyproject.toml @@ -21,7 +21,8 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.8" -jsonschema = "^4.17.3" +# jsonschema 4.18 has binary dependencies, making it unsuitable for vendoring. +jsonschema = "^4.17.3,<4.18.0" lark = "^1.1.3" packaging = ">=22.0" tomli = "^2.0.1"