From 76d2540e55190fead24e02ee75969470e67ada5b Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:22:51 -0400 Subject: [PATCH 01/27] feat: implemented Text.Parsec.Combinators.between, and implemented integer part of Text.Parsec.Number --- .coveragerc | 15 ++++ .github/workflows/ci.yaml | 7 +- .gitignore | 3 + setup.py | 9 ++- src/parsec/__init__.py | 128 +++++++++++++++++--------------- src/parsec/__init__.pyi | 27 +++++++ src/parsec/tests/test_parsec.py | 113 +++++++++++++++++++++++++++- 7 files changed, 239 insertions(+), 63 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..684a473 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,15 @@ +[run] +relative_files = True +branch = True +omit = + test_*.py + src/parsec/tests/* + +# coverage.py does not currenly handle @overload decorated methods gracefully. +# overloaded methods should be ignored because they are not allowed to contain code +[report] +exclude_lines = + pragma: not covered + @overload + \.\.\. + if TYPE_CHECKING: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4b30583..6f98f3d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10'] + python-version: ['2.7', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 @@ -19,4 +19,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - run: python setup.py test + - run: pip install -e .[dev] + + - run: coverage run setup.py test + - run: coverage report diff --git a/.gitignore b/.gitignore index db4561e..9332007 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ docs/_build/ # PyBuilder target/ + +# venv +.venv/ diff --git a/setup.py b/setup.py index 97c2b94..2926c6b 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name = 'parsec', - version = '3.15', + version = '3.16', description = 'parser combinator.', long_description = 'A universal Python parser combinator library inspired by Parsec library of Haskell.', author = 'He Tao', @@ -33,6 +33,7 @@ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'License :: OSI Approved :: MIT License', ], platforms = 'any', @@ -40,7 +41,13 @@ install_requires = [ 'enum34; python_version < "3.5"', + 'setuptools', ], + extras_require={ + 'dev': [ + 'coverage', + ], + }, package_dir = {'': 'src'}, packages = find_packages('src'), package_data = {'': ('py.typed', '*.pyi')}, diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 698a4f8..a45e53d 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import annotations ''' A universal Python parser combinator library inspired by Parsec library of Haskell. @@ -7,9 +8,10 @@ __author__ = 'He Tao, sighingnow@gmail.com' +import operator import re import warnings -from functools import wraps +from functools import reduce, wraps from collections import namedtuple ########################################################################## @@ -246,7 +248,7 @@ def excepts_parser(text, index): def parsecmap(self, fn): '''Returns a parser that transforms the produced value of parser with `fn`.''' - return self.bind(lambda res: Parser(lambda _, index: Value.success(index, fn(res)))) + return self.bind(lambda res: success_with(fn(res), advance=False)) def parsecapp(self, other): '''Returns a parser that applies the produced value of this parser to the produced value of `other`.''' @@ -255,7 +257,7 @@ def parsecapp(self, other): def result(self, res): '''Return a value according to the parameter `res` when parse successfully.''' - return self >> Parser(lambda _, index: Value.success(index, res)) + return self >> success_with(res, advance=False) def mark(self): '''Mark the line and column information of the result of this parser.''' @@ -273,7 +275,7 @@ def mark_parser(text, index): def desc(self, description): '''Describe a parser, when it failed, print out the description text.''' - return self | Parser(lambda _, index: Value.failure(index, description)) + return self | fail_with(description) def __or__(self, other): '''Implements the `(|)` operator, means `choice`.''' @@ -628,77 +630,42 @@ def sepEndBy1(p, sep): # Text.Parsec.Char ########################################################################## - -def any(): - '''Parses a arbitrary character.''' +def satisfy(predicate, failure=None): @Parser - def any_parser(text, index=0): - if index < len(text): + def satisfy_parser(text, index=0): + if index < len(text) and predicate(text[index]): return Value.success(index + 1, text[index]) else: - return Value.failure(index, 'a random char') - return any_parser + return Value.failure(index, failure or "does not satisfy predicate") + return satisfy_parser +def any(): + '''Parses a arbitrary character.''' + return satisfy(lambda _: True, 'a random char') def one_of(s): '''Parses a char from specified string.''' - @Parser - def one_of_parser(text, index=0): - if index < len(text) and text[index] in s: - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'one of {}'.format(s)) - return one_of_parser - + return satisfy(lambda c: c in s, 'one of {}'.format(s)) def none_of(s): '''Parses a char NOT from specified string.''' - @Parser - def none_of_parser(text, index=0): - if index < len(text) and text[index] not in s: - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'none of {}'.format(s)) - return none_of_parser - + return satisfy(lambda c: c not in s, 'none of {}'.format(s)) def space(): '''Parses a whitespace character.''' - @Parser - def space_parser(text, index=0): - if index < len(text) and text[index].isspace(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'one space') - return space_parser - + return satisfy(str.isspace, 'one space') def spaces(): '''Parses zero or more whitespace characters.''' return many(space()) - def letter(): '''Parse a letter in alphabet.''' - @Parser - def letter_parser(text, index=0): - if index < len(text) and text[index].isalpha(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'a letter') - return letter_parser - + return satisfy(str.isalpha, 'a letter') def digit(): '''Parse a digit.''' - @Parser - def digit_parser(text, index=0): - if index < len(text) and text[index].isdigit(): - return Value.success(index + 1, text[index]) - else: - return Value.failure(index, 'a digit') - return digit_parser - + return satisfy(str.isdigit, 'a digit') def eof(): '''Parses EOF flag of a string.''' @@ -710,7 +677,6 @@ def eof_parser(text, index=0): return Value.failure(index, 'EOF') return eof_parser - def string(s): '''Parses a string.''' @Parser @@ -744,16 +710,16 @@ def regex_parser(text, index): return Value.failure(index, exp.pattern) return regex_parser - ########################################################################## # Useful utility parsers ########################################################################## +def success_with(value, advance=False): + return Parser(lambda _, index: Value.success(index + int(advance), value)) def fail_with(message): return Parser(lambda _, index: Value.failure(index, message)) - def exclude(p: Parser, exclude: Parser): '''Fails parser p if parser `exclude` matches''' @Parser @@ -765,8 +731,7 @@ def exclude_parser(text, index): return p(text, index) return exclude_parser - -def lookahead(p: Parser): +def lookahead(p): '''Parses without consuming''' @Parser def lookahead_parser(text, index): @@ -778,7 +743,7 @@ def lookahead_parser(text, index): return lookahead_parser -def unit(p: Parser): +def unit(p): '''Converts a parser into a single unit. Only consumes input if the parser succeeds''' @Parser def unit_parser(text, index): @@ -789,7 +754,14 @@ def unit_parser(text, index): return Value.failure(index, res.expected) return unit_parser - +def between(open, close, parser): + @generate + def between_parser(): + yield open + results = yield parser + yield close + return results + return between_parser def fix(fn): '''Allow recursive parser using the Y combinator trick. @@ -800,3 +772,41 @@ def fix(fn): See also: https://github.com/sighingnow/parsec.py/issues/39. ''' return (lambda x: x(x))(lambda y: fn(lambda *args: y(y)(*args))) + +def validate(predicate): + def validator(value): + if predicate(value): + return success_with(value, advance=False) + else: + return fail_with(f"{value} does not satisfy the given predicate {predicate}") + return validator + +########################################################################## +# Text.Parsec.Number +########################################################################## + +sign = string("-").result(operator.neg).desc("'-'") | optional(string("+").result(lambda x: x).desc("'+'"), lambda x: x) + +def number(base: int, digit: Parser[str]) -> Parser[int]: + return many1(digit).parsecmap( + lambda digits: reduce(lambda accumulation, digit: accumulation * base + int(digit, base), digits, 0), + ) + +binary_digit = one_of("01").desc("binary_digit") +binary_number = number(2, binary_digit).desc("binary_number") +binary = (one_of("bB") >> binary_number).desc("binary") + +octal_digit = one_of("01234567").desc("octal_digit") +octal_number = number(8, octal_digit).desc("octal_number") +octal = (one_of("oO") >> octal_number).desc("octal") + +hexadecimal_digit = one_of("0123456789ABCDEFabcdef").desc("hexadecimal_digit") +hexadecimal_number = number(16, hexadecimal_digit).desc("hexadecimal_number") +hexadecimal = (one_of("xX") >> hexadecimal_number).desc("hexadecimal") + +decimal_number = number(10, digit()).desc("decimal_number") +decimal = decimal_number + +zero_number = string("0") >> (hexadecimal | octal | binary | decimal | success_with(0)) +natural = zero_number | decimal +integer = sign.parsecapp(natural) diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 0c4a74f..1ca41cc 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -116,6 +116,7 @@ def endBy(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def endBy1(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def sepEndBy(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... def sepEndBy1(p: Parser[_U], sep: Parser) -> Parser[list[_U]]: ... +def satisfy(predicate: CA.Callable[[_U], bool]) -> Parser[_U]: ... def any() -> Parser: ... def one_of(s: CA.Container[_U]) -> Parser[_U]: ... def none_of(s: CA.Container[_U]) -> Parser[_U]: ... @@ -126,7 +127,33 @@ def digit() -> Parser[str]: ... def eof() -> Parser[None]: ... def string(s: _VS) -> Parser[_VS]: ... def regex(exp: str | re.Pattern, flags: re.RegexFlag = ...) -> Parser[str]: ... +def success_with(value: _U, advance: bool = False) -> Parser[_U]: ... def fail_with(message: str) -> Parser: ... def exclude(p: Parser[_U], exclude: Parser) -> Parser[_U]: ... def lookahead(p: Parser[_U]) -> Parser[_U]: ... def unit(p: Parser[_U]) -> Parser[_U]: ... +def between(open: Parser[_U], close: Parser[_U], parser: Parser[_U]) -> Parser[_U]: ... +def validate(predicate: CA.Callable[[_U], bool]) -> Parser[_U]: ... + +sign: Parser[CA.Callable[[_U], _U]] + +def number(base: int, digit: Parser[str]) -> Parser[int]: ... + +binary_digit: Parser[str] +binary_number: Parser[int] +binary: Parser[int] + +octal_digit: Parser[str] +octal_number: Parser[int] +octal: Parser[int] + +hexadecimal_digit: Parser[str] +hexadecimal_number: Parser[int] +hexadecimal: Parser[int] + +decimal_number: Parser[int] +decimal: Parser[int] + +zero_number: Parser[int] +natural: Parser[int] +integer: Parser[int] diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index b6b00e0..2547278 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -14,6 +14,18 @@ from parsec import * +class ParseErrorTest(unittest.TestCase): + def test_loc_info_should_throw_on_invalid_index(self): + self.assertRaises(ValueError, ParseError.loc_info, "", 1) + + def test_loc_info_should_use_default_values_when_text_is_not_str(self): + self.assertEqual(ParseError.loc_info([0], 0), (0, -1)) + + def test_str(self): + self.assertTrue(str(ParseError("foo bar", "test", 0))) + # trigger ValueError + self.assertTrue(str(ParseError("foo bar", "", 1))) + class ParsecTest(unittest.TestCase): '''Test the implementation of Text.Parsec. (The final test for all apis)''' def test_times_with_then(self): @@ -57,7 +69,7 @@ def binder(x): nonlocals['piped'] = x return string('y') - parser = string('x').bind(binder) + parser = string('x') >= binder self.assertEqual(parser.parse('xy'), 'y') self.assertEqual(nonlocals['piped'], 'x') self.assertRaises(ParseError, parser.parse, 'x') @@ -322,6 +334,15 @@ def test_excepts(self): self.assertEqual(parser.parse('<'), "<") self.assertEqual(parser.parse('<='), "<") + def test_between(self): + parser = between(string("("), string(")"), many(none_of(")"))) + self.assertEqual(parser.parse("()"), []) + self.assertEqual(parser.parse("(abc)"), ["a", "b", "c"]) + self.assertRaises(ParseError, parser.parse, "") + self.assertRaises(ParseError, parser.parse, "(") + self.assertRaises(ParseError, parser.parse, ")") + self.assertRaises(ParseError, parser.parse, ")(") + def test_fix(self): @Parser @fix @@ -330,6 +351,10 @@ def bracketed_expr(recur): self.assertEqual(bracketed_expr.parse("((x))"), 'x') + def test_validate(self): + parser = any() >= validate(str.isalpha) + self.assertEqual(parser.parse("a"), "a") + self.assertRaises(ParseError, parser.parse, "1") class ParsecCharTest(unittest.TestCase): '''Test the implementation of Text.Parsec.Char.''' @@ -344,6 +369,92 @@ def test_regex(self): self.assertEqual(parser.parse('1'), '1') self.assertEqual(parser.parse('4'), '4') self.assertRaises(ParseError, parser.parse, 'x') + # combinator only accepts string as input + self.assertRaises(ParseError, parser.parse, [1]) + + def test_one_of(self): + parser = one_of('abc') + self.assertEqual(parser.parse('a'), 'a') + self.assertEqual(parser.parse('b'), 'b') + self.assertEqual(parser.parse('c'), 'c') + self.assertRaises(ParseError, parser.parse, 'd') + + def test_none_of(self): + parser = none_of('abc') + self.assertRaises(ParseError, parser.parse, 'a') + self.assertRaises(ParseError, parser.parse, 'b') + self.assertRaises(ParseError, parser.parse, 'c') + self.assertEqual(parser.parse('d'), 'd') + + def test_exclude(self): + parser = exclude(string("test"), string("should-be-excluded")) + self.assertEqual(parser.parse("test"), "test") + self.assertRaises(ParseError, parser.parse, "should-be-excluded") + + def test_lookahead(self): + parser = lookahead(string("test")) + string("test") + self.assertEqual(parser.parse("test"), ("test", "test")) + self.assertRaises(ParseError, parser.parse, "tes") + + def test_unit(self): + parser = unit(string("abc")) | one_of("a") + self.assertEqual(parser.parse("abc"), "abc") + self.assertEqual(parser.parse("a"), "a") + +class ParsecNumberTest(unittest.TestCase): + '''Test the implementation of Text.Parsec.Number.''' + + def test_decimal(self): + parser = decimal + self.assertEqual(parser.parse('0'), 0) + self.assertEqual(parser.parse('1'), 1) + self.assertEqual(parser.parse('10'), 10) + self.assertEqual(parser.parse('9999'), 9999) + + def test_binary(self): + parser = binary + self.assertEqual(parser.parse('b0'), 0b0) + self.assertEqual(parser.parse('b1'), 0b1) + self.assertEqual(parser.parse('B1'), 0b1) + self.assertEqual(parser.parse('b10'), 0b10) + self.assertEqual(parser.parse('B10'), 0b10) + self.assertEqual(parser.parse('b1111'), 0b1111) + self.assertEqual(parser.parse('B1111'), 0b1111) + + def test_octal(self): + parser = octal + self.assertEqual(parser.parse('o0'), 0o0) + self.assertEqual(parser.parse('o1'), 0o1) + self.assertEqual(parser.parse('O1'), 0o1) + self.assertEqual(parser.parse('o10'), 0o10) + self.assertEqual(parser.parse('O10'), 0o10) + self.assertEqual(parser.parse('o7777'), 0o7777) + self.assertEqual(parser.parse('O7777'), 0o7777) + + def test_hexadecimal(self): + parser = hexadecimal + self.assertEqual(parser.parse('x0'), 0x0) + self.assertEqual(parser.parse('x1'), 0x1) + self.assertEqual(parser.parse('X1'), 0x1) + self.assertEqual(parser.parse('x10'), 0x10) + self.assertEqual(parser.parse('X10'), 0x10) + self.assertEqual(parser.parse('xffff'), 0xffff) + self.assertEqual(parser.parse('Xffff'), 0xffff) + + def test_integer(self): + parser = integer + self.assertEqual(parser.parse('0'), 0) + self.assertEqual(parser.parse('-1'), -1) + self.assertEqual(parser.parse('+1'), 1) + self.assertEqual(parser.parse('0b10'), 0b10) + self.assertEqual(parser.parse('-0b10'), -0b10) + self.assertEqual(parser.parse('+0b10'), 0b10) + self.assertEqual(parser.parse('0o10'), 0o10) + self.assertEqual(parser.parse('+0o10'), 0o10) + self.assertEqual(parser.parse('-0o10'), -0o10) + self.assertEqual(parser.parse('0x10'), 0x10) + self.assertEqual(parser.parse('+0x10'), 0x10) + self.assertEqual(parser.parse('-0x10'), -0x10) class ParserGeneratorTest(unittest.TestCase): '''Test the implementation of Parser Generator.(generate)''' From 0214ab07a0ce170c71d9d4cc583259fd4b04662f Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:24:03 -0400 Subject: [PATCH 02/27] ci: trigger pipeline From d0f947070a06522d2803c1da00626eddd1c4f3f1 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:26:02 -0400 Subject: [PATCH 03/27] ci: remove python 2.7 --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6f98f3d..4e13bbc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['2.7', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 From 761758e4ed7fb51c8e5b0315d4bdc0f852db1377 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:27:14 -0400 Subject: [PATCH 04/27] ci: remove python < 3.6 --- .github/workflows/ci.yaml | 2 +- setup.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4e13bbc..db77b59 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 diff --git a/setup.py b/setup.py index 2926c6b..f9fdf9c 100644 --- a/setup.py +++ b/setup.py @@ -20,13 +20,7 @@ 'Operating System :: Microsoft :: Windows', 'Operating System :: POSIX', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', From ca6ff27b2f270cbea0370c068e72bfc979ab3422 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:28:36 -0400 Subject: [PATCH 05/27] ci: remove from __future__ import annotations --- src/parsec/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index a45e53d..84a5c60 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -1,6 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import annotations ''' A universal Python parser combinator library inspired by Parsec library of Haskell. From 631c602636fb59df437af241af590e64602fbbf0 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:29:37 -0400 Subject: [PATCH 06/27] ci: remove typing from src/parsec/__init__.py --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 84a5c60..cf06fd6 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -786,7 +786,7 @@ def validator(value): sign = string("-").result(operator.neg).desc("'-'") | optional(string("+").result(lambda x: x).desc("'+'"), lambda x: x) -def number(base: int, digit: Parser[str]) -> Parser[int]: +def number(base, digit): return many1(digit).parsecmap( lambda digits: reduce(lambda accumulation, digit: accumulation * base + int(digit, base), digits, 0), ) From 2aad4acd66fb6d375a592491dfc7df025d5f793c Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:31:05 -0400 Subject: [PATCH 07/27] chore: continue to mark supporting old python version --- setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.py b/setup.py index f9fdf9c..2926c6b 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,13 @@ 'Operating System :: Microsoft :: Windows', 'Operating System :: POSIX', 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', From a33f561bf74dafb06371125c538b804b6c4a5569 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:33:35 -0400 Subject: [PATCH 08/27] refactor: remove typing from src/parsec/__init__.py --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index cf06fd6..7f21a6f 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -719,7 +719,7 @@ def success_with(value, advance=False): def fail_with(message): return Parser(lambda _, index: Value.failure(index, message)) -def exclude(p: Parser, exclude: Parser): +def exclude(p, exclude): '''Fails parser p if parser `exclude` matches''' @Parser def exclude_parser(text, index): From 508867cf9a7657ea0b304d0b41d660ad2f682ad6 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:37:07 -0400 Subject: [PATCH 09/27] ci: show missing lines on coverage --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index db77b59..0aef995 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,4 +22,4 @@ jobs: - run: pip install -e .[dev] - run: coverage run setup.py test - - run: coverage report + - run: coverage report -m From 9dda772b1167e903f4bed971122d78b67c255053 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 04:59:02 -0400 Subject: [PATCH 10/27] refactor: remove edge case --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 7f21a6f..ada2ea3 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -784,7 +784,7 @@ def validator(value): # Text.Parsec.Number ########################################################################## -sign = string("-").result(operator.neg).desc("'-'") | optional(string("+").result(lambda x: x).desc("'+'"), lambda x: x) +sign = string("-").result(operator.neg).desc("'-'") | optional(string("+").desc("'+'")).result(lambda x: x) def number(base, digit): return many1(digit).parsecmap( From 32e435dbe4ec96cbe3f29600cc6b6dc9564edab8 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 14:15:07 -0400 Subject: [PATCH 11/27] ci: added mypy for type checking --- .github/workflows/ci.yaml | 2 + mypy.ini | 15 +++++ setup.py | 3 +- src/parsec/__init__.py | 56 ++++++++++------- src/parsec/__init__.pyi | 9 ++- src/parsec/tests/test_parsec.py | 104 +++++++++++++++++++++++++++++++- 6 files changed, 163 insertions(+), 26 deletions(-) create mode 100644 mypy.ini diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0aef995..5d989a4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,3 +23,5 @@ jobs: - run: coverage run setup.py test - run: coverage report -m + + - run: mypy . diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..d794d83 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,15 @@ +[mypy] +pretty = True +mypy_path = $MYPY_CONFIG_FILE_DIR/src +packages = parsec +exclude = docs/|examples/|build/lib|src/parsec/tests + +explicit_package_bases = True +check_untyped_defs = True +implicit_reexport = True +show_error_codes = True +show_column_numbers = True +follow_imports = silent + +warn_redundant_casts = True +warn_unused_ignores = True diff --git a/setup.py b/setup.py index 2926c6b..cb9a35d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from setuptools import setup, find_packages +from setuptools import setup, find_packages # type: ignore[import-untyped] setup( name = 'parsec', @@ -45,6 +45,7 @@ ], extras_require={ 'dev': [ + 'mypy', 'coverage', ], }, diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index ada2ea3..6f2e8ee 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -75,7 +75,7 @@ def aggregate(self, other=None): return self if not other.status: return other - return Value(True, other.index, self.value + other.value, None) + return Value.success(other.index, self.value + other.value) def update_index(self, index=None): if index is None: @@ -86,15 +86,14 @@ def update_index(self, index=None): @staticmethod def combinate(values): '''Aggregate multiple values into tuple''' - prev_v = None + if not values: + raise TypeError("cannot call combinate without any value") + for v in values: - if prev_v: - if not v: - return prev_v if not v.status: return v out_values = tuple([v.value for v in values]) - return Value(True, values[-1].index, out_values, None) + return Value.success(values[-1].index, out_values) def __str__(self): return 'Value: state: {}, @index: {}, values: {}, expected: {}'.format( @@ -159,7 +158,13 @@ def bind(self, fn): @Parser def bind_parser(text, index): res = self(text, index) - return res if not res.status else fn(res.value)(text, res.index) + if not res.status: + return res + + try: + return fn(res.value, index)(text, res.index) + except TypeError: + return fn(res.value)(text, res.index) return bind_parser def compose(self, other): @@ -263,14 +268,14 @@ def mark(self): def pos(text, index): return ParseError.loc_info(text, index) - @Parser - def mark_parser(text, index): - res = self(text, index) - if res.status: - return Value.success(res.index, (pos(text, index), res.value, pos(text, res.index))) - else: - return res # failed. - return mark_parser + return self >= ( + lambda value, index: Parser( + lambda text, resultant_index: Value.success( + resultant_index, + (pos(text, index), value, pos(text, resultant_index)), + ) + ) + ) def desc(self, description): '''Describe a parser, when it failed, print out the description text.''' @@ -358,9 +363,12 @@ def choice(pa, pb): def try_choice(pa, pb): - '''Choice one from two parsers with backtrack, implements the operator of `(^)`.''' + '''Choose one from two parsers with backtrack, implements the operator of `(^)`.''' return pa.try_choice(pb) +def try_choices(*choices): + '''Choose one from the choices''' + return reduce(try_choice, choices) def skip(pa, pb): '''Ends with a specified parser, and at the end parser consumed the end flag. @@ -432,8 +440,8 @@ def generate(fn): @wraps(fn) @Parser def generated(text, index): - iterator, value = fn(), None try: + iterator, value = fn(), None while True: parser = iterator.send(value) res = parser(text, index) @@ -448,11 +456,14 @@ def generated(text, index): return Value.success(index, endval) except RuntimeError as error: stop = error.__cause__ - endval = stop.value - if isinstance(endval, Parser): - return endval(text, index) - else: - return Value.success(index, endval) + if isinstance(stop, StopIteration) and hasattr(stop, "value"): + endval = stop.value + if isinstance(endval, Parser): + return endval(text, index) + else: + return Value.success(index, endval) + # not what we want + raise error from None return generated.desc(fn.__name__) @@ -473,6 +484,7 @@ def times_parser(text, index): res = p(text, index) if res.status: if maxt == float('inf') and res.index == index: + # TODO: check whether it reaches mint # prevent infinite loop, see GH-43 break values.append(res.value) diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 1ca41cc..3e8c12d 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -44,7 +44,10 @@ class Parser(T.Generic[_U]): def parse(self, text: Text) -> _U: ... def parse_partial(self, text: Text) -> tuple[_U, Text]: ... def parse_strict(self, text: Text) -> _U: ... + @T.overload def bind(self, fn: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... + @T.overload + def bind(self, fn: CA.Callable[[_U, int], Parser[_V]]) -> Parser[_V]: ... def compose(self, other: Parser[_V]) -> Parser[_V]: ... def joint(self, *parsers: Parser[_U]) -> Parser[tuple[_U, ...]]: ... def choice(self, other: Parser[_V]) -> Parser[_U | _V]: ... @@ -64,18 +67,22 @@ class Parser(T.Generic[_U]): def __add__(self, other: Parser[_V]) -> Parser[tuple[_U, _V]]: ... def __rshift__(self, other: Parser[_V]) -> Parser[_V]: ... def __gt__(self, other: Parser[_V]) -> Parser[_V]: ... - def __irshift__(self, other: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... + def __irshift__(self, other: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... # type: ignore[misc] def __ge__(self, other: Parser[_V]) -> Parser[_V]: ... def __lshift__(self, other: Parser[_V]) -> Parser[_U]: ... def __lt__(self, other: Parser[_V]) -> Parser[_U]: ... def __truediv__(self, other: Parser[_V]) -> Parser[_U]: ... def parse(p: Parser[_V], text: Text, index: int) -> _V: ... +@T.overload def bind(p: Parser[_U], fn: CA.Callable[[_U], Parser[_V]]) -> Parser[_V]: ... +@T.overload +def bind(p: Parser[_U], fn: CA.Callable[[_U, int], Parser[_V]]) -> Parser[_V]: ... def compose(pa: Parser, pb: Parser[_V]) -> Parser[_V]: ... def joint(*parsers: Parser[_U]) -> Parser[tuple[_U, ...]]: ... def choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... def try_choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... +def try_choices(*parsers: Parser[_U]) -> Parser[_U]: ... def skip(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def ends_with(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def excepts(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index 2547278..0a91327 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -9,6 +9,7 @@ __author__ = 'He Tao, sighingnow@gmail.com' +import re import random import unittest @@ -16,7 +17,8 @@ class ParseErrorTest(unittest.TestCase): def test_loc_info_should_throw_on_invalid_index(self): - self.assertRaises(ValueError, ParseError.loc_info, "", 1) + with self.assertRaises(ValueError): + ParseError.loc_info("", 1) def test_loc_info_should_use_default_values_when_text_is_not_str(self): self.assertEqual(ParseError.loc_info([0], 0), (0, -1)) @@ -26,6 +28,33 @@ def test_str(self): # trigger ValueError self.assertTrue(str(ParseError("foo bar", "", 1))) +class ValueTest(unittest.TestCase): + def test_aggregate(self): + value = Value.failure(-1, "this") + self.assertEqual(value.aggregate(), value) + + value = Value.success(-1, ["foo"]) + self.assertEqual(value.aggregate(), value) + + other = Value.failure(-1, "that") + self.assertEqual(value.aggregate(other), other) + + other = Value.success(0, ["bar"]) + self.assertEqual(value.aggregate(other), Value.success(0, ["foo", "bar"])) + + def test_update_index(self): + value = Value.success(0, None) + self.assertEqual(value.update_index(), value) + self.assertEqual(value.update_index(1), Value.success(1, None)) + + def test_combinate(self): + with self.assertRaisesRegex(TypeError, "cannot call combinate without any value"): + Value.combinate([]) + + self.assertEqual(Value.combinate([Value.success(0, None)]), Value.success(0, (None,))) + self.assertEqual(Value.combinate([Value.failure(0, "expect to fail")]), Value.failure(0, "expect to fail")) + self.assertEqual(Value.combinate([Value.success(0, None), Value.failure(0, "expect to fail")]), Value.failure(0, "expect to fail")) + class ParsecTest(unittest.TestCase): '''Test the implementation of Text.Parsec. (The final test for all apis)''' def test_times_with_then(self): @@ -35,6 +64,11 @@ def test_times_with_then(self): self.assertRaises(ParseError, parser.parse, 'xyz') self.assertRaises(ParseError, parser.parse, 'xyzw') + def test_times_inf_maxt(self): + parser = times(eof(), 1, float('inf')) + self.assertEqual(parser.parse(''), []) + # self.assertEqual(parser.parse('abc'), ['a', 'b', 'c']) + def test_many_with_then(self): parser = many(string('x')) >> string('y') self.assertEqual(parser.parse('y'), 'y') @@ -117,11 +151,31 @@ def test_try_choice(self): self.assertEqual(parser.parse('xy'), 'xy') self.assertEqual(parser.parse('xz'), 'xz') + def test_try_choices(self): + # cannot try_choices without choices + with self.assertRaisesRegex(TypeError, r"reduce\(\) of empty iterable with no initial value"): + try_choices() + + parser = try_choices(string('x')) + self.assertEqual(parser.parse('x'), 'x') + + parser = try_choices(string('yz'), string('y')) + self.assertEqual(parser.parse('yz'), 'yz') + self.assertEqual(parser.parse('y'), 'y') + + parser = try_choices(string('x'), string('yz'), string('y')) + self.assertEqual(parser.parse('x'), 'x') + self.assertEqual(parser.parse('yz'), 'yz') + self.assertEqual(parser.parse('y'), 'y') + def test_ends_with(self): parser = string('x') < string('y') self.assertEqual(parser.parse('xy'), 'x') self.assertRaises(ParseError, parser.parse, 'xx') + with self.assertRaises(ParseError): + parser.parse('y') + def test_parsecmap(self): def mapfn(p): @@ -144,7 +198,7 @@ def test_desc(self): self.assertRaises(ParseError, parser.parse, 'y') def test_mark(self): - parser = many(mark(many(letter())) << string("\n")) + parser = many1(mark(many(letter())) << string("\n")) lines = parser.parse("asdf\nqwer\n") @@ -160,6 +214,9 @@ def test_mark(self): self.assertEqual(letters, ['q', 'w', 'e', 'r']) self.assertEqual(end, (1, 4)) + with self.assertRaises(ParseError): + parser.parse("1") + def test_choice_with_compose(self): parser = (string('\\') >> string('y')) | string('z') self.assertEqual(parser.parse('\\y'), 'y') @@ -330,6 +387,9 @@ def test_excepts(self): self.assertEqual(parser.parse('<'), "<") self.assertEqual(parser.parse('<='), "<=") + with self.assertRaises(ParseError): + parser.parse('>') + parser = string('<') ^ string('<=') self.assertEqual(parser.parse('<'), "<") self.assertEqual(parser.parse('<='), "<") @@ -372,6 +432,9 @@ def test_regex(self): # combinator only accepts string as input self.assertRaises(ParseError, parser.parse, [1]) + parser = regex(re.compile(r'[0-9]')) + self.assertEqual(parser.parse('1'), '1') + def test_one_of(self): parser = one_of('abc') self.assertEqual(parser.parse('a'), 'a') @@ -498,6 +561,8 @@ def test_generate_raise(self): def xy(): yield string('x') yield string('y') + + # NOTE: this will appear in the form of a RuntimeError caused by StopIteration r = StopIteration('success') r.value = 'success' # for pre-3.3 Python raise r @@ -505,5 +570,40 @@ def xy(): parser = xy self.assertEqual(parser.parse('xy'), 'success') + @generate + def yz(): + r = StopIteration() + r.value = string("yz") + raise r + + parser = yz + self.assertEqual(parser.parse('yz'), 'yz') + + @generate + def stop_iteration_without_value(): + # simulate python 2 + r = StopIteration() + delattr(r, "value") + raise RuntimeError from r + + parser = stop_iteration_without_value + self.assertEqual(parser.parse("whatever"), None) + + @generate + def stop_iteration_with_parser_as_value(): + raise RuntimeError from StopIteration(string("yz")) + + parser = stop_iteration_with_parser_as_value + self.assertEqual(parser.parse("yz"), "yz") + + @generate + def runtime_error(): + r = RuntimeError + raise r + + parser = runtime_error + with self.assertRaises(RuntimeError): + parser.parse("whatever") + if __name__ == '__main__': unittest.main() From 847110a4bffa94488a17484229c39b4fd4483788 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 14:18:19 -0400 Subject: [PATCH 12/27] test: backward compatibility --- src/parsec/tests/test_parsec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index 0a91327..c0d87ff 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -153,7 +153,7 @@ def test_try_choice(self): def test_try_choices(self): # cannot try_choices without choices - with self.assertRaisesRegex(TypeError, r"reduce\(\) of empty iterable with no initial value"): + with self.assertRaisesRegex(TypeError, r"reduce\(\) of empty \w+ with no initial value"): try_choices() parser = try_choices(string('x')) From a07049f0d019983054b3de63e863d1b04050ce8e Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 14:22:00 -0400 Subject: [PATCH 13/27] ci: only run mypy on python >= 3.8 --- .github/workflows/ci.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5d989a4..311e62d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,4 +24,6 @@ jobs: - run: coverage run setup.py test - run: coverage report -m - - run: mypy . + - if: ${{ matrix.python-version != '3.6' && matrix.python-version != '3.7' }} + run: mypy . + From 78007fbb3576d615144815ea4df62a6e71021485 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 15:40:05 -0400 Subject: [PATCH 14/27] feat: added try_choices_longest --- src/parsec/__init__.py | 17 +++++++++++++++++ src/parsec/__init__.pyi | 1 + src/parsec/tests/test_parsec.py | 14 ++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 6f2e8ee..9be23c4 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -370,6 +370,23 @@ def try_choices(*choices): '''Choose one from the choices''' return reduce(try_choice, choices) +def try_choices_longest(*choices): + if not choices: + raise TypeError("choices cannot be empty") + + if not all(isinstance(choice, Parser) for choice in choices): + raise TypeError("choices can only be Parsers") + + @Parser + def longest(text, index): + results = list(map(lambda choice: choice(text, index), choices)) + if all(not result.status for result in results): + return Value.failure(index, 'does not match with any choices {}'.format(choices)) + + successful_results = list(filter(lambda result: result.status, results)) + return max(successful_results, key=lambda result: result.index) + return longest + def skip(pa, pb): '''Ends with a specified parser, and at the end parser consumed the end flag. Implements the operator of `(<<)`.''' diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 3e8c12d..094a0df 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -83,6 +83,7 @@ def joint(*parsers: Parser[_U]) -> Parser[tuple[_U, ...]]: ... def choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... def try_choice(pa: Parser[_U], pb: Parser[_V]) -> Parser[_U | _V]: ... def try_choices(*parsers: Parser[_U]) -> Parser[_U]: ... +def try_choices_longest(*parsers: Parser[_U]) -> Parser[_U]: ... def skip(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def ends_with(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... def excepts(pa: Parser[_U], pb: Parser) -> Parser[_U]: ... diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index c0d87ff..7a69612 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -168,6 +168,20 @@ def test_try_choices(self): self.assertEqual(parser.parse('yz'), 'yz') self.assertEqual(parser.parse('y'), 'y') + def test_try_choices_longest(self): + with self.assertRaisesRegex(TypeError, "choices cannot be empty"): + try_choices_longest() + + with self.assertRaisesRegex(TypeError, "choices can only be Parsers"): + try_choices_longest(None) + + parser = try_choices_longest(string("x"), string("xyz")) + self.assertEqual(parser.parse("x"), "x") + self.assertEqual(parser.parse("xyz"), "xyz") + + with self.assertRaisesRegex(ParseError, r"does not match with any choices .*"): + parser.parse("y") + def test_ends_with(self): parser = string('x') < string('y') self.assertEqual(parser.parse('xy'), 'x') From 3aa2c39d6434158fac9c2d6a27d64f5102bf7dee Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 15:42:13 -0400 Subject: [PATCH 15/27] refactor: show results in failure message --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 9be23c4..0821d6b 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -381,7 +381,7 @@ def try_choices_longest(*choices): def longest(text, index): results = list(map(lambda choice: choice(text, index), choices)) if all(not result.status for result in results): - return Value.failure(index, 'does not match with any choices {}'.format(choices)) + return Value.failure(index, 'does not match with any choices {}'.format(results)) successful_results = list(filter(lambda result: result.status, results)) return max(successful_results, key=lambda result: result.index) From d9bbd435c94a8f133947a0b5a2274ee4e7d4f432 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 15:56:30 -0400 Subject: [PATCH 16/27] refactor: use inspect instead of catching TypeError --- src/parsec/__init__.py | 9 +++++++++ src/parsec/tests/test_parsec.py | 3 +++ 2 files changed, 12 insertions(+) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 0821d6b..aabfa22 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -7,6 +7,11 @@ __author__ = 'He Tao, sighingnow@gmail.com' +try: + from inspect import getfullargspec as getargspec +except ImportError: + from inspect import getargspec as getargspec + import operator import re import warnings @@ -155,6 +160,10 @@ def bind(self, fn): parser is successful, passes the result to fn, and continues with the parser returned from fn. ''' + argspec = getargspec(fn) + if not 1 <= len(argspec.args) <= 2: + raise TypeError("can only bind on a function with one or two arguments, fn: {}".format(argspec)) + @Parser def bind_parser(text, index): res = self(text, index) diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index 7a69612..bbff8a9 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -108,6 +108,9 @@ def binder(x): self.assertEqual(nonlocals['piped'], 'x') self.assertRaises(ParseError, parser.parse, 'x') + with self.assertRaises(TypeError): + parser >= (lambda x, y, z: any()) + def test_compose(self): parser = string('x') >> string('y') self.assertEqual(parser.parse('xy'), 'y') From 7bba029d5e3d3afc65d640da475ae128a1b833ff Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 15:58:22 -0400 Subject: [PATCH 17/27] refactor: use inspect instead of catching TypeError --- src/parsec/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index aabfa22..e9b42ff 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -170,10 +170,7 @@ def bind_parser(text, index): if not res.status: return res - try: - return fn(res.value, index)(text, res.index) - except TypeError: - return fn(res.value)(text, res.index) + return (fn(res.value, index) if len(argspec.args) == 2 else fn(res.value))(text, res.index) return bind_parser def compose(self, other): From ecf203e9a7c5c6affe4a8d38a69e5e845b9d84a0 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 16:28:17 -0400 Subject: [PATCH 18/27] feat: implemented __repr__ --- src/parsec/__init__.py | 5 +++++ src/parsec/tests/test_parsec.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index e9b42ff..77e868a 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -127,6 +127,11 @@ def __call__(self, text, index): '''call wrapped function.''' return self.fn(text, index) + def __repr__(self): + if hasattr(self.fn, "__name__"): + return self.fn.__name__ + return super().__repr__() + def parse(self, text): '''Parses a given string `text`.''' return self.parse_partial(text)[0] diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index bbff8a9..d5ed1d2 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -57,6 +57,9 @@ def test_combinate(self): class ParsecTest(unittest.TestCase): '''Test the implementation of Text.Parsec. (The final test for all apis)''' + def test_repr(self): + self.assertIsNotNone(repr(any())) + def test_times_with_then(self): parser = times(letter(), 3) >> digit() self.assertEqual(parser.parse('xyz1'), '1') From 66fb27f9fbc81002143a4fdf9f359b7336d233d4 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 16:59:34 -0400 Subject: [PATCH 19/27] refactor: use named function for mark --- src/parsec/__init__.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 77e868a..8594196 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -279,14 +279,12 @@ def mark(self): def pos(text, index): return ParseError.loc_info(text, index) - return self >= ( - lambda value, index: Parser( - lambda text, resultant_index: Value.success( - resultant_index, - (pos(text, index), value, pos(text, resultant_index)), - ) - ) - ) + def mark(value, index): + def mark(text, resultant_index): + return Value.success(resultant_index, (pos(text, index), value, pos(text, resultant_index))) + return mark + + return self >= mark def desc(self, description): '''Describe a parser, when it failed, print out the description text.''' From 2993a2d51c9beeff9ccb10390abb149ef3d07233 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 17:01:01 -0400 Subject: [PATCH 20/27] refactor: use named function for mark --- src/parsec/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 8594196..d27ae87 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -280,6 +280,7 @@ def pos(text, index): return ParseError.loc_info(text, index) def mark(value, index): + @Parser def mark(text, resultant_index): return Value.success(resultant_index, (pos(text, index), value, pos(text, resultant_index))) return mark From a6d71183ee1c0b610f640f6d94c7b73f9f3945c5 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 17:04:18 -0400 Subject: [PATCH 21/27] chore: also show the parser that failed in try_choices_longest --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index d27ae87..6b4a6af 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -391,7 +391,7 @@ def try_choices_longest(*choices): def longest(text, index): results = list(map(lambda choice: choice(text, index), choices)) if all(not result.status for result in results): - return Value.failure(index, 'does not match with any choices {}'.format(results)) + return Value.failure(index, 'does not match with any choices {}'.format(zip(choices, results))) successful_results = list(filter(lambda result: result.status, results)) return max(successful_results, key=lambda result: result.index) From 50e56004dbec44281f81bd8813d1f9ad3a016297 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 17:05:32 -0400 Subject: [PATCH 22/27] chore: also show the parser that failed in try_choices_longest and cast to list --- src/parsec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 6b4a6af..1b57122 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -391,7 +391,7 @@ def try_choices_longest(*choices): def longest(text, index): results = list(map(lambda choice: choice(text, index), choices)) if all(not result.status for result in results): - return Value.failure(index, 'does not match with any choices {}'.format(zip(choices, results))) + return Value.failure(index, 'does not match with any choices {}'.format(list(zip(choices, results)))) successful_results = list(filter(lambda result: result.status, results)) return max(successful_results, key=lambda result: result.index) From 8ecd7f990211247f9e735dcc49c531fe227e14e6 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 18:31:12 -0400 Subject: [PATCH 23/27] feat: infer if we do starmap --- src/parsec/__init__.py | 36 ++++++++++++++++++++++++++------- src/parsec/__init__.pyi | 6 +++++- src/parsec/tests/test_parsec.py | 8 ++++---- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 1b57122..dd007cc 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -14,6 +14,7 @@ import operator import re +import inspect import warnings from functools import reduce, wraps from collections import namedtuple @@ -22,6 +23,11 @@ # Text.Parsec.Error ########################################################################## +def _arguments(callable): + if inspect.isbuiltin(callable): + # NOTE: we cannot perform introspection on builtins + return 1 + return len(getargspec(callable).args) class ParseError(RuntimeError): '''Type for parse error.''' @@ -165,9 +171,9 @@ def bind(self, fn): parser is successful, passes the result to fn, and continues with the parser returned from fn. ''' - argspec = getargspec(fn) - if not 1 <= len(argspec.args) <= 2: - raise TypeError("can only bind on a function with one or two arguments, fn: {}".format(argspec)) + args_count = _arguments(fn) + if not 1 <= args_count <= 2: + raise TypeError("can only bind on a function with one or two arguments, fn/{}".format(args_count)) @Parser def bind_parser(text, index): @@ -175,7 +181,7 @@ def bind_parser(text, index): if not res.status: return res - return (fn(res.value, index) if len(argspec.args) == 2 else fn(res.value))(text, res.index) + return (fn(res.value, index) if args_count == 2 else fn(res.value))(text, res.index) return bind_parser def compose(self, other): @@ -261,15 +267,31 @@ def excepts_parser(text, index): return res return excepts_parser - def parsecmap(self, fn): + def parsecmap(self, fn, star=None): '''Returns a parser that transforms the produced value of parser with `fn`.''' - return self.bind(lambda res: success_with(fn(res), advance=False)) + def mapper(res): + # unpack tuple + result = fn(*res) if star is True or star is None and isinstance(res, tuple) and len(res) == _arguments(fn) else fn(res) + return success_with(result, advance=False) + return self.bind(mapper) + + def map(self, fn, star=None): + '''Functor map on the parsed value with `fn`. + Alias to parsecmap + ''' + return self.parsecmap(fn, star=star) def parsecapp(self, other): '''Returns a parser that applies the produced value of this parser to the produced value of `other`.''' # pylint: disable=unnecessary-lambda return self.bind(lambda res: other.parsecmap(lambda x: res(x))) + def apply(self, other): + '''Apply the function produced by self on the result of other. + Alias to parsecapp + ''' + return self.parsecapp(other) + def result(self, res): '''Return a value according to the parameter `res` when parse successfully.''' return self >> success_with(res, advance=False) @@ -847,4 +869,4 @@ def number(base, digit): zero_number = string("0") >> (hexadecimal | octal | binary | decimal | success_with(0)) natural = zero_number | decimal -integer = sign.parsecapp(natural) +integer = sign.apply(natural) diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 094a0df..8c36b51 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -55,10 +55,14 @@ class Parser(T.Generic[_U]): def skip(self, other: Parser[_V]) -> Parser[_U]: ... def ends_with(self, other: Parser[_V]) -> Parser[_U]: ... def excepts(self, ohter: Parser[_V]) -> Parser[_U]: ... - def parsecmap(self, fn: CA.Callable[[_U], _V]) -> Parser[_V]: ... + def parsecmap(self, fn: CA.Callable[[_U], _V], star: T.Optional[bool] = None) -> Parser[_V]: ... + def map(self, fn: CA.Callable[[_U], _V], star: T.Optional[bool] = None) -> Parser[_V]: ... def parsecapp( self: Parser[CA.Callable[[_V], _W]], other: Parser[_V] ) -> Parser[_W]: ... + def apply( + self: Parser[CA.Callable[[_V], _W]], other: Parser[_V] + ) -> Parser[_W]: ... def result(self, res: _V) -> Parser[_V]: ... def mark(self) -> Parser[tuple[_LocInfo, _U, _LocInfo]]: ... def desc(self, description: str) -> Parser[_U]: ... diff --git a/src/parsec/tests/test_parsec.py b/src/parsec/tests/test_parsec.py index d5ed1d2..dee5515 100644 --- a/src/parsec/tests/test_parsec.py +++ b/src/parsec/tests/test_parsec.py @@ -196,20 +196,20 @@ def test_ends_with(self): with self.assertRaises(ParseError): parser.parse('y') - def test_parsecmap(self): + def test_map(self): def mapfn(p): return p + p - parser = string('x').parsecmap(mapfn) + parser = string('x').map(mapfn) self.assertEqual(parser.parse('x'), 'xx') - def test_parsecapp(self): + def test_apply(self): def genfn(p): return lambda c: 'fn:' + p + c + c - parser = string('x').parsecmap(genfn).parsecapp(string('y')) + parser = string('x').map(genfn).apply(string('y')) self.assertEqual(parser.parse('xy'), 'fn:xyy') def test_desc(self): From 50b2f99adaca821d1819bbf54db267ae8184f590 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 18:45:22 -0400 Subject: [PATCH 24/27] feat: implemented newline, crlf and end_of_line --- src/parsec/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index dd007cc..f198628 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -770,6 +770,10 @@ def regex_parser(text, index): return Value.failure(index, exp.pattern) return regex_parser +newline = string("\n").desc("LF") +crlf = (string("\r") >> newline).desc("CRLF") +end_of_line = (newline | crlf).desc("EOL") + ########################################################################## # Useful utility parsers ########################################################################## From 3c0a0ce53dc1118426e24459d8231785b0dd67fc Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 18:49:05 -0400 Subject: [PATCH 25/27] refactor: fixed declaration --- src/parsec/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index f198628..e9b69b7 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -770,9 +770,14 @@ def regex_parser(text, index): return Value.failure(index, exp.pattern) return regex_parser -newline = string("\n").desc("LF") -crlf = (string("\r") >> newline).desc("CRLF") -end_of_line = (newline | crlf).desc("EOL") +def newline(): + return string("\n").desc("LF") + +def crlf(): + return (string("\r") >> newline).desc("CRLF") + +def end_of_line(): + return (newline | crlf).desc("EOL") ########################################################################## # Useful utility parsers From 9e619340f65aaac21bf46448e7c1cfe3c4acb638 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 18:51:52 -0400 Subject: [PATCH 26/27] refactor: fixed declaration --- src/parsec/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index e9b69b7..116b894 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -774,10 +774,10 @@ def newline(): return string("\n").desc("LF") def crlf(): - return (string("\r") >> newline).desc("CRLF") + return (string("\r") >> newline()).desc("CRLF") def end_of_line(): - return (newline | crlf).desc("EOL") + return (newline() | crlf()).desc("EOL") ########################################################################## # Useful utility parsers From 3ab3d508d094905b41b6c781c274ee62c5cc94a6 Mon Sep 17 00:00:00 2001 From: Leo Sin Date: Wed, 5 Jun 2024 19:04:21 -0400 Subject: [PATCH 27/27] refactor: remove auto starmap --- src/parsec/__init__.py | 10 +++++----- src/parsec/__init__.pyi | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/parsec/__init__.py b/src/parsec/__init__.py index 116b894..69ef13d 100644 --- a/src/parsec/__init__.py +++ b/src/parsec/__init__.py @@ -23,7 +23,7 @@ # Text.Parsec.Error ########################################################################## -def _arguments(callable): +def expected_arguments(callable): if inspect.isbuiltin(callable): # NOTE: we cannot perform introspection on builtins return 1 @@ -171,7 +171,7 @@ def bind(self, fn): parser is successful, passes the result to fn, and continues with the parser returned from fn. ''' - args_count = _arguments(fn) + args_count = expected_arguments(fn) if not 1 <= args_count <= 2: raise TypeError("can only bind on a function with one or two arguments, fn/{}".format(args_count)) @@ -267,15 +267,15 @@ def excepts_parser(text, index): return res return excepts_parser - def parsecmap(self, fn, star=None): + def parsecmap(self, fn, star=False): '''Returns a parser that transforms the produced value of parser with `fn`.''' def mapper(res): # unpack tuple - result = fn(*res) if star is True or star is None and isinstance(res, tuple) and len(res) == _arguments(fn) else fn(res) + result = fn(*res) if star else fn(res) return success_with(result, advance=False) return self.bind(mapper) - def map(self, fn, star=None): + def map(self, fn, star=False): '''Functor map on the parsed value with `fn`. Alias to parsecmap ''' diff --git a/src/parsec/__init__.pyi b/src/parsec/__init__.pyi index 8c36b51..827022b 100644 --- a/src/parsec/__init__.pyi +++ b/src/parsec/__init__.pyi @@ -55,8 +55,8 @@ class Parser(T.Generic[_U]): def skip(self, other: Parser[_V]) -> Parser[_U]: ... def ends_with(self, other: Parser[_V]) -> Parser[_U]: ... def excepts(self, ohter: Parser[_V]) -> Parser[_U]: ... - def parsecmap(self, fn: CA.Callable[[_U], _V], star: T.Optional[bool] = None) -> Parser[_V]: ... - def map(self, fn: CA.Callable[[_U], _V], star: T.Optional[bool] = None) -> Parser[_V]: ... + def parsecmap(self, fn: CA.Callable[[_U], _V], star: bool = False) -> Parser[_V]: ... + def map(self, fn: CA.Callable[[_U], _V], star: bool = False) -> Parser[_V]: ... def parsecapp( self: Parser[CA.Callable[[_V], _W]], other: Parser[_V] ) -> Parser[_W]: ...