From 6587a8dda25524eaf282323a35e4957646d23fe5 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 18 Apr 2017 09:46:44 +0200 Subject: [PATCH] Handle keys that contain keywords such as "or", "and" or "with" * tokenize() now handles two cases: the Licensing was created with or without symbols. In the first case, the automaton-based tokenizer is used; otherwise a plain regex-based splitter is used and more constraints are enforced on license symbols: they cannot contain spaces and only the "+" is recognized as or later. Link: https://github.com/nexB/license-expression/issues/9 Signed-off-by: Philippe Ombredanne --- README.rst | 6 +- src/license_expression/__init__.py | 126 ++++- tests/test_license_expression.py | 714 +++++++++++++++++------------ 3 files changed, 523 insertions(+), 323 deletions(-) diff --git a/README.rst b/README.rst index f015ce4..ac821dc 100644 --- a/README.rst +++ b/README.rst @@ -61,14 +61,14 @@ For example:: >>> from license_expression import Licensing, LicenseSymbol >>> licensing = Licensing() - >>> expression = ' GPL-2.0 or LGPL 2.1 and mit ' + >>> expression = ' GPL-2.0 or LGPL-2.1 and mit ' >>> parsed = licensing.parse(expression) - >>> expected = 'GPL-2.0 OR (LGPL 2.1 AND mit)' + >>> expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' >>> assert expected == parsed.render('{symbol.key}') >>> expected = [ ... LicenseSymbol('GPL-2.0'), - ... LicenseSymbol('LGPL 2.1'), + ... LicenseSymbol('LGPL-2.1'), ... LicenseSymbol('mit') ... ] >>> assert expected == licensing.license_symbols(expression) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index c7f2b17..f72e256 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -62,6 +62,10 @@ from boolean.boolean import PARSE_UNKNOWN_TOKEN from boolean.boolean import ParseError from boolean.boolean import TOKEN_SYMBOL +from boolean.boolean import TOKEN_AND +from boolean.boolean import TOKEN_OR +from boolean.boolean import TOKEN_LPAR +from boolean.boolean import TOKEN_RPAR from license_expression._pyahocorasick import Trie as Scanner from license_expression._pyahocorasick import Output @@ -103,14 +107,16 @@ class ExpressionError(Exception): TOKEN_OR_LATER = 11 # actual keyword types +KW_LPAR = Keyword('(', TOKEN_LPAR) +KW_RPAR = Keyword(')', TOKEN_RPAR) _KEYWORDS = [ - Keyword('and', boolean.TOKEN_AND), - Keyword('or', boolean.TOKEN_OR), - Keyword('(', boolean.TOKEN_LPAR), - Keyword(')', boolean.TOKEN_RPAR), - Keyword('with', TOKEN_WITH), + Keyword(' and ', TOKEN_AND), + Keyword(' or ', TOKEN_OR), + KW_LPAR, + KW_RPAR, + Keyword(' with ', TOKEN_WITH), Keyword('+', TOKEN_OR_LATER), - Keyword('or later', TOKEN_OR_LATER), + Keyword(' or later', TOKEN_OR_LATER), ] KEYWORDS = tuple(kw.value for kw in _KEYWORDS) @@ -123,13 +129,13 @@ class Licensing(boolean.BooleanAlgebra): For example: >>> l = Licensing() - >>> expr = l.parse(" GPL-2.0 or LGPL 2.1 and mit ") - >>> expected = 'GPL-2.0 OR (LGPL 2.1 AND mit)' + >>> expr = l.parse(" GPL-2.0 or LGPL-2.1 and mit ") + >>> expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' >>> assert expected == expr.render('{symbol.key}') >>> expected = [ ... LicenseSymbol('GPL-2.0'), - ... LicenseSymbol('LGPL 2.1'), + ... LicenseSymbol('LGPL-2.1'), ... LicenseSymbol('mit') ... ] >>> assert expected == l.license_symbols(expr) @@ -174,6 +180,8 @@ def __init__(self, symbols=tuple(), quiet=True): raise ValueError('\n'.join(warns + errors)) # mapping of known symbol used for parsing and resolution as (key, symbol) + # TODO: inject lpar, rpar and spaces sourround, before and after + # e.g "(sym)" "(sym " "sym)" " sym " self.known_symbols = {symbol.key: symbol for symbol in symbols} # Aho-Corasick automaton-based Scanner used for expression tokenizing @@ -283,8 +291,8 @@ def license_keys(self, expression, unique=True, **kwargs): For example: >>> l = Licensing() - >>> expr = ' GPL-2.0 and mit or later with blabla and mit or LGPL 2.1 and mit and mit or later with GPL-2.0' - >>> expected = ['GPL-2.0', 'mit', 'blabla', 'LGPL 2.1'] + >>> expr = ' GPL-2.0 and mit+ with blabla and mit or LGPL-2.1 and mit and mit+ with GPL-2.0' + >>> expected = ['GPL-2.0', 'mit', 'blabla', 'LGPL-2.1'] >>> assert expected == l.license_keys(l.parse(expr)) """ symbols = self.license_symbols(expression, unique=False, decompose=True, **kwargs) @@ -349,9 +357,9 @@ def parse(self, expression, validate=False, strict=False, **kwargs): the YYY symbol has `is_exception` set to False. For example: - >>> expression = 'EPL 1.0 and Apache 1.1 OR GPL 2.0 with Classpath exception' + >>> expression = 'EPL-1.0 and Apache-1.1 OR GPL-2.0 with Classpath-exception' >>> parsed = Licensing().parse(expression) - >>> expected = '(EPL 1.0 AND Apache 1.1) OR GPL 2.0 WITH Classpath exception' + >>> expected = '(EPL-1.0 AND Apache-1.1) OR GPL-2.0 WITH Classpath-exception' >>> assert expected == parsed.render(template='{symbol.key}') """ if expression is None: @@ -406,12 +414,16 @@ def tokenize(self, expression, strict=False): If `strict` is True, additional exceptions will be raised in a expression such as "XXX with ZZZ" if the XXX symbol has is_exception` set to True or the - YYY symbol has `is_exception` set to False. + ZZZ symbol has `is_exception` set to False. """ - scanner = self.get_scanner() + if self.known_symbols: + # scan with an automaton, recognize whole symbols+keywords or only keywords + scanner = self.get_scanner() + results = scanner.scan(expression) + else: + # scan with a simple regex-based splitter + results = splitter(expression) - # scan with an automaton, recognize whole symbols+keywords or only keywords - results = scanner.scan(expression) results = strip_and_skip_spaces(results) results = merge_or_later_results(results) result_groups = group_results_for_with_subexpression(results) @@ -471,9 +483,10 @@ def tokenize(self, expression, strict=False): TOKEN_SYMBOL, string, start, PARSE_INVALID_EXPRESSION) # this is a A with B seq of three results - lic_res, _WITH , exc_res = group + lic_res, WITH , exc_res = group pos = lic_res.start - token_string = ' '.join([t.string for t in group]) + WITHs = ' ' + WITH.string.strip() + ' ' + token_string = ''.join([lic_res.string, WITHs, exc_res.string]) # licenses lic_out = lic_res.output @@ -511,7 +524,7 @@ def tokenize(self, expression, strict=False): raise ParseError(TOKEN_SYMBOL, exc_res.string, exc_res.start, PARSE_INVALID_SYMBOL) - if strict and not exc_sym.is_exception: + if strict and self.known_symbols and not exc_sym.is_exception: raise ParseError(TOKEN_SYMBOL, exc_res.string, exc_res.start, PARSE_INVALID_SYMBOL_AS_EXCEPTION) @@ -1189,3 +1202,76 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): errors.append('Duplicated or empty aliases ignored for license key: %(dupeal)r.' % locals()) return warnings, errors + + +_splitter = re.compile(''' + (?P[^\s\(\)]+) + | + (?P\s+) + | + (?P\() + | + (?P\)) + ''', + re.VERBOSE | re.MULTILINE | re.UNICODE +).finditer + + +def splitter(expression): + """ + Return an iterable of Result describing each token given an + expression unicode string. + + This is a simpler tokenizer used when the Licensing does not have + known symbols. The split is done on spaces and parens. Anything else + is either a token or a symbol. + """ + if not expression: + return + + if not isinstance(expression, str): + raise ParseError(error_code=PARSE_EXPRESSION_NOT_UNICODE) + + # mapping of lowercase token strings to a token type id + TOKENS = { + 'and': Keyword(value='and', type=TOKEN_AND), + 'or': Keyword(value='or', type=TOKEN_OR), + 'with': Keyword(value='with', type=TOKEN_WITH), + } + KW_PLUS = Keyword(value='+', type=TOKEN_OR_LATER) + + for match in _splitter(expression): + if not match: + continue + + start, end = match.span() + end = end - 1 + mgd = match.groupdict() + + space = mgd.get('space') + if space: + yield Result(start, end, space, None) + + lpar = mgd.get('lpar') + if lpar: + yield Result(start, end, lpar, Output(lpar, KW_LPAR)) + + rpar = mgd.get('rpar') + if rpar: + yield Result(start, end, rpar, Output(rpar, KW_RPAR)) + + token_or_sym = mgd.get('symbol') + if not token_or_sym: + continue + + token = TOKENS.get(token_or_sym.lower()) + if token: + yield Result(start, end, token_or_sym, Output(token_or_sym, token)) + elif token_or_sym.endswith('+') and token_or_sym != '+': + val = token_or_sym[:-1] + sym = LicenseSymbol(key=val) + yield Result(start, end - 1, val, Output(val, sym)) + yield Result(end, end, '+', Output('+', KW_PLUS)) + else: + sym = LicenseSymbol(key=token_or_sym) + yield Result(start, end, token_or_sym, Output(token_or_sym, sym)) diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index 0ff4e71..e337220 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -25,10 +25,11 @@ from boolean.boolean import TOKEN_LPAR from boolean.boolean import TOKEN_OR from boolean.boolean import TOKEN_RPAR +from boolean.boolean import TOKEN_SYMBOL + from boolean.boolean import PARSE_UNBALANCED_CLOSING_PARENS from boolean.boolean import PARSE_INVALID_SYMBOL_SEQUENCE -from license_expression import TOKEN_WITH from license_expression import PARSE_INVALID_EXPRESSION from license_expression import PARSE_INVALID_NESTING from license_expression import PARSE_INVALID_EXCEPTION @@ -41,12 +42,17 @@ from license_expression import LicenseSymbol from license_expression import LicenseWithExceptionSymbol from license_expression import ParseError -from license_expression import strip_and_skip_spaces -from license_expression import validate_symbols +from license_expression import Result +from license_expression import Output + from license_expression import group_results_for_with_subexpression -from boolean.boolean import TOKEN_SYMBOL from license_expression import merge_or_later_results +from license_expression import splitter +from license_expression import strip_and_skip_spaces +from license_expression import validate_symbols + from license_expression import TOKEN_OR_LATER +from license_expression import TOKEN_WITH class LicenseSymbolTest(TestCase): @@ -92,10 +98,10 @@ def test_tokenize_plain1(self): licensing = Licensing() expected = [ (TOKEN_LPAR, '(', 1), - (LicenseSymbol(key='mit'), u' mit ', 2), + (LicenseSymbol(key='mit'), 'mit', 3), (TOKEN_RPAR, ')', 7), (TOKEN_AND, 'and', 9), - (LicenseSymbol(key='gpl'), u' gpl', 12) + (LicenseSymbol(key='gpl'), 'gpl', 13) ] assert expected == list(licensing.tokenize(' ( mit ) and gpl')) @@ -103,9 +109,9 @@ def test_tokenize_plain2(self): licensing = Licensing() expected = [ (TOKEN_LPAR, '(', 0), - (LicenseSymbol(key='mit'), 'mit ', 1), + (LicenseSymbol(key='mit'), 'mit', 1), (TOKEN_AND, 'and', 5), - (LicenseSymbol(key='gpl'), ' gpl', 8), + (LicenseSymbol(key='gpl'), 'gpl', 9), (TOKEN_RPAR, ')', 12) ] assert expected == list(licensing.tokenize('(mit and gpl)')) @@ -113,11 +119,11 @@ def test_tokenize_plain2(self): def test_tokenize_plain3(self): licensing = Licensing() expected = [ - (LicenseSymbol(key='mit'), 'mit ', 0), + (LicenseSymbol(key='mit'), 'mit', 0), (TOKEN_AND, 'AND', 4), - (LicenseSymbol(key='gpl'), ' gpl ', 7), + (LicenseSymbol(key='gpl'), 'gpl', 8), (TOKEN_OR, 'or', 12), - (LicenseSymbol(key='gpl'), ' gpl', 14) + (LicenseSymbol(key='gpl'), 'gpl', 15) ] assert expected == list(licensing.tokenize('mit AND gpl or gpl')) @@ -126,42 +132,40 @@ def test_tokenize_plain4(self): expected = [ (TOKEN_LPAR, '(', 0), (TOKEN_LPAR, '(', 1), - (LicenseSymbol(key=u'l-a', is_exception=False, or_later=True), u'l-a +', 2), - (TOKEN_AND, 'AND', 8), - (LicenseSymbol(key=u'l-b', is_exception=False, or_later=False), u' l-b', 11), - (TOKEN_RPAR, ')', 15), - (TOKEN_OR, 'OR', 17), - (TOKEN_LPAR, '(', 20), - (LicenseSymbol(key=u'l -c', is_exception=False, or_later=True), u'l -c+', 21), - (TOKEN_RPAR, ')', 26), - (TOKEN_RPAR, ')', 27) + (LicenseSymbol(key=u'l-a', is_exception=False, or_later=True), u'l-a+', 2), + (TOKEN_AND, 'AND', 7), + (LicenseSymbol(key=u'l-b', is_exception=False, or_later=False), u'l-b', 11), + (TOKEN_RPAR, ')', 14), + (TOKEN_OR, 'OR', 16), + (TOKEN_LPAR, '(', 19), + (LicenseSymbol(key='l-c', is_exception=False, or_later=True), 'l-c+', 20), + (TOKEN_RPAR, ')', 24), + (TOKEN_RPAR, ')', 25) ] - assert expected == list(licensing.tokenize('((l-a + AND l-b) OR (l -c+))')) + assert expected == list(licensing.tokenize('((l-a+ AND l-b) OR (l-c+))')) def test_tokenize_plain5(self): licensing = Licensing() expected = [ (TOKEN_LPAR, '(', 0), (TOKEN_LPAR, '(', 1), - (LicenseSymbol(key=u'l-a', is_exception=False, or_later=True), 'l-a +', 2), - (TOKEN_AND, 'AND', 8), - (LicenseSymbol(key=u'l-b', is_exception=False, or_later=False), ' l-b', 11), - (TOKEN_RPAR, ')', 15), - (TOKEN_OR, 'OR', 17), - (TOKEN_LPAR, '(', 20), - (LicenseSymbol(key=u'l -c', is_exception=False, or_later=True), 'l -c+', 21), - (TOKEN_RPAR, ')', 26), - (TOKEN_RPAR, ')', 27), - (TOKEN_AND, 'and', 29), + (LicenseSymbol(key='l-a', or_later=True), 'l-a+', 2), + (TOKEN_AND, 'AND', 7), + (LicenseSymbol(key='l-b'), 'l-b', 11), + (TOKEN_RPAR, ')', 14), + (TOKEN_OR, 'OR', 16), + (TOKEN_LPAR, '(', 19), + (LicenseSymbol(key='l-c', or_later=True), 'l-c+', 20), + (TOKEN_RPAR, ')', 24), + (TOKEN_RPAR, ')', 25), + (TOKEN_AND, 'and', 27), (LicenseWithExceptionSymbol( - license_symbol=LicenseSymbol(key='gpl', - is_exception=False, or_later=False), - exception_symbol=LicenseSymbol(key='classpath exception', - is_exception=True, or_later=False)), - ' gpl with classpath exception', 32 + license_symbol=LicenseSymbol(key='gpl'), + exception_symbol=LicenseSymbol(key='classpath')), + 'gpl with classpath', 31 ) ] - assert expected == list(licensing.tokenize('((l-a + AND l-b) OR (l -c+)) and gpl with classpath exception')) + assert expected == list(licensing.tokenize('((l-a+ AND l-b) OR (l-c+)) and gpl with classpath')) def get_symbols(self): gpl_20 = LicenseSymbol('GPL-2.0', ['The GNU GPL 20']) @@ -177,9 +181,9 @@ def test_tokenize_1(self): result = licensing.tokenize('The GNU GPL 20 or LGPL-2.1 and mit') expected = [ (gpl_20, 'The GNU GPL 20', 0), - (TOKEN_OR, 'or', 15), + (TOKEN_OR, ' or ', 14), (lgpl_21, 'LGPL-2.1', 18), - (TOKEN_AND, 'and', 27), + (TOKEN_AND, ' and ', 26), (mit, 'mit', 31)] assert expected == list(result) @@ -188,11 +192,11 @@ def test_tokenize_with_trailing_unknown(self): result = licensing.tokenize('The GNU GPL 20 or LGPL-2.1 and mit2') expected = [ (gpl_20, 'The GNU GPL 20', 0), - (TOKEN_OR, 'or', 15), + (TOKEN_OR, ' or ', 14), (lgpl_21, 'LGPL-2.1', 18), - (TOKEN_AND, 'and', 27), + (TOKEN_AND, ' and ', 26), (mit, 'mit', 31), - (LicenseSymbol(key=u'2'), u'2', 34) + (LicenseSymbol(key='2'), '2', 34) ] assert expected == list(result) @@ -206,15 +210,15 @@ def test_tokenize_3(self): result = licensing.tokenize('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123') expected = [ (gpl_20_or_later, 'The GNU GPL 20 or later', 0), - (TOKEN_OR, 'or', 24), + (TOKEN_OR, ' or ', 23), (TOKEN_LPAR, '(', 27), (lgpl_21, 'LGPL-2.1', 28), - (TOKEN_AND, 'and', 37), + (TOKEN_AND, ' and ', 36), (mit, 'mit', 41), (TOKEN_RPAR, ')', 44), - (TOKEN_OR, 'or', 46), + (TOKEN_OR, ' or ', 45), (gpl_20, 'The GNU GPL 20', 49), - (TOKEN_OR, 'or', 64), + (TOKEN_OR, ' or ', 63), (mit, 'mit', 67), (LicenseSymbol(key='123', aliases=()), ' 123', 70) ] @@ -267,7 +271,7 @@ def test_parse_raise_ExpressionError_when_validating_strict(self): try: licensing.parse(expression, validate=True, strict=True) except ExpressionError as ee: - assert 'Unknown license key(s): gpl, bsd, lgpl, exception' == str(ee) + assert str(ee).startswith('exception_symbol must be an exception with "is_exception" set to True:') def test_parse_raise_ParseError_when_validating_strict_with_non_exception_symbols(self): expression = 'gpl and bsd or lgpl with exception' @@ -332,7 +336,7 @@ def test_parse_license_expression_with_alias(self): express_string = 'l-a +' result = licensing.parse(express_string) assert 'l-a or later' == str(result) - expected = LicenseSymbol(key=u'l-a', is_exception=False, or_later=True) + expected = LicenseSymbol(key='l-a', is_exception=False, or_later=True) assert expected == result assert [] == licensing.unknown_license_keys(result) @@ -341,7 +345,7 @@ def test_parse_license_expression3(self): express_string = 'L-a+' result = licensing.parse(express_string) assert 'l-a or later' == str(result) - expected = LicenseSymbol(key=u'l-a', is_exception=False, or_later=True) + expected = LicenseSymbol(key='l-a', is_exception=False, or_later=True) assert expected == result assert [] == licensing.unknown_license_keys(result) @@ -350,7 +354,7 @@ def test_parse_license_expression4(self): express_string = '(l-a)' result = licensing.parse(express_string) assert 'l-a' == str(result) - expected = LicenseSymbol(key=u'l-a', aliases=()) + expected = LicenseSymbol(key='l-a', aliases=()) assert expected == result assert [] == licensing.unknown_license_keys(result) @@ -360,7 +364,7 @@ def test_parse_license_expression5(self): result = licensing.parse(express_string) assert '(l-a or later AND l-b) OR l-c' == str(result) expected = licensing.OR(licensing.AND( - LicenseSymbol(key=u'l-a', is_exception=False, or_later=True), b), c) + LicenseSymbol(key='l-a', is_exception=False, or_later=True), b), c) assert expected == result assert [] == licensing.unknown_license_keys(result) @@ -481,30 +485,25 @@ def test_parse_invalid_expression_raise_expression(self): except (ExpressionError, ParseError): pass - expr = '+ l-a' + expr = '+l-a' try: licensing.parse(expr) self.fail("Exception not raised when validating '%s'" % expr) - except ParseError as pe: - expected = { - 'error_code': PARSE_INVALID_EXPRESSION, - 'position': 0, - 'token_string': '+', - 'token_type': TOKEN_OR_LATER - } - assert expected == _parse_error_as_dict(pe) + except ExpressionError as ee: + expected = 'Invalid license key: the valid characters are: letters and numbers, underscore, dot or hyphen signs and spaces: "+l-a"' + assert expected == str(ee) def test_parse_can_parse(self): licensing = Licensing() - expr = ' GPL-2.0 or LGPL 2.1 and mit ' + expr = ' GPL-2.0 or LGPL2.1 and mit ' parsed = licensing.parse(expr) gpl2 = LicenseSymbol('GPL-2.0') - lgpl = LicenseSymbol('LGPL 2.1') + lgpl = LicenseSymbol('LGPL2.1') mit = LicenseSymbol('mit') expected = [gpl2, lgpl, mit] self.assertEqual(expected, licensing.license_symbols(parsed)) self.assertEqual(expected, licensing.license_symbols(expr)) - self.assertEqual('GPL-2.0 OR (LGPL 2.1 AND mit)', str(parsed)) + self.assertEqual('GPL-2.0 OR (LGPL2.1 AND mit)', str(parsed)) expected = licensing.OR(gpl2, licensing.AND(lgpl, mit)) assert expected == parsed @@ -570,23 +569,23 @@ def test_parse_errors_catch_invalid_non_unicode_byte_strings_on_python3(self): def test_parse_errors_does_not_raise_error_on_plain_non_unicode_raw_string(self): # plain non-unicode string does not raise error licensing = Licensing() - x = licensing.parse(r'mit and (LGPL 2.1)') + x = licensing.parse(r'mit and (LGPL-2.1)') self.assertTrue(isinstance(x, LicenseExpression)) def test_parse_simplify_and_contain_and_equal(self): licensing = Licensing() - expr = licensing.parse(' GPL-2.0 or LGPL 2.1 and mit ') + expr = licensing.parse(' GPL-2.0 or LGPL2.1 and mit ') - expr2 = licensing.parse(' (mit and LGPL 2.1) or GPL-2.0 ') + expr2 = licensing.parse(' (mit and LGPL2.1) or GPL-2.0 ') self.assertEqual(expr2.simplify(), expr.simplify()) self.assertEqual(expr2, expr) - expr3 = licensing.parse('mit and LGPL 2.1') + expr3 = licensing.parse('mit and LGPL2.1') self.assertTrue(expr3 in expr2) def test_parse_trailing_char_raise_exception(self): - _gpl2, _lgpl, _mit, licensing = self.get_symbols_and_licensing() + _gpl2, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() try: licensing.parse('The GNU GPL 20 or LGPL-2.1 and mit2') except ParseError as pe: @@ -595,21 +594,21 @@ def test_parse_trailing_char_raise_exception(self): assert expected == _parse_error_as_dict(pe) def test_parse_expression_with_trailing_unknown_should_raise_exception(self): - gpl2, lgpl, mit, licensing = self.get_symbols_and_licensing() + gpl2, lgpl, mit, _mitand2, licensing = self.get_symbols_and_licensing() unknown = LicenseSymbol(key='123') tokens = list(licensing.tokenize('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123')) expected = [ - (LicenseSymbol(key=u'gpl-2.0', is_exception=False, or_later=True), 'The GNU GPL 20 or later', 0), - (TOKEN_OR, 'or', 24), + (LicenseSymbol(key='gpl-2.0', is_exception=False, or_later=True), 'The GNU GPL 20 or later', 0), + (TOKEN_OR, ' or ', 23), (TOKEN_LPAR, '(', 27), (lgpl, 'LGPL-2.1', 28), - (TOKEN_AND, 'and', 37), + (TOKEN_AND, ' and ', 36), (mit, 'mit', 41), (TOKEN_RPAR, ')', 44), - (TOKEN_OR, 'or', 46), + (TOKEN_OR, ' or ', 45), (gpl2, 'The GNU GPL 20', 49), - (TOKEN_OR, 'or', 64), + (TOKEN_OR, ' or ', 63), (mit, 'mit', 67), (unknown, ' 123', 70) ] @@ -623,7 +622,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): assert expected == _parse_error_as_dict(pe) def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): - _gpl2, _lgpl, _mit, licensing = self.get_symbols_and_licensing() + _gpl2, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() unknown = LicenseSymbol(key='123') try: licensing.parse('The GNU GPL 20 or mit 123') @@ -633,21 +632,21 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): assert expected == _parse_error_as_dict(pe) def test_parse_expression_with_WITH(self): - gpl2, lgpl, mit, licensing = self.get_symbols_and_licensing() + gpl2, lgpl, mit, _mitand2, licensing = self.get_symbols_and_licensing() expr = 'The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit with 123' tokens = list(licensing.tokenize(expr)) expected = [ - (LicenseSymbol(key=u'gpl-2.0', is_exception=False, or_later=True), u'The GNU GPL 20 or later', 0), - (2, u'or', 24), - (4, u'(', 27), - (lgpl, u'LGPL-2.1', 28), - (1, u'and', 37), - (mit, u'mit', 41), - (5, u')', 44), - (2, u'or', 46), - (gpl2, u'The GNU GPL 20', 49), - (2, u'or', 64), - (LicenseWithExceptionSymbol(mit, LicenseSymbol(key=u'123', is_exception=True)), u'mit with 123', 67) + (LicenseSymbol(key='gpl-2.0', is_exception=False, or_later=True), 'The GNU GPL 20 or later', 0), + (TOKEN_OR, ' or ', 23), + (TOKEN_LPAR, '(', 27), + (lgpl, 'LGPL-2.1', 28), + (TOKEN_AND, ' and ', 36), + (mit, 'mit', 41), + (TOKEN_RPAR, ')', 44), + (TOKEN_OR, ' or ', 45), + (gpl2, 'The GNU GPL 20', 49), + (TOKEN_OR, ' or ', 63), + (LicenseWithExceptionSymbol(mit, LicenseSymbol(key='123', is_exception=True)), 'mit with 123', 67) ] assert expected == tokens @@ -727,21 +726,22 @@ def test_license_expression_license_keys(self): # these two are surprising for now: this is because the expression is a # logical expression so the order may be different on more complex expressions self.assertEqual(['mit', 'gpl'], licensing.license_keys(licensing.parse('mit AND gpl or gpl'))) - self.assertEqual(['l-a', 'l-b', 'l -c'], licensing.license_keys(licensing.parse('((l-a + AND l-b) OR (l -c+))'))) + self.assertEqual(['l-a', 'l-b', 'l-c'], licensing.license_keys(licensing.parse('((l-a+ AND l-b) OR (l-c+))'))) # same without parsing self.assertEqual(['mit', 'gpl'], licensing.license_keys('mit AND gpl or gpl')) - self.assertEqual(['l-a', 'l-b', 'l -c'], licensing.license_keys('((l-a + AND l-b) OR (l -c+))')) + self.assertEqual(['l-a', 'l-b', 'l-c'], licensing.license_keys('((l-a+ AND l-b) OR (l-c+))')) def get_symbols_and_licensing(self): gpl2 = LicenseSymbol('gpl-2.0', ['The GNU GPL 20', 'GPL-2.0', 'GPL v2.0']) lgpl = LicenseSymbol('LGPL-2.1', ['LGPL v2.1']) mit = LicenseSymbol('MIT', ['MIT license']) - symbols = [gpl2, lgpl, mit] + mitand2 = LicenseSymbol('mitand2', ['mitand2', 'mitand2 license']) + symbols = [gpl2, lgpl, mit, mitand2] licensing = Licensing(symbols) - return gpl2, lgpl, mit, licensing + return gpl2, lgpl, mit, mitand2, licensing def test_unknown_keys(self): - _gpl2, _lgpl, _mit, licensing = self.get_symbols_and_licensing() + _gpl2, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() expr = 'The GNU GPL 20 or LGPL-2.1 and mit' parsed = licensing.parse(expr) expected = 'gpl-2.0 OR (LGPL-2.1 AND MIT)' @@ -751,38 +751,50 @@ def test_unknown_keys(self): assert [] == licensing.unknown_license_keys(expr) def test_unknown_keys_with_trailing_char(self): - gpl2, lgpl, mit, licensing = self.get_symbols_and_licensing() + gpl2, lgpl, _mit, mitand2, licensing = self.get_symbols_and_licensing() expr = 'The GNU GPL 20 or LGPL-2.1 and mitand2' parsed = licensing.parse(expr) - expected = [gpl2, lgpl, mit, LicenseSymbol(key='2')] + expected = [gpl2, lgpl, mitand2] assert expected == licensing.license_symbols(parsed) assert expected == licensing.license_symbols(licensing.parse(parsed)) assert expected == licensing.license_symbols(expr) - assert ['2'] == licensing.unknown_license_keys(parsed) - assert ['2'] == licensing.unknown_license_keys(expr) + assert [] == licensing.unknown_license_keys(parsed) + assert [] == licensing.unknown_license_keys(expr) + + def test_unknown_keys_with_trailing_char_2(self): + _gpl2, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() + expr = 'The GNU GPL 20 or LGPL-2.1 and mitand3' + + try: + licensing.parse(expr) + self.fail('ParseError should be raised') + except ParseError as pe: + expected = {'error_code': 5, 'position': 34, 'token_string': u'and3', 'token_type': LicenseSymbol(key=u'and3')} + + assert expected == _parse_error_as_dict(pe) def test_end_to_end(self): # these were formerly doctest portedd to actual real code tests here l = Licensing() - expr = l.parse(' GPL-2.0 or LGPL 2.1 and mit ') - expected = 'GPL-2.0 OR (LGPL 2.1 AND mit)' + expr = l.parse(' GPL-2.0 or LGPL-2.1 and mit ') + expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' assert expected == str(expr) expected = [ LicenseSymbol('GPL-2.0'), - LicenseSymbol('LGPL 2.1'), + LicenseSymbol('LGPL-2.1'), LicenseSymbol('mit'), ] assert expected == l.license_symbols(expr) def test_pretty(self): l = Licensing() - expr = l.parse(' GPL-2.0 or LGPL 2.1 and mit ') + expr = l.parse(' GPL-2.0 or LGPL2.1 and mit ') expected = '''OR( LicenseSymbol('GPL-2.0'), AND( - LicenseSymbol('LGPL 2.1'), + LicenseSymbol('LGPL2.1'), LicenseSymbol('mit') ) )''' @@ -791,34 +803,34 @@ def test_pretty(self): def test_simplify_and_contains(self): l = Licensing() - expr = l.parse(' GPL-2.0 or LGPL 2.1 and mit ') - expr2 = l.parse(' GPL-2.0 or (mit and LGPL 2.1) ') + expr = l.parse(' GPL-2.0 or LGPL2.1 and mit ') + expr2 = l.parse(' GPL-2.0 or (mit and LGPL2.1) ') assert expr2.simplify() == expr.simplify() - expr3 = l.parse('mit and LGPL 2.1') + expr3 = l.parse('mit and LGPL2.1') assert expr3 in expr2 def test_simplify_and_equivalent_and_contains(self): l = Licensing() - expr2 = l.parse(' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)') + expr2 = l.parse(' GPL-2.0 or (mit and LGPL-2.1) or bsd Or GPL-2.0 or (mit and LGPL-2.1)') # note thats simplification does SORT the symbols such that they can # eventually be compared sequence-wise. This sorting is based on license key - expected = 'GPL-2.0 OR bsd OR (LGPL 2.1 AND mit)' + expected = 'GPL-2.0 OR bsd OR (LGPL-2.1 AND mit)' assert expected == str(expr2.simplify()) # Two expressions can be compared for equivalence: - expr1 = l.parse(' GPL-2.0 or (LGPL 2.1 and mit) ') - assert 'GPL-2.0 OR (LGPL 2.1 AND mit)' == str(expr1) - expr2 = l.parse(' (mit and LGPL 2.1) or GPL-2.0 ') - assert '(mit AND LGPL 2.1) OR GPL-2.0' == str(expr2) + expr1 = l.parse(' GPL-2.0 or (LGPL-2.1 and mit) ') + assert 'GPL-2.0 OR (LGPL-2.1 AND mit)' == str(expr1) + expr2 = l.parse(' (mit and LGPL-2.1) or GPL-2.0 ') + assert '(mit AND LGPL-2.1) OR GPL-2.0' == str(expr2) assert l.is_equivalent(expr1, expr2) - assert 'GPL-2.0 OR (LGPL 2.1 AND mit)' == str(expr1.simplify()) - assert 'GPL-2.0 OR (LGPL 2.1 AND mit)' == str(expr2.simplify()) + assert 'GPL-2.0 OR (LGPL-2.1 AND mit)' == str(expr1.simplify()) + assert 'GPL-2.0 OR (LGPL-2.1 AND mit)' == str(expr2.simplify()) assert expr1.simplify() == expr2.simplify() - expr3 = l.parse(' GPL-2.0 or mit or LGPL 2.1') + expr3 = l.parse(' GPL-2.0 or mit or LGPL-2.1') assert not l.is_equivalent(expr2, expr3) - expr4 = l.parse('mit and LGPL 2.1') + expr4 = l.parse('mit and LGPL-2.1') assert expr4.simplify() in expr2.simplify() assert l.contains(expr2, expr4) @@ -831,24 +843,24 @@ def test_create_from_python(self): licensing = Licensing() expr1 = (licensing.LicenseSymbol('GPL-2.0') | (licensing.LicenseSymbol('mit') - & licensing.LicenseSymbol('LGPL 2.1'))) - expr2 = licensing.parse(' GPL-2.0 or (mit and LGPL 2.1) ') + & licensing.LicenseSymbol('LGPL-2.1'))) + expr2 = licensing.parse(' GPL-2.0 or (mit and LGPL-2.1) ') - assert 'GPL-2.0 OR (LGPL 2.1 AND mit)' == str(expr1.simplify()) - assert 'GPL-2.0 OR (LGPL 2.1 AND mit)' == str(expr2.simplify()) + assert 'GPL-2.0 OR (LGPL-2.1 AND mit)' == str(expr1.simplify()) + assert 'GPL-2.0 OR (LGPL-2.1 AND mit)' == str(expr2.simplify()) assert licensing.is_equivalent(expr1, expr2) a = licensing.OR( - LicenseSymbol(key=u'gpl-2.0'), - licensing.AND(LicenseSymbol(key=u'mit'), - LicenseSymbol(key=u'lgpl 2.1') + LicenseSymbol(key='gpl-2.0'), + licensing.AND(LicenseSymbol(key='mit'), + LicenseSymbol(key='lgpl-2.1') ) ) b = licensing.OR( - LicenseSymbol(key=u'gpl-2.0'), - licensing.AND(LicenseSymbol(key=u'mit'), - LicenseSymbol(key=u'lgpl 2.1') + LicenseSymbol(key='gpl-2.0'), + licensing.AND(LicenseSymbol(key='mit'), + LicenseSymbol(key='lgpl-2.1') ) ) assert a == b @@ -858,119 +870,119 @@ def test_get_license_symbols(self): expected = [ LicenseSymbol('GPL-2.0'), LicenseSymbol('mit'), - LicenseSymbol('LGPL 2.1') + LicenseSymbol('LGPL-2.1') ] - assert expected == l.license_symbols(l.parse(' GPL-2.0 and mit or LGPL 2.1 and mit ')) + assert expected == l.license_symbols(l.parse(' GPL-2.0 and mit or LGPL-2.1 and mit ')) def test_get_license_symbols2(self): l = Licensing() expected = [ - LicenseSymbol('GPL-2.0', or_later=True), + LicenseSymbol('GPL-2.0'), + LicenseSymbol('LATER'), LicenseSymbol('mit'), - LicenseSymbol('LGPL 2.1', or_later=True), + LicenseSymbol('LGPL-2.1', or_later=True), LicenseSymbol('mit'), - LicenseSymbol('Foo exception', is_exception=True), + LicenseSymbol('Foo-exception', is_exception=True), ] - expr = ' GPL-2.0 or LATER and mit or LGPL 2.1+ and mit with Foo exception ' + expr = ' GPL-2.0 or LATER and mit or LGPL-2.1+ and mit with Foo-exception ' assert expected == l.license_symbols(l.parse(expr), unique=False) def test_get_license_symbols3(self): l = Licensing() expected = [ LicenseSymbol('mit'), - LicenseSymbol('LGPL 2.1', or_later=True), - LicenseSymbol('Foo exception', is_exception=True), - LicenseSymbol('GPL-2.0', or_later=True), + LicenseSymbol('LGPL-2.1', or_later=True), + LicenseSymbol('Foo-exception', is_exception=True), + LicenseSymbol('GPL-2.0'), + LicenseSymbol('LATER'), ] - expr = 'mit or LGPL 2.1+ and mit with Foo exception or GPL-2.0 or LATER ' + expr = 'mit or LGPL-2.1+ and mit with Foo-exception or GPL-2.0 or LATER ' assert expected == l.license_symbols(l.parse(expr)) def test_get_license_symbols4(self): l = Licensing() expected = [ - LicenseSymbol('GPL-2.0', or_later=True), - LicenseSymbol('big exception', is_exception=True), + LicenseSymbol('GPL-2.0'), + LicenseSymbol('LATER'), + LicenseSymbol('big-exception'), LicenseSymbol('mit'), - LicenseSymbol('LGPL 2.1', or_later=True), - LicenseSymbol('mit', or_later=True), - LicenseSymbol('Foo exception', is_exception=True), + LicenseSymbol('LGPL-2.1', or_later=True), + LicenseSymbol('mit'), + LicenseSymbol('later'), + LicenseSymbol('Foo-exception', is_exception=True), ] - expr = (' GPL-2.0 or LATER with big exception and mit or ' - 'LGPL 2.1+ and mit or later with Foo exception ') + expr = (' GPL-2.0 or LATER with big-exception and mit or ' + 'LGPL-2.1+ and mit or later with Foo-exception ') assert expected == l.license_symbols(l.parse(expr), unique=False) def test_parse_with_repeated_or_later_raise_parse_error(self): l = Licensing() - expr = 'LGPL 2.1+ or later and mit' + expr = 'LGPL2.1+ + and mit' try: l.parse(expr) self.fail('Exception not raised') - except ParseError as pe: - expected = { - 'error_code': 3, - 'position': 10, - 'token_string': 'or later', - 'token_type': TOKEN_OR_LATER} - assert expected == _parse_error_as_dict(pe) + except ExpressionError as ee: + expected = 'Invalid license key: the valid characters are: letters and numbers, underscore, dot or hyphen signs and spaces: "+"' + assert expected == str(ee) def test_render_complex(self): licensing = Licensing() expression = ''' - EPL 1.0 AND Apache 1.1 AND Apache 2.0 AND BSD-Modified AND CPL 1.0 AND - ICU Composite License AND JPEG License AND JDOM License AND LGPL 2.0 AND - MIT Open Group AND MPL 1.1 AND SAX-PD AND Unicode Inc License Agreement - AND W3C Software Notice and License AND W3C Documentation License''' + EPL-1.0 AND Apache-1.1 AND Apache-2.0 AND BSD-Modified AND CPL-1.0 AND + ICU-Composite-License AND JPEG-License AND JDOM-License AND LGPL-2.0 AND + MIT-Open-Group AND MPL-1.1 AND SAX-PD AND Unicode-Inc-License-Agreement + AND W3C-Software-Notice and License AND W3C-Documentation-License''' result = licensing.parse(expression) - expected = ('EPL 1.0 AND Apache 1.1 AND Apache 2.0 AND BSD-Modified ' - 'AND CPL 1.0 AND ICU Composite License AND JPEG License ' - 'AND JDOM License AND LGPL 2.0 AND MIT Open Group AND MPL 1.1 ' - 'AND SAX-PD AND Unicode Inc License Agreement ' - 'AND W3C Software Notice AND License AND W3C Documentation License') + expected = ('EPL-1.0 AND Apache-1.1 AND Apache-2.0 AND BSD-Modified ' + 'AND CPL-1.0 AND ICU-Composite-License AND JPEG-License ' + 'AND JDOM-License AND LGPL-2.0 AND MIT-Open-Group AND MPL-1.1 ' + 'AND SAX-PD AND Unicode-Inc-License-Agreement ' + 'AND W3C-Software-Notice AND License AND W3C-Documentation-License') assert expected == result.render('{symbol.key}') - expectedkey = ('EPL 1.0 AND Apache 1.1 AND Apache 2.0 AND BSD-Modified AND ' - 'CPL 1.0 AND ICU Composite License AND JPEG License AND JDOM License AND ' - 'LGPL 2.0 AND MIT Open Group AND MPL 1.1 AND SAX-PD AND ' - 'Unicode Inc License Agreement AND W3C Software Notice AND License AND' - ' W3C Documentation License') + expectedkey = ('EPL-1.0 AND Apache-1.1 AND Apache-2.0 AND BSD-Modified AND ' + 'CPL-1.0 AND ICU-Composite-License AND JPEG-License AND JDOM-License AND ' + 'LGPL-2.0 AND MIT-Open-Group AND MPL-1.1 AND SAX-PD AND ' + 'Unicode-Inc-License-Agreement AND W3C-Software-Notice AND License AND' + ' W3C-Documentation-License') assert expectedkey == result.render('{symbol.key}') def test_render_with(self): licensing = Licensing() - expression = 'GPL 2.0 with Classpath 2.0 OR BSD-new' + expression = 'GPL-2.0 with Classpath-2.0 OR BSD-new' result = licensing.parse(expression) - expected = 'GPL 2.0 WITH Classpath 2.0 OR BSD-new' + expected = 'GPL-2.0 WITH Classpath-2.0 OR BSD-new' assert expected == result.render('{symbol.key}') expected_html = ( - 'GPL 2.0 WITH ' - 'Classpath 2.0 ' + 'GPL-2.0 WITH ' + 'Classpath-2.0 ' 'OR BSD-new') assert expected_html == result.render('{symbol.key}') - expected = 'GPL 2.0 WITH Classpath 2.0 OR BSD-new' + expected = 'GPL-2.0 WITH Classpath-2.0 OR BSD-new' assert expected == result.render('{symbol.key}') def test_parse_complex(self): licensing = Licensing() - expression = ' GPL-2.0 or later with classpath Exception and mit or LPL 2.1 and mit or later ' + expression = ' GPL-2.0+ with classpath and mit or LPL2.1 and mit+ ' result = licensing.parse(expression) # this may look weird, but we did not provide symbols hence in "or later", # "later" is treated as if it were a license - expected = '(GPL-2.0 or later WITH classpath Exception AND mit) OR (LPL 2.1 AND mit or later)' + expected = '(GPL-2.0 or later WITH classpath AND mit) OR (LPL2.1 AND mit or later)' assert expected == result.render('{symbol.key}') licensing = Licensing() - expr = licensing.parse(" GPL-2.0 or LGPL 2.1 and mit ") + expr = licensing.parse(" GPL-2.0 or LGPL2.1 and mit ") expected = [ LicenseSymbol('GPL-2.0'), - LicenseSymbol('LGPL 2.1'), + LicenseSymbol('LGPL2.1'), LicenseSymbol('mit') ] assert expected == sorted(licensing.license_symbols(expr)) - expected = 'GPL-2.0 OR (LGPL 2.1 AND mit)' + expected = 'GPL-2.0 OR (LGPL2.1 AND mit)' assert expected == expr.render('{symbol.key}') def test_Licensing_with_illegal_symbols_raise_Exception(self): @@ -1050,6 +1062,109 @@ def test_primary_license_symbol_and_primary_license_key(self): assert expected == licensing.primary_license_symbol( parsed, decompose=False).render('{symbol.key}') + def test_splitter(self): + expr = (' GPL-2.0 or later with classpath Exception and mit and ' + 'mit with SOMETHING with ELSE+ or LGPL 2.1 and ' + 'GPL-2.0 or LATER with (Classpath Exception and ' + 'mit or later) or LGPL 2.1 or mit or GPL-2.0 or LATER ' + 'with SOMETHING with ELSE+ and lgpl 2.1') + results = list(splitter(expr)) + expected = [ + Result(0, 0, ' ', None), + Result(1, 7, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), + Result(8, 8, ' ', None), + Result(9, 10, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(11, 11, ' ', None), + Result(12, 16, 'later', Output('later', LicenseSymbol(key='later',))), + Result(17, 17, ' ', None), + Result(18, 21, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(22, 22, ' ', None), + Result(23, 31, 'classpath', Output('classpath', LicenseSymbol(key='classpath',))), + Result(32, 32, ' ', None), + Result(33, 41, 'Exception', Output('Exception', LicenseSymbol(key='Exception',))), + Result(42, 42, ' ', None), + Result(43, 45, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), + Result(46, 46, ' ', None), + Result(47, 49, 'mit', Output('mit', LicenseSymbol(key='mit',))), + Result(50, 50, ' ', None), + Result(51, 53, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), + Result(54, 54, ' ', None), + Result(55, 57, 'mit', Output('mit', LicenseSymbol(key='mit',))), + Result(58, 58, ' ', None), + Result(59, 62, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(63, 63, ' ', None), + Result(64, 72, 'SOMETHING', Output('SOMETHING', LicenseSymbol(key='SOMETHING',))), + Result(73, 73, ' ', None), + Result(74, 77, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(78, 78, ' ', None), + Result(79, 82, 'ELSE', Output('ELSE', LicenseSymbol(key='ELSE',))), + Result(83, 83, '+', Output('+', Keyword(value='+', type=TOKEN_OR_LATER))), + Result(84, 84, ' ', None), + Result(85, 86, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(87, 87, ' ', None), + Result(88, 91, 'LGPL', Output('LGPL', LicenseSymbol(key='LGPL',))), + Result(92, 92, ' ', None), + Result(93, 95, '2.1', Output('2.1', LicenseSymbol(key='2.1',))), + Result(96, 96, ' ', None), + Result(97, 99, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), + Result(100, 100, ' ', None), + Result(101, 107, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), + Result(108, 108, ' ', None), + Result(109, 110, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(111, 111, ' ', None), + Result(112, 116, 'LATER', Output('LATER', LicenseSymbol(key='LATER',))), + Result(117, 117, ' ', None), + Result(118, 121, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(122, 122, ' ', None), + Result(123, 123, '(', Output('(', Keyword(value='(', type=TOKEN_LPAR))), + Result(124, 132, 'Classpath', Output('Classpath', LicenseSymbol(key='Classpath',))), + Result(133, 133, ' ', None), + Result(134, 142, 'Exception', Output('Exception', LicenseSymbol(key='Exception',))), + Result(143, 143, ' ', None), + Result(144, 146, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), + Result(147, 147, ' ', None), + Result(148, 150, 'mit', Output('mit', LicenseSymbol(key='mit',))), + Result(151, 151, ' ', None), + Result(152, 153, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(154, 154, ' ', None), + Result(155, 159, 'later', Output('later', LicenseSymbol(key='later',))), + Result(160, 160, ')', Output(')', Keyword(value=')', type=TOKEN_RPAR))), + Result(161, 161, ' ', None), + Result(162, 163, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(164, 164, ' ', None), + Result(165, 168, 'LGPL', Output('LGPL', LicenseSymbol(key='LGPL',))), + Result(169, 169, ' ', None), + Result(170, 172, '2.1', Output('2.1', LicenseSymbol(key='2.1',))), + Result(173, 173, ' ', None), + Result(174, 175, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(176, 176, ' ', None), + Result(177, 179, 'mit', Output('mit', LicenseSymbol(key='mit',))), + Result(180, 180, ' ', None), + Result(181, 182, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(183, 183, ' ', None), + Result(184, 190, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), + Result(191, 191, ' ', None), + Result(192, 193, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), + Result(194, 194, ' ', None), + Result(195, 199, 'LATER', Output('LATER', LicenseSymbol(key='LATER',))), + Result(200, 200, ' ', None), + Result(201, 204, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(205, 205, ' ', None), + Result(206, 214, 'SOMETHING', Output('SOMETHING', LicenseSymbol(key='SOMETHING',))), + Result(215, 215, ' ', None), + Result(216, 219, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(220, 220, ' ', None), + Result(221, 224, 'ELSE', Output('ELSE', LicenseSymbol(key='ELSE',))), + Result(225, 225, '+', Output('+', Keyword(value='+', type=TOKEN_OR_LATER))), + Result(226, 226, ' ', None), + Result(227, 229, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), + Result(230, 230, ' ', None), + Result(231, 234, 'lgpl', Output('lgpl', LicenseSymbol(key='lgpl',))), + Result(235, 235, ' ', None), + Result(236, 238, '2.1', Output('2.1', LicenseSymbol(key='2.1',))) + ] + assert expected == results + def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): licensing = Licensing([ 'GPL-2.0', @@ -1080,186 +1195,143 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): # fist scan scanner = licensing.get_scanner() result = list(scanner.scan(expr)) - from license_expression._pyahocorasick import Result, Output expected = [ Result(0, 0, ' ', None), - Result(1, 7, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0'))), - Result(8, 8, ' ', None), - Result(9, 16, 'or later', Output('or later', Keyword(value='or later', type=TOKEN_OR_LATER))), - - Result(17, 17, ' ', None), - Result(18, 21, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(22, 22, ' ', None), - Result(23, 41, 'classpath Exception', Output('classpath Exception', cpex_plain)), - - Result(42, 42, ' ', None), - Result(43, 45, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(46, 46, ' ', None), - Result(47, 49, 'mit', Output('mit', mit)), - Result(50, 50, ' ', None), - Result(51, 53, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(54, 54, ' ', None), - - Result(55, 57, 'mit', Output('mit', mit)), - Result(58, 58, ' ', None), - Result(59, 62, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(63, 63, ' ', None), + Result(8, 16, ' or later', Output(' or later', Keyword(value=' or later', type=TOKEN_OR_LATER))), + Result(17, 22, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(23, 41, 'classpath Exception', Output('classpath Exception', LicenseSymbol(key='classpath Exception'))), + Result(42, 46, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(47, 49, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(50, 54, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(55, 57, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(58, 63, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(64, 82, 'SOMETHING with ELSE', Output('something with else', LicenseSymbol(key='something with else'))), Result(83, 83, '+', Output('+', Keyword(value='+', type=TOKEN_OR_LATER))), - - Result(84, 84, ' ', None), - Result(85, 86, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(87, 87, ' ', None), - Result(88, 95, 'LGPL 2.1', Output('LGPL 2.1', lgpl)), - Result(96, 96, ' ', None), - Result(97, 99, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(100, 100, ' ', None), - + Result(84, 87, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(88, 95, 'LGPL 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))), + Result(96, 100, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), Result(101, 107, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0'))), - Result(108, 108, ' ', None), - Result(109, 116, 'or LATER', Output('or later', Keyword(value='or later', type=TOKEN_OR_LATER))), - Result(117, 117, ' ', None), - Result(118, 121, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(122, 122, ' ', None), - Result(123, 141, 'Classpath Exception', Output('classpath Exception', cpex_plain)), - - Result(142, 142, ' ', None), - Result(143, 145, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(146, 146, ' ', None), - + Result(108, 116, ' or LATER', Output(' or later', Keyword(value=' or later', type=TOKEN_OR_LATER))), + Result(117, 122, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(123, 141, 'Classpath Exception', Output('classpath Exception', LicenseSymbol(key='classpath Exception'))), + Result(142, 146, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), Result(147, 149, 'mit', Output('mit', LicenseSymbol(key='mit'))), - Result(150, 150, ' ', None), - Result(151, 158, 'or later', Output('or later', Keyword(value='or later', type=TOKEN_OR_LATER))), - - Result(159, 159, ' ', None), - Result(160, 161, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(162, 162, ' ', None), - Result(163, 170, 'LGPL 2.1', Output('LGPL 2.1', lgpl)), - Result(171, 171, ' ', None), - Result(172, 173, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(174, 174, ' ', None), - Result(175, 177, 'mit', Output('mit', mit)), - Result(178, 178, ' ', None), - Result(179, 180, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(181, 181, ' ', None), - + Result(150, 158, ' or later', Output(' or later', Keyword(value=' or later', type=TOKEN_OR_LATER))), + Result(159, 162, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(163, 170, 'LGPL 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))), + Result(171, 174, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(175, 177, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(178, 181, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), Result(182, 188, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0'))), - Result(189, 189, ' ', None), - Result(190, 197, 'or LATER', Output('or later', Keyword(value='or later', type=TOKEN_OR_LATER))), - Result(198, 198, ' ', None), - Result(199, 202, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(203, 203, ' ', None), + Result(189, 197, ' or LATER', Output(' or later', Keyword(value=' or later', type=TOKEN_OR_LATER))), + Result(198, 203, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(204, 222, 'SOMETHING with ELSE', Output('something with else', LicenseSymbol(key='something with else'))), Result(223, 223, '+', Output('+', Keyword(value='+', type=TOKEN_OR_LATER))), - - Result(224, 224, ' ', None), - Result(225, 227, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(228, 228, ' ', None), - Result(229, 236, 'lgpl 2.1', Output('LGPL 2.1', lgpl)) + Result(224, 228, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(229, 236, 'lgpl 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))) ] + assert expected == result assert expected[-1].end + 1 == 237 == sum(len(r.string) for r in result) # skip spaces result = list(strip_and_skip_spaces(result)) - assert 64 == len(expected) + assert 34 == len(expected) assert 33 == len(result) # merge or later result = list(merge_or_later_results(result)) expected_merged = [ - Result(1, 16, u'GPL-2.0 or later', Output(u'GPL-2.0 or later', LicenseSymbol(key=u'GPL-2.0', or_later=True))), - Result(18, 21, u'with', Output(u'with', Keyword(value=u'with', type=TOKEN_WITH))), - Result(23, 41, u'classpath Exception', Output(u'classpath Exception', LicenseSymbol(key=u'classpath Exception'))), - Result(43, 45, u'and', Output(u'and', Keyword(value=u'and', type=TOKEN_AND))), - Result(47, 49, u'mit', Output(u'mit', LicenseSymbol(key=u'mit'))), - Result(51, 53, u'and', Output(u'and', Keyword(value=u'and', type=TOKEN_AND))), - Result(55, 57, u'mit', Output(u'mit', LicenseSymbol(key=u'mit'))), - Result(59, 62, u'with', Output(u'with', Keyword(value=u'with', type=TOKEN_WITH))), - Result(64, 83, u'SOMETHING with ELSE+', Output(u'SOMETHING with ELSE+', LicenseSymbol(key=u'something with else', or_later=True))), - Result(85, 86, u'or', Output(u'or', Keyword(value=u'or', type=TOKEN_OR))), - Result(88, 95, u'LGPL 2.1', Output(u'LGPL 2.1', LicenseSymbol(key=u'LGPL 2.1'))), - Result(97, 99, u'and', Output(u'and', Keyword(value=u'and', type=TOKEN_AND))), - Result(101, 116, u'GPL-2.0 or LATER', Output(u'GPL-2.0 or LATER', LicenseSymbol(key=u'GPL-2.0', or_later=True))), - Result(118, 121, u'with', Output(u'with', Keyword(value=u'with', type=TOKEN_WITH))), - Result(123, 141, u'Classpath Exception', Output(u'classpath Exception', LicenseSymbol(key=u'classpath Exception'))), - Result(143, 145, u'and', Output(u'and', Keyword(value=u'and', type=TOKEN_AND))), - Result(147, 158, u'mit or later', Output(u'mit or later', LicenseSymbol(key=u'mit', or_later=True))), - Result(160, 161, u'or', Output(u'or', Keyword(value=u'or', type=TOKEN_OR))), - Result(163, 170, u'LGPL 2.1', Output(u'LGPL 2.1', LicenseSymbol(key=u'LGPL 2.1'))), - Result(172, 173, u'or', Output(u'or', Keyword(value=u'or', type=TOKEN_OR))), - Result(175, 177, u'mit', Output(u'mit', LicenseSymbol(key=u'mit'))), - Result(179, 180, u'or', Output(u'or', Keyword(value=u'or', type=TOKEN_OR))), - Result(182, 197, u'GPL-2.0 or LATER', Output(u'GPL-2.0 or LATER', LicenseSymbol(key=u'GPL-2.0', or_later=True))), - Result(199, 202, u'with', Output(u'with', Keyword(value=u'with', type=TOKEN_WITH))), - Result(204, 223, u'SOMETHING with ELSE+', Output(u'SOMETHING with ELSE+', LicenseSymbol(key=u'something with else', or_later=True))), - Result(225, 227, u'and', Output(u'and', Keyword(value=u'and', type=TOKEN_AND))), - Result(229, 236, u'lgpl 2.1', Output(u'LGPL 2.1', LicenseSymbol(key=u'LGPL 2.1'))) + Result(1, 16, 'GPL-2.0 or later', Output('GPL-2.0 or later', LicenseSymbol(key='GPL-2.0', or_later=True))), + Result(17, 22, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(23, 41, 'classpath Exception', Output('classpath Exception', LicenseSymbol(key='classpath Exception'))), + Result(42, 46, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(47, 49, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(50, 54, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(55, 57, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(58, 63, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(64, 83, 'SOMETHING with ELSE+', Output('SOMETHING with ELSE+', LicenseSymbol(key='something with else', or_later=True))), + Result(84, 87, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(88, 95, 'LGPL 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))), + Result(96, 100, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(101, 116, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', LicenseSymbol(key='GPL-2.0', or_later=True))), + Result(117, 122, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(123, 141, 'Classpath Exception', Output('classpath Exception', LicenseSymbol(key='classpath Exception'))), + Result(142, 146, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(147, 158, 'mit or later', Output('mit or later', LicenseSymbol(key='mit', or_later=True))), + Result(159, 162, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(163, 170, 'LGPL 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))), + Result(171, 174, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(175, 177, 'mit', Output('mit', LicenseSymbol(key='mit'))), + Result(178, 181, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))), + Result(182, 197, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', LicenseSymbol(key='GPL-2.0', or_later=True))), + Result(198, 203, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), + Result(204, 223, 'SOMETHING with ELSE+', Output('SOMETHING with ELSE+', LicenseSymbol(key='something with else', or_later=True))), + Result(224, 228, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))), + Result(229, 236, 'lgpl 2.1', Output('LGPL 2.1', LicenseSymbol(key='LGPL 2.1'))) ] assert expected_merged == result - + # group results expected_groups = [ (Result(1, 16, 'GPL-2.0 or later', Output('GPL-2.0 or later', gpl2plus)), - Result(18, 21, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(17, 22, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(23, 41, 'classpath Exception', Output('classpath Exception', cpex_plain))), - (Result(43, 45, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))),), + (Result(42, 46, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))),), (Result(47, 49, 'mit', Output('mit', LicenseSymbol(key='mit'))),), - (Result(51, 53, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))),), + (Result(50, 54, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))),), (Result(55, 57, 'mit', Output('mit', mit)), - Result(59, 62, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(58, 63, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(64, 83, 'SOMETHING with ELSE+', Output('SOMETHING with ELSE+', someplus))), - (Result(85, 86, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))),), + (Result(84, 87, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))),), (Result(88, 95, 'LGPL 2.1', Output('LGPL 2.1', lgpl)),), - (Result(97, 99, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))),), + (Result(96, 100, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))),), (Result(101, 116, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', gpl2plus)), - Result(118, 121, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(117, 122, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(123, 141, 'Classpath Exception', Output('classpath Exception', cpex_plain))), - (Result(143, 145, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))),), + (Result(142, 146, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))),), (Result(147, 158, 'mit or later', Output('mit or later', mitplus)),), - (Result(160, 161, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))),), + (Result(159, 162, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))),), (Result(163, 170, 'LGPL 2.1', Output('LGPL 2.1', lgpl)),), - (Result(172, 173, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))),), + (Result(171, 174, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))),), (Result(175, 177, 'mit', Output('mit', mit)),), - (Result(179, 180, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))),), + (Result(178, 181, ' or ', Output(' or ', Keyword(value=' or ', type=TOKEN_OR))),), (Result(182, 197, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', gpl2plus)), - Result(199, 202, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), + Result(198, 203, ' with ', Output(' with ', Keyword(value=' with ', type=TOKEN_WITH))), Result(204, 223, 'SOMETHING with ELSE+', Output('SOMETHING with ELSE+', someplus))), - (Result(225, 227, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))),), + (Result(224, 228, ' and ', Output(' and ', Keyword(value=' and ', type=TOKEN_AND))),), (Result(229, 236, 'lgpl 2.1', Output('LGPL 2.1', lgpl)),), - ] result_groups = list(group_results_for_with_subexpression(result)) assert expected_groups == result_groups # finally retest it all with tokenize - expected = [ - (gpl2plus_with_cpex, 'GPL-2.0 or later with classpath Exception', 1), - (1, 'and', 43), + expected = [(gpl2plus_with_cpex, 'GPL-2.0 or later with classpath Exception', 1), + (TOKEN_AND, ' and ', 42), (mit, 'mit', 47), - (1, 'and', 51), + (TOKEN_AND, ' and ', 50), (mit_with_some, 'mit with SOMETHING with ELSE+', 55), - (2, 'or', 85), + (TOKEN_OR, ' or ', 84), (lgpl, 'LGPL 2.1', 88), - (1, 'and', 97), + (TOKEN_AND, ' and ', 96), (gpl2plus_with_cpex, 'GPL-2.0 or LATER with Classpath Exception', 101), - (1, 'and', 143), + (TOKEN_AND, ' and ', 142), (mitplus, 'mit or later', 147), - (2, 'or', 160), + (TOKEN_OR, ' or ', 159), (lgpl, 'LGPL 2.1', 163), - (2, 'or', 172), + (TOKEN_OR, ' or ', 171), (mit, 'mit', 175), - (2, 'or', 179), + (TOKEN_OR, ' or ', 178), (gpl2plus_with_someplus, 'GPL-2.0 or LATER with SOMETHING with ELSE+', 182), - (1, 'and', 225), + (TOKEN_AND, ' and ', 224), (lgpl, 'lgpl 2.1', 229) ] @@ -1280,21 +1352,63 @@ def test_merge_or_later_results(self): ] results = list(merge_or_later_results(tests)) expected = [ - Result(1, 118, u'GPL-2.0 or later', - Output(u'GPL-2.0 or later', - LicenseSymbol(key=u'GPL-2.0', is_exception=False, or_later=True))), + Result(1, 118, 'GPL-2.0 or later', + Output('GPL-2.0 or later', LicenseSymbol(key='GPL-2.0', is_exception=False, or_later=True))), Result(43, 45, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(47, 56, u'mit +', - Output(u'mit +', - LicenseSymbol(key=u'mit', is_exception=False, or_later=True))) + Result(47, 56, 'mit +', + Output('mit +', LicenseSymbol(key='mit', is_exception=False, or_later=True))) ] assert expected == results assert results[0].output.value.or_later assert results[2].output.value.or_later + def test_Licensing_can_scan_valid_expressions_with_symbols_that_contain_and_with_or(self): + licensing = Licensing() + expression = 'orgpl or withbsd with orclasspath and andmit or andlgpl and ormit or withme' + result = [r.string for r in licensing.get_scanner().scan(expression)] + expected = [ + 'orgpl', ' or ', 'withbsd', ' with ', 'orclasspath', + ' and ', 'andmit', ' or ', 'andlgpl', ' and ', 'ormit', + ' or ', 'withme' + ] + + assert expected == result + + def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_with_or(self): + licensing = Licensing() + expression = 'orgpl or withbsd with orclasspath and andmit or anlgpl and ormit or withme' + + result = list(licensing.tokenize(expression)) + expected = [ + (LicenseSymbol(key='orgpl'), 'orgpl', 0), + (2, 'or', 6), + (LicenseWithExceptionSymbol( + license_symbol=LicenseSymbol(key='withbsd'), + exception_symbol=LicenseSymbol(key='orclasspath', is_exception=True)), + 'withbsd with orclasspath', 9), + (1, 'and', 34), + (LicenseSymbol(key='andmit'), 'andmit', 38), + (2, 'or', 45), + (LicenseSymbol(key='anlgpl'), 'anlgpl', 48), + (1, 'and', 55), + (LicenseSymbol(key='ormit'), 'ormit', 59), + (2, 'or', 65), + (LicenseSymbol(key='withme'), 'withme', 68) + ] + + assert expected == result + + def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_and_with_or(self): + licensing = Licensing() + expression = 'orgpl or withbsd with orclasspath and andmit or anlgpl and ormit or withme' + + result = licensing.parse(expression) + expected = 'orgpl OR (withbsd WITH orclasspath AND andmit) OR (anlgpl AND ormit) OR withme' + assert expected == result.render('{symbol.key}') + def _parse_error_as_dict(pe): """