From f4c4270b30deb1a850e7c141b7e3a6b2ce3e3c12 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 2 Aug 2018 12:20:08 +0200 Subject: [PATCH 1/9] Add failing tests for #29 This is just the tests for now, to support alternative implementation Signed-off-by: Philippe Ombredanne --- .travis.yml | 43 +++++++++---- configure | 12 ++-- setup.py | 2 +- tests/test_license_expression.py | 100 +++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index a979755..1fe01c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,17 +1,40 @@ -# This deliberately is not "python" as a work-around to support -# multi-os builds with custom Python versions in Travis CI. -language: cpp - -os: - - osx - - linux +language: python env: matrix: - - PYTHON_EXE="`pyenv install -s 2.7.13 && pyenv local 2.7.13`" - - PYTHON_EXE="`pyenv install -s 3.5.3 && pyenv local 3.5.3`" + - PYTHON_EXE="`pyenv install -s 2.7.14 && pyenv local 2.7.14`" - PYTHON_EXE="`pyenv install -s 3.6.1 && pyenv local 3.6.1`" + +# Travis does not offer OSX with arbitrary python versions (like 2.7.13 above) +# So, you cannot simply have the following section in your build matrix: +# os: +# - linux +# - osx +# Instead, you have to include OSX entries into the build matrix manually. +# In particular, this means specifying the environment variables again. + +# The following was adapted from here: +# https://docs.travis-ci.com/user/multi-os/ +# Set `language: generic` to clear `language: python` from above +# Set `python:` (to empty) to clear it from the travis-ci web interface +# Set `osx_image: xcode7.3` to pin OSX version see here: +# https://docs.travis-ci.com/user/osx-ci-environment/ + +matrix: + include: + - os: osx + language: generic + python: + osx_image: xcode7.3 + env: PYTHON_EXE="`pyenv install -s 2.7.14 && pyenv local 2.7.14`" + - os: osx + language: generic + python: + osx_image: xcode7.3 + env: PYTHON_EXE="`pyenv install -s 3.6.1 && pyenv local 3.6.1`" + + install: - pyenv install --list - ./configure @@ -32,4 +55,4 @@ notifications: use_notice: true skip_join: true template: - - "%{repository_slug}#%{build_number} (%{branch} - %{commit} : %{author}): %{message} : %{build_url}" + - "%{repository_slug}#%{build_number} (%{branch}-%{commit}:%{author})-%{message}- %{build_url}" diff --git a/configure b/configure index 8ceb9d6..4f9fdcc 100755 --- a/configure +++ b/configure @@ -16,17 +16,15 @@ CONF_DEFAULT="etc/conf/dev" CFG_CMD_LINE_ARGS="$@" -if [ "$1" == "--init" ]; then - CFG_CMD_LINE_ARGS=$CONF_INIT -fi - -if [ "$1" == "" ]; then +if [[ "$1" == "" ]]; then # default for dev conf if not argument is provided CFG_CMD_LINE_ARGS=$CONF_DEFAULT fi -if [ "$PYTHON_EXE" == "" ]; then +CONFIGURE_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [[ "$PYTHON_EXE" == "" ]]; then PYTHON_EXE=python fi -$PYTHON_EXE etc/configure.py $CFG_CMD_LINE_ARGS +$PYTHON_EXE "$CONFIGURE_ROOT_DIR/etc/configure.py" $CFG_CMD_LINE_ARGS diff --git a/setup.py b/setup.py index ea2eae5..38f993d 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( name='license-expression', - version='0.98', + version='0.99', license='apache-2.0', description=desc, long_description=desc, diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index 58312c4..22da4c7 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -250,6 +250,40 @@ def test_tokenize_unknown_as_trailing_single_attached_character(self): ] assert expected == result + def test_tokenize_with_unknown_symbol_containing_known_symbol_leading(self): + l = Licensing(['gpl-2.0']) + result = list(l.tokenize('gpl-2.0 AND gpl-2.0-plus', strict=False)) + result = [s for s, _, _ in result] + expected = [ + LicenseSymbol(key='gpl-2.0'), + TOKEN_AND, + LicenseSymbol(key='gpl-2.0-plus'), + ] + assert expected == result + + def test_tokenize_with_unknown_symbol_containing_known_symbol_contained(self): + l = Licensing(['gpl-2.0']) + result = list(l.tokenize('gpl-2.0 WITH exception-gpl-2.0-plus', strict=False)) + result = [s for s, _, _ in result] + expected = [ + LicenseWithExceptionSymbol( + LicenseSymbol(u'gpl-2.0'), + LicenseSymbol(u'exception-gpl-2.0-plus') + ) + ] + assert expected == result + + def test_tokenize_with_unknown_symbol_containing_known_symbol_trailing(self): + l = Licensing(['gpl-2.0']) + result = list(l.tokenize('gpl-2.0 AND exception-gpl-2.0', strict=False)) + result = [s for s, _, _ in result] + expected = [ + LicenseSymbol(u'gpl-2.0'), + TOKEN_AND, + LicenseSymbol(u'exception-gpl-2.0') + ] + assert expected == result + class LicensingParseTest(TestCase): @@ -278,6 +312,7 @@ def test_parse_raise_ExpressionError_when_validating(self): licensing = Licensing() try: licensing.parse(expression, validate=True) + self.fail('Exception not raised') except ExpressionError as ee: assert 'Unknown license key(s): gpl, bsd, lgpl, exception' == str(ee) @@ -286,6 +321,7 @@ def test_parse_raise_ExpressionError_when_validating_strict(self): licensing = Licensing() try: licensing.parse(expression, validate=True, strict=True) + self.fail('Exception not raised') except ExpressionError as ee: assert str(ee).startswith('exception_symbol must be an exception with "is_exception" set to True:') @@ -687,6 +723,38 @@ def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_ assert expected == result + def test_Licensing_can_split_valid_expressions_with_symbols_that_contain_and_with_or(self): + expression = 'orgpl or withbsd with orclasspath and andmit or andlgpl and ormit or withme' + result = [r.string for r in splitter(expression)] + expected = [ + 'orgpl', + ' ', + 'or', + ' ', + 'withbsd', + ' ', + 'with', + ' ', + 'orclasspath', + ' ', + 'and', + ' ', + 'andmit', + ' ', + 'or', + ' ', + 'andlgpl', + ' ', + 'and', + ' ', + 'ormit', + ' ', + 'or', + ' ', + 'withme' + ] + assert expected == result + def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_and_with_or(self): licensing = Licensing() expression = 'orgpl or withbsd with orclasspath and andmit or anlgpl and ormit or withme' @@ -708,6 +776,7 @@ def test_Licensing_with_illegal_symbols_raise_Exception(self): 'LGPL 2.1', 'mit or later' ]) + self.fail('Exception not raised') except ExpressionError as ee: expected = ('Invalid license key: "or later" words are reserved and ' 'cannot be used in a key: "GPL-2.0 or LATER"') @@ -881,6 +950,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_non_exception_symbol expression = 'gpl and bsd or lgpl with exception' try: licensing.parse(expression, validate=True, strict=True) + self.fail('Exception not raised') except ParseError as pe: expected = { 'error_code': PARSE_INVALID_SYMBOL_AS_EXCEPTION, @@ -895,6 +965,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in licensing.parse('gpl with exception', validate=True, strict=True) try: licensing.parse('exception with gpl', validate=True, strict=True) + self.fail('Exception not raised') except ParseError as pe: expected = { 'error_code': PARSE_INVALID_EXCEPTION, @@ -905,6 +976,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in try: licensing.parse('gpl with gpl', validate=True, strict=True) + self.fail('Exception not raised') except ParseError as pe: expected = { 'error_code': PARSE_INVALID_SYMBOL_AS_EXCEPTION, @@ -913,6 +985,31 @@ def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in 'token_type': TOKEN_SYMBOL} assert expected == _parse_error_as_dict(pe) + def test_with_unknown_symbol_string_contained_in_known_symbol_does_not_crash_with(self): + l = Licensing(['lgpl-3.0-plus']) + license_expression = 'lgpl-3.0-plus WITH openssl-exception-lgpl-3.0-plus' + l.parse(license_expression) + + def test_with_unknown_symbol_string_contained_in_known_symbol_does_not_crash_and(self): + l = Licensing(['lgpl-3.0-plus']) + license_expression = 'lgpl-3.0-plus AND openssl-exception-lgpl-3.0-plus' + l.parse(license_expression) + + def test_with_unknown_symbol_string_contained_in_known_symbol_does_not_crash_or(self): + l = Licensing(['lgpl-3.0-plus']) + license_expression = 'lgpl-3.0-plus OR openssl-exception-lgpl-3.0-plus' + l.parse(license_expression) + + def test_with_known_symbol_string_contained_in_known_symbol_does_not_crash_or(self): + l = Licensing(['lgpl-3.0-plus', 'openssl-exception-lgpl-3.0-plus']) + license_expression = 'lgpl-3.0-plus OR openssl-exception-lgpl-3.0-plus' + l.parse(license_expression) + + def test_with_known_symbol_string_contained_in_known_symbol_does_not_crash_with(self): + l = Licensing(['lgpl-3.0-plus', 'openssl-exception-lgpl-3.0-plus']) + license_expression = 'lgpl-3.0-plus WITH openssl-exception-lgpl-3.0-plus' + l.parse(license_expression) + class LicensingSymbolsReplacement(TestCase): @@ -1015,6 +1112,7 @@ def test_parse_trailing_char_raise_exception(self): _gpl2, _gpl2plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() try: licensing.parse('The GNU GPL 20 or LGPL-2.1 and mit2') + self.fail('Exception not raised') except ParseError as pe: expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 34, 'token_string': '2', 'token_type': LicenseSymbol('2')} @@ -1043,6 +1141,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): try: licensing.parse('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123') + self.fail('Exception not raised') except ParseError as pe: expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 70, 'token_string': ' 123', 'token_type': unknown} @@ -1053,6 +1152,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): unknown = LicenseSymbol(key='123') try: licensing.parse('The GNU GPL 20 or mit 123') + self.fail('Exception not raised') except ParseError as pe: expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 21, 'token_string': ' 123', 'token_type': unknown} From 9760b3e62fdf1085af798950977641a04a279435 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 3 Aug 2018 21:24:53 +0200 Subject: [PATCH 2/9] Support nested license names #29 These can include spaces or not. Signed-off-by: Philippe Ombredanne --- README.rst | 2 + src/license_expression/__init__.py | 367 ++++++++----- src/license_expression/_pyahocorasick.py | 669 +++++++++++------------ tests/test__pyahocorasick.py | 237 +++++--- tests/test_license_expression.py | 633 ++++++++++++--------- 5 files changed, 1088 insertions(+), 820 deletions(-) diff --git a/README.rst b/README.rst index 77f24fb..09587d0 100644 --- a/README.rst +++ b/README.rst @@ -104,6 +104,8 @@ And expression can be simplified: >>> expression2 = ' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)' >>> parsed2 = licensing.parse(expression2) + >>> str(parsed2) + 'GPL-2.0 OR (mit AND LGPL 2.1) OR BSD OR GPL-2.0 OR (mit AND LGPL 2.1)' >>> assert str(parsed2.simplify()) == 'BSD OR GPL-2.0 OR (LGPL 2.1 AND mit)' Two expressions can be compared for equivalence and containment: diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index 43c7b2c..e67740e 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -40,11 +40,16 @@ # Python 3 unicode = str # NOQA -import collections +from collections import defaultdict +from collections import deque +from collections import namedtuple +from collections import OrderedDict from copy import copy from copy import deepcopy from functools import total_ordering import itertools +import logging +from pprint import pprint import re import string @@ -67,8 +72,25 @@ from boolean.boolean import TOKEN_RPAR from license_expression._pyahocorasick import Trie as Scanner -from license_expression._pyahocorasick import Output -from license_expression._pyahocorasick import Result +from license_expression._pyahocorasick import Token + +TRACE = False + +logger = logging.getLogger(__name__) + + +def logger_debug(*args): + pass + + +if TRACE: + + def logger_debug(*args): + return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + + import sys + logging.basicConfig(stream=sys.stdout) + logger.setLevel(logging.DEBUG) # append new error codes to PARSE_ERRORS by monkey patching PARSE_EXPRESSION_NOT_UNICODE = 100 @@ -98,24 +120,25 @@ class ExpressionError(Exception): # Used for tokenizing -Keyword = collections.namedtuple('Keyword', 'value type') +Keyword = namedtuple('Keyword', 'value type') +Keyword.__len__ = lambda self: len(self.value) # id for "with" token which is not a proper boolean symbol but an expression symbol TOKEN_WITH = 10 -# actual keyword types +# keyword types that include operators and parens + KW_LPAR = Keyword('(', TOKEN_LPAR) KW_RPAR = Keyword(')', TOKEN_RPAR) -_KEYWORDS = [ - Keyword(' and ', TOKEN_AND), - Keyword(' or ', TOKEN_OR), - KW_LPAR, - KW_RPAR, - Keyword(' with ', TOKEN_WITH), -] +KW_AND = Keyword('and', TOKEN_AND) +KW_OR = Keyword('or', TOKEN_OR) +KW_WITH = Keyword('with', TOKEN_WITH) -KEYWORDS = tuple(kw.value for kw in _KEYWORDS) -KEYWORDS_STRIPPED = tuple(k.strip() for k in KEYWORDS) +KEYWORDS = (KW_AND, KW_OR, KW_LPAR, KW_RPAR, KW_WITH,) +KEYWORDS_STRINGS = set(kw.value for kw in KEYWORDS) + +# mapping of lowercase operator strings to an operator object +OPERATORS = {'and': KW_AND, 'or': KW_OR, 'with': KW_WITH} class Licensing(boolean.BooleanAlgebra): @@ -178,11 +201,12 @@ def __init__(self, symbols=tuple(), quiet=True): if errors: raise ValueError('\n'.join(warns + errors)) - # mapping of known symbol used for parsing and resolution as (key, symbol) - # TODO: inject lpar, rpar and spaces sourround, before and after - # e.g "(sym)" "(sym " "sym)" " sym " + # mapping of known symbol key to symbol for reference self.known_symbols = {symbol.key: symbol for symbol in symbols} + # mapping of lowercase key and aliaes to symbol used to resolve symbols + self.symbol_by_key = get_symbols_by_key(symbols) + # Aho-Corasick automaton-based Scanner used for expression tokenizing self.scanner = None @@ -337,7 +361,7 @@ def parse(self, expression, validate=False, strict=False, **kwargs): string. Check that the expression syntax is valid and raise an Exception, ExpressionError or ParseError on errors. Return None for empty expressions. `expression` is either a string or a LicenseExpression object. If this is a - LicenseExpression it is retruned as-si. + LicenseExpression it is retruned as-is. Symbols are always recognized from known symbols if `symbols` were provided Licensing creation time: each license and exception is recognized from known @@ -415,30 +439,61 @@ def tokenize(self, expression, strict=False): such as "XXX with ZZZ" if the XXX symbol has is_exception` set to True or the ZZZ symbol has `is_exception` set to False. """ - if self.known_symbols: - # scan with an automaton, recognize whole symbols+keywords or only keywords - scanner = self.get_scanner() - results = scanner.scan(expression) - else: - # scan with a simple regex-based splitter - results = splitter(expression) - results = strip_and_skip_spaces(results) - result_groups = group_results_for_with_subexpression(results) +# if self.known_symbols: + if TRACE: + logger_debug('tokenize, using known_symbols') + # scan with an automaton, recognize whole symbols+keywords or only keywords + scanner = self.get_scanner() + tokens = scanner.scan(expression) +# else: +# if TRACE: +# logger_debug('tokenize, using plain splitter') +# tokens = splitter(expression) + + tokens = list(tokens) + if TRACE: + logger_debug('tokenize: tokens') + pprint(tokens) + + # Assign symbol for unknown tokens + tokens = list(build_symbols_from_unmatched_tokens(tokens)) + if TRACE: + logger_debug('tokenize: token with symbols') + pprint(tokens) + + # skip whitespace-only tokens + tokens = [t for t in tokens if t.string and t.string.strip()] + if TRACE: + logger_debug('tokenize: token NO spaces') + pprint(tokens) + + # group Symbols or operators tokens separated only by spaces + # attempt to look this token_group of symbols in a table. + # use symbol if available + # otherwise ....? + + token_groups = build_token_groups_for_with_subexpression(tokens) + + if TRACE: + token_groups = list(token_groups) + logger_debug('tokenize: token_groups') + pprint(token_groups) + + for token_group in token_groups: + len_group = len(token_group) - for group in result_groups: - len_group = len(group) if not len_group: # This should never happen continue + if len_group == 1: # a single token - result = group[0] + result = token_group[0] pos = result.start token_string = result.string - output = result.output - if output: - val = output.value + val = result.value + if val: if isinstance(val, Keyword): # keyword token = val.type @@ -470,40 +525,38 @@ def tokenize(self, expression, strict=False): else: if len_group != 3: # this should never happen - string = ' '.join([res.string for res in group]) - start = group[0].start + string = ' '.join([tok.string for tok in token_group]) + start = token_group[0].start raise ParseError( TOKEN_SYMBOL, string, start, PARSE_INVALID_EXPRESSION) - # this is a A with B seq of three results - lic_res, WITH , exc_res = group - pos = lic_res.start - WITHs = ' ' + WITH.string.strip() + ' ' - token_string = ''.join([lic_res.string, WITHs, exc_res.string]) + # this is a A with B seq of three tokens + lic_token, WITH , exc_token = token_group + pos = lic_token.start + WITHs = WITH.string.strip() + token_string = ' '.join([lic_token.string, WITHs, exc_token.string]) # licenses - lic_out = lic_res.output - lic_sym = lic_out and lic_out.value + lic_sym = lic_token.value # this should not happen if lic_sym and not isinstance(lic_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, lic_res.string, lic_res.start, + raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, PARSE_INVALID_SYMBOL) if not lic_sym: - lic_sym = LicenseSymbol(lic_res.string, is_exception=False) + lic_sym = LicenseSymbol(lic_token.string, is_exception=False) if not isinstance(lic_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, lic_res.string, lic_res.start, + raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, PARSE_INVALID_SYMBOL) if strict and lic_sym.is_exception: - raise ParseError(TOKEN_SYMBOL, lic_res.string, lic_res.start, + raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, PARSE_INVALID_EXCEPTION) # exception - exc_out = exc_res.output - exc_sym = exc_out and exc_out.value + exc_sym = exc_token.value # this should not happen if exc_sym and not isinstance(exc_sym, LicenseSymbol): @@ -513,14 +566,14 @@ def tokenize(self, expression, strict=False): exc_sym = copy(exc_sym) if not exc_sym: - exc_sym = LicenseSymbol(exc_res.string) + exc_sym = LicenseSymbol(exc_token.string) if not isinstance(exc_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, exc_res.string, exc_res.start, + raise ParseError(TOKEN_SYMBOL, exc_token.string, exc_token.start, PARSE_INVALID_SYMBOL) if strict and self.known_symbols and not exc_sym.is_exception: - raise ParseError(TOKEN_SYMBOL, exc_res.string, exc_res.start, + raise ParseError(TOKEN_SYMBOL, exc_token.string, exc_token.start, PARSE_INVALID_SYMBOL_AS_EXCEPTION) token = LicenseWithExceptionSymbol(lic_sym, exc_sym, strict) @@ -537,29 +590,43 @@ def get_scanner(self): if self.scanner is not None: return self.scanner - self.scanner = scanner = Scanner(ignore_case=True) + self.scanner = scanner = Scanner() - for keyword in _KEYWORDS: - scanner.add(keyword.value, keyword, priority=0) + for keyword in KEYWORDS: + scanner.add(keyword.value, keyword) # self.known_symbols has been created at Licensing initialization time and is # already validated and trusted here for key, symbol in self.known_symbols.items(): # always use the key even if there are no aliases. - scanner.add(key, symbol, priority=1) + scanner.add(key, symbol) aliases = getattr(symbol, 'aliases', []) for alias in aliases: # normalize spaces for each alias. The Scanner will lowercase them - # since we created it with ignore_case=True if alias: alias = ' '.join(alias.split()) - if alias: - scanner.add(alias, symbol, priority=2) + scanner.add(alias, symbol) scanner.make_automaton() return scanner +def get_symbols_by_key(symbols): + """ + Return a mapping of key->symbol given an iterable of symbols + """ + by_key = {} + for symbol in symbols: + by_key[symbol.key.lower()] = symbol + aliases = getattr(symbol, 'aliases', []) + for alias in aliases: + if alias: + alias = ' '.join(alias.split()) + if alias: + by_key[alias.lower()] = symbol + return by_key + + class Renderable(object): """ An interface for renderable objects. @@ -646,7 +713,7 @@ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): # normalize for spaces key = ' '.join(key.split()) - if key.lower() in KEYWORDS_STRIPPED: + if key.lower() in KEYWORDS_STRINGS: raise ExpressionError( 'Invalid license key: a key cannot be a reserved keyword: "or", "and" or "with: "%(key)s"' % locals()) @@ -662,7 +729,7 @@ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): def decompose(self): """ - Return an iterable the underlying symbols for this symbol + Return an iterable of the underlying symbols for this symbol. """ yield self @@ -698,6 +765,9 @@ def render(self, template='{symbol.key}', *args, **kwargs): def __str__(self): return self.key + def __len__(self): + return len(self.key) + def __repr__(self): cls = self.__class__.__name__ key = self.key @@ -949,75 +1019,109 @@ def ordered_unique(seq): return uniques -def strip_and_skip_spaces(results): +def build_symbols_from_unmatched_tokens(tokens): """ - Yield results given a sequence of Result skipping whitespace-only results + Yield Token given a sequence of Token replacing unmatched contiguous Tokens + by a single token with a LicenseSymbol. """ - for result in results: - if result.string.strip(): - yield result + tokens = list(tokens) + + unmatched = deque() + + def build_token_with_symbol(): + """ + Build and return a new Token from accumulated unmatched tokens or None. + """ + if not unmatched: + return + # strip trailing spaces + trailing_spaces = [] + while unmatched and not unmatched[-1].string.strip(): + trailing_spaces.append(unmatched.pop()) + + if unmatched: + string = ' '.join(t.string for t in unmatched if t.string.strip()) + start = unmatched[0].start + end = unmatched[-1].end + toksym = LicenseSymbol(string) + unmatched.clear() + yield Token(start, end, string, toksym) + + for ts in trailing_spaces: + yield ts + + for tok in tokens: + if tok.value: + for symtok in build_token_with_symbol(): + yield symtok + yield tok + else: + if not unmatched and not tok.string.strip(): + # skip leading spaces + yield tok + else: + unmatched.append(tok) + + # end remainders + for symtok in build_token_with_symbol(): + yield symtok -def group_results_for_with_subexpression(results): +def build_token_groups_for_with_subexpression(tokens): """ - Yield tuples of (Result) given a sequence of Result such that: - - all symbol-with-symbol subsequences of three results are grouped in a three-tuple - - other results are the single result in a tuple. + Yield tuples of Token given a sequence of Token such that: + - all symbol-with-symbol sequences of 3 tokens are grouped in a three-tuple + - other tokens are a single token wrapped in a tuple. """ - # if n-1 is sym, n is with and n+1 is sym: yield this as a group for a with exp - # otherwise: yield each single result as a group + # if n-1 is sym, n is with and n+1 is sym: yield this as a group for a with + # exp otherwise: yield each single token as a group - results = list(results) + tokens = list(tokens) - # check three contiguous result from scanning at a time + # check three contiguous token from scanning at a time triple_len = 3 # shortcut if there are no grouping possible - if len(results) < triple_len: - for res in results: - yield (res,) + if len(tokens) < triple_len: + for tok in tokens: + yield (tok,) return - # accumulate three contiguous results - triple = collections.deque() + # accumulate three contiguous tokens + triple = deque() triple_popleft = triple.popleft triple_clear = triple.clear tripple_append = triple.append - for res in results: + for tok in tokens: if len(triple) == triple_len: if is_with_subexpression(triple): yield tuple(triple) triple_clear() else: - prev_res = triple_popleft() - yield (prev_res,) - tripple_append(res) + prev_tok = triple_popleft() + yield (prev_tok,) + tripple_append(tok) # end remainders if triple: if len(triple) == triple_len and is_with_subexpression(triple): yield tuple(triple) else: - for res in triple: - yield (res,) - - -def is_symbol(result): - # either the output value is a known sym, or we have no output for unknown sym - return result.output and isinstance(result.output.value, LicenseSymbol) or not result.output - + for tok in triple: + yield (tok,) -def is_with_keyword(result): - return (result.output - and isinstance(result.output.value, Keyword) - and result.output.value.type == TOKEN_WITH) - -def is_with_subexpression(results): - lic, wit, exc = results - return (is_symbol(lic) and is_with_keyword(wit) and is_symbol(exc)) +def is_with_subexpression(tokens_tripple): + """ + Return True if a Token tripple is a WITH license sub-expression. + """ + lic, wit, exc = tokens_tripple + return (isinstance(lic.value, LicenseSymbol) + and wit.value == KW_WITH + and isinstance(exc.value, LicenseSymbol) + ) def as_symbols(symbols): @@ -1053,7 +1157,7 @@ def as_symbols(symbols): 'or a LicenseSymbol-like instance.' % locals()) -def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): +def validate_symbols(symbols, validate_keys=False): """ Return a tuple of (`warnings`, `errors`) given a sequence of `symbols` LicenseSymbol-like objects. @@ -1075,9 +1179,9 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): not_symbol_classes = [] dupe_keys = set() dupe_exceptions = set() - dupe_aliases = collections.defaultdict(list) + dupe_aliases = defaultdict(list) invalid_keys_as_kw = set() - invalid_alias_as_kw = collections.defaultdict(list) + invalid_alias_as_kw = defaultdict(list) # warning warning_dupe_aliases = set() @@ -1096,7 +1200,7 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): dupe_keys.add(key) # key cannot be an expression keyword - if keyl in _keywords: + if keyl in KEYWORDS_STRINGS: invalid_keys_as_kw.add(key) # keep a set of unique seen keys @@ -1129,7 +1233,7 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): dupe_aliases[alias].append(key) # an alias cannot be an expression keyword - if alias in _keywords: + if alias in KEYWORDS_STRINGS: invalid_alias_as_kw[key].append(alias) seen_aliases[alias] = keyl @@ -1171,7 +1275,7 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): return warnings, errors -_splitter = re.compile(''' +_tokenizer = re.compile(''' (?P[^\s\(\)]+) | (?P\s+) @@ -1181,17 +1285,16 @@ def validate_symbols(symbols, validate_keys=False, _keywords=KEYWORDS): (?P\)) ''', re.VERBOSE | re.MULTILINE | re.UNICODE -).finditer +) def splitter(expression): """ - Return an iterable of Result describing each token given an - expression unicode string. + Return an iterable of Tokens describing each token given an expression + unicode string. - This is a simpler tokenizer used when the Licensing does not have - known symbols. The split is done on spaces and parens. Anything else - is either a token or a symbol. + The split is done on spaces and parens. Anything else is either a token or a + symbol. """ if not expression: return @@ -1199,45 +1302,33 @@ def splitter(expression): if not isinstance(expression, str): raise ParseError(error_code=PARSE_EXPRESSION_NOT_UNICODE) - # mapping of lowercase token strings to a token type id - TOKENS = { - 'and': Keyword(value='and', type=TOKEN_AND), - 'or': Keyword(value='or', type=TOKEN_OR), - 'with': Keyword(value='with', type=TOKEN_WITH), - } - - for match in _splitter(expression): + for match in _tokenizer.finditer(expression): if not match: continue - + # set start and end as string indexes start, end = match.span() end = end - 1 - mgd = match.groupdict() + match_getter = match.groupdict().get - space = mgd.get('space') + space = match_getter('space') if space: - yield Result(start, end, space, None) + yield Token(start, end, space, None) - lpar = mgd.get('lpar') + lpar = match_getter('lpar') if lpar: - yield Result(start, end, lpar, Output(lpar, KW_LPAR)) + yield Token(start, end, lpar, KW_LPAR) - rpar = mgd.get('rpar') + rpar = match_getter('rpar') if rpar: - yield Result(start, end, rpar, Output(rpar, KW_RPAR)) + yield Token(start, end, rpar, KW_RPAR) - token_or_sym = mgd.get('symbol') - if not token_or_sym: + operator_or_sym = match_getter('symbol') + if not operator_or_sym: continue - token = TOKENS.get(token_or_sym.lower()) - if token: - yield Result(start, end, token_or_sym, Output(token_or_sym, token)) -# elif token_or_sym.endswith('+') and token_or_sym != '+': -# val = token_or_sym[:-1] -# sym = LicenseSymbol(key=val) -# yield Result(start, end - 1, val, Output(val, sym)) -# yield Result(end, end, '+', Output('+', KW_PLUS)) + operator = OPERATORS.get(operator_or_sym.lower()) + if operator: + yield Token(start, end, operator_or_sym, operator) else: - sym = LicenseSymbol(key=token_or_sym) - yield Result(start, end, token_or_sym, Output(token_or_sym, sym)) + sym = LicenseSymbol(key=operator_or_sym) + yield Token(start, end, operator_or_sym, sym) diff --git a/src/license_expression/_pyahocorasick.py b/src/license_expression/_pyahocorasick.py index 4c73709..35b9f00 100644 --- a/src/license_expression/_pyahocorasick.py +++ b/src/license_expression/_pyahocorasick.py @@ -6,10 +6,9 @@ WWW : http://0x80.pl License : public domain -Modified for use in the license_expression library and in particular: - - add support for unicode key strinsg. - - rename word to key and output to value (to be more like a mapping/dict) - - case insensitive search +Modified for use in the license_expression library: + - add support for unicode strings. + - case insensitive search using sequence of words and not characters - improve returned results with the actual start,end and matched string. - support returning non-matched parts of a string """ @@ -21,104 +20,148 @@ from collections import deque from collections import OrderedDict import logging +import re + +TRACE = False logger = logging.getLogger(__name__) def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + pass + + +if TRACE: -# uncomment for local debug logging -# import sys -# logging.basicConfig(stream=sys.stdout) -# logger.setLevel(logging.DEBUG) + def logger_debug(*args): + return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + import sys + logging.basicConfig(stream=sys.stdout) + logger.setLevel(logging.DEBUG) # used to distinguish from None nil = object() +class TrieNode(object): + """ + Node of the Trie/Aho-Corasick automaton. + """ + __slots__ = ['token', 'output', 'fail', 'children'] + + def __init__(self, token, output=nil): + # token of a tokens string added to the Trie as a string + self.token = token + + # an output function (in the Aho-Corasick meaning) for this node: this + # is an object that contains the original key string and any + # additional value data associated to that key. Or "nil" for a node that + # is not a terminal leave for a key. It will be returned with a match. + self.output = output + + # failure link used by the Aho-Corasick automaton and its search procedure + self.fail = nil + + # children of this node as a mapping of char->node + self.children = {} + + def __repr__(self): + if self.output is not nil: + return 'TrieNode(%r, %r)' % (self.token, self.output) + else: + return 'TrieNode(%r)' % self.token + + class Trie(object): """ A Trie and Aho-Corasick automaton. This behaves more or less like a mapping of key->value. This is the main entry point. """ - def __init__(self, ignore_case=True): + def __init__(self): """ Initialize a new Trie. - - If `ignore_case` is True, searches in the Trie will be case insensitive. """ self.root = TrieNode('') - self.ignore_case = ignore_case - # set of any unique character in the trie, updated on each addition - # we keep track of the set of chars added to the trie to build the automaton + # set of any unique tokens in the trie, updated on each addition we keep + # track of the set of tokens added to the trie to build the automaton # these are needed to created the first level children failure links - self._known_chars = set() + self._known_tokens = set() # Flag set to True once a Trie has been converted to an Aho-Corasick automaton self._converted = False - def add(self, key, value=None, priority=0): + def add(self, tokens_string, value=None): """ - Add a new (key string, value) pair to the trie. If the key already exists in - the Trie, its value is replaced with the provided value. - A key is any unicode string. + Add a new tokens_string and its associated value to the trie. If the + tokens_string already exists in the Trie, its value is replaced with the + provided value, typically a Token object. If a value is not provided, + the tokens_string is used as value. + + A tokens_string is any unicode string. It will be tokenized when added + to the Trie. """ if self._converted: raise Exception('This Trie has been converted to an Aho-Corasick ' - 'automaton and cannot be further modified.') - if not key: + 'automaton and cannot be modified.') + + if not tokens_string or not isinstance(tokens_string, basestring): return - stored_key = self.ignore_case and key.lower() or key + tokens = [t for t in get_tokens(tokens_string) if t.strip()] - # we keep track of the set of chars added to the trie to build the automaton - # these are needed to created the first level children failure links - self._known_chars.update(stored_key) + # we keep track of the set of tokens added to the trie to build the + # automaton these are needed to created the first level children failure + # links + + self._known_tokens.update(tokens) node = self.root - for char in stored_key: + for token in tokens: try: - node = node.children[char] + node = node.children[token] except KeyError: - child = TrieNode(char) - node.children[char] = child + child = TrieNode(token) + node.children[token] = child node = child - # we always store the original key, not a possibly lowercased version - node.output = Output(key, value, priority) + node.output = (tokens_string, value or tokens_string) - def __get_node(self, key): + def __get_node(self, tokens_string): """ - Return a node for this key or None if the trie does not contain the key. - Private function retrieving a final node of trie for given key. + Return a node for this tokens_string or None if the trie does not + contain the tokens_string. Private function retrieving a final node of + the Trie for a given tokens_string. """ - key = self.ignore_case and key.lower() or key + if not tokens_string or not isinstance(tokens_string, basestring): + return + + tokens = [t for t in get_tokens(tokens_string) if t.strip()] node = self.root - for char in key: + for token in tokens: try: - node = node.children[char] + node = node.children[token] except KeyError: return None return node - def get(self, key, default=nil): + def get(self, tokens_string, default=nil): """ - Return the Output tuple associated with a `key`. - If there is no such key in the Trie, return the default value (other - than nil): if default is not given or nil, raise a KeyError exception. + Return the output value found associated with a `tokens_string`. If + there is no such tokens_string in the Trie, return the default value + (other than nil). If `default` is not provided or is `nil`, raise a + KeyError. """ - node = self.__get_node(key) + node = self.__get_node(tokens_string) output = nil if node: output = node.output if output is nil: if default is nil: - raise KeyError(key) + raise KeyError(tokens_string) else: return default else: @@ -142,37 +185,36 @@ def items(self): """ items = [] - def walk(node, key): + def walk(node, tokens): """ Walk the trie, depth first. """ - key = key + node.char + tokens = [t for t in tokens + [node.token] if t] if node.output is not nil: - items.append((node.output.key, node.output.value)) + items.append((node.output[0], node.output[1],)) for child in node.children.values(): if child is not node: - walk(child, key) + walk(child, tokens) - walk(self.root, key='') + walk(self.root, tokens=[]) return iter(items) - def exists(self, key): + def exists(self, tokens_string): """ Return True if the key is present in this trie. """ - # TODO: add __contains__ magic for this - node = self.__get_node(key) + node = self.__get_node(tokens_string) if node: return bool(node.output != nil) return False - def is_prefix(self, key): + def is_prefix(self, tokens_string): """ - Return True if key is a prefix of any existing key in the trie. + Return True if tokens_string is a prefix of any existing tokens_string in the trie. """ - return (self.__get_node(key) is not None) + return bool(self.__get_node(tokens_string) is not None) def make_automaton(self): """ @@ -181,45 +223,45 @@ def make_automaton(self): converted to an Automaton. """ queue = deque() - queue_append = queue.append - queue_popleft = queue.popleft # 1. create root children for each known items range (e.g. all unique - # characters from all the added keys), failing to root. + # characters from all the added tokens), failing to root. # And build a queue of these - for char in self._known_chars: - if char in self.root.children: - node = self.root.children[char] + for token in self._known_tokens: + if token in self.root.children: + node = self.root.children[token] # e.g. f(s) = 0, Aho-Corasick-wise node.fail = self.root - queue_append(node) + queue.append(node) else: - self.root.children[char] = self.root + self.root.children[token] = self.root # 2. using the queue of all possible top level items/chars, walk the trie and # add failure links to nodes as needed while queue: - current_node = queue_popleft() + current_node = queue.popleft() for node in current_node.children.values(): - queue_append(node) + queue.append(node) state = current_node.fail - while node.char not in state.children: + while node.token not in state.children: state = state.fail - node.fail = state.children.get(node.char, self.root) + node.fail = state.children.get(node.token, self.root) # Mark the trie as converted so it cannot be modified anymore self._converted = True - def iter(self, string): + def iter(self, tokens_string, include_unmatched=False, include_space=False): """ - Yield Result objects for matched strings by performing the Aho-Corasick search procedure. + Yield Token objects for matched strings by performing the Aho-Corasick + search procedure. - The Result start and end positions in the searched string are such that the - matched string is "search_string[start:end+1]". And the start is computed - from the end_index collected by the Aho-Corasick search procedure such that - "start=end_index - n + 1" where n is the length of a matched key. + The Token start and end positions in the searched string are such that + the matched string is "tokens_string[start:end+1]". And the start is + computed from the end_index collected by the Aho-Corasick search + procedure such that + "start=end_index - n + 1" where n is the length of a matched string. - The Result.output is an Output object for a matched key. + The Token.value is an object associated with a matched string. For example: >>> a = Trie() @@ -229,17 +271,14 @@ def iter(self, string): >>> a.add('EFGH') >>> a.add('KL') >>> a.make_automaton() - >>> string = 'abcdefghijklm' - >>> results = Result.sort(a.iter(string)) - + >>> tokens_string = 'a bcdef ghij kl m' + >>> strings = Token.sort(a.iter(tokens_string)) >>> expected = [ - ... Result(1, 5, 'bcdef', Output('BCDEF')), - ... Result(2, 4, 'cde', Output('CDE')), - ... Result(3, 7, 'defgh', Output('DEFGH')), - ... Result(4, 7, 'efgh', Output('EFGH')), - ... Result(10, 11, 'kl', Output('KL')), + ... Token(2, 6, u'bcdef', u'BCDEF'), + ... Token(13, 14, u'kl', u'KL') ... ] - >>> results == expected + + >>> strings == expected True >>> list(a.iter('')) == [] @@ -248,38 +287,78 @@ def iter(self, string): >>> list(a.iter(' ')) == [] True """ - if not string: + if not tokens_string: return - # keep a copy for results - original_string = string - string = self.ignore_case and string.lower() or string - - known_chars = self._known_chars + tokens = get_tokens(tokens_string) state = self.root - for end, char in enumerate(string): - if char not in known_chars: + + if TRACE: + logger_debug('Trie.iter() with:', repr(tokens_string)) + logger_debug(' tokens:', tokens) + + end_pos = -1 + for token_string in tokens: + end_pos += len(token_string) + if TRACE: + logger_debug() + logger_debug('token_string', repr(token_string)) + logger_debug(' end_pos', end_pos) + + if not include_space and not token_string.strip(): + if TRACE: + logger_debug(' include_space skipped') + continue + + if token_string not in self._known_tokens: state = self.root + if TRACE: + logger_debug(' unmatched') + if include_unmatched: + n = len(token_string) + start_pos = end_pos - n + 1 + tok = Token(start_pos, end_pos, tokens_string[start_pos: end_pos + 1], None) + if TRACE: + logger_debug(' unmatched tok:', tok) + yield tok continue - # search for a matching character in the children, starting at root - while char not in state.children: + yielded = False + + # search for a matching token_string in the children, starting at root + while token_string not in state.children: state = state.fail - # we have a matching starting character - state = state.children.get(char, self.root) + + # we have a matching starting token_string + state = state.children.get(token_string, self.root) match = state while match is not nil: if match.output is not nil: - # TODO: this could be precomputed or cached - n = len(match.output.key) - start = end - n + 1 - yield Result(start, end, original_string[start:end + 1], match.output) + matched_string, output_value = match.output + if TRACE: + logger_debug(' type output', repr(output_value), type(matched_string)) + n = len(matched_string) + start_pos = end_pos - n + 1 + if TRACE: logger_debug(' start_pos', start_pos) + yield Token(start_pos, end_pos, tokens_string[start_pos: end_pos + 1], output_value) + yielded = True match = match.fail + if not yielded and include_unmatched: + if TRACE: + logger_debug(' unmatched but known token') + n = len(token_string) + start_pos = end_pos - n + 1 + tok = Token(start_pos, end_pos, tokens_string[start_pos: end_pos + 1], None) + if TRACE: + logger_debug(' unmatched tok 2:', tok) + yield tok - def scan(self, string): + logger_debug() + + def scan(self, string, include_unmatched=True, include_space=False): """ Scan a string for matched and unmatched sub-sequences and yield non- - overlapping Result objects performing a modified Aho-Corasick search + overlapping Token objects performing a modified Aho-Corasick search procedure: - return both matched and unmatched sub-sequences. @@ -293,10 +372,8 @@ def scan(self, string): return the non-overlapping portion of the other discarded match as a non-match. - Each Result contains the start and end position, the corresponding string and - an Output object (with original key and any associated associated value). The - string and key are in their original case even if the automaton has the - `ignore_case` attribute. + Each Token contains the start and end position, the corresponding string + and an associated value object. For example: >>> a = Trie() @@ -306,144 +383,175 @@ def scan(self, string): >>> a.add('EFGH') >>> a.add('KL') >>> a.make_automaton() - >>> string = 'abcdefghijkl' - >>> results = list(a.scan(string)) + >>> string = 'a bcdef ghij kl' + >>> tokens = list(a.scan(string, include_space=True)) >>> expected = [ - ... Result(start=0, end=0, string='a', output=None), - ... Result(start=1, end=5, string='bcdef', output=Output('BCDEF')), - ... Result(start=6, end=9, string='ghij', output=None), - ... Result(start=10, end=11, string='kl', output=Output('KL')), + ... Token(0, 0, u'a', None), + ... Token(1, 1, u' ', None), + ... Token(2, 6, u'bcdef', u'BCDEF'), + ... Token(7, 7, u' ', None), + ... Token(8, 11, u'ghij', None), + ... Token(12, 12, u' ', None), + ... Token(13, 14, u'kl', u'KL') ... ] - - >>> results == expected + >>> tokens == expected True """ - results = self.iter(string) - results = filter_overlapping(results) - results = add_unmatched(string, results) - return results + tokens = self.iter(string, + include_unmatched=include_unmatched, include_space=include_space) + tokens = list(tokens) + if TRACE: + logger_debug('scan.tokens:', tokens) + if not include_space: + tokens = [t for t in tokens if t.string.strip()] + tokens = filter_overlapping(tokens) + return tokens -class TrieNode(object): - """ - Node of the Trie/Aho-Corasick automaton. +def filter_overlapping(tokens): """ - __slots__ = ['char', 'output', 'fail', 'children'] - - def __init__(self, char, output=nil): - # character of a key string added to the Trie - self.char = char - - # an output function (in the Aho-Corasick meaning) for this node: this is an - # Output object that contains the original key string and any additional - # value data associated to that key. Or "nil" for a node that is not a - # terminal leave for a key. It will be returned with a match. - self.output = output - - # failure link used by the Aho-Corasick automaton and its search procedure - self.fail = nil - - # children of this node as a mapping of char->node - self.children = {} + Return a new list from an iterable of `tokens` discarding contained and + overlaping Tokens using these rules: - def __repr__(self): - if self.output is not nil: - return 'TrieNode(%r, %r)' % (self.char, self.output) - else: - return 'TrieNode(%r)' % self.char + - skip a token fully contained in another token. + - keep the biggest, left-most token of two overlapping tokens and skip the other + For example: + >>> tokens = [ + ... Token(0, 0, 'a'), + ... Token(1, 5, 'bcdef'), + ... Token(2, 4, 'cde'), + ... Token(3, 7, 'defgh'), + ... Token(4, 7, 'efgh'), + ... Token(8, 9, 'ij'), + ... Token(10, 13, 'klmn'), + ... Token(11, 15, 'lmnop'), + ... Token(16, 16, 'q'), + ... ] -class Output(object): - """ - An Output is used to track a key added to the Trie as a TrieNode and any - arbitrary value object corresponding to that key. + >>> expected = [ + ... Token(0, 0, 'a'), + ... Token(1, 5, 'bcdef'), + ... Token(8, 9, 'ij'), + ... Token(11, 15, 'lmnop'), + ... Token(16, 16, 'q'), + ... ] - - `key` is the original key unmodified unicode string. - - `value` is the associated value for this key as provided when adding this key. - - `priority` is an optional priority for this key used to disambiguate overalpping matches. + >>> filtered = list(filter_overlapping(tokens)) + >>> filtered == expected + True """ - __slots__ = 'key', 'value', 'priority' - - def __init__(self, key, value=None, priority=0): - self.key = key - self.value = value - self.priority = priority - - def __repr__(self): - return self.__class__.__name__ + '(%(key)r, %(value)r, %(priority)r)' % self.as_dict() + tokens = Token.sort(tokens) - def __eq__(self, other): - return ( - isinstance(other, Output) - and self.key == other.key - and self.value == other.value - and self.priority == other.priority) + # compare pair of tokens in the sorted sequence: current and next + i = 0 + while i < len(tokens) - 1: + j = i + 1 + while j < len(tokens): + curr_tok = tokens[i] + next_tok = tokens[j] + + logger_debug('curr_tok, i, next_tok, j:', curr_tok, i, next_tok, j) + # disjoint tokens: break, there is nothing to do + if next_tok.is_after(curr_tok): + logger_debug(' break to next', curr_tok) + break - def __hash__(self): - return hash((self.key, self.value, self.priority,)) + # contained token: discard the contained token + if next_tok in curr_tok: + logger_debug(' del next_tok contained:', next_tok) + del tokens[j] + continue - def as_dict(self): - return OrderedDict([(s, getattr(self, s)) for s in self.__slots__]) + # overlap: Keep the longest token and skip the smallest overlapping + # tokens. In case of length tie: keep the left most + if curr_tok.overlap(next_tok): + if len(curr_tok) >= len(next_tok): + logger_debug(' del next_tok smaller overlap:', next_tok) + del tokens[j] + continue + else: + logger_debug(' del curr_tok smaller overlap:', curr_tok) + del tokens[i] + break + j += 1 + i += 1 + return tokens -class Result(object): +class Token(object): """ - A Result is used to track the result of a search with its start and end as - index position in the original string and other attributes: + A Token is used to track the tokenization an expression with its + start and end as index position in the original string and other attributes: - `start` and `end` are zero-based index in the original string S such that S[start:end+1] will yield `string`. - - `string` is the sub-string from the original searched string for this Result. - - `output` is the Output object for a matched string and is a marker that this is a - matched string. None otherwise for a Result for unmatched text. + - `string` is the matched substring from the original string for this Token. + - `value` is the corresponding object for this token as one of: + - a LicenseSymbol object + - a "Keyword" object (and, or, with, left and right parens) + - None if this is a space. """ - __slots__ = 'start', 'end', 'string', 'output' + __slots__ = 'start', 'end', 'string', 'value', - def __init__(self, start, end, string='', output=None): + def __init__(self, start, end, string='', value=None): self.start = start self.end = end self.string = string - self.output = output + self.value = value def __repr__(self): - return self.__class__.__name__ + '(%(start)r, %(end)r, %(string)r, %(output)r)' % self.as_dict() + return self.__class__.__name__ + '(%(start)r, %(end)r, %(string)r, %(value)r)' % self.as_dict() def as_dict(self): return OrderedDict([(s, getattr(self, s)) for s in self.__slots__]) def __len__(self): - return self.end + 1 - self.start + return self.end - self.start + 1 def __eq__(self, other): - return isinstance(other, Result) and ( + return isinstance(other, Token) and ( self.start == other.start and self.end == other.end and self.string == other.string and - self.output == other.output + self.value == other.value ) def __hash__(self): - tup = self.start, self.end, self.string, self.output + tup = self.start, self.end, self.string, self.value return hash(tup) - @property - def priority(self): - return getattr(self.output, 'priority', 0) + @classmethod + def sort(cls, tokens): + """ + Return a new sorted sequence of tokens given a sequence of tokens. The + primary sort is on start and the secondary sort is on longer lengths. + Therefore if two tokens have the same start, the longer token will sort + first. + + For example: + >>> tokens = [Token(0, 0), Token(5, 5), Token(1, 1), Token(2, 4), Token(2, 5)] + >>> expected = [Token(0, 0), Token(1, 1), Token(2, 5), Token(2, 4), Token(5, 5)] + >>> expected == Token.sort(tokens) + True + """ + key = lambda s: (s.start, -len(s),) + return sorted(tokens, key=key) def is_after(self, other): """ - Return True if this result is after the other result. + Return True if this token is after the other token. For example: - >>> Result(1, 2).is_after(Result(5, 6)) + >>> Token(1, 2).is_after(Token(5, 6)) False - >>> Result(5, 6).is_after(Result(5, 6)) + >>> Token(5, 6).is_after(Token(5, 6)) False - >>> Result(2, 3).is_after(Result(1, 2)) + >>> Token(2, 3).is_after(Token(1, 2)) False - >>> Result(5, 6).is_after(Result(3, 4)) + >>> Token(5, 6).is_after(Token(3, 4)) True """ return self.start > other.end @@ -453,188 +561,57 @@ def is_before(self, other): def __contains__(self, other): """ - Return True if this result contains the other result. + Return True if this token contains the other token. For example: - >>> Result(5, 7) in Result(5, 7) + >>> Token(5, 7) in Token(5, 7) True - >>> Result(6, 8) in Result(5, 7) + >>> Token(6, 8) in Token(5, 7) False - >>> Result(6, 6) in Result(4, 8) + >>> Token(6, 6) in Token(4, 8) True - >>> Result(3, 9) in Result(4, 8) + >>> Token(3, 9) in Token(4, 8) False - >>> Result(4, 8) in Result(3, 9) + >>> Token(4, 8) in Token(3, 9) True """ return self.start <= other.start and other.end <= self.end def overlap(self, other): """ - Return True if this result and the other result overlap. + Return True if this token and the other token overlap. For example: - >>> Result(1, 2).overlap(Result(5, 6)) + >>> Token(1, 2).overlap(Token(5, 6)) False - >>> Result(5, 6).overlap(Result(5, 6)) + >>> Token(5, 6).overlap(Token(5, 6)) True - >>> Result(4, 5).overlap(Result(5, 6)) + >>> Token(4, 5).overlap(Token(5, 6)) True - >>> Result(4, 5).overlap(Result(5, 7)) + >>> Token(4, 5).overlap(Token(5, 7)) True - >>> Result(4, 5).overlap(Result(6, 7)) + >>> Token(4, 5).overlap(Token(6, 7)) False """ start = self.start end = self.end return (start <= other.start <= end) or (start <= other.end <= end) - @classmethod - def sort(cls, results): - """ - Return a new sorted sequence of results given a sequence of results. The - primary sort is on start and the secondary sort is on longer lengths. - Therefore if two results have the same start, the longer result will sort - first. - For example: - >>> results = [Result(0, 0), Result(5, 5), Result(1, 1), Result(2, 4), Result(2, 5)] - >>> expected = [Result(0, 0), Result(1, 1), Result(2, 5), Result(2, 4), Result(5, 5)] - >>> expected == Result.sort(results) - True - """ - key = lambda s: (s.start, -len(s),) - return sorted(results, key=key) +# tokenize to separate text from parens +_tokenizer = re.compile(''' + (?P[^\s\(\)]+) + | + (?P\s+) + | + (?P[\(\)]) + ''', + re.VERBOSE | re.MULTILINE | re.UNICODE +) -def filter_overlapping(results): +def get_tokens(tokens_string): """ - Return a new list from an iterable of `results` discarding contained and - overlaping Results using these rules: - - - skip a result fully contained in another result. - - keep the biggest, left-most result of two overlapping results and skip the other - - For example: - >>> results = [ - ... Result(0, 0, 'a'), - ... Result(1, 5, 'bcdef'), - ... Result(2, 4, 'cde'), - ... Result(3, 7, 'defgh'), - ... Result(4, 7, 'efgh'), - ... Result(8, 9, 'ij'), - ... Result(10, 13, 'klmn'), - ... Result(11, 15, 'lmnop'), - ... Result(16, 16, 'q'), - ... ] - - >>> expected = [ - ... Result(0, 0, 'a'), - ... Result(1, 5, 'bcdef'), - ... Result(8, 9, 'ij'), - ... Result(11, 15, 'lmnop'), - ... Result(16, 16, 'q'), - ... ] - - >>> filtered = list(filter_overlapping(results)) - >>> filtered == expected - True - """ - results = Result.sort(results) - - # compare pair of results in the sorted sequence: current and next - i = 0 - while i < len(results) - 1: - j = i + 1 - while j < len(results): - curr_res = results[i] - next_res = results[j] - - logger_debug('curr_res, i, next_res, j:', curr_res, i, next_res, j) - # disjoint results: break, there is nothing to do - if next_res.is_after(curr_res): - logger_debug(' break to next', curr_res) - break - - # contained result: discard the contained result - if next_res in curr_res: - logger_debug(' del next_res contained:', next_res) - del results[j] - continue - - # overlap: keep the biggest result and skip the smallest overlapping results - # in case of length tie: keep the left most - if curr_res.overlap(next_res): - if curr_res.priority < next_res.priority: - logger_debug(' del next_res lower priority:', next_res) - del results[j] - continue - elif curr_res.priority > next_res.priority: - logger_debug(' del curr_res lower priority:', curr_res) - del results[i] - break - else: - if len(curr_res) >= len(next_res): - logger_debug(' del next_res smaller overlap:', next_res) - del results[j] - continue - else: - logger_debug(' del curr_res smaller overlap:', curr_res) - del results[i] - break - j += 1 - i += 1 - return results - - -def add_unmatched(string, results): - """ - Yield Result object from the original `string` and the search `results` iterable - of non-overlapping matched substring Result object. New unmatched Results are - added to the stream for unmatched parts. - - For example: - >>> string ='abcdefghijklmn' - >>> results = [ - ... Result(2, 3, 'cd'), - ... Result(7, 7, 'h', None), - ... Result(9, 10, 'jk', None), - ... ] - >>> expected = [ - ... Result(0, 1, 'ab'), - ... Result(2, 3, 'cd'), - ... Result(4, 6, 'efg'), - ... Result(7, 7, 'h'), - ... Result(8, 8, 'i'), - ... Result(9, 10, 'jk'), - ... Result(11, 13, 'lmn') - ... ] - >>> expected == list(add_unmatched(string, results)) - True - - >>> string ='abc2' - >>> results = [ - ... Result(0, 2, 'abc'), - ... ] - >>> expected = [ - ... Result(0, 2, 'abc'), - ... Result(3, 3, '2', None), - ... ] - >>> expected == list(add_unmatched(string, results)) - True - + Return an iterable of strings splitting on spaces and parens. """ - string_pos = 0 - for result in Result.sort(results): - if result.start > string_pos: - start = string_pos - end = result.start - 1 - yield Result(start, end, string[start:end + 1]) - yield result - string_pos = result.end + 1 - - len_string = len(string) - if string_pos < len_string: - start = string_pos - end = len_string - 1 - yield Result(start, end, string[start:end + 1]) + return [match for match in _tokenizer.split(tokens_string.lower()) if match] diff --git a/tests/test__pyahocorasick.py b/tests/test__pyahocorasick.py index 7b346b6..22fc917 100644 --- a/tests/test__pyahocorasick.py +++ b/tests/test__pyahocorasick.py @@ -6,12 +6,7 @@ WWW : http://0x80.pl License : public domain -Modified for use in the license_expression library and in particular: - - add support for unicode key strinsg. - - rename word to key and output to value (to be more like a mapping/dict) - - case insensitive search - - improve returned results with the actual start,end and matched string. - - support returning non-matched parts of a string +Modified for use in the license_expression library. """ from __future__ import unicode_literals @@ -21,35 +16,34 @@ import unittest from license_expression._pyahocorasick import Trie -from license_expression._pyahocorasick import Output -from license_expression._pyahocorasick import Result +from license_expression._pyahocorasick import Token class TestTrie(unittest.TestCase): - def testAddedWordShouldBeCountedAndAvailableForRetrieval(self): + def test_add_can_get(self): t = Trie() t.add('python', 'value') - assert Output('python', 'value') == t.get('python') + assert ('python', 'value') == t.get('python') - def testAddingExistingWordShouldReplaceAssociatedValue(self): + def test_add_existing_WordShouldReplaceAssociatedValue(self): t = Trie() t.add('python', 'value') - assert Output('python', 'value') == t.get('python') + assert ('python', 'value') == t.get('python') t.add('python', 'other') - assert Output('python', 'other') == t.get('python') + assert ('python', 'other') == t.get('python') - def testGetUnknowWordWithoutDefaultValueShouldRaiseException(self): + def test_get_UnknowWordWithoutDefaultValueShouldRaiseException(self): t = Trie() with self.assertRaises(KeyError): t.get('python') - def testGetUnknowWordWithDefaultValueShouldReturnDefault(self): + def test_get_UnknowWordWithDefaultValueShouldReturnDefault(self): t = Trie() self.assertEqual(t.get('python', 'default'), 'default') - def testExistShouldDetectAddedWords(self): + def test_exists_ShouldDetectAddedWords(self): t = Trie() t.add('python', 'value') t.add('ada', 'value') @@ -57,7 +51,7 @@ def testExistShouldDetectAddedWords(self): self.assertTrue(t.exists('python')) self.assertTrue(t.exists('ada')) - def testExistShouldReturnFailOnUnknownWord(self): + def test_exists_ShouldReturnFailOnUnknownWord(self): t = Trie() t.add('python', 'value') @@ -66,20 +60,22 @@ def testExistShouldReturnFailOnUnknownWord(self): def test_is_prefix_ShouldDetecAllPrefixesIncludingWord(self): t = Trie() t.add('python', 'value') - t.add('ada', 'value') + t.add('ada lovelace', 'value') - self.assertTrue(t.is_prefix('a')) - self.assertTrue(t.is_prefix('ad')) + self.assertFalse(t.is_prefix('a')) + self.assertFalse(t.is_prefix('ad')) self.assertTrue(t.is_prefix('ada')) - self.assertTrue(t.is_prefix('p')) - self.assertTrue(t.is_prefix('py')) - self.assertTrue(t.is_prefix('pyt')) - self.assertTrue(t.is_prefix('pyth')) - self.assertTrue(t.is_prefix('pytho')) + self.assertFalse(t.is_prefix('p')) + self.assertFalse(t.is_prefix('py')) + self.assertFalse(t.is_prefix('pyt')) + self.assertFalse(t.is_prefix('pyth')) + self.assertFalse(t.is_prefix('pytho')) self.assertTrue(t.is_prefix('python')) - def testItemsShouldReturnAllItemsAlreadyAddedToTheTrie(self): + self.assertFalse(t.is_prefix('lovelace')) + + def test_items_ShouldReturnAllItemsAlreadyAddedToTheTrie(self): t = Trie() t.add('python', 1) @@ -87,6 +83,7 @@ def testItemsShouldReturnAllItemsAlreadyAddedToTheTrie(self): t.add('perl', 3) t.add('pascal', 4) t.add('php', 5) + t.add('php that', 6) result = list(t.items()) self.assertIn(('python', 1), result) @@ -94,8 +91,9 @@ def testItemsShouldReturnAllItemsAlreadyAddedToTheTrie(self): self.assertIn(('perl', 3), result) self.assertIn(('pascal', 4), result) self.assertIn(('php', 5), result) + self.assertIn(('php that', 6), result) - def testKeysShouldReturnAllKeysAlreadyAddedToTheTrie(self): + def test_keys_ShouldReturnAllKeysAlreadyAddedToTheTrie(self): t = Trie() t.add('python', 1) @@ -103,6 +101,7 @@ def testKeysShouldReturnAllKeysAlreadyAddedToTheTrie(self): t.add('perl', 3) t.add('pascal', 4) t.add('php', 5) + t.add('php that', 6) result = list(t.keys()) self.assertIn('python', result) @@ -110,8 +109,9 @@ def testKeysShouldReturnAllKeysAlreadyAddedToTheTrie(self): self.assertIn('perl', result) self.assertIn('pascal', result) self.assertIn('php', result) + self.assertIn('php that', result) - def testValuesShouldReturnAllValuesAlreadyAddedToTheTrie(self): + def test_values_ShouldReturnAllValuesAlreadyAddedToTheTrie(self): t = Trie() t.add('python', 1) @@ -127,28 +127,52 @@ def testValuesShouldReturnAllValuesAlreadyAddedToTheTrie(self): self.assertIn(4, result) self.assertIn(5, result) - def test_iter_should_not_return_non_matches(self): + def test_iter_should_not_return_non_matches_by_default(self): def get_test_automaton(): - words = "he her hers his she hi him man himan".split() + words = 'he her hers his she hi him man himan'.split() t = Trie() for w in words: t.add(w, w) t.make_automaton() return t - test_string = "he she himan" + test_string = 'he she himan' t = get_test_automaton() result = list(t.iter(test_string)) + assert 'he she himan'.split() == [r.value for r in result] + + def test_iter_should_can_return_non_matches_optionally(self): + + def get_test_automaton(): + words = 'he her hers his she hi him man himan'.split() + t = Trie() + for w in words: + t.add(w, w) + t.make_automaton() + return t + + test_string = ' he she junk himan other stuffs ' + # 111111111122222222223333333 + # 0123456789012345678901234567890123456 + + t = get_test_automaton() + result = list(t.iter(test_string, include_unmatched=True, include_space=True)) expected = [ - Result(start=0, end=1, string='he', output=Output('he', 'he')), - Result(start=3, end=5, string='she', output=Output('she', 'she')), - Result(start=4, end=5, string='he', output=Output('he', 'he')), - Result(start=7, end=8, string='hi', output=Output('hi', 'hi')), - Result(start=7, end=9, string='him', output=Output('him', 'him')), - Result(start=7, end=11, string='himan', output=Output('himan', 'himan')), - Result(start=9, end=11, string='man', output=Output('man', 'man')) + Token(0, 1, u' ', None), + Token(2, 3, u'he', u'he'), + Token(4, 4, u' ', None), + Token(5, 7, u'she', u'she'), + Token(8, 8, u' ', None), + Token(9, 12, u'junk', None), + Token(13, 14, u' ', None), + Token(15, 19, u'himan', u'himan'), + Token(20, 21, u' ', None), + Token(22, 26, u'other', None), + Token(27, 27, u' ', None), + Token(28, 33, u'stuffs', None), + Token(34, 36, u' ', None), ] assert expected == result @@ -156,7 +180,7 @@ def get_test_automaton(): def test_iter_vs_scan(self): def get_test_automaton(): - words = "( AND ) OR".split() + words = '( AND ) OR'.split() t = Trie() for w in words: t.add(w, w) @@ -166,41 +190,38 @@ def get_test_automaton(): test_string = '((l-a + AND l-b) OR (l -c+))' t = get_test_automaton() - result = list(t.iter(test_string)) + result = list(t.iter(test_string, include_unmatched=True, include_space=True)) expected = [ - Result(0, 0, '(', Output('(', '(')), - Result(1, 1, '(', Output('(', '(')), - Result(8, 10, 'AND', Output('AND', 'AND')), - Result(15, 15, ')', Output(')', ')')), - Result(17, 18, 'OR', Output('OR', 'OR')), - Result(20, 20, '(', Output('(', '(')), - Result(26, 26, ')', Output(')', ')')), - Result(27, 27, ')', Output(')', ')')) + Token(0, 0, u'(', u'('), + Token(1, 1, u'(', u'('), + Token(2, 4, u'l-a', None), + Token(5, 5, u' ', None), + Token(6, 6, u'+', None), + Token(7, 7, u' ', None), + Token(8, 10, u'AND', u'AND'), + Token(11, 11, u' ', None), + Token(12, 14, u'l-b', None), + Token(15, 15, u')', u')'), + Token(16, 16, u' ', None), + Token(17, 18, u'OR', u'OR'), + Token(19, 19, u' ', None), + Token(20, 20, u'(', u'('), + Token(21, 21, u'l', None), + Token(22, 22, u' ', None), + Token(23, 25, u'-c+', None), + Token(26, 26, u')', u')'), + Token(27, 27, u')', u')') ] + assert expected == result - result = list(t.scan(test_string)) - expected = [ - Result(0, 0, '(', Output('(', '(')), - Result(1, 1, '(', Output('(', '(')), - Result(2, 7, 'l-a + ', None), - Result(8, 10, 'AND', Output('AND', 'AND')), - Result(11, 14, ' l-b', None), - Result(15, 15, ')', Output(')', ')')), - Result(16, 16, ' ', None), - Result(17, 18, 'OR', Output('OR', 'OR')), - Result(19, 19, ' ', None), - Result(20, 20, '(', Output('(', '(')), - Result(21, 25, 'l -c+', None), - Result(26, 26, ')', Output(')', ')')), - Result(27, 27, ')', Output(')', ')')) - ] + result = list(t.scan(test_string, include_unmatched=True, include_space=True)) assert expected == result - def test_scan_with_unmatched(self): + def test_scan_with_unmatched_and_space(self): def get_test_automaton(): - words = "( AND ) OR".split() + words = '( AND ) OR'.split() t = Trie() for w in words: t.add(w, w) @@ -208,18 +229,44 @@ def get_test_automaton(): return t test_string = '((l-a + AND l-b) OR an (l -c+))' - + # 111111111122222222223 + # 0123456789012345678901234567890 t = get_test_automaton() - result = list(t.scan(test_string)) - assert test_string == ''.join(r.string for r in result) + result = list(t.scan(test_string, include_unmatched=True, include_space=True)) + expected = [ + Token(0, 0, u'(', u'('), + Token(1, 1, u'(', u'('), + Token(2, 4, u'l-a', None), + Token(5, 5, u' ', None), + Token(6, 6, u'+', None), + Token(7, 7, u' ', None), + Token(8, 10, u'AND', u'AND'), + Token(11, 11, u' ', None), + Token(12, 14, u'l-b', None), + Token(15, 15, u')', u')'), + Token(16, 16, u' ', None), + Token(17, 18, u'OR', u'OR'), + Token(19, 19, u' ', None), + Token(20, 21, u'an', None), + Token(22, 22, u' ', None), + Token(23, 23, u'(', u'('), + Token(24, 24, u'l', None), + Token(25, 25, u' ', None), + Token(26, 28, u'-c+', None), + Token(29, 29, u')', u')'), + Token(30, 30, u')', u')') + ] + + assert expected == result + assert test_string == ''.join(t.string for t in result) def test_iter_with_unmatched_simple(self): t = Trie() - t.add('AND', 'AND') + t.add('And', 'And') t.make_automaton() - test_string = 'AND an a and' + test_string = 'AND an a And' result = list(t.iter(test_string)) - assert 'ANDand' == ''.join(r.string for r in result) + assert ['And', 'And'] == [r.value for r in result] def test_iter_with_unmatched_simple2(self): t = Trie() @@ -227,5 +274,49 @@ def test_iter_with_unmatched_simple2(self): t.make_automaton() test_string = 'AND an a and' result = list(t.iter(test_string)) - assert 'ANDand' == ''.join(r.string for r in result) + assert ['AND', 'AND'] == [r.value for r in result] + + def test_iter_with_unmatched_simple3(self): + t = Trie() + t.add('AND', 'AND') + t.make_automaton() + test_string = 'AND an a andersom' + result = list(t.iter(test_string)) + assert ['AND'] == [r.value for r in result] + def test_iter_simple(self): + t = Trie() + t.add('AND', 'AND') + t.add('OR', 'OR') + t.add('WITH', 'WITH') + t.add('(', '(') + t.add(')', ')') + t.add('GPL-2.0', 'GPL-2.0') + t.add('mit', 'MIT') + t.add('Classpath', 'Classpath') + t.make_automaton() + test_string = '(GPL-2.0 with Classpath) or (gpl-2.0) and (classpath or gpl-2.0 OR mit) ' + # 111111111122222222223333333333444444444455555555556666666666777 + # 0123456789012345678901234567890123456789012345678901234567890123456789012 + result = list(t.iter(test_string)) + expected = [ + Token(0, 0, u'(', u'('), + Token(1, 7, u'GPL-2.0', u'GPL-2.0'), + Token(9, 12, u'with', u'WITH'), + Token(14, 22, u'Classpath', u'Classpath'), + Token(23, 23, u')', u')'), + Token(25, 26, u'or', u'OR'), + Token(28, 28, u'(', u'('), + Token(29, 35, u'gpl-2.0', u'GPL-2.0'), + Token(36, 36, u')', u')'), + Token(38, 40, u'and', u'AND'), + Token(42, 42, u'(', u'('), + Token(43, 51, u'classpath', u'Classpath'), + Token(53, 54, u'or', u'OR'), + Token(57, 63, u'gpl-2.0', u'GPL-2.0'), + Token(65, 66, u'OR', u'OR'), + Token(68, 70, u'mit', u'MIT'), + Token(71, 71, u')', u')') + ] + + assert expected == result diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index 22da4c7..dde755a 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -37,12 +37,10 @@ from license_expression import LicenseSymbolLike from license_expression import LicenseWithExceptionSymbol from license_expression import ParseError -from license_expression import Result -from license_expression import Output +from license_expression import Token -from license_expression import group_results_for_with_subexpression +from license_expression import build_token_groups_for_with_subexpression from license_expression import splitter -from license_expression import strip_and_skip_spaces from license_expression import validate_symbols from license_expression import TOKEN_AND @@ -198,27 +196,46 @@ def get_symbols_and_licensing(self): licensing = Licensing(symbols) return gpl_20, gpl_20_plus, lgpl_21, mit, licensing - def test_tokenize_1(self): + def test_tokenize_1_with_symbols(self): gpl_20, _gpl_20_plus, lgpl_21, mit, licensing = self.get_symbols_and_licensing() - result = licensing.tokenize('The GNU GPL 20 or LGPL-2.1 and mit') + + result = licensing.tokenize('The GNU GPL 20 or LGPL v2.1 AND MIT license ') + # 111111111122222222223333333333444 + # 0123456789012345678901234567890123456789012 + expected = [ - (gpl_20, 'The GNU GPL 20', 0), - (TOKEN_OR, ' or ', 14), - (lgpl_21, 'LGPL-2.1', 18), - (TOKEN_AND, ' and ', 26), - (mit, 'mit', 31)] + (gpl_20, u'The GNU GPL 20', 0), + (TOKEN_OR, u'or', 15), + (lgpl_21, u'LGPL v2.1', 18), + (TOKEN_AND, u'AND', 28), + (mit, u'MIT license', 32) + ] + assert expected == list(result) + + def test_tokenize_1_no_symbols(self): + licensing = Licensing() + + result = licensing.tokenize('The GNU GPL 20 or LGPL v2.1 AND MIT license') + + expected = [ + (LicenseSymbol(u'The GNU GPL 20'), u'The GNU GPL 20', 0), + (TOKEN_OR, u'or', 15), + (LicenseSymbol(u'LGPL v2.1'), u'LGPL v2.1', 18), + (TOKEN_AND, u'AND', 28), + (LicenseSymbol(u'MIT license'), u'MIT license', 32) + ] + assert expected == list(result) def test_tokenize_with_trailing_unknown(self): - gpl_20, _gpl_20_plus, lgpl_21, mit, licensing = self.get_symbols_and_licensing() + gpl_20, _gpl_20_plus, lgpl_21, _mit, licensing = self.get_symbols_and_licensing() result = licensing.tokenize('The GNU GPL 20 or LGPL-2.1 and mit2') expected = [ (gpl_20, 'The GNU GPL 20', 0), - (TOKEN_OR, ' or ', 14), + (TOKEN_OR, 'or', 15), (lgpl_21, 'LGPL-2.1', 18), - (TOKEN_AND, ' and ', 26), - (mit, 'mit', 31), - (LicenseSymbol(key='2'), '2', 34) + (TOKEN_AND, 'and', 27), + (LicenseSymbol(key='mit2'), 'mit2', 31), ] assert expected == list(result) @@ -228,14 +245,15 @@ def test_tokenize_3(self): result = licensing.tokenize('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit') expected = [ (gpl_20_plus, 'The GNU GPL 20 or later', 0), - (TOKEN_OR, ' or ', 23), + (TOKEN_OR, 'or', 24), (TOKEN_LPAR, '(', 27), (lgpl_21, 'LGPL-2.1', 28), - (TOKEN_AND, ' and ', 36), + (TOKEN_AND, 'and', 37), (mit, 'mit', 41), (TOKEN_RPAR, ')', 44), - (TOKEN_OR, ' or ', 45), - (gpl_20, 'The GNU GPL 20', 49), (2, ' or ', 63), + (TOKEN_OR, 'or', 46), + (gpl_20, 'The GNU GPL 20', 49), + (2, 'or', 64), (mit, 'mit', 67) ] assert expected == list(result) @@ -245,8 +263,7 @@ def test_tokenize_unknown_as_trailing_single_attached_character(self): l = Licensing(symbols) result = list(l.tokenize('mit2')) expected = [ - (LicenseSymbol(key='MIT', aliases=('MIT license',)), 'mit', 0), - (LicenseSymbol(key='2'), '2', 3), + (LicenseSymbol(u'mit2', is_exception=False), u'mit2', 0), ] assert expected == result @@ -616,15 +633,11 @@ def test_create_from_python(self): ) assert a == b - def test_parse_with_repeated_or_later_raise_parse_error(self): + def test_parse_with_repeated_or_later_does_not_raise_parse_error(self): l = Licensing() expr = 'LGPL2.1+ + and mit' - try: - l.parse(expr) - self.fail('Exception not raised') - except ParseError as ee: - expected = 'Invalid symbols sequence such as (A B) for token: "+" at position: 9' - assert expected == str(ee) + parsed = l.parse(expr) + assert 'LGPL2.1+ + AND mit' == str(parsed) def test_render_complex(self): licensing = Licensing() @@ -692,9 +705,9 @@ def test_Licensing_can_scan_valid_expressions_with_symbols_that_contain_and_with expression = 'orgpl or withbsd with orclasspath and andmit or andlgpl and ormit or withme' result = [r.string for r in licensing.get_scanner().scan(expression)] expected = [ - 'orgpl', ' or ', 'withbsd', ' with ', 'orclasspath', - ' and ', 'andmit', ' or ', 'andlgpl', ' and ', 'ormit', - ' or ', 'withme' + 'orgpl', 'or', 'withbsd', 'with', 'orclasspath', + 'and', 'andmit', 'or', 'andlgpl', 'and', 'ormit', + 'or', 'withme' ] assert expected == result @@ -763,25 +776,25 @@ def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_and_wit expected = 'orgpl OR (withbsd WITH orclasspath AND andmit) OR (anlgpl AND ormit) OR withme' assert expected == result.render('{symbol.key}') + def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_spaces(self): + licensing = Licensing() + expression = ' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)' + parsed = licensing.parse(expression) + expected = 'GPL-2.0 OR (mit AND LGPL 2.1) OR bsd OR GPL-2.0 OR (mit AND LGPL 2.1)' + assert expected == str(parsed) -class LicensingParseWithSymbolsSimpleTest(TestCase): - def test_Licensing_with_illegal_symbols_raise_Exception(self): - try: - Licensing([ - 'GPL-2.0 or LATER', - 'classpath Exception', - 'something with else+', - 'mit', - 'LGPL 2.1', - 'mit or later' - ]) - self.fail('Exception not raised') - except ExpressionError as ee: - expected = ('Invalid license key: "or later" words are reserved and ' - 'cannot be used in a key: "GPL-2.0 or LATER"') +class LicensingParseWithSymbolsSimpleTest(TestCase): - assert expected == str(ee) + def test_Licensing_with_overlapping_symbols_with_keywords_does_not_raise_Exception(self): + Licensing([ + 'GPL-2.0 or LATER', + 'classpath Exception', + 'something with else+', + 'mit', + 'LGPL 2.1', + 'mit or later' + ]) def get_syms_and_licensing(self): a = LicenseSymbol('l-a') @@ -1108,15 +1121,18 @@ def get_symbols_and_licensing(self): licensing = Licensing(symbols) return gpl2, gpl2plus, lgpl, mit, mitand2, licensing - def test_parse_trailing_char_raise_exception(self): + def test_parse_trailing_char_does_not_raise_exception_without_validate(self): + _gpl2, _gpl2plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() + e = licensing.parse('The GNU GPL 20 or LGPL-2.1 and mit2', validate=False) + assert 'gpl-2.0 OR (LGPL-2.1 AND mit2)' == str(e) + + def test_parse_trailing_char_raise_exception_with_validate(self): _gpl2, _gpl2plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() try: - licensing.parse('The GNU GPL 20 or LGPL-2.1 and mit2') + licensing.parse('The GNU GPL 20 or LGPL-2.1 and mit2', validate=True) self.fail('Exception not raised') - except ParseError as pe: - expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 34, - 'token_string': '2', 'token_type': LicenseSymbol('2')} - assert expected == _parse_error_as_dict(pe) + except ExpressionError as ee: + assert 'Unknown license key(s): mit2' == str(ee) def test_parse_expression_with_trailing_unknown_should_raise_exception(self): gpl2, gpl2plus, lgpl, mit, _mitand2, licensing = self.get_symbols_and_licensing() @@ -1125,17 +1141,17 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): tokens = list(licensing.tokenize('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123')) expected = [ (gpl2plus, 'The GNU GPL 20 or later', 0), - (TOKEN_OR, ' or ', 23), + (TOKEN_OR, 'or', 24), (TOKEN_LPAR, '(', 27), (lgpl, 'LGPL-2.1', 28), - (TOKEN_AND, ' and ', 36), + (TOKEN_AND, 'and', 37), (mit, 'mit', 41), (TOKEN_RPAR, ')', 44), - (TOKEN_OR, ' or ', 45), + (TOKEN_OR, 'or', 46), (gpl2, 'The GNU GPL 20', 49), - (TOKEN_OR, ' or ', 63), + (TOKEN_OR, 'or', 64), (mit, 'mit', 67), - (unknown, ' 123', 70) + (unknown, '123', 71) ] assert expected == tokens @@ -1143,8 +1159,8 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): licensing.parse('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123') self.fail('Exception not raised') except ParseError as pe: - expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 70, - 'token_string': ' 123', 'token_type': unknown} + expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 71, + 'token_string': '123', 'token_type': unknown} assert expected == _parse_error_as_dict(pe) def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): @@ -1152,10 +1168,11 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): unknown = LicenseSymbol(key='123') try: licensing.parse('The GNU GPL 20 or mit 123') + # 01234567890123456789012345 self.fail('Exception not raised') except ParseError as pe: - expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 21, - 'token_string': ' 123', 'token_type': unknown} + expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 22, + 'token_string': '123', 'token_type': unknown} assert expected == _parse_error_as_dict(pe) def test_parse_expression_with_WITH(self): @@ -1169,15 +1186,15 @@ def test_parse_expression_with_WITH(self): tokens = list(licensing.tokenize(expr)) expected = [ (gpl_20_or_later, 'The GNU GPL 20 or later', 0), - (TOKEN_OR, ' or ', 23), + (TOKEN_OR, 'or', 24), (TOKEN_LPAR, '(', 27), (lgpl, 'LGPL-2.1', 28), - (TOKEN_AND, ' and ', 36), + (TOKEN_AND, 'and', 37), (mit, 'mit', 41), (TOKEN_RPAR, ')', 44), - (TOKEN_OR, ' or ', 45), + (TOKEN_OR, 'or', 46), (gpl2, 'The GNU GPL 20', 49), - (TOKEN_OR, ' or ', 63), + (TOKEN_OR, 'or', 64), (LicenseWithExceptionSymbol(mit, mitexp), 'mit with mit exp', 67) ] @@ -1223,19 +1240,68 @@ def test_unknown_keys_with_trailing_char(self): assert [] == licensing.unknown_license_keys(parsed) assert [] == licensing.unknown_license_keys(expr) - def test_unknown_keys_with_trailing_char_2(self): + def test_unknown_keys_with_trailing_char_2_with_validate(self): _gpl2, _gpl2plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() expr = 'The GNU GPL 20 or LGPL-2.1 and mitand3' try: - licensing.parse(expr) - self.fail('ParseError should be raised') - except ParseError as pe: - expected = {'error_code': 5, 'position': 34, 'token_string': u'and3', 'token_type': LicenseSymbol(key=u'and3')} + licensing.parse(expr, validate=True) + self.fail('Exception should be raised') + except ExpressionError as ee: + assert 'Unknown license key(s): mitand3' == str(ee) - assert expected == _parse_error_as_dict(pe) + def test_unknown_keys_with_trailing_char_2_without_validate(self): + _gpl2, _gpl2plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() + expr = 'The GNU GPL 20 or LGPL-2.1 and mitand3' + parsed = licensing.parse(expr, validate=False) + assert 'gpl-2.0 OR (LGPL-2.1 AND mitand3)' == str(parsed) + + def test_parse_with_overlapping_key_without_symbols(self): + expression = 'mit or mit AND zlib or mit or mit with verylonglicense' + # 1111111111222222222233333333334444444444555555555566666 + # 0123456789012345678901234567890123456789012345678901234 + + licensing = Licensing() + results = str(licensing.parse(expression)) + expected = 'mit OR (mit AND zlib) OR mit OR mit WITH verylonglicense' + assert expected == results + + def test_scan_with_overlapping_key_with_symbols_and_trailing_unknown(self): + expression = 'mit or mit AND zlib or mit or mit with verylonglicense' + # 111111111122222222223333333333444444444455555 + # 0123456789012345678901234567890123456789012345678901234 + + symbols = [ + LicenseSymbol('MIT', ['MIT license']), + LicenseSymbol('LGPL-2.1', ['LGPL v2.1']), + LicenseSymbol('zlib', ['zlib']), + LicenseSymbol('d-zlib', ['D zlib']), + LicenseSymbol('mito', ['mit o']), + LicenseSymbol('hmit', ['h verylonglicense']), + ] + licensing = Licensing(symbols) + results = list(licensing.get_scanner().scan(expression)) + expected = [ + Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 5, u'or', Keyword(value=u'or', type=2)), + Token(7, 9, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(11, 13, u'AND', Keyword(value=u'and', type=1)), + Token(15, 18, u'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), + Token(20, 21, u'or', Keyword(value=u'or', type=2)), + Token(23, 25, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(27, 28, u'or', Keyword(value=u'or', type=2)), + Token(30, 32, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(34, 37, u'with', Keyword(value=u'with', type=10)), + Token(39, 53, u'verylonglicense', None), + ] + + assert expected == results + + def test_iter_with_overlapping_key_with_symbols_and_trailing_unknown(self): + expression = 'mit or mit AND zlib or mit or mit with verylonglicense' + # 111111111122222222223333333333444444444455555 + # 0123456789012345678901234567890123456789012345678901234 - def test_parse_with_overlapping_key_with_licensing(self): symbols = [ LicenseSymbol('MIT', ['MIT license']), LicenseSymbol('LGPL-2.1', ['LGPL v2.1']), @@ -1245,11 +1311,73 @@ def test_parse_with_overlapping_key_with_licensing(self): LicenseSymbol('hmit', ['h verylonglicense']), ] licensing = Licensing(symbols) + results = list(licensing.get_scanner().iter(expression, include_unmatched=True)) + expected = [ + Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 5, u'or', Keyword(value=u'or', type=2)), + Token(7, 9, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(11, 13, u'AND', Keyword(value=u'and', type=1)), + Token(15, 18, u'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), + Token(20, 21, u'or', Keyword(value=u'or', type=2)), + Token(23, 25, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(27, 28, u'or', Keyword(value=u'or', type=2)), + Token(30, 32, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(34, 37, u'with', Keyword(value=u'with', type=10)), + Token(39, 53, u'verylonglicense', None), + ] + assert expected == results + + def test_iter_with_overlapping_key_with_symbols_and_trailing_unknown2(self): + expression = 'mit with verylonglicense' + symbols = [ + LicenseSymbol('MIT', ['MIT license']), + LicenseSymbol('hmit', ['h verylonglicense']), + ] + licensing = Licensing(symbols) + results = list(licensing.get_scanner().iter(expression, include_unmatched=True)) + expected = [ + Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 7, u'with', Keyword(value=u'with', type=10)), + Token(9, 23, u'verylonglicense', None), + ] + assert expected == results + def test_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' + # 1111111111222222222233333333334444444444555555555566666 + # 0123456789012345678901234567890123456789012345678901234 + + symbols = [ + LicenseSymbol('MIT', ['MIT license']), + LicenseSymbol('LGPL-2.1', ['LGPL v2.1']), + LicenseSymbol('zlib', ['zlib']), + LicenseSymbol('d-zlib', ['D zlib']), + LicenseSymbol('mito', ['mit o']), + LicenseSymbol('hmit', ['h verylonglicense']), + ] + licensing = Licensing(symbols) + + results = list(licensing.tokenize(expression)) + expected = [ + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 0), + (2, u'or', 4), + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 7), + (1, u'AND', 11), + (LicenseSymbol(u'zlib', aliases=(u'zlib',)), u'zlib', 15), + (2, u'or', 20), + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 23), + (2, u'or', 27), + (LicenseWithExceptionSymbol( + license_symbol=LicenseSymbol(u'MIT', aliases=(u'MIT license',)), + exception_symbol=LicenseSymbol(u'verylonglicense')), u'mit with verylonglicense', + 30) + ] + + assert expected == results + results = str(licensing.parse(expression)) - expected = 'mit OR (MIT AND zlib) OR mit OR MIT WITH verylonglicense' - self.assertEqual(expected, results) + expected = 'MIT OR (MIT AND zlib) OR MIT OR MIT WITH verylonglicense' + assert expected == results class LicensingSymbolsTest(TestCase): @@ -1394,96 +1522,96 @@ def test_splitter(self): 'with SOMETHING with ELSE+ and lgpl 2.1') results = list(splitter(expr)) expected = [ - Result(0, 0, ' ', None), - Result(1, 7, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), - Result(8, 8, ' ', None), - Result(9, 10, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(11, 11, ' ', None), - Result(12, 16, 'later', Output('later', LicenseSymbol(key='later',))), - Result(17, 17, ' ', None), - Result(18, 21, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(22, 22, ' ', None), - Result(23, 31, 'classpath', Output('classpath', LicenseSymbol(key='classpath',))), - Result(32, 32, ' ', None), - Result(33, 41, 'Exception', Output('Exception', LicenseSymbol(key='Exception',))), - Result(42, 42, ' ', None), - Result(43, 45, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(46, 46, ' ', None), - Result(47, 49, 'mit', Output('mit', LicenseSymbol(key='mit',))), - Result(50, 50, ' ', None), - Result(51, 53, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(54, 54, ' ', None), - Result(55, 57, 'mit', Output('mit', LicenseSymbol(key='mit',))), - Result(58, 58, ' ', None), - Result(59, 62, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(63, 63, ' ', None), - Result(64, 72, 'SOMETHING', Output('SOMETHING', LicenseSymbol(key='SOMETHING',))), - Result(73, 73, ' ', None), - Result(74, 77, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(78, 78, ' ', None), - Result(79, 83, 'ELSE+', Output('ELSE+', LicenseSymbol(key='ELSE+',))), - Result(84, 84, ' ', None), - Result(85, 86, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(87, 87, ' ', None), - Result(88, 91, 'LGPL', Output('LGPL', LicenseSymbol(key='LGPL',))), - Result(92, 92, ' ', None), - Result(93, 95, '2.1', Output('2.1', LicenseSymbol(key='2.1',))), - Result(96, 96, ' ', None), - Result(97, 99, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(100, 100, ' ', None), - Result(101, 107, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), - Result(108, 108, ' ', None), - Result(109, 110, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(111, 111, ' ', None), - Result(112, 116, 'LATER', Output('LATER', LicenseSymbol(key='LATER',))), - Result(117, 117, ' ', None), - Result(118, 121, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(122, 122, ' ', None), - Result(123, 123, '(', Output('(', Keyword(value='(', type=TOKEN_LPAR))), - Result(124, 132, 'Classpath', Output('Classpath', LicenseSymbol(key='Classpath',))), - Result(133, 133, ' ', None), - Result(134, 142, 'Exception', Output('Exception', LicenseSymbol(key='Exception',))), - Result(143, 143, ' ', None), - Result(144, 146, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(147, 147, ' ', None), - Result(148, 150, 'mit', Output('mit', LicenseSymbol(key='mit',))), - Result(151, 151, ' ', None), - Result(152, 153, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(154, 154, ' ', None), - Result(155, 159, 'later', Output('later', LicenseSymbol(key='later',))), - Result(160, 160, ')', Output(')', Keyword(value=')', type=TOKEN_RPAR))), - Result(161, 161, ' ', None), - Result(162, 163, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(164, 164, ' ', None), - Result(165, 168, 'LGPL', Output('LGPL', LicenseSymbol(key='LGPL',))), - Result(169, 169, ' ', None), - Result(170, 172, '2.1', Output('2.1', LicenseSymbol(key='2.1',))), - Result(173, 173, ' ', None), - Result(174, 175, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(176, 176, ' ', None), - Result(177, 179, 'mit', Output('mit', LicenseSymbol(key='mit',))), - Result(180, 180, ' ', None), - Result(181, 182, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(183, 183, ' ', None), - Result(184, 190, 'GPL-2.0', Output('GPL-2.0', LicenseSymbol(key='GPL-2.0',))), - Result(191, 191, ' ', None), - Result(192, 193, 'or', Output('or', Keyword(value='or', type=TOKEN_OR))), - Result(194, 194, ' ', None), - Result(195, 199, 'LATER', Output('LATER', LicenseSymbol(key='LATER',))), - Result(200, 200, ' ', None), - Result(201, 204, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(205, 205, ' ', None), - Result(206, 214, 'SOMETHING', Output('SOMETHING', LicenseSymbol(key='SOMETHING',))), - Result(215, 215, ' ', None), - Result(216, 219, 'with', Output('with', Keyword(value='with', type=TOKEN_WITH))), - Result(220, 220, ' ', None), - Result(221, 225, 'ELSE+', Output('ELSE+', LicenseSymbol(key='ELSE+',))), - Result(226, 226, ' ', None), - Result(227, 229, 'and', Output('and', Keyword(value='and', type=TOKEN_AND))), - Result(230, 230, ' ', None), - Result(231, 234, 'lgpl', Output('lgpl', LicenseSymbol(key='lgpl',))), - Result(235, 235, ' ', None), - Result(236, 238, '2.1', Output('2.1', LicenseSymbol(key='2.1',))) + Token(0, 0, ' ', None), + Token(1, 7, 'GPL-2.0', LicenseSymbol(key='GPL-2.0')), + Token(8, 8, ' ', None), + Token(9, 10, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(11, 11, ' ', None), + Token(12, 16, 'later', LicenseSymbol(key='later')), + Token(17, 17, ' ', None), + Token(18, 21, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(22, 22, ' ', None), + Token(23, 31, 'classpath', LicenseSymbol(key='classpath')), + Token(32, 32, ' ', None), + Token(33, 41, 'Exception', LicenseSymbol(key='Exception')), + Token(42, 42, ' ', None), + Token(43, 45, 'and', Keyword(value='and', type=TOKEN_AND)), + Token(46, 46, ' ', None), + Token(47, 49, 'mit', LicenseSymbol(key='mit')), + Token(50, 50, ' ', None), + Token(51, 53, 'and', Keyword(value='and', type=TOKEN_AND)), + Token(54, 54, ' ', None), + Token(55, 57, 'mit', LicenseSymbol(key='mit')), + Token(58, 58, ' ', None), + Token(59, 62, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(63, 63, ' ', None), + Token(64, 72, 'SOMETHING', LicenseSymbol(key='SOMETHING')), + Token(73, 73, ' ', None), + Token(74, 77, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(78, 78, ' ', None), + Token(79, 83, 'ELSE+', LicenseSymbol(key='ELSE+')), + Token(84, 84, ' ', None), + Token(85, 86, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(87, 87, ' ', None), + Token(88, 91, 'LGPL', LicenseSymbol(key='LGPL')), + Token(92, 92, ' ', None), + Token(93, 95, '2.1', LicenseSymbol(key='2.1')), + Token(96, 96, ' ', None), + Token(97, 99, 'and', Keyword(value='and', type=TOKEN_AND)), + Token(100, 100, ' ', None), + Token(101, 107, 'GPL-2.0', LicenseSymbol(key='GPL-2.0')), + Token(108, 108, ' ', None), + Token(109, 110, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(111, 111, ' ', None), + Token(112, 116, 'LATER', LicenseSymbol(key='LATER')), + Token(117, 117, ' ', None), + Token(118, 121, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(122, 122, ' ', None), + Token(123, 123, '(', Keyword(value='(', type=TOKEN_LPAR)), + Token(124, 132, 'Classpath', LicenseSymbol(key='Classpath')), + Token(133, 133, ' ', None), + Token(134, 142, 'Exception', LicenseSymbol(key='Exception')), + Token(143, 143, ' ', None), + Token(144, 146, 'and', Keyword(value='and', type=TOKEN_AND)), + Token(147, 147, ' ', None), + Token(148, 150, 'mit', LicenseSymbol(key='mit')), + Token(151, 151, ' ', None), + Token(152, 153, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(154, 154, ' ', None), + Token(155, 159, 'later', LicenseSymbol(key='later')), + Token(160, 160, ')', Keyword(value=')', type=TOKEN_RPAR)), + Token(161, 161, ' ', None), + Token(162, 163, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(164, 164, ' ', None), + Token(165, 168, 'LGPL', LicenseSymbol(key='LGPL')), + Token(169, 169, ' ', None), + Token(170, 172, '2.1', LicenseSymbol(key='2.1')), + Token(173, 173, ' ', None), + Token(174, 175, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(176, 176, ' ', None), + Token(177, 179, 'mit', LicenseSymbol(key='mit')), + Token(180, 180, ' ', None), + Token(181, 182, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(183, 183, ' ', None), + Token(184, 190, 'GPL-2.0', LicenseSymbol(key='GPL-2.0')), + Token(191, 191, ' ', None), + Token(192, 193, 'or', Keyword(value='or', type=TOKEN_OR)), + Token(194, 194, ' ', None), + Token(195, 199, 'LATER', LicenseSymbol(key='LATER')), + Token(200, 200, ' ', None), + Token(201, 204, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(205, 205, ' ', None), + Token(206, 214, 'SOMETHING', LicenseSymbol(key='SOMETHING')), + Token(215, 215, ' ', None), + Token(216, 219, 'with', Keyword(value='with', type=TOKEN_WITH)), + Token(220, 220, ' ', None), + Token(221, 225, 'ELSE+', LicenseSymbol(key='ELSE+')), + Token(226, 226, ' ', None), + Token(227, 229, 'and', Keyword(value='and', type=TOKEN_AND)), + Token(230, 230, ' ', None), + Token(231, 234, 'lgpl', LicenseSymbol(key='lgpl')), + Token(235, 235, ' ', None), + Token(236, 238, '2.1', LicenseSymbol(key='2.1',)) ] assert expected == results @@ -1525,100 +1653,79 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): # fist scan scanner = licensing.get_scanner() result = list(scanner.scan(expr)) - - WITH_KW = Keyword(value=' with ', type=10) - AND_KW = Keyword(value=' and ', type=1) - OR_KW = Keyword(value=' or ', type=2) - expected = [ - Result(0, 0, ' ', None), - Result(1, 16, 'GPL-2.0 or later', Output('GPL-2.0 or LATER', gpl2plus, 1)), - Result(17, 22, ' with ', Output(' with ', WITH_KW, 0)), - Result(23, 41, 'classpath Exception', Output('classpath Exception', cpex, 1)), - Result(42, 46, ' and ', Output(' and ', AND_KW, 0)), - Result(47, 49, 'mit', Output('mit', mit, 1)), - Result(50, 54, ' and ', Output(' and ', AND_KW, 0)), - Result(55, 57, 'mit', Output('mit', mit, 1)), - Result(58, 63, ' with ', Output(' with ', WITH_KW, 0)), - Result(64, 82, 'mitthing with ELSE+', Output('mitthing with else+', mitthing_with_else, 1)), - Result(83, 86, ' or ', Output(' or ', OR_KW, 0)), - Result(87, 94, 'LGPL 2.1', Output('LGPL 2.1', lgpl, 1)), - Result(95, 99, ' and ', Output(' and ', AND_KW, 0)), - Result(100, 115, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', gpl2plus, 1)), - Result(116, 121, ' with ', Output(' with ', WITH_KW, 0)), - Result(122, 140, 'Classpath Exception', Output('classpath Exception', cpex, 1)), - Result(141, 145, ' and ', Output(' and ', AND_KW, 0)), - Result(146, 157, 'mit or later', Output('mit or later', mitplus, 1)), - Result(158, 161, ' or ', Output(' or ', OR_KW, 0)), - Result(162, 169, 'LGPL 2.1', Output('LGPL 2.1', lgpl, 1)), - Result(170, 173, ' or ', Output(' or ', OR_KW, 0)), - Result(174, 176, 'mit', Output('mit', mit, 1)), - Result(177, 180, ' or ', Output(' or ', OR_KW, 0)), - Result(181, 196, 'GPL-2.0 or LATER', Output('GPL-2.0 or LATER', gpl2plus, 1)), - Result(197, 202, ' with ', Output(' with ', WITH_KW, 0)), - Result(203, 221, 'mitthing with ELSE+', Output('mitthing with else+', mitthing_with_else, 1)), - Result(222, 226, ' and ', Output(' and ', AND_KW, 0)), - Result(227, 234, 'lgpl 2.1', Output('LGPL 2.1', lgpl, 1)), - Result(235, 238, ' or ', Output(' or ', OR_KW, 0)), - Result(239, 245, 'gpl-2.0', Output('GPL-2.0', gpl2, 1)) + Token(1, 16, u'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(18, 21, u'with', Keyword(value=u'with', type=10)), + Token(23, 41, u'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), + Token(43, 45, u'and', Keyword(value=u'and', type=1)), + Token(47, 49, u'mit', LicenseSymbol(u'mit', is_exception=False)), + Token(51, 53, u'and', Keyword(value=u'and', type=1)), + Token(55, 57, u'mit', LicenseSymbol(u'mit', is_exception=False)), + Token(59, 62, u'with', Keyword(value=u'with', type=10)), + Token(64, 82, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False)), + Token(84, 85, u'or', Keyword(value=u'or', type=2)), + Token(87, 94, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), + Token(96, 98, u'and', Keyword(value=u'and', type=1)), + Token(100, 115, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(117, 120, u'with', Keyword(value=u'with', type=10)), + Token(122, 140, u'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), + Token(142, 144, u'and', Keyword(value=u'and', type=1)), + Token(146, 157, u'mit or later', LicenseSymbol(u'mit or later', is_exception=False)), + Token(159, 160, u'or', Keyword(value=u'or', type=2)), + Token(162, 169, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), + Token(171, 172, u'or', Keyword(value=u'or', type=2)), + Token(174, 176, u'mit', LicenseSymbol(u'mit', is_exception=False)), + Token(178, 179, u'or', Keyword(value=u'or', type=2)), + Token(181, 196, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(198, 201, u'with', Keyword(value=u'with', type=10)), + Token(203, 221, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False)), + Token(223, 225, u'and', Keyword(value=u'and', type=1)), + Token(227, 234, u'lgpl 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), + Token(236, 237, u'or', Keyword(value=u'or', type=2)), + Token(239, 245, u'gpl-2.0', LicenseSymbol(u'GPL-2.0', is_exception=False)) ] assert expected == result - assert 246 == expected[-1].end + 1 - assert 246 == sum(len(r.string) for r in result) - - # skip spaces - result = list(strip_and_skip_spaces(result)) - # here only the first token is a space - assert expected[1:] == result - - # group results - - gpl2pluso = Output('GPL-2.0 or LATER', LicenseSymbol('GPL-2.0 or LATER', is_exception=False), 1) - cpex0 = Output('classpath Exception', LicenseSymbol('classpath Exception', is_exception=True), 1) - mito = Output('mit', LicenseSymbol('mit', is_exception=False), 1) - mieo1 = Output('mitthing with else+', LicenseSymbol('mitthing with else+', is_exception=False), 1) - lgplo = Output('LGPL 2.1', LicenseSymbol('LGPL 2.1', is_exception=False), 1) - mitoo = Output('mit or later', LicenseSymbol('mit or later', is_exception=False), 1) - gpl202 = Output('GPL-2.0', LicenseSymbol('GPL-2.0', is_exception=False), 1) - - with_kw = Output(' with ', WITH_KW, 0) - and_kw = Output(' and ', AND_KW, 0) - or_kw = Output(' or ', OR_KW, 0) expected_groups = [ - (Result(1, 16, 'GPL-2.0 or later', gpl2pluso), - Result(17, 22, ' with ', with_kw), - Result(23, 41, 'classpath Exception', cpex0)), - (Result(42, 46, ' and ', and_kw),), - (Result(47, 49, 'mit', mito),), - (Result(50, 54, ' and ', and_kw),), - (Result(55, 57, 'mit', mito), - Result(58, 63, ' with ', with_kw), - Result(64, 82, 'mitthing with ELSE+', mieo1)), - (Result(83, 86, ' or ', or_kw),), - (Result(87, 94, 'LGPL 2.1', lgplo),), - (Result(95, 99, ' and ', and_kw),), - (Result(100, 115, 'GPL-2.0 or LATER', gpl2pluso), - Result(116, 121, ' with ', with_kw), - Result(122, 140, 'Classpath Exception', cpex0)), - (Result(141, 145, ' and ', and_kw),), - (Result(146, 157, 'mit or later', mitoo),), - (Result(158, 161, ' or ', or_kw),), - (Result(162, 169, 'LGPL 2.1', lgplo),), - (Result(170, 173, ' or ', or_kw),), - (Result(174, 176, 'mit', mito),), - (Result(177, 180, ' or ', or_kw),), - (Result(181, 196, 'GPL-2.0 or LATER', gpl2pluso), - Result(197, 202, ' with ', with_kw), - Result(203, 221, 'mitthing with ELSE+', mieo1)), - (Result(222, 226, ' and ', and_kw),), - (Result(227, 234, 'lgpl 2.1', lgplo),), - (Result(235, 238, ' or ', or_kw),), - (Result(239, 245, 'gpl-2.0', gpl202),) + (Token(1, 16, u'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(18, 21, u'with', Keyword(value=u'with', type=10)), + Token(23, 41, u'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), + + (Token(43, 45, u'and', Keyword(value=u'and', type=1)),), + (Token(47, 49, u'mit', LicenseSymbol(u'mit', is_exception=False)),), + (Token(51, 53, u'and', Keyword(value=u'and', type=1)),), + + (Token(55, 57, u'mit', LicenseSymbol(u'mit', is_exception=False)), + Token(59, 62, u'with', Keyword(value=u'with', type=10)), + Token(64, 82, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False))), + + (Token(84, 85, u'or', Keyword(value=u'or', type=2)),), + (Token(87, 94, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), + (Token(96, 98, u'and', Keyword(value=u'and', type=1)),), + + (Token(100, 115, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(117, 120, u'with', Keyword(value=u'with', type=10)), + Token(122, 140, u'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), + + (Token(142, 144, u'and', Keyword(value=u'and', type=1)),), + (Token(146, 157, u'mit or later', LicenseSymbol(u'mit or later', is_exception=False)),), + (Token(159, 160, u'or', Keyword(value=u'or', type=2)),), + (Token(162, 169, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), + (Token(171, 172, u'or', Keyword(value=u'or', type=2)),), + (Token(174, 176, u'mit', LicenseSymbol(u'mit', is_exception=False)),), + (Token(178, 179, u'or', Keyword(value=u'or', type=2)),), + + (Token(181, 196, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), + Token(198, 201, u'with', Keyword(value=u'with', type=10)), + Token(203, 221, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False))), + + (Token(223, 225, u'and', Keyword(value=u'and', type=1)),), + (Token(227, 234, u'lgpl 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), + (Token(236, 237, u'or', Keyword(value=u'or', type=2)),), + (Token(239, 245, u'gpl-2.0', LicenseSymbol(u'GPL-2.0', is_exception=False)),) ] - - result_groups = list(group_results_for_with_subexpression(result)) + result_groups = list(build_token_groups_for_with_subexpression(result)) assert expected_groups == result_groups # finally retest it all with tokenize @@ -1630,25 +1737,25 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): expected = [ (gpl2plus_with_cpex, 'GPL-2.0 or later with classpath Exception', 1), - (TOKEN_AND, ' and ', 42), + (TOKEN_AND, 'and', 43), (mit, 'mit', 47), - (TOKEN_AND, ' and ', 50), + (TOKEN_AND, 'and', 51), (mit_with_mitthing_with_else, 'mit with mitthing with ELSE+', 55), - (TOKEN_OR, ' or ', 83), + (TOKEN_OR, 'or', 84), (lgpl, 'LGPL 2.1', 87), - (TOKEN_AND, ' and ', 95), + (TOKEN_AND, 'and', 96), (gpl2plus_with_cpex, 'GPL-2.0 or LATER with Classpath Exception', 100), - (TOKEN_AND, ' and ', 141), + (TOKEN_AND, 'and', 142), (mitplus, 'mit or later', 146), - (TOKEN_OR, ' or ', 158), + (TOKEN_OR, 'or', 159), (lgpl, 'LGPL 2.1', 162), - (TOKEN_OR, ' or ', 170), + (TOKEN_OR, 'or', 171), (mit, 'mit', 174), - (TOKEN_OR, ' or ', 177), + (TOKEN_OR, 'or', 178), (gpl2plus_with_someplus, 'GPL-2.0 or LATER with mitthing with ELSE+', 181), - (TOKEN_AND, ' and ', 222), + (TOKEN_AND, 'and', 223), (lgpl, 'lgpl 2.1', 227), - (TOKEN_OR, ' or ', 235), + (TOKEN_OR, 'or', 236), (gpl2, 'gpl-2.0', 239), ] From 04ed06521c847705aaec35f20f924496d39022ec Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 3 Aug 2018 23:01:23 +0200 Subject: [PATCH 3/9] Do not use basestring for Python 3 #29 Signed-off-by: Philippe Ombredanne --- src/license_expression/__init__.py | 26 ++++++++++++------------ src/license_expression/_pyahocorasick.py | 15 +++++++++++--- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index e67740e..fb61475 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -31,15 +31,6 @@ from __future__ import unicode_literals from __future__ import print_function -# Python 2 and 3 support -try: - # Python 2 - unicode - str = unicode # NOQA -except NameError: - # Python 3 - unicode = str # NOQA - from collections import defaultdict from collections import deque from collections import namedtuple @@ -74,6 +65,15 @@ from license_expression._pyahocorasick import Trie as Scanner from license_expression._pyahocorasick import Token +# Python 2 and 3 support +try: + # Python 2 + unicode + str = unicode # NOQA +except NameError: + # Python 3 + unicode = str # NOQA + TRACE = False logger = logging.getLogger(__name__) @@ -393,7 +393,7 @@ def parse(self, expression, validate=False, strict=False, **kwargs): if isinstance(expression, bytes): try: - expression = unicode(expression) + expression = str(expression) except: ext = type(expression) raise ExpressionError('expression must be a string and not: %(ext)r' % locals()) @@ -690,7 +690,7 @@ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): if not isinstance(key, str): if isinstance(key, bytes): try: - key = unicode(key) + key = str(key) except: raise ExpressionError( 'A license key must be a unicode string: %(key)r' % locals()) @@ -1138,11 +1138,11 @@ def as_symbols(symbols): continue if isinstance(symbol, bytes): try: - symbol = unicode(symbol) + symbol = str(symbol) except: raise TypeError('%(symbol)r is not a unicode string.' % locals()) - if isinstance(symbol, unicode): + if isinstance(symbol, str): if symbol.strip(): yield LicenseSymbol(symbol) diff --git a/src/license_expression/_pyahocorasick.py b/src/license_expression/_pyahocorasick.py index 35b9f00..ec84c0a 100644 --- a/src/license_expression/_pyahocorasick.py +++ b/src/license_expression/_pyahocorasick.py @@ -13,15 +13,24 @@ - support returning non-matched parts of a string """ -from __future__ import unicode_literals from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals from collections import deque from collections import OrderedDict import logging import re +# Python 2 and 3 support +try: + # Python 2 + unicode + str = unicode # NOQA +except NameError: + # Python 3 + unicode = str # NOQA + TRACE = False logger = logging.getLogger(__name__) @@ -107,7 +116,7 @@ def add(self, tokens_string, value=None): raise Exception('This Trie has been converted to an Aho-Corasick ' 'automaton and cannot be modified.') - if not tokens_string or not isinstance(tokens_string, basestring): + if not tokens_string or not isinstance(tokens_string, str): return tokens = [t for t in get_tokens(tokens_string) if t.strip()] @@ -135,7 +144,7 @@ def __get_node(self, tokens_string): contain the tokens_string. Private function retrieving a final node of the Trie for a given tokens_string. """ - if not tokens_string or not isinstance(tokens_string, basestring): + if not tokens_string or not isinstance(tokens_string, str): return tokens = [t for t in get_tokens(tokens_string) if t.strip()] From 0d1af80df1d094985c92be2949e38079affb8ec6 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Fri, 3 Aug 2018 23:06:21 +0200 Subject: [PATCH 4/9] Attempt to use a proper Python version Signed-off-by: Philippe Ombredanne --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1fe01c4..6e20f55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -language: python +language: generic env: matrix: @@ -37,6 +37,8 @@ matrix: install: - pyenv install --list + - echo $PYTHON_EXE + - python --version - ./configure before_script: From 3d39c71c44602816d685d7f2e001afd21b390db7 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sat, 4 Aug 2018 23:08:13 +0200 Subject: [PATCH 5/9] Rename scan to tokenize #29 Signed-off-by: Philippe Ombredanne --- src/license_expression/_pyahocorasick.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/license_expression/_pyahocorasick.py b/src/license_expression/_pyahocorasick.py index ec84c0a..fefe51f 100644 --- a/src/license_expression/_pyahocorasick.py +++ b/src/license_expression/_pyahocorasick.py @@ -364,9 +364,9 @@ def iter(self, tokens_string, include_unmatched=False, include_space=False): logger_debug() - def scan(self, string, include_unmatched=True, include_space=False): + def tokenize(self, string, include_unmatched=True, include_space=False): """ - Scan a string for matched and unmatched sub-sequences and yield non- + tokenize a string for matched and unmatched sub-sequences and yield non- overlapping Token objects performing a modified Aho-Corasick search procedure: @@ -393,7 +393,7 @@ def scan(self, string, include_unmatched=True, include_space=False): >>> a.add('KL') >>> a.make_automaton() >>> string = 'a bcdef ghij kl' - >>> tokens = list(a.scan(string, include_space=True)) + >>> tokens = list(a.tokenize(string, include_space=True)) >>> expected = [ ... Token(0, 0, u'a', None), @@ -411,7 +411,7 @@ def scan(self, string, include_unmatched=True, include_space=False): include_unmatched=include_unmatched, include_space=include_space) tokens = list(tokens) if TRACE: - logger_debug('scan.tokens:', tokens) + logger_debug('tokenize.tokens:', tokens) if not include_space: tokens = [t for t in tokens if t.string.strip()] tokens = filter_overlapping(tokens) From 1e34fc53d8e33f363193237d289e36784eabb9b1 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sat, 4 Aug 2018 23:11:58 +0200 Subject: [PATCH 6/9] Add simple tokenizer parse option Cleanup Licensing.tokenize() Also refactor code to move tokenizing stages in their own functions Signed-off-by: Philippe Ombredanne --- src/license_expression/__init__.py | 750 +++++++++++++++-------------- tests/test__pyahocorasick.py | 8 +- tests/test_license_expression.py | 415 ++++++++++------ 3 files changed, 664 insertions(+), 509 deletions(-) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index fb61475..b0ef76d 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -62,7 +62,7 @@ from boolean.boolean import TOKEN_LPAR from boolean.boolean import TOKEN_RPAR -from license_expression._pyahocorasick import Trie as Scanner +from license_expression._pyahocorasick import Trie as AdvancedTokenizer from license_expression._pyahocorasick import Token # Python 2 and 3 support @@ -140,10 +140,45 @@ class ExpressionError(Exception): # mapping of lowercase operator strings to an operator object OPERATORS = {'and': KW_AND, 'or': KW_OR, 'with': KW_WITH} +_simple_tokenizer = re.compile(''' + (?P[^\s\(\)]+) + | + (?P\s+) + | + (?P\() + | + (?P\)) + ''', + re.VERBOSE | re.MULTILINE | re.UNICODE +).finditer + class Licensing(boolean.BooleanAlgebra): """ - Define a mini language to parse, validate and compare license expressions. + Licensing defines a mini language to parse, validate and compare license + expressions. This is the main entry point this library. + + Some of the features are: + + - licenses can be validated against user-provided lists of known licenses + "symbols" (such as ScanCode licenses or the SPDX list). + + - flexible expression parsing and recognition of licenses (including + licenses with spaces and keywords (such as AND, OR WITH) or parens in + their names). + + - in an expression licenses can be more than just identifiers such short or + long names + + - A license can have multiple aliases (such as GPLv2 or GPL2) and each will + be properly recognized when parsing. + + - expressions can be simplified, normalized, sorted and compared for + containment and/or logical equivalence thanks to a built-in boolean logic + engine. + + - Once parsed, expressions can be rendered using simple templates (for + instance to render HTML links in a GUI). For example: @@ -177,9 +212,9 @@ class Licensing(boolean.BooleanAlgebra): def __init__(self, symbols=tuple(), quiet=True): """ - Initialize a Licensing with an optional `symbols` sequence of LicenseSymbol - or LicenseSymbol-like objects or license key strings. If provided and this - list data is invalid, raise a ValueError. + Initialize a Licensing with an optional `symbols` sequence of + LicenseSymbol or LicenseSymbol-like objects or license key strings. If + provided and this list data is invalid, raise a ValueError. """ super(Licensing, self).__init__(Symbol_class=LicenseSymbol, AND_class=AND, OR_class=OR) @@ -191,9 +226,11 @@ def __init__(self, symbols=tuple(), quiet=True): if symbols: symbols = tuple(as_symbols(symbols)) warns, errors = validate_symbols(symbols) + if warns and not quiet: for w in warns: print(w) + if errors and not quiet: for e in errors: print(e) @@ -202,13 +239,13 @@ def __init__(self, symbols=tuple(), quiet=True): raise ValueError('\n'.join(warns + errors)) # mapping of known symbol key to symbol for reference - self.known_symbols = {symbol.key: symbol for symbol in symbols} + self.known_symbols_by_key = {symbol.key: symbol for symbol in symbols} - # mapping of lowercase key and aliaes to symbol used to resolve symbols - self.symbol_by_key = get_symbols_by_key(symbols) + # mapping of known symbol lowercase key to symbol for reference + self.known_symbols_by_keylow = {symbol.key.lower(): symbol for symbol in symbols} - # Aho-Corasick automaton-based Scanner used for expression tokenizing - self.scanner = None + # Aho-Corasick automaton-based Advanced Tokenizer + self.advanced_tokenizer = None def is_equivalent(self, expression1, expression2, **kwargs): """ @@ -338,7 +375,7 @@ def unknown_license_symbols(self, expression, unique=True, **kwargs): Extra kwargs are passed down to the parse() function. """ return [ls for ls in self.license_symbols(expression, unique=unique, decompose=True, **kwargs) - if not ls.key in self.known_symbols] + if not ls.key in self.known_symbols_by_key] def unknown_license_keys(self, expression, unique=True, **kwargs): """ @@ -355,29 +392,33 @@ def unknown_license_keys(self, expression, unique=True, **kwargs): symbols = self.unknown_license_symbols(expression, unique=False, **kwargs) return self._keys(symbols, unique) - def parse(self, expression, validate=False, strict=False, **kwargs): + def parse(self, expression, validate=False, strict=False, simple=False, **kwargs): """ - Return a new license LicenseExpression object by parsing a license expression - string. Check that the expression syntax is valid and raise an Exception, - ExpressionError or ParseError on errors. Return None for empty expressions. - `expression` is either a string or a LicenseExpression object. If this is a - LicenseExpression it is retruned as-is. - - Symbols are always recognized from known symbols if `symbols` were provided - Licensing creation time: each license and exception is recognized from known - license keys (and from aliases for a symbol if available). - - If `validate` is True and a symbol is unknown, an ExpressionError error + Return a new license LicenseExpression object by parsing a license + `expression` string. Check that the expression syntax is valid and raise + an Exception, an ExpressionError or a ParseError on errors. + Return None for empty expressions. + `expression` is either a string or a LicenseExpression object. If this + is a LicenseExpression it is returned as-is. + Symbols are always recognized from known symbols if `symbols` were + provided at Licensing creation time: each license and exception is + recognized from known license keys (and from aliases for a symbol if + available). + + If `validate` is True and a license is unknown, an ExpressionError error is raised with a message listing the unknown license keys. - If `validate` is False, no error is triggered. + If `validate` is False, no error is raised. You can call the + `unknown_license_keys` or `unknown_license_symbols` methods to get + unknown license keys or symbols found in a parsed LicenseExpression. - You can call the `unknown_license_keys` or `unknown_license_symbols` methods - to get unknown license keys or symbols found in a parsed LicenseExpression. + If `strict` is True, additional exceptions will be raised if in a + "WITH" expression such as "XXX with ZZZ" if the XXX symbol has + `is_exception` set to True or the YYY symbol has `is_exception` set to + False. This checks that symbols are used strictly as constructed. - If `strict` is True, additional exceptions will be raised if in a expression - such as "XXX with ZZZ" if the XXX symbol has `is_exception` set to True or - the YYY symbol has `is_exception` set to False. + If `simple` is True, parsing will use a simple tokenizer that assumes + that license symbols are all license keys that cannot contain spaces. For example: >>> expression = 'EPL-1.0 and Apache-1.1 OR GPL-2.0 with Classpath-exception' @@ -423,7 +464,7 @@ def parse(self, expression, validate=False, strict=False, **kwargs): return expression - def tokenize(self, expression, strict=False): + def tokenize(self, expression, strict=False, simple=False): """ Return an iterable of 3-tuple describing each token given an expression unicode string. See boolean.BooleanAlgreba.tokenize() for API details. @@ -431,200 +472,369 @@ def tokenize(self, expression, strict=False): This 3-tuple contains these items: (token, token string, position): - token: either a Symbol instance or one of TOKEN_* token types.. - token string: the original token unicode string. - - position: some simple object describing the starting position of the - original token string in the `expr` string. It can be an int for a - character offset, or a tuple of starting (row/line, column). + - position: the starting index of the token string in the `expr` string. + + If `strict` is True, additional exceptions will be raised in a + expression such as "XXX with ZZZ" if the XXX symbol has is_exception` + set to True or the ZZZ symbol has `is_exception` set to False. - If `strict` is True, additional exceptions will be raised in a expression - such as "XXX with ZZZ" if the XXX symbol has is_exception` set to True or the - ZZZ symbol has `is_exception` set to False. + If `simple` is True, use a simple tokenizer that assumes that license + symbols are all license keys that cannot contain spaces. """ + if not expression: + return + + if not isinstance(expression, str): + raise ParseError(error_code=PARSE_EXPRESSION_NOT_UNICODE) + + if simple: + if TRACE: logger_debug('using simple tokenizer') + tokens = self.simple_tokenizer(expression) + else: + if TRACE: logger_debug('using advanced tokenizer') + advanced_tokenizer = self.get_advanced_tokenizer() + tokens = advanced_tokenizer.tokenize(expression) -# if self.known_symbols: - if TRACE: - logger_debug('tokenize, using known_symbols') - # scan with an automaton, recognize whole symbols+keywords or only keywords - scanner = self.get_scanner() - tokens = scanner.scan(expression) -# else: -# if TRACE: -# logger_debug('tokenize, using plain splitter') -# tokens = splitter(expression) - - tokens = list(tokens) if TRACE: + tokens = list(tokens) logger_debug('tokenize: tokens') pprint(tokens) # Assign symbol for unknown tokens - tokens = list(build_symbols_from_unmatched_tokens(tokens)) + tokens = build_symbols_from_unknown_tokens(tokens) if TRACE: + tokens = list(tokens) logger_debug('tokenize: token with symbols') pprint(tokens) # skip whitespace-only tokens - tokens = [t for t in tokens if t.string and t.string.strip()] + tokens = (t for t in tokens if t.string and t.string.strip()) if TRACE: + tokens = list(tokens) logger_debug('tokenize: token NO spaces') pprint(tokens) - # group Symbols or operators tokens separated only by spaces - # attempt to look this token_group of symbols in a table. - # use symbol if available - # otherwise ....? - - token_groups = build_token_groups_for_with_subexpression(tokens) + # create atomic LicenseWithExceptionSymbol from WITH subexpressions + tokens = replace_with_subexpression_by_license_symbol(tokens, strict) if TRACE: - token_groups = list(token_groups) - logger_debug('tokenize: token_groups') - pprint(token_groups) + tokens = list(tokens) + logger_debug('tokenize: LicenseWithExceptionSymbol replaced') + pprint(tokens) - for token_group in token_groups: - len_group = len(token_group) + # finally yield the actual args expected by the boolean parser + for token in tokens: + pos = token.start + token_string = token.string + token_value = token.value - if not len_group: - # This should never happen - continue + if isinstance(token_value, BaseSymbol): + token_obj = token_value + elif isinstance(token_value, Keyword): + token_obj = token_value.type + else: + raise ParseError(error_code=PARSE_INVALID_EXPRESSION) - if len_group == 1: - # a single token - result = token_group[0] - pos = result.start - token_string = result.string - val = result.value - if val: - if isinstance(val, Keyword): - # keyword - token = val.type - # WITH is not known from the boolean parser as a proper - # boolean element so we handle validation ourselves: by - # design a single group cannot be a single 'WITH' keyword: - # this is an error that we catch and raise here. - if token == TOKEN_WITH: - raise ParseError(token_type=TOKEN_WITH, - token_string=result.string, - position=result.start, - error_code=PARSE_INVALID_EXPRESSION) - - elif isinstance(val, LicenseSymbol): - if strict and val.is_exception: - raise ParseError(token_type=TOKEN_SYMBOL, - token_string=result.string, - position=result.start, - error_code=PARSE_INVALID_EXCEPTION) - - # known symbol: The strict check above handled possible errors before. - token = val - else: - # this should not be possible by design - raise Exception('Licensing.tokenize is internally confused...') - else: - token = LicenseSymbol(result.string) + yield token_obj, token_string, pos - else: - if len_group != 3: - # this should never happen - string = ' '.join([tok.string for tok in token_group]) - start = token_group[0].start - raise ParseError( - TOKEN_SYMBOL, string, start, PARSE_INVALID_EXPRESSION) + def get_advanced_tokenizer(self): + """ + Return an AdvancedTokenizer instance either cached or created as needed. - # this is a A with B seq of three tokens - lic_token, WITH , exc_token = token_group - pos = lic_token.start - WITHs = WITH.string.strip() - token_string = ' '.join([lic_token.string, WITHs, exc_token.string]) + If symbols were provided when this Licensing object was created, the + tokenizer will recognize known symbol keys and aliases (ignoring case) + when tokenizing expressions. - # licenses - lic_sym = lic_token.value + A license symbol is any string separated by keywords and parens (and it + can include spaces). + """ + if self.advanced_tokenizer is not None: + return self.advanced_tokenizer - # this should not happen - if lic_sym and not isinstance(lic_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, - PARSE_INVALID_SYMBOL) + self.advanced_tokenizer = tokenizer = AdvancedTokenizer() - if not lic_sym: - lic_sym = LicenseSymbol(lic_token.string, is_exception=False) + add_item = tokenizer.add + for keyword in KEYWORDS: + add_item(keyword.value, keyword) - if not isinstance(lic_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, - PARSE_INVALID_SYMBOL) + # self.known_symbols_by_key has been created at Licensing initialization time and is + # already validated and trusted here + for key, symbol in self.known_symbols_by_key.items(): + # always use the key even if there are no aliases. + add_item(key, symbol) + aliases = getattr(symbol, 'aliases', []) + for alias in aliases: + # normalize spaces for each alias. The AdvancedTokenizer will lowercase them + if alias: + alias = ' '.join(alias.split()) + add_item(alias, symbol) - if strict and lic_sym.is_exception: - raise ParseError(TOKEN_SYMBOL, lic_token.string, lic_token.start, - PARSE_INVALID_EXCEPTION) + tokenizer.make_automaton() + return tokenizer - # exception - exc_sym = exc_token.value + def advanced_tokenizer(self, expression): + """ + Return an iterable of Token describing each token given an expression + unicode string. + """ + tokenizer = self.get_advanced_tokenizer() + return tokenizer.tokenize(expression) - # this should not happen - if exc_sym and not isinstance(exc_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, lic_sym.string, lic_sym.start, - PARSE_INVALID_SYMBOL) - if exc_sym: - exc_sym = copy(exc_sym) + def simple_tokenizer(self, expression): + """ + Return an iterable of Token describing each token given an expression + unicode string. - if not exc_sym: - exc_sym = LicenseSymbol(exc_token.string) + The split is done on spaces, keywords and parens. Anything else is a + symbol token, e.g. a typically license key or license id (that contains + no spaces or parens). - if not isinstance(exc_sym, LicenseSymbol): - raise ParseError(TOKEN_SYMBOL, exc_token.string, exc_token.start, - PARSE_INVALID_SYMBOL) + If symbols were provided when this Licensing object was created, the + tokenizer will recognize known symbol keys (ignoring case) when + tokenizing expressions. + """ - if strict and self.known_symbols and not exc_sym.is_exception: - raise ParseError(TOKEN_SYMBOL, exc_token.string, exc_token.start, - PARSE_INVALID_SYMBOL_AS_EXCEPTION) + symbols = self.known_symbols_by_keylow or {} + + for match in _simple_tokenizer(expression): + if not match: + continue + # set start and end as string indexes + start, end = match.span() + end = end - 1 + match_getter = match.groupdict().get + + space = match_getter('space') + if space: + yield Token(start, end, space, None) + + lpar = match_getter('lpar') + if lpar: + yield Token(start, end, lpar, KW_LPAR) + + rpar = match_getter('rpar') + if rpar: + yield Token(start, end, rpar, KW_RPAR) + + sym_or_op = match_getter('symop') + if sym_or_op: + sym_or_op_lower = sym_or_op.lower() + + operator = OPERATORS.get(sym_or_op_lower) + if operator: + yield Token(start, end, sym_or_op, operator) + else: + sym = symbols.get(sym_or_op_lower) + if not sym: + sym = LicenseSymbol(key=sym_or_op) + yield Token(start, end, sym_or_op, sym) - token = LicenseWithExceptionSymbol(lic_sym, exc_sym, strict) - yield token, token_string, pos +def build_symbols_from_unknown_tokens(tokens): + """ + Yield Token given a sequence of Token replacing unmatched contiguous Tokens + by a single token with a LicenseSymbol. + """ + tokens = list(tokens) + + unmatched = deque() - def get_scanner(self): + def build_token_with_symbol(): """ - Return a scanner either cached or created as needed. If symbols were provided - when this Licensing object was created, the scanner will recognize known - symbols when tokenizing expressions. Otherwise, only keywords are recognized - and a license symbol is anything in between keywords. + Build and return a new Token from accumulated unmatched tokens or None. """ - if self.scanner is not None: - return self.scanner + if not unmatched: + return + # strip trailing spaces + trailing_spaces = [] + while unmatched and not unmatched[-1].string.strip(): + trailing_spaces.append(unmatched.pop()) - self.scanner = scanner = Scanner() + if unmatched: + string = ' '.join(t.string for t in unmatched if t.string.strip()) + start = unmatched[0].start + end = unmatched[-1].end + toksym = LicenseSymbol(string) + unmatched.clear() + yield Token(start, end, string, toksym) - for keyword in KEYWORDS: - scanner.add(keyword.value, keyword) + for ts in trailing_spaces: + yield ts - # self.known_symbols has been created at Licensing initialization time and is - # already validated and trusted here - for key, symbol in self.known_symbols.items(): - # always use the key even if there are no aliases. - scanner.add(key, symbol) - aliases = getattr(symbol, 'aliases', []) - for alias in aliases: - # normalize spaces for each alias. The Scanner will lowercase them - if alias: - alias = ' '.join(alias.split()) - scanner.add(alias, symbol) + for tok in tokens: + if tok.value: + for symtok in build_token_with_symbol(): + yield symtok + yield tok + else: + if not unmatched and not tok.string.strip(): + # skip leading spaces + yield tok + else: + unmatched.append(tok) - scanner.make_automaton() - return scanner + # end remainders + for symtok in build_token_with_symbol(): + yield symtok -def get_symbols_by_key(symbols): +def build_token_groups_for_with_subexpression(tokens): """ - Return a mapping of key->symbol given an iterable of symbols + Yield tuples of Token given a sequence of Token such that: + - all symbol-with-symbol sequences of 3 tokens are grouped in a three-tuple + - other tokens are a single token wrapped in a tuple. """ - by_key = {} - for symbol in symbols: - by_key[symbol.key.lower()] = symbol - aliases = getattr(symbol, 'aliases', []) - for alias in aliases: - if alias: - alias = ' '.join(alias.split()) - if alias: - by_key[alias.lower()] = symbol - return by_key + + # if n-1 is sym, n is with and n+1 is sym: yield this as a group for a with + # exp otherwise: yield each single token as a group + + tokens = list(tokens) + + # check three contiguous tokens that may form "lic WITh exception" sequence + triple_len = 3 + + # shortcut if there are no grouping possible + if len(tokens) < triple_len: + for tok in tokens: + yield (tok,) + return + + # accumulate three contiguous tokens + triple = deque() + triple_popleft = triple.popleft + triple_clear = triple.clear + tripple_append = triple.append + + for tok in tokens: + if len(triple) == triple_len: + if is_with_subexpression(triple): + yield tuple(triple) + triple_clear() + else: + prev_tok = triple_popleft() + yield (prev_tok,) + tripple_append(tok) + + # end remainders + if triple: + if len(triple) == triple_len and is_with_subexpression(triple): + yield tuple(triple) + else: + for tok in triple: + yield (tok,) + + +def is_with_subexpression(tokens_tripple): + """ + Return True if a Token tripple is a WITH license sub-expression. + """ + lic, wit, exc = tokens_tripple + return (isinstance(lic.value, LicenseSymbol) + and wit.value == KW_WITH + and isinstance(exc.value, LicenseSymbol) + ) + + +def replace_with_subexpression_by_license_symbol(tokens, strict=False): + """ + Given an iterable of Token, yiled token, replacing any XXX WITH ZZZ + subexpression by a LicenseWithExceptionSymbol symbol. + + Check validity of with subexpessions and raise ParseError as needed. + + If `strict` is True also raise ParseError if the left hand side + LicenseSymbol has is_exception True or if the right hand side + LicenseSymbol has is_exception False. + """ + token_groups = build_token_groups_for_with_subexpression(tokens) + + for token_group in token_groups: + len_group = len(token_group) + + if not len_group: + # This should never happen + continue + + if len_group == 1: + # a single token + token = token_group[0] + tval = token.value + + if isinstance(tval, Keyword): + if tval.type == TOKEN_WITH: + # keyword + # a single group cannot be a single 'WITH' keyword: + # this is an error that we catch and raise here. + raise ParseError( + token_type=TOKEN_WITH, token_string=token.string, + position=token.start, error_code=PARSE_INVALID_EXPRESSION) + + elif isinstance(tval, LicenseSymbol): + if strict and tval.is_exception: + raise ParseError( + token_type=TOKEN_SYMBOL, token_string=token.string, + position=token.start, error_code=PARSE_INVALID_EXCEPTION) + + else: + # this should not be possible by design + raise Exception('Licensing.tokenize is internally confused...:' + repr(tval)) + + yield token + continue + + if len_group != 3: + # this should never happen + string = ' '.join([tok.string for tok in token_group]) + start = token_group[0].start + raise ParseError( + TOKEN_SYMBOL, string, start, PARSE_INVALID_EXPRESSION) + + # from now on we have a tripple of tokens: a WITH sub-expression such as "A with + # B" seq of three tokens + lic_token, WITH , exc_token = token_group + + token_string = ' '.join([ + lic_token.string, + WITH.string.strip(), + exc_token.string + ]) + + # the left hand side license symbol + lic_sym = lic_token.value + + # this should not happen + if not isinstance(lic_sym, LicenseSymbol): + raise ParseError( + TOKEN_SYMBOL, lic_token.string, lic_token.start, + PARSE_INVALID_SYMBOL) + + if strict and lic_sym.is_exception: + raise ParseError( + TOKEN_SYMBOL, lic_token.string, lic_token.start, + PARSE_INVALID_EXCEPTION) + + # the right hand side exception symbol + exc_sym = exc_token.value + + if not isinstance(exc_sym, LicenseSymbol): + raise ParseError( + TOKEN_SYMBOL, lic_sym.string, lic_sym.start, + PARSE_INVALID_SYMBOL) + + if strict and not exc_sym.is_exception: + raise ParseError( + TOKEN_SYMBOL, exc_token.string, exc_token.start, + PARSE_INVALID_SYMBOL_AS_EXCEPTION) + + lic_exc_sym = LicenseWithExceptionSymbol(lic_sym, exc_sym, strict) + + token = Token( + lic_token.start, + exc_token.end, + token_string, + lic_exc_sym, + ) + yield token class Renderable(object): @@ -675,7 +885,7 @@ def __contains__(self, other): is_valid_license_key = re.compile(r'^[-\w\s\.\+]+$', re.UNICODE).match -#FIXME: we need to implement comparison!!!! +# TODO: we need to implement comparison by hand instead @total_ordering class LicenseSymbol(BaseSymbol): """ @@ -786,12 +996,12 @@ def symbol_like(cls, symbol): return hasattr(symbol, 'key') and hasattr(symbol, 'is_exception') -#FIXME: we need to implement comparison!!!! +# TODO: we need to implement comparison by hand instead @total_ordering class LicenseSymbolLike(LicenseSymbol): """ - A LicenseSymbolLike object wraps a symbol-like object to expose a LicenseSymbol - behavior. + A LicenseSymbolLike object wraps a symbol-like object to expose a + LicenseSymbol behavior. """ def __init__(self, symbol_like, *args, **kwargs): @@ -847,7 +1057,7 @@ def __lt__(self, other): return NotImplemented -#FIXME: we need to implement comparison!!!! +# TODO: we need to implement comparison by hand instead @total_ordering class LicenseWithExceptionSymbol(BaseSymbol): """ @@ -991,6 +1201,8 @@ class AND(RenderableFunction, boolean.AND): """ def __init__(self, *args): + if len(args) < 2: + raise ExpressionError('AND requires two or more licenses as in: MIT AND BSD') super(AND, self).__init__(*args) self.operator = ' AND ' @@ -1001,6 +1213,8 @@ class OR(RenderableFunction, boolean.OR): """ def __init__(self, *args): + if len(args) < 2: + raise ExpressionError('OR requires two or more licenses as in: MIT OR BSD') super(OR, self).__init__(*args) self.operator = ' OR ' @@ -1019,118 +1233,13 @@ def ordered_unique(seq): return uniques -def build_symbols_from_unmatched_tokens(tokens): - """ - Yield Token given a sequence of Token replacing unmatched contiguous Tokens - by a single token with a LicenseSymbol. - """ - tokens = list(tokens) - - unmatched = deque() - - def build_token_with_symbol(): - """ - Build and return a new Token from accumulated unmatched tokens or None. - """ - if not unmatched: - return - # strip trailing spaces - trailing_spaces = [] - while unmatched and not unmatched[-1].string.strip(): - trailing_spaces.append(unmatched.pop()) - - if unmatched: - string = ' '.join(t.string for t in unmatched if t.string.strip()) - start = unmatched[0].start - end = unmatched[-1].end - toksym = LicenseSymbol(string) - unmatched.clear() - yield Token(start, end, string, toksym) - - for ts in trailing_spaces: - yield ts - - for tok in tokens: - if tok.value: - for symtok in build_token_with_symbol(): - yield symtok - yield tok - else: - if not unmatched and not tok.string.strip(): - # skip leading spaces - yield tok - else: - unmatched.append(tok) - - # end remainders - for symtok in build_token_with_symbol(): - yield symtok - - -def build_token_groups_for_with_subexpression(tokens): - """ - Yield tuples of Token given a sequence of Token such that: - - all symbol-with-symbol sequences of 3 tokens are grouped in a three-tuple - - other tokens are a single token wrapped in a tuple. - """ - - # if n-1 is sym, n is with and n+1 is sym: yield this as a group for a with - # exp otherwise: yield each single token as a group - - tokens = list(tokens) - - # check three contiguous token from scanning at a time - triple_len = 3 - - # shortcut if there are no grouping possible - if len(tokens) < triple_len: - for tok in tokens: - yield (tok,) - return - - # accumulate three contiguous tokens - triple = deque() - triple_popleft = triple.popleft - triple_clear = triple.clear - tripple_append = triple.append - - for tok in tokens: - if len(triple) == triple_len: - if is_with_subexpression(triple): - yield tuple(triple) - triple_clear() - else: - prev_tok = triple_popleft() - yield (prev_tok,) - tripple_append(tok) - - # end remainders - if triple: - if len(triple) == triple_len and is_with_subexpression(triple): - yield tuple(triple) - else: - for tok in triple: - yield (tok,) - - -def is_with_subexpression(tokens_tripple): - """ - Return True if a Token tripple is a WITH license sub-expression. - """ - lic, wit, exc = tokens_tripple - return (isinstance(lic.value, LicenseSymbol) - and wit.value == KW_WITH - and isinstance(exc.value, LicenseSymbol) - ) - - def as_symbols(symbols): """ Return an iterable of LicenseSymbol objects from a sequence of `symbols` or - strings. If an item is a string, then create a new LicenseSymbol for it using the - string as key. If this is not a string it must be a LicenseSymbol-like type. It - will raise a TypeError expection if an item is neither a string or LicenseSymbol- - like. + strings. If an item is a string, then create a new LicenseSymbol for it + using the string as key. If this is not a string it must be a LicenseSymbol- + like type. It will raise a TypeError expection if an item is neither a + string or LicenseSymbol- like. """ if symbols: for symbol in symbols: @@ -1273,62 +1382,3 @@ def validate_symbols(symbols, validate_keys=False): errors.append('Duplicated or empty aliases ignored for license key: %(dupeal)r.' % locals()) return warnings, errors - - -_tokenizer = re.compile(''' - (?P[^\s\(\)]+) - | - (?P\s+) - | - (?P\() - | - (?P\)) - ''', - re.VERBOSE | re.MULTILINE | re.UNICODE -) - - -def splitter(expression): - """ - Return an iterable of Tokens describing each token given an expression - unicode string. - - The split is done on spaces and parens. Anything else is either a token or a - symbol. - """ - if not expression: - return - - if not isinstance(expression, str): - raise ParseError(error_code=PARSE_EXPRESSION_NOT_UNICODE) - - for match in _tokenizer.finditer(expression): - if not match: - continue - # set start and end as string indexes - start, end = match.span() - end = end - 1 - match_getter = match.groupdict().get - - space = match_getter('space') - if space: - yield Token(start, end, space, None) - - lpar = match_getter('lpar') - if lpar: - yield Token(start, end, lpar, KW_LPAR) - - rpar = match_getter('rpar') - if rpar: - yield Token(start, end, rpar, KW_RPAR) - - operator_or_sym = match_getter('symbol') - if not operator_or_sym: - continue - - operator = OPERATORS.get(operator_or_sym.lower()) - if operator: - yield Token(start, end, operator_or_sym, operator) - else: - sym = LicenseSymbol(key=operator_or_sym) - yield Token(start, end, operator_or_sym, sym) diff --git a/tests/test__pyahocorasick.py b/tests/test__pyahocorasick.py index 22fc917..e7ad883 100644 --- a/tests/test__pyahocorasick.py +++ b/tests/test__pyahocorasick.py @@ -177,7 +177,7 @@ def get_test_automaton(): assert expected == result - def test_iter_vs_scan(self): + def test_iter_vs_tokenize(self): def get_test_automaton(): words = '( AND ) OR'.split() @@ -215,10 +215,10 @@ def get_test_automaton(): assert expected == result - result = list(t.scan(test_string, include_unmatched=True, include_space=True)) + result = list(t.tokenize(test_string, include_unmatched=True, include_space=True)) assert expected == result - def test_scan_with_unmatched_and_space(self): + def test_tokenize_with_unmatched_and_space(self): def get_test_automaton(): words = '( AND ) OR'.split() @@ -232,7 +232,7 @@ def get_test_automaton(): # 111111111122222222223 # 0123456789012345678901234567890 t = get_test_automaton() - result = list(t.scan(test_string, include_unmatched=True, include_space=True)) + result = list(t.tokenize(test_string, include_unmatched=True, include_space=True)) expected = [ Token(0, 0, u'(', u'('), Token(1, 1, u'(', u'('), diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index dde755a..f42324e 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -17,6 +17,7 @@ from __future__ import print_function from __future__ import unicode_literals +from collections import namedtuple from collections import OrderedDict from unittest import TestCase import sys @@ -40,7 +41,6 @@ from license_expression import Token from license_expression import build_token_groups_for_with_subexpression -from license_expression import splitter from license_expression import validate_symbols from license_expression import TOKEN_AND @@ -148,9 +148,9 @@ def test_tokenize_plain4(self): expected = [ (TOKEN_LPAR, '(', 0), (TOKEN_LPAR, '(', 1), - (LicenseSymbol(key=u'l-a+'), u'l-a+', 2), + (LicenseSymbol(key=u'l-a+'), 'l-a+', 2), (TOKEN_AND, 'AND', 7), - (LicenseSymbol(key=u'l-b'), u'l-b', 11), + (LicenseSymbol(key=u'l-b'), 'l-b', 11), (TOKEN_RPAR, ')', 14), (TOKEN_OR, 'OR', 16), (TOKEN_LPAR, '(', 19), @@ -204,11 +204,11 @@ def test_tokenize_1_with_symbols(self): # 0123456789012345678901234567890123456789012 expected = [ - (gpl_20, u'The GNU GPL 20', 0), - (TOKEN_OR, u'or', 15), - (lgpl_21, u'LGPL v2.1', 18), - (TOKEN_AND, u'AND', 28), - (mit, u'MIT license', 32) + (gpl_20, 'The GNU GPL 20', 0), + (TOKEN_OR, 'or', 15), + (lgpl_21, 'LGPL v2.1', 18), + (TOKEN_AND, 'AND', 28), + (mit, 'MIT license', 32) ] assert expected == list(result) @@ -218,11 +218,11 @@ def test_tokenize_1_no_symbols(self): result = licensing.tokenize('The GNU GPL 20 or LGPL v2.1 AND MIT license') expected = [ - (LicenseSymbol(u'The GNU GPL 20'), u'The GNU GPL 20', 0), - (TOKEN_OR, u'or', 15), - (LicenseSymbol(u'LGPL v2.1'), u'LGPL v2.1', 18), - (TOKEN_AND, u'AND', 28), - (LicenseSymbol(u'MIT license'), u'MIT license', 32) + (LicenseSymbol(u'The GNU GPL 20'), 'The GNU GPL 20', 0), + (TOKEN_OR, 'or', 15), + (LicenseSymbol(u'LGPL v2.1'), 'LGPL v2.1', 18), + (TOKEN_AND, 'AND', 28), + (LicenseSymbol(u'MIT license'), 'MIT license', 32) ] assert expected == list(result) @@ -263,7 +263,7 @@ def test_tokenize_unknown_as_trailing_single_attached_character(self): l = Licensing(symbols) result = list(l.tokenize('mit2')) expected = [ - (LicenseSymbol(u'mit2', is_exception=False), u'mit2', 0), + (LicenseSymbol(u'mit2'), 'mit2', 0), ] assert expected == result @@ -321,7 +321,12 @@ def test_parse_raise_ParseError(self): licensing.parse(expression) self.fail('ParseError should be raised') except ParseError as pe: - expected = {'error_code': PARSE_UNBALANCED_CLOSING_PARENS, 'position': 48, 'token_string': ')', 'token_type': TOKEN_RPAR} + expected = { + 'error_code': PARSE_UNBALANCED_CLOSING_PARENS, + 'position': 48, + 'token_string': ')', + 'token_type': TOKEN_RPAR + } assert expected == _parse_error_as_dict(pe) def test_parse_raise_ExpressionError_when_validating(self): @@ -333,29 +338,61 @@ def test_parse_raise_ExpressionError_when_validating(self): except ExpressionError as ee: assert 'Unknown license key(s): gpl, bsd, lgpl, exception' == str(ee) - def test_parse_raise_ExpressionError_when_validating_strict(self): + def test_parse_raise_ParseError_when_validating_strict(self): expression = 'gpl and bsd or lgpl with exception' licensing = Licensing() try: licensing.parse(expression, validate=True, strict=True) self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_SYMBOL_AS_EXCEPTION, + 'position': 25, + 'token_string': 'exception', + 'token_type': TOKEN_SYMBOL + } + assert expected == _parse_error_as_dict(pe) + + def test_parse_raise_ParseError_when_strict_no_validate(self): + expression = 'gpl and bsd or lgpl with exception' + licensing = Licensing() + try: + licensing.parse(expression, validate=False, strict=True) + self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_SYMBOL_AS_EXCEPTION, + 'position': 25, + 'token_string': 'exception', + 'token_type': TOKEN_SYMBOL + } + assert expected == _parse_error_as_dict(pe) + + def test_parse_raise_ExpressionError_when_validating_strict_with_unknown(self): + expression = 'gpl and bsd or lgpl with exception' + licensing = Licensing(symbols=[LicenseSymbol('exception', is_exception=True)]) + try: + licensing.parse(expression, validate=True, strict=True) except ExpressionError as ee: - assert str(ee).startswith('exception_symbol must be an exception with "is_exception" set to True:') + assert 'Unknown license key(s): gpl, bsd, lgpl' == str(ee) def test_parse_in_strict_mode_for_solo_symbol(self): expression = 'lgpl' licensing = Licensing() licensing.parse(expression, strict=True) - def test_parse_invalid_expression_raise_expression(self): + def test_parse_invalid_expression_raise_exception(self): licensing = Licensing() - expr = 'wrong' licensing.parse(expr) + def test_parse_not_invalid_expression_rais_not_exception(self): + licensing = Licensing() expr = 'l-a AND none' licensing.parse(expr) + def test_parse_invalid_expression_raise_exception3(self): + licensing = Licensing() expr = '(l-a + AND l-b' try: licensing.parse(expr) @@ -363,6 +400,8 @@ def test_parse_invalid_expression_raise_expression(self): except ParseError: pass + def test_parse_invalid_expression_raise_exception4(self): + licensing = Licensing() expr = '(l-a + AND l-b))' try: licensing.parse(expr) @@ -370,20 +409,26 @@ def test_parse_invalid_expression_raise_expression(self): except ParseError: pass + def test_parse_invalid_expression_raise_exception5(self): + licensing = Licensing() expr = 'l-a AND' try: licensing.parse(expr) self.fail("Exception not raised when validating '%s'" % expr) - except ParseError: - pass + except ExpressionError as ee: + assert 'AND requires two or more licenses as in: MIT AND BSD' == str(ee) + def test_parse_invalid_expression_raise_exception6(self): + licensing = Licensing() expr = 'OR l-a' try: licensing.parse(expr) self.fail("Exception not raised when validating '%s'" % expr) - except ParseError: - pass + except ExpressionError as ee: + assert 'OR requires two or more licenses as in: MIT OR BSD' == str(ee) + def test_parse_not_invalid_expression_raise_no_exception2(self): + licensing = Licensing() expr = '+l-a' licensing.parse(expr) @@ -408,7 +453,12 @@ def test_parse_errors_catch_invalid_nesting(self): licensing.parse('mit (and LGPL 2.1)') self.fail('Exception not raised') except ParseError as pe: - expected = {'error_code': PARSE_INVALID_NESTING, 'position': 4, 'token_string': '(', 'token_type': TOKEN_LPAR} + expected = { + 'error_code': PARSE_INVALID_NESTING, + 'position': 4, + 'token_string': '(', + 'token_type': TOKEN_LPAR + } assert expected == _parse_error_as_dict(pe) def test_parse_errors_catch_invalid_expression_with_bare_and(self): @@ -416,18 +466,18 @@ def test_parse_errors_catch_invalid_expression_with_bare_and(self): try: licensing.parse('and') self.fail('Exception not raised') - except ParseError as pe: - expected = {'error_code': PARSE_INVALID_EXPRESSION, 'position':-1, 'token_string': '', 'token_type': None} - assert expected == _parse_error_as_dict(pe) + except ExpressionError as pe: + expected = 'AND requires two or more licenses as in: MIT AND BSD' + assert expected == str(pe) def test_parse_errors_catch_invalid_expression_with_or_and_no_other(self): licensing = Licensing() try: licensing.parse('or that') self.fail('Exception not raised') - except ParseError as pe: - expected = {'error_code': PARSE_INVALID_EXPRESSION, 'position':-1, 'token_string': '', 'token_type': None} - assert expected == _parse_error_as_dict(pe) + except ExpressionError as pe: + expected = 'OR requires two or more licenses as in: MIT OR BSD' + assert expected == str(pe) def test_parse_errors_catch_invalid_expression_with_empty_parens(self): licensing = Licensing() @@ -435,7 +485,12 @@ def test_parse_errors_catch_invalid_expression_with_empty_parens(self): licensing.parse('with ( )this') self.fail('Exception not raised') except ParseError as pe: - expected = {'error_code': PARSE_INVALID_EXPRESSION, 'position': 0, 'token_string': 'with', 'token_type': TOKEN_WITH} + expected = { + 'error_code': PARSE_INVALID_EXPRESSION, + 'position': 0, + 'token_string': 'with', + 'token_type': TOKEN_WITH + } assert expected == _parse_error_as_dict(pe) def test_parse_errors_catch_invalid_non_unicode_byte_strings_on_python3(self): @@ -700,18 +755,6 @@ def test_parse_complex2(self): expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' assert expected == expr.render('{symbol.key}') - def test_Licensing_can_scan_valid_expressions_with_symbols_that_contain_and_with_or(self): - licensing = Licensing() - expression = 'orgpl or withbsd with orclasspath and andmit or andlgpl and ormit or withme' - result = [r.string for r in licensing.get_scanner().scan(expression)] - expected = [ - 'orgpl', 'or', 'withbsd', 'with', 'orclasspath', - 'and', 'andmit', 'or', 'andlgpl', 'and', 'ormit', - 'or', 'withme' - ] - - assert expected == result - def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_with_or(self): licensing = Licensing() expression = 'orgpl or withbsd with orclasspath and andmit or anlgpl and ormit or withme' @@ -736,9 +779,11 @@ def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_ assert expected == result - def test_Licensing_can_split_valid_expressions_with_symbols_that_contain_and_with_or(self): + def test_Licensing_can_simple_tokenize_valid_expressions_with_symbols_that_contain_and_with_or(self): + licensing = Licensing() expression = 'orgpl or withbsd with orclasspath and andmit or andlgpl and ormit or withme' - result = [r.string for r in splitter(expression)] + + result = [r.string for r in licensing.simple_tokenizer(expression)] expected = [ 'orgpl', ' ', @@ -1266,7 +1311,7 @@ def test_parse_with_overlapping_key_without_symbols(self): expected = 'mit OR (mit AND zlib) OR mit OR mit WITH verylonglicense' assert expected == results - def test_scan_with_overlapping_key_with_symbols_and_trailing_unknown(self): + def test_advanced_tokenizer_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' # 111111111122222222223333333333444444444455555 # 0123456789012345678901234567890123456789012345678901234 @@ -1280,24 +1325,24 @@ def test_scan_with_overlapping_key_with_symbols_and_trailing_unknown(self): LicenseSymbol('hmit', ['h verylonglicense']), ] licensing = Licensing(symbols) - results = list(licensing.get_scanner().scan(expression)) + results = list(licensing.get_advanced_tokenizer().tokenize(expression)) expected = [ - Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(4, 5, u'or', Keyword(value=u'or', type=2)), - Token(7, 9, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(11, 13, u'AND', Keyword(value=u'and', type=1)), - Token(15, 18, u'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), - Token(20, 21, u'or', Keyword(value=u'or', type=2)), - Token(23, 25, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(27, 28, u'or', Keyword(value=u'or', type=2)), - Token(30, 32, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(34, 37, u'with', Keyword(value=u'with', type=10)), - Token(39, 53, u'verylonglicense', None), + Token(0, 2, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 5, 'or', Keyword(value=u'or', type=2)), + Token(7, 9, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(11, 13, 'AND', Keyword(value=u'and', type=1)), + Token(15, 18, 'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), + Token(20, 21, 'or', Keyword(value=u'or', type=2)), + Token(23, 25, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(27, 28, 'or', Keyword(value=u'or', type=2)), + Token(30, 32, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(34, 37, 'with', Keyword(value=u'with', type=10)), + Token(39, 53, 'verylonglicense', None), ] assert expected == results - def test_iter_with_overlapping_key_with_symbols_and_trailing_unknown(self): + def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' # 111111111122222222223333333333444444444455555 # 0123456789012345678901234567890123456789012345678901234 @@ -1311,34 +1356,34 @@ def test_iter_with_overlapping_key_with_symbols_and_trailing_unknown(self): LicenseSymbol('hmit', ['h verylonglicense']), ] licensing = Licensing(symbols) - results = list(licensing.get_scanner().iter(expression, include_unmatched=True)) + results = list(licensing.get_advanced_tokenizer().iter(expression, include_unmatched=True)) expected = [ - Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(4, 5, u'or', Keyword(value=u'or', type=2)), - Token(7, 9, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(11, 13, u'AND', Keyword(value=u'and', type=1)), - Token(15, 18, u'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), - Token(20, 21, u'or', Keyword(value=u'or', type=2)), - Token(23, 25, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(27, 28, u'or', Keyword(value=u'or', type=2)), - Token(30, 32, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(34, 37, u'with', Keyword(value=u'with', type=10)), - Token(39, 53, u'verylonglicense', None), + Token(0, 2, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 5, 'or', Keyword(value=u'or', type=2)), + Token(7, 9, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(11, 13, 'AND', Keyword(value=u'and', type=1)), + Token(15, 18, 'zlib', LicenseSymbol(u'zlib', aliases=(u'zlib',))), + Token(20, 21, 'or', Keyword(value=u'or', type=2)), + Token(23, 25, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(27, 28, 'or', Keyword(value=u'or', type=2)), + Token(30, 32, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(34, 37, 'with', Keyword(value=u'with', type=10)), + Token(39, 53, 'verylonglicense', None), ] assert expected == results - def test_iter_with_overlapping_key_with_symbols_and_trailing_unknown2(self): + def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_unknown2(self): expression = 'mit with verylonglicense' symbols = [ LicenseSymbol('MIT', ['MIT license']), LicenseSymbol('hmit', ['h verylonglicense']), ] licensing = Licensing(symbols) - results = list(licensing.get_scanner().iter(expression, include_unmatched=True)) + results = list(licensing.get_advanced_tokenizer().iter(expression, include_unmatched=True)) expected = [ - Token(0, 2, u'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), - Token(4, 7, u'with', Keyword(value=u'with', type=10)), - Token(9, 23, u'verylonglicense', None), + Token(0, 2, 'mit', LicenseSymbol(u'MIT', aliases=(u'MIT license',))), + Token(4, 7, 'with', Keyword(value=u'with', type=10)), + Token(9, 23, 'verylonglicense', None), ] assert expected == results @@ -1359,17 +1404,17 @@ def test_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): results = list(licensing.tokenize(expression)) expected = [ - (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 0), - (2, u'or', 4), - (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 7), - (1, u'AND', 11), - (LicenseSymbol(u'zlib', aliases=(u'zlib',)), u'zlib', 15), - (2, u'or', 20), - (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), u'mit', 23), - (2, u'or', 27), + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), 'mit', 0), + (2, 'or', 4), + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), 'mit', 7), + (1, 'AND', 11), + (LicenseSymbol(u'zlib', aliases=(u'zlib',)), 'zlib', 15), + (2, 'or', 20), + (LicenseSymbol(u'MIT', aliases=(u'MIT license',)), 'mit', 23), + (2, 'or', 27), (LicenseWithExceptionSymbol( license_symbol=LicenseSymbol(u'MIT', aliases=(u'MIT license',)), - exception_symbol=LicenseSymbol(u'verylonglicense')), u'mit with verylonglicense', + exception_symbol=LicenseSymbol(u'verylonglicense')), 'mit with verylonglicense', 30) ] @@ -1514,13 +1559,14 @@ def test_primary_license_symbol_and_primary_license_key(self): class SplitAndTokenizeTest(TestCase): - def test_splitter(self): + def test_simple_tokenizer(self): expr = (' GPL-2.0 or later with classpath Exception and mit and ' 'mit with SOMETHING with ELSE+ or LGPL 2.1 and ' 'GPL-2.0 or LATER with (Classpath Exception and ' 'mit or later) or LGPL 2.1 or mit or GPL-2.0 or LATER ' 'with SOMETHING with ELSE+ and lgpl 2.1') - results = list(splitter(expr)) + licensing = Licensing() + results = list(licensing.simple_tokenizer(expr)) expected = [ Token(0, 0, ' ', None), Token(1, 7, 'GPL-2.0', LicenseSymbol(key='GPL-2.0')), @@ -1650,80 +1696,80 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): 'mit or later or LGPL 2.1 or mit or GPL-2.0 or LATER ' 'with mitthing with ELSE+ and lgpl 2.1 or gpl-2.0') - # fist scan - scanner = licensing.get_scanner() - result = list(scanner.scan(expr)) + # fist tokenize + tokenizer = licensing.get_advanced_tokenizer() + result = list(tokenizer.tokenize(expr)) expected = [ - Token(1, 16, u'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(18, 21, u'with', Keyword(value=u'with', type=10)), - Token(23, 41, u'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), - Token(43, 45, u'and', Keyword(value=u'and', type=1)), - Token(47, 49, u'mit', LicenseSymbol(u'mit', is_exception=False)), - Token(51, 53, u'and', Keyword(value=u'and', type=1)), - Token(55, 57, u'mit', LicenseSymbol(u'mit', is_exception=False)), - Token(59, 62, u'with', Keyword(value=u'with', type=10)), - Token(64, 82, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False)), - Token(84, 85, u'or', Keyword(value=u'or', type=2)), - Token(87, 94, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), - Token(96, 98, u'and', Keyword(value=u'and', type=1)), - Token(100, 115, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(117, 120, u'with', Keyword(value=u'with', type=10)), - Token(122, 140, u'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), - Token(142, 144, u'and', Keyword(value=u'and', type=1)), - Token(146, 157, u'mit or later', LicenseSymbol(u'mit or later', is_exception=False)), - Token(159, 160, u'or', Keyword(value=u'or', type=2)), - Token(162, 169, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), - Token(171, 172, u'or', Keyword(value=u'or', type=2)), - Token(174, 176, u'mit', LicenseSymbol(u'mit', is_exception=False)), - Token(178, 179, u'or', Keyword(value=u'or', type=2)), - Token(181, 196, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(198, 201, u'with', Keyword(value=u'with', type=10)), - Token(203, 221, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False)), - Token(223, 225, u'and', Keyword(value=u'and', type=1)), - Token(227, 234, u'lgpl 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)), - Token(236, 237, u'or', Keyword(value=u'or', type=2)), - Token(239, 245, u'gpl-2.0', LicenseSymbol(u'GPL-2.0', is_exception=False)) + Token(1, 16, 'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(18, 21, 'with', Keyword(value=u'with', type=10)), + Token(23, 41, 'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), + Token(43, 45, 'and', Keyword(value=u'and', type=1)), + Token(47, 49, 'mit', LicenseSymbol(u'mit')), + Token(51, 53, 'and', Keyword(value=u'and', type=1)), + Token(55, 57, 'mit', LicenseSymbol(u'mit')), + Token(59, 62, 'with', Keyword(value=u'with', type=10)), + Token(64, 82, 'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+')), + Token(84, 85, 'or', Keyword(value=u'or', type=2)), + Token(87, 94, 'LGPL 2.1', LicenseSymbol(u'LGPL 2.1')), + Token(96, 98, 'and', Keyword(value=u'and', type=1)), + Token(100, 115, 'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(117, 120, 'with', Keyword(value=u'with', type=10)), + Token(122, 140, 'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True)), + Token(142, 144, 'and', Keyword(value=u'and', type=1)), + Token(146, 157, 'mit or later', LicenseSymbol(u'mit or later')), + Token(159, 160, 'or', Keyword(value=u'or', type=2)), + Token(162, 169, 'LGPL 2.1', LicenseSymbol(u'LGPL 2.1')), + Token(171, 172, 'or', Keyword(value=u'or', type=2)), + Token(174, 176, 'mit', LicenseSymbol(u'mit')), + Token(178, 179, 'or', Keyword(value=u'or', type=2)), + Token(181, 196, 'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(198, 201, 'with', Keyword(value=u'with', type=10)), + Token(203, 221, 'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+')), + Token(223, 225, 'and', Keyword(value=u'and', type=1)), + Token(227, 234, 'lgpl 2.1', LicenseSymbol(u'LGPL 2.1')), + Token(236, 237, 'or', Keyword(value=u'or', type=2)), + Token(239, 245, 'gpl-2.0', LicenseSymbol(u'GPL-2.0')) ] assert expected == result expected_groups = [ - (Token(1, 16, u'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(18, 21, u'with', Keyword(value=u'with', type=10)), - Token(23, 41, u'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), - - (Token(43, 45, u'and', Keyword(value=u'and', type=1)),), - (Token(47, 49, u'mit', LicenseSymbol(u'mit', is_exception=False)),), - (Token(51, 53, u'and', Keyword(value=u'and', type=1)),), - - (Token(55, 57, u'mit', LicenseSymbol(u'mit', is_exception=False)), - Token(59, 62, u'with', Keyword(value=u'with', type=10)), - Token(64, 82, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False))), - - (Token(84, 85, u'or', Keyword(value=u'or', type=2)),), - (Token(87, 94, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), - (Token(96, 98, u'and', Keyword(value=u'and', type=1)),), - - (Token(100, 115, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(117, 120, u'with', Keyword(value=u'with', type=10)), - Token(122, 140, u'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), - - (Token(142, 144, u'and', Keyword(value=u'and', type=1)),), - (Token(146, 157, u'mit or later', LicenseSymbol(u'mit or later', is_exception=False)),), - (Token(159, 160, u'or', Keyword(value=u'or', type=2)),), - (Token(162, 169, u'LGPL 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), - (Token(171, 172, u'or', Keyword(value=u'or', type=2)),), - (Token(174, 176, u'mit', LicenseSymbol(u'mit', is_exception=False)),), - (Token(178, 179, u'or', Keyword(value=u'or', type=2)),), - - (Token(181, 196, u'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER', is_exception=False)), - Token(198, 201, u'with', Keyword(value=u'with', type=10)), - Token(203, 221, u'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+', is_exception=False))), - - (Token(223, 225, u'and', Keyword(value=u'and', type=1)),), - (Token(227, 234, u'lgpl 2.1', LicenseSymbol(u'LGPL 2.1', is_exception=False)),), - (Token(236, 237, u'or', Keyword(value=u'or', type=2)),), - (Token(239, 245, u'gpl-2.0', LicenseSymbol(u'GPL-2.0', is_exception=False)),) + (Token(1, 16, 'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(18, 21, 'with', Keyword(value=u'with', type=10)), + Token(23, 41, 'classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), + + (Token(43, 45, 'and', Keyword(value=u'and', type=1)),), + (Token(47, 49, 'mit', LicenseSymbol(u'mit')),), + (Token(51, 53, 'and', Keyword(value=u'and', type=1)),), + + (Token(55, 57, 'mit', LicenseSymbol(u'mit')), + Token(59, 62, 'with', Keyword(value=u'with', type=10)), + Token(64, 82, 'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+'))), + + (Token(84, 85, 'or', Keyword(value=u'or', type=2)),), + (Token(87, 94, 'LGPL 2.1', LicenseSymbol(u'LGPL 2.1')),), + (Token(96, 98, 'and', Keyword(value=u'and', type=1)),), + + (Token(100, 115, 'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(117, 120, 'with', Keyword(value=u'with', type=10)), + Token(122, 140, 'Classpath Exception', LicenseSymbol(u'classpath Exception', is_exception=True))), + + (Token(142, 144, 'and', Keyword(value=u'and', type=1)),), + (Token(146, 157, 'mit or later', LicenseSymbol(u'mit or later')),), + (Token(159, 160, 'or', Keyword(value=u'or', type=2)),), + (Token(162, 169, 'LGPL 2.1', LicenseSymbol(u'LGPL 2.1')),), + (Token(171, 172, 'or', Keyword(value=u'or', type=2)),), + (Token(174, 176, 'mit', LicenseSymbol(u'mit')),), + (Token(178, 179, 'or', Keyword(value=u'or', type=2)),), + + (Token(181, 196, 'GPL-2.0 or LATER', LicenseSymbol(u'GPL-2.0 or LATER')), + Token(198, 201, 'with', Keyword(value=u'with', type=10)), + Token(203, 221, 'mitthing with ELSE+', LicenseSymbol(u'mitthing with else+'))), + + (Token(223, 225, 'and', Keyword(value=u'and', type=1)),), + (Token(227, 234, 'lgpl 2.1', LicenseSymbol(u'LGPL 2.1')),), + (Token(236, 237, 'or', Keyword(value=u'or', type=2)),), + (Token(239, 245, 'gpl-2.0', LicenseSymbol(u'GPL-2.0')),) ] result_groups = list(build_token_groups_for_with_subexpression(result)) assert expected_groups == result_groups @@ -1868,3 +1914,62 @@ def __init__(self, key, is_exception=False): expected = [l1, lx, lx2, lx3, l3, l2, l4] assert expected == sorted([l4, l3, l2, l1, lx , lx2, lx3]) + + +class MockLicensesTest(TestCase): + + def test_licensing_can_use_mocklicense_tuple(self): + MockLicense = namedtuple('MockLicense', 'key aliases is_exception') + + licenses = [ + MockLicense('gpl-2.0', ['GPL-2.0'], False), + MockLicense('classpath-2.0', ['Classpath-Exception-2.0'], True), + MockLicense('gpl-2.0-plus', ['GPL-2.0-or-later', 'GPL-2.0 or-later'], False), + MockLicense('lgpl-2.1-plus', ['LGPL-2.1-or-later'], False), + ] + licensing = Licensing(licenses) + + ex1 = '(GPL-2.0-or-later with Classpath-Exception-2.0 or GPL-2.0 or-later) and LGPL-2.1-or-later' + expression1 = licensing.parse(ex1, validate=False, strict=False) + assert ['gpl-2.0-plus', 'classpath-2.0', 'lgpl-2.1-plus'] == licensing.license_keys(expression1) + + ex2 = 'LGPL-2.1-or-later and (GPL-2.0-or-later oR GPL-2.0-or-later with Classpath-Exception-2.0)' + expression2 = licensing.parse(ex2, validate=True, strict=False) + + ex3 = 'LGPL-2.1-or-later and (GPL-2.0-or-later oR GPL-2.0-or-later)' + expression3 = licensing.parse(ex3, validate=True, strict=False) + + self.assertTrue(licensing.is_equivalent(expression1, expression2)) + self.assertTrue(licensing.is_equivalent(expression2, expression1)) + self.assertFalse(licensing.is_equivalent(expression1, expression3)) + self.assertFalse(licensing.is_equivalent(expression2, expression3)) + + def test_and_and_or_is_invalid(self): + expression = 'gpl-2.0 with classpath and and or gpl-2.0-plus' + licensing = Licensing() + try: + licensing.parse(expression) + self.fail('Exception not raised') + except ExpressionError as e: + assert 'AND requires two or more licenses as in: MIT AND BSD' == str(e) + + def test_or_or_is_not_invalid(self): + expression = 'gpl-2.0 with classpath or or or or gpl-2.0-plus' + licensing = Licensing() + result = str(licensing.parse(expression)) + assert 'gpl-2.0 WITH classpath OR gpl-2.0-plus' == result + + def test_tokenize_or_or(self): + expression = 'gpl-2.0 with classpath or or gpl-2.0-plus' + licensing = Licensing() + results = list(licensing.tokenize(expression)) + expected = [ + (LicenseWithExceptionSymbol( + license_symbol=LicenseSymbol(u'gpl-2.0'), + exception_symbol=LicenseSymbol(u'classpath')), 'gpl-2.0 with classpath', 0), + (2, 'or', 23), + (2, 'or', 26), + (LicenseSymbol(u'gpl-2.0-plus'), 'gpl-2.0-plus', 29) + ] + + assert expected == results From ccfe96b740cc1028c70923f3818d6c53a7a0b3f7 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 Aug 2018 09:43:17 +0200 Subject: [PATCH 7/9] Add simple tokenizer and improve error checks #29 Also update boolean.py to 3.6 Signed-off-by: Philippe Ombredanne --- setup.py | 4 +- src/license_expression/__init__.py | 56 ++------ tests/test_license_expression.py | 134 ++++++++++++++++-- .../prod/boolean.py-3.5-py2.py3-none-any.whl | Bin 22898 -> 0 bytes .../prod/boolean.py-3.6-py2.py3-none-any.whl | Bin 0 -> 21969 bytes .../boolean.py-3.6-py2.py3-none-any.whl.ABOUT | 18 +++ ...oolean.py-3.6-py2.py3-none-any.whl.NOTICE} | 44 +++--- thirdparty/prod/boolean.py.ABOUT | 11 -- thirdparty/prod/bsd-simplified.LICENSE | 20 +++ 9 files changed, 191 insertions(+), 96 deletions(-) delete mode 100644 thirdparty/prod/boolean.py-3.5-py2.py3-none-any.whl create mode 100644 thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl create mode 100644 thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.ABOUT rename thirdparty/prod/{boolean.py.LICENSE => boolean.py-3.6-py2.py3-none-any.whl.NOTICE} (93%) delete mode 100644 thirdparty/prod/boolean.py.ABOUT create mode 100644 thirdparty/prod/bsd-simplified.LICENSE diff --git a/setup.py b/setup.py index 38f993d..87a527c 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ include_package_data=True, zip_safe=False, classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: Apache Software License', 'Intended Audience :: Developers', 'Operating System :: OS Independent', @@ -48,6 +48,6 @@ 'licence' ], install_requires=[ - 'boolean.py >= 3.5, < 4.0.0', + 'boolean.py >= 3.6, < 4.0.0', ] ) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index b0ef76d..e37f183 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -39,8 +39,6 @@ from copy import deepcopy from functools import total_ordering import itertools -import logging -from pprint import pprint import re import string @@ -52,9 +50,11 @@ from boolean.boolean import PARSE_ERRORS from boolean.boolean import PARSE_INVALID_EXPRESSION from boolean.boolean import PARSE_INVALID_NESTING +from boolean.boolean import PARSE_INVALID_OPERATOR_SEQUENCE from boolean.boolean import PARSE_INVALID_SYMBOL_SEQUENCE from boolean.boolean import PARSE_UNBALANCED_CLOSING_PARENS from boolean.boolean import PARSE_UNKNOWN_TOKEN + from boolean.boolean import ParseError from boolean.boolean import TOKEN_SYMBOL from boolean.boolean import TOKEN_AND @@ -65,6 +65,7 @@ from license_expression._pyahocorasick import Trie as AdvancedTokenizer from license_expression._pyahocorasick import Token + # Python 2 and 3 support try: # Python 2 @@ -74,23 +75,6 @@ # Python 3 unicode = str # NOQA -TRACE = False - -logger = logging.getLogger(__name__) - - -def logger_debug(*args): - pass - - -if TRACE: - - def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) - - import sys - logging.basicConfig(stream=sys.stdout) - logger.setLevel(logging.DEBUG) # append new error codes to PARSE_ERRORS by monkey patching PARSE_EXPRESSION_NOT_UNICODE = 100 @@ -239,10 +223,10 @@ def __init__(self, symbols=tuple(), quiet=True): raise ValueError('\n'.join(warns + errors)) # mapping of known symbol key to symbol for reference - self.known_symbols_by_key = {symbol.key: symbol for symbol in symbols} + self.known_symbols = {symbol.key: symbol for symbol in symbols} # mapping of known symbol lowercase key to symbol for reference - self.known_symbols_by_keylow = {symbol.key.lower(): symbol for symbol in symbols} + self.known_symbols_lowercase = {symbol.key.lower(): symbol for symbol in symbols} # Aho-Corasick automaton-based Advanced Tokenizer self.advanced_tokenizer = None @@ -375,7 +359,7 @@ def unknown_license_symbols(self, expression, unique=True, **kwargs): Extra kwargs are passed down to the parse() function. """ return [ls for ls in self.license_symbols(expression, unique=unique, decompose=True, **kwargs) - if not ls.key in self.known_symbols_by_key] + if not ls.key in self.known_symbols] def unknown_license_keys(self, expression, unique=True, **kwargs): """ @@ -447,7 +431,7 @@ def parse(self, expression, validate=False, strict=False, simple=False, **kwargs return try: # this will raise a ParseError on errors - tokens = list(self.tokenize(expression, strict=strict)) + tokens = list(self.tokenize(expression, strict=strict, simple=simple)) expression = super(Licensing, self).parse(tokens) except TypeError as e: msg = 'Invalid expression syntax: ' + repr(e) @@ -488,40 +472,20 @@ def tokenize(self, expression, strict=False, simple=False): raise ParseError(error_code=PARSE_EXPRESSION_NOT_UNICODE) if simple: - if TRACE: logger_debug('using simple tokenizer') tokens = self.simple_tokenizer(expression) else: - if TRACE: logger_debug('using advanced tokenizer') advanced_tokenizer = self.get_advanced_tokenizer() tokens = advanced_tokenizer.tokenize(expression) - if TRACE: - tokens = list(tokens) - logger_debug('tokenize: tokens') - pprint(tokens) - # Assign symbol for unknown tokens tokens = build_symbols_from_unknown_tokens(tokens) - if TRACE: - tokens = list(tokens) - logger_debug('tokenize: token with symbols') - pprint(tokens) # skip whitespace-only tokens tokens = (t for t in tokens if t.string and t.string.strip()) - if TRACE: - tokens = list(tokens) - logger_debug('tokenize: token NO spaces') - pprint(tokens) # create atomic LicenseWithExceptionSymbol from WITH subexpressions tokens = replace_with_subexpression_by_license_symbol(tokens, strict) - if TRACE: - tokens = list(tokens) - logger_debug('tokenize: LicenseWithExceptionSymbol replaced') - pprint(tokens) - # finally yield the actual args expected by the boolean parser for token in tokens: pos = token.start @@ -557,9 +521,9 @@ def get_advanced_tokenizer(self): for keyword in KEYWORDS: add_item(keyword.value, keyword) - # self.known_symbols_by_key has been created at Licensing initialization time and is + # self.known_symbols has been created at Licensing initialization time and is # already validated and trusted here - for key, symbol in self.known_symbols_by_key.items(): + for key, symbol in self.known_symbols.items(): # always use the key even if there are no aliases. add_item(key, symbol) aliases = getattr(symbol, 'aliases', []) @@ -594,7 +558,7 @@ def simple_tokenizer(self, expression): tokenizing expressions. """ - symbols = self.known_symbols_by_keylow or {} + symbols = self.known_symbols_lowercase or {} for match in _simple_tokenizer(expression): if not match: diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index f42324e..6ceed6c 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -20,6 +20,7 @@ from collections import namedtuple from collections import OrderedDict from unittest import TestCase +from unittest.case import expectedFailure import sys from boolean.boolean import PARSE_UNBALANCED_CLOSING_PARENS @@ -29,6 +30,7 @@ from license_expression import PARSE_INVALID_NESTING from license_expression import PARSE_INVALID_EXCEPTION from license_expression import PARSE_INVALID_SYMBOL_AS_EXCEPTION +from license_expression import PARSE_INVALID_OPERATOR_SEQUENCE from license_expression import ExpressionError from license_expression import Keyword @@ -424,8 +426,15 @@ def test_parse_invalid_expression_raise_exception6(self): try: licensing.parse(expr) self.fail("Exception not raised when validating '%s'" % expr) - except ExpressionError as ee: - assert 'OR requires two or more licenses as in: MIT OR BSD' == str(ee) + self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 0, + 'token_string': 'OR', + 'token_type': TOKEN_OR + } + assert expected == _parse_error_as_dict(pe) def test_parse_not_invalid_expression_raise_no_exception2(self): licensing = Licensing() @@ -466,18 +475,28 @@ def test_parse_errors_catch_invalid_expression_with_bare_and(self): try: licensing.parse('and') self.fail('Exception not raised') - except ExpressionError as pe: - expected = 'AND requires two or more licenses as in: MIT AND BSD' - assert expected == str(pe) + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 0, + 'token_string': 'and', + 'token_type': TOKEN_AND + } + assert expected == _parse_error_as_dict(pe) def test_parse_errors_catch_invalid_expression_with_or_and_no_other(self): licensing = Licensing() try: licensing.parse('or that') self.fail('Exception not raised') - except ExpressionError as pe: - expected = 'OR requires two or more licenses as in: MIT OR BSD' - assert expected == str(pe) + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 0, + 'token_string': 'or', + 'token_type': TOKEN_OR + } + assert expected == _parse_error_as_dict(pe) def test_parse_errors_catch_invalid_expression_with_empty_parens(self): licensing = Licensing() @@ -828,6 +847,78 @@ def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_spaces( expected = 'GPL-2.0 OR (mit AND LGPL 2.1) OR bsd OR GPL-2.0 OR (mit AND LGPL 2.1)' assert expected == str(parsed) + def test_parse_invalid_expression_with_trailing_or(self): + licensing = Licensing() + expr = 'mit or' + try: + licensing.parse(expr) + self.fail("Exception not raised when validating '%s'" % expr) + except ExpressionError as ee: + assert 'OR requires two or more licenses as in: MIT OR BSD' == str(ee) + + def test_parse_invalid_expression_with_trailing_or_and_valid_start_does_not_raise_exception(self): + licensing = Licensing() + expression = ' mit or mit or ' + parsed = licensing.parse(expression) + # ExpressionError: OR requires two or more licenses as in: MIT OR BSD + expected = 'mit OR mit' + assert expected == str(parsed) + + def test_parse_invalid_expression_with_repeated_trailing_or_raise_exception(self): + licensing = Licensing() + expression = 'mit or mit or or' + try: + licensing.parse(expression, simple=False) + self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 14, + 'token_string': 'or', + 'token_type': TOKEN_OR + } + assert expected == _parse_error_as_dict(pe) + + @expectedFailure + def test_parse_invalid_expression_with_single_trailing_or_raise_exception(self): + licensing = Licensing() + expression = 'mit or mit or' + try: + licensing.parse(expression, simple=False) + self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 14, + 'token_string': 'or', + 'token_type': TOKEN_OR + } + assert expected == _parse_error_as_dict(pe) + + def test_parse_invalid_expression_with_single_trailing_and_raise_exception(self): + licensing = Licensing() + expression = 'mit or mit and' + try: + licensing.parse(expression, simple=False) + self.fail('Exception not raised') + except ExpressionError as ee: + assert 'AND requires two or more licenses as in: MIT AND BSD' == str(ee) + + def test_parse_invalid_expression_with_single_leading_or_raise_exception(self): + licensing = Licensing() + expression = 'or mit or mit' + try: + licensing.parse(expression, simple=False) + self.fail('Exception not raised') + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 0, + 'token_string': 'or', + 'token_type': TOKEN_OR + } + assert expected == _parse_error_as_dict(pe) + class LicensingParseWithSymbolsSimpleTest(TestCase): @@ -1950,17 +2041,29 @@ def test_and_and_or_is_invalid(self): try: licensing.parse(expression) self.fail('Exception not raised') - except ExpressionError as e: - assert 'AND requires two or more licenses as in: MIT AND BSD' == str(e) + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 27, + 'token_string': 'and', + 'token_type': TOKEN_AND} + assert expected == _parse_error_as_dict(pe) - def test_or_or_is_not_invalid(self): + def test_or_or_is_invalid(self): expression = 'gpl-2.0 with classpath or or or or gpl-2.0-plus' licensing = Licensing() - result = str(licensing.parse(expression)) - assert 'gpl-2.0 WITH classpath OR gpl-2.0-plus' == result + try: + licensing.parse(expression) + except ParseError as pe: + expected = { + 'error_code': PARSE_INVALID_OPERATOR_SEQUENCE, + 'position': 26, + 'token_string': 'or', + 'token_type': TOKEN_OR} + assert expected == _parse_error_as_dict(pe) def test_tokenize_or_or(self): - expression = 'gpl-2.0 with classpath or or gpl-2.0-plus' + expression = 'gpl-2.0 with classpath or or or gpl-2.0-plus' licensing = Licensing() results = list(licensing.tokenize(expression)) expected = [ @@ -1969,7 +2072,8 @@ def test_tokenize_or_or(self): exception_symbol=LicenseSymbol(u'classpath')), 'gpl-2.0 with classpath', 0), (2, 'or', 23), (2, 'or', 26), - (LicenseSymbol(u'gpl-2.0-plus'), 'gpl-2.0-plus', 29) + (2, 'or', 29), + (LicenseSymbol(u'gpl-2.0-plus'), 'gpl-2.0-plus', 32) ] assert expected == results diff --git a/thirdparty/prod/boolean.py-3.5-py2.py3-none-any.whl b/thirdparty/prod/boolean.py-3.5-py2.py3-none-any.whl deleted file mode 100644 index 5fe68e51b48f657d0ad6274ef8b6c08a28363154..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22898 zcmV)FK)=6GO9KQH000080CZbnOP8YeRT>%q00N=_02crN0Ag=%Y-M3?FLY&dbYEg` zZ){~@ZZ2?n?LGZ-+cvVl>#snasYG5GO{Cn+o9AinT^c)APZK-OcCUBS>F`Jjw%Jmo zN>Yh+r|NHiyZ9hM65xldq;6j|iA)i@i`@mV*j)nP(b3W9t0al27n7IKE%j4xIvQQm zEYC)xSIKIVhPQV)u>zaSpFR8Ur}JmC{~}k^_p&?$ly6dxE@|ozc4PWe`q$f~7e><{ zSrRW^kR-pOX*Rl~P(el)WF0SPN>chh#9v=soscLDXq?gM=m}NtcjxZ)#W$ztqfwr2jz zTYR-y`bnfMVrp>DFRoikU!I?|lwMr66kcC`d)iX``sJIew(_fgzWwUrO$)#qXw;VC z%X+cCPn@0q{pFjp6H%Reb^5>Go}Rxt9e{cIk9U`+S662j=L3+=Pp_`e&i^(Vje^L_ zvRXsqzh|q5{Ohfx>>- zc8z~V%l#1MckVL0rwcb(!JyBRG;^2hC=a7BrkM*y$PLz6o-Eyrb!aJWQ8!oP+YRsU z7XPE@13xPO8EM6zfC5O8k{$8M7j8g^jdrhsTz(d0Of2#lp{@a7ORVLF{E6I>i%T*? zy3cH&2xE!lcV3Rwjk+6(i{LWBUP!_0zHOqyF*~Ec0wY$1by;w6X*EQWWkRtNZ(=(} zATTCY9l@X2Ef^BY4eE4jKkEpKI(#1?bo^1bl|dajwY;IsHl~*wo%T%7wm*=|G_D#y^!fkgHP$a&b;4 z-NU)}$W_4V1GnB6%?;kt&<`szS9J&`9@q^6FLu}PmpP7pL|vSaT`z*4G0c+hDenT% zPg27K@`_C|{U;F>OSW{i;cMu~vFnB~ue&be!66kxuQE_WWolW&^k`M;bC|B|X+hOY zZ`5E#>O-)SX{tQB@|LTJ7N99vXXyR%`Ir>*HH#GZNXw2IRFYK&9r-_6t2FuU>wlsj zIyo-n!-ytzlN%_; zG7()8g52A6PcsS)<>9aq>7C9RUW(T2J@MCO zTh>`L6Yf!S`aI>mL{7TpT)dzu_%#cz;rx!421g{N3eW~k?2*JHH}nH=lBam(VTLU` zCQqJb`39>?mP;DvFt?`^eGhCms0>x+HRuJX&ZlO5h8ZdRbWQ_gN#PP&lgrg~cepz- zC1(JV#K2^hbG|F07BV^{iK7h`X0$;}wx{B;fK&=g#}L?WA+TpD&lFAhv^lfChLR<) zP2^aXwX-w~*(7V=bPe+l%xfRZ zouqLSrea_k6dlG$DBzQWqvR|N&O+*i5bs=XRy>mkK{3uWOK240(_%cVfizMlpJ&qv z`JChcU&-pIpC@GLtyYMa+>j{wKvS0eWTFa+8T^6{$ZsY7k6=TIJZ%@w+?NLCTou433dE?~tq#Nt>{N(M+%WqCEo8F9WTtXM2$1q&T zI`+MY+b|r(G|SL}ExnDf2Ohb`Dws(icQnw9?7Lk$r&?{SETkQ&=!*6I`T z0UZ=CxI>4D_;C3XStVhdbNA_vIT}3NMTbbJTu5jn%asR0SGsLiJAoVm5j+5NGf+{z zLVbqi6neKx#kY;rJsV1X&|M7UwE-f_MH2u;?gxNz$Dz61BMA65{wYpA2Sr%^q+jh!YWW8h%*fuG~ z22m1#Ggp~eSu#YVurTqZ)@`xiadr$s%%<=)Cp`C=#B(?Hmh>2|-6Vbv04{@Y^Evx| zUQ4XiAKO}Fg%gq(AQ0evCK7xlnjr~SkM29taD{Y-%fqSD^rWsJUGl1l(51aeReOd+ zoi-O&O!N~~!(C5@ODvgHtZ{Itd>iY{Zn-*Uj|CE8{N9VggxpIBJ<1uKr8`t7B*wzwav9@ktfDA#XaTmraPzxze)<`3myL8pif26Swi%IcA4 zY^JiC(+O+-8!wEkqf?e^#3)6~n?B`cYe7ET!HkS#Fy%1$JewSW6>1U`*~+qC5HBMu zjaB6R4T`1|!CYLDhSP#}DH$Tv`X8o!Tz|v=3U}4zT#LR*r7*6)&Rv)=9=L{wtzhv< zP+Hc8%DbN&^w3VbW=spJ4y2>`gBLn6E!mGnP0H%A-WsZ@=E2>@e;lgVyT`CcD?AH$ zVBJ`rO)TGj9QrBN42mU#9#D)Z=I0NvUa)QHqEgckMK-4G%1V*Sx{(q|sY_4l5*s+P z3OChJQ5-Vg2MHIo>CkXFP=#v@oAi8lVPQN_S@hD4%D~hbJMgg8D)rikj72?hIDJIo z%M$vwq_K)BFi*fr+SP6;*Lv?UUR#-)O2W8GCU0G5R$;S=r{z~Z%lil%BWc?-O8PNh zBO$s^RgetEcog1j4lYJPcwZrO1X@LK^ACwDAPwTo+T;;1^L=EV8HJwrlzL_sd#;yz zf!K#`HYD26ch459;&QR8EK|e^^Mm)c{km8mInR^U>a-1$qzk%FyxBexe7Voe^Otr` z*%lfIm=4h%xWvE0p@UETdLnS{9DK0Xf|1fyvkv^*8>y&=T>zT&ruYhx0ge@L)^pEYvX3NUN-JoGjMAh^4xBee2*nR)EhO-g#E9xSMMnWyVC-t^cech z&ksZHrw-)X!zeRbL~Qg;{O%?)Kw_2fE@Mvut!JtFJu5rUIhm6f^j zQ#+f;C~yxG<|&C;-9TlofDWL52s8k`LSdmPUnu2NQ%=4Gtmbn5)jH^7UvjkfHO{S-U>A5v}Vn1cm_wHF+QRH z^WA*Pcxa2Nzy+wl&sw366skuKVN-YCii!#*RYX>!oaWCjt{*tZ4|9#0i3jCj$Z!mn zs(>%m#;8wMG{&mGGi>Q7F)z!m7~xKO+(p)s zsbu>mJKQ#FPb#8Niet6qPG`1AL(#esmG*vV9Y`tI3 zb=iwS?_)Cv#z}O=7wws{sI!qZ?j5U3)5x3&voMlJXz!Zmr<-SbQ$?OjlR>Ne!#E*rl;B5ad6TbLBjV& zu%HFWn6Up995vWs@*QYpk$x9W3NtUVH1cdhij}C>Q0B- z7g>v2L#`zi=`u-gy?q{>b!pJ=2$qcgMBSPdJlZ(y$(HQ=K~&gL7d1L{C^Ty0UC_&a zDk$$X3`+hElOnut^>8-L(w3m-9s-8ly}5jM55Yov`Ils4F5dt;((e~(e2gKAaR-c0 zaF}+I-waa0-SM!i?7tfH%KKQIx1tE6F|qu zn%x1;(z`ziZ*D-Cat&>3(9`yGSXNs$lS6!mrAD-D`|6KCQALiKBS{TSmolpJdA-b0 z_aWkQ2RvT6vR80nEJJm zjJLJeq;EH)8842*yO3=|IpniTS@9TMGNq2;PhblXjFfcGW5Wf7$@s9(V+&4vOpJ&M z_7`ZZC!>?*&=16M=GFKcj?RmHQHCRe%wvKWg z5=Pzt%PPjmU0$;3TwVtK(!KRPCcM_eb3cD{-23B2FKsh+#tYtm;P0=@*!VmnLz5=nA#S~l9Fwz22W+E(8q}iCZB~eZ?ROPWjF3j zc>R-R2`|u`N?M{ajcQP9{Ap=?pybYbj|XcM3#o`t;Z`V4H&AyP;>PZrCpmbfYN zeKu8(?@-XZ;#8eHrL+x@^a-HVZrm-cfkp$h*9a&UplJpHr^`11nF5d}#xtQ$ko>u-@ZA)wIJm6%dmF?tyJqcb7O?nT{47fwn57Mr zaLO+&l7@O|Mr$?|uPfM<{n!0XL&FTWnTooM%umIyfEf+JcIB>evkIx9g#ad;+C2lA z*1F4E1z7b~d8eSNdfuZB8zdorcgCoPY0+2|G$qM8ms<*jfk{%Aap{J!m{;Ze>r;_P z3!3?-|8JH@F+40XmWF}l{;DrEmg`JRWl-P6u(Ow(vo33>?rIp!RL20mT)0m3>#>_g^^b2ob|rh zJr1WoCSh#-W<3F5yg1SNkEy&mm1_)FkBF%;Wc5x|o{F(-h~G;_RZ$c*2SpyS#&@51 zxNbE{r$v`m=oa7X751Bne-92)p=OOT#luc*nWp{AgAj1=F6=E|=3dyua!+m?eEqxy`n>Vgi_bi{X@QJM~4v%##l7YYZ`E3)Z-Rc zHx++63Dt!tzt1-8C7~i!ua*_+Y4hAwis$h^_}DF;Px2t-h!+rI zR8ltVtq;w$y5+%IHlD17r@zwsyz7jv+WD~!y);@+(47_%DkpV0hI6{Aty{}?to=Oo zg2qDYo+})MvdYC-V>Q9Bnw4eMC+DlViWTUdsY|@JHR|lueT4|CSv}~I=W_NXFmDpj z_s5&a%X8Z3UL9I1vk8h~*peH+xPR|pfw@+7Yg|6B#*Q^a-ysXzsgr}dw0Sqs!Uj&IK3%)lg=;oG{aLfJ z_Sav2KXWS!@O`$zzN*@m)%o)Vc2FhI^Xv^|@dYqr-U!7R?SPpJn6^VYL{;4&`icsO zwqpTRAbvzqQz0GaaRBmXMrkF8JMRXX3!s)wY@)7Ch}Pzj2Q;?t059Pby*zwJpF=hd z`L}LgUt=Noym{-nnP1fsU@f+CgqXYsHMblXq8?gVQu)h3Dn0l(022@4-~ejai$t?M z4$v6A&vw~&H%*Zj6&qJ@Un9FPqp(elHJqc?jg3bw)Kmtxl7Vf11_aI#%(oA%pPh^_ z4mc0RkA8fpxJ(%)eptY)PLJxY&x`zyrb7+@Ml8LxDxQ$!cVOrc8Vz2MZXn_bj>!jn z*UloL8NLN>k^IjPE63_fcX*2Q!l=yR!F`e;H=|uCY)m1%YlJ3JHd`Y ze*fbS$0IE)ej5Am7+YU*8ulQxIanQru1yylWVLD6cBtmf)?rs`-)Aa;fg94SXZ8cI zXA_$YKCD#MJgd|^44Er(KxLl&(J^+F)-Ctb+rv&>@N5kk+|ECU4C7@tMBW{*cnFT} z1d~B$#nfAM%*O(YMhD2s(RKsz_(4C8SUog|OPuB!Z6xkLfKV9_2ZP#L9`13Nb3ZiX zkTG^D5wNx!!W-ME$xjU#)S_G2qJ!}G2~g!BB?GwJYFm`we*huOf$Y(sAjfuBC$v`s zh_kK+H-R_in400e$L>B2?Og})-M-X|J!_T~Y(0By`CBU(d$w(BD^zk{xaJ3taQ&by z(`8n`kCL)K2{%mxfodKXsDbW`H%&5ruLJrH?6t`AW!NIHEeX2Q=b7~%@0j?%RFCxH z$;Q6eUJbiv=3Z2dRn-X8+Y?V*7_(8{uRo|YpemXydi@22utoKk46N!76F__yetGL? zpEE!oMd$d%Awq%|6;bzjIYX`2sO~SwS-Uf+;=V34f5U=FFizzFubmjKIqlK?@8||xS;PpM&T!xm9Z&4V?1U!$H4aj zzUelAzij(G#igm5XWWujIYzM(-sh_4qG(olmVRQ!v_sinsB=r%U;cXm-|dM&gWLUKEx0!q`pH@HULS$oTA`W4#3zs_>oITW)=PJs!VQrytHb`5xix z6Io*tGeZE$AJ{4I_p7NYp`{aF>^N4Xm$yEq@aQ74U!P11a%;8e;4`U#3}BNRcO8d6 zuW8%S(g$yMd5M4`0Eg8qwLlgleKWq=m)83us}a`wWr+i!7_0HBOh&@zW{Kr(jSF z9sZtf-Y26Ba_2aj1J@6=?(R?V6le0WIj=CEt6bgkx zRiRLL^ytyvw`G~9NukpB%POtwtSoAkSPt`oV%8C@eDpN<7 zQ{`=1spckCb=s&8FE4&N9jfn7ez-jSr0%ljMoqc=-fdE4$utKTvn0>cxw^bxOv@ZV z7IUhv3#zMCF>45wle6!J>ilA;&dy%~K9}VJN+oqRQ+as}|IW(A5}?*)p#al*wOp1} zlg`JgxhuUs%>e8)-Q)7m^i^4@^p{n3o8)OROBFoM%A!fKVv!cjNPWMm0HbPAR;ju# zS5&{w7Rx-ly5IBYMc9{9tg5o6Rz+DYk{laY0LK5&b!SzYG^vLYl>hO*xd8~PIxDW# zRkFzPEU7FR%4J$5OZ{%SXT&Jd%iHy?2p9b2ZRz1r)4Q zmELCf^zG$$D$i!n$#k^$2>~hafKc_`UQ^wV_Z0k~n<+DiQu~JCp6#XY zXX&z0XUQTxt*WwW6+TAR5>>w1+uH{+Ho!jWWwlE2|7n5KSv6HMBL>!N3V-jtyf}G& zI{ESB;_?(geGgqr5x`62J=NSVK?q>+x&fLe)!gwq4Fb;5qTYLX{cPLgp?G_K0WZ%lymx>ayo7(fw}>LV?t3pU_~pQRiRi*h{Ga!p z@P-fcfA0(T06y^_@a@wc(%eXe6vtU1yAh2SFu9ZKwBDmZn*4P3x3lyAf_4!W(c8TL z_T-0?v*)MZO`iX7e);0;KXFt~&n{^gxXg>Q|8w%gi|;0<|NY~|>E-2%^D`P1|Kr)| zHSlh`hdoOCq0TW{QpHlBJS=6XQ*CHK=c96*cAh2 z{H$Doz{hzC&4YY@&wMpfFAAK@Gnm-r6~SvzdEnN0^=Lq{OhG2r3~+*CV5t9QsWVi| zvd$27pw&U6!{Bw+?p9Hc5LI0Oz?Xuv78 zQ(@$h1N0pb#95IwlS#i$^Q)mi`}nLZ0^E;#J%|4B!IA92@u8+Q{t(v4fdb}s53xd4 zAj?O;plKEgnLqskf|$nwuRYOIfNMMYB&n`z_}`Ovcleh;K3&v^ikzLJ>$K?ytqk3) z^u$kZuxCLvxWF8%UeCVCuG;-}P0Kp8cu=vidh|v8sHeVYS9MJRQQ|FG!hNF-{g!kU z)Im(4vHD_e8?tn^wV1n9?*r24(+5QL(+4Q_sW)O)>pyvtm!SIA{lUOaOs!0xurA#% zr*C0F4`$|x)3sqru_8WKQ@z3;5WDl53cQaxD6gih%oj$_9?8;nMz8Fgw*%FsV{%tt zx@k>XBglwbGz(B7p#Gr0;Q)b_ZCvGZ=t>jT<5fZ#N8MTM%cMY z+BHjY$(#@pdX~~s>Ahis$XDLj(+`$zuMUW4@A>>fJc=1&290Tz3S~!FMr6#m zNsq2Hhb%P11CCVDakeBQqu?GPCaatjH-PHof>FJw7l0zniK0YRv%Jmbq|%$B)`+nN zg0TLk_qqxWWs<|%PykA|xEM_hXco!srinJADCzczT+}UA)jTdC>qBF3kf*n4t{x%t z=4n$FkF3P6eJQ9_K*u1sxRIt6EocAk3#mmfjq)fsi$KwHyr_GS(DvDunpI_8A5`g8 zTG2Y5Ca{i^CDKN0G+%*vDFH?r^a@c2MU%dLiPDe8VQ4r72U6Q;{z&-^vGQ%F2@(Jv z{4%V&+o^+P;K46L8oilKSmOLbV#}{dOyMNbJ_8?wX8dDsVk=u(tT|VcA}OstX@0C` zj~%&ud7zc;u^@0z0QLN_B}?!T&4sb0NAMBMhOwbT@QIN(pLJKv*H4m0H7q?#<|1Xe zsAhGrxq&4NC9#>XERfKacK~?V0Y$CM=OD)mFbn5SVPkK}ZHiVp+3;&X4258avJ1fd zR?cw*1x3;O!GkdZU3Qhin&_{;SPsz`D*7(UgO$5z6=zmx5M^9LhqCU#EfpkPY2Bv9 zqC;$7@JbG(S}kGaA?cM_znEQsoiHlqh9tFv*zc70W&s*RTi1Go<3-7Kj>jBBS@!W5 z#r9b*oKm&hy5Gjan69#%1w7a;cV?j>?SNzz03~-%T$bw|5!C)7S@s2D89ZS>2UyehPux*2y)h6PNjdyAX4 zIc$@#n!YjL-k{osP7?uwtSw=dDX2p#axDtIxH9NwWN>LC$&<`)tS(2Ttf17d6VDhh zo>s%kv$D9#u2+?H#u6V}IIs$6$7LXW>;lU)HFb$#o%k1)lprJ;Ro|2=kT%-ws=hiP z{y=46toj7X$-}sjI6$jY+nJypFkzS;Z;TT(=|Wu*%~NA8vJd4+sFGZ!selSp?hoF8 zz1ZB8Ol3AifmG;-%dWB+(c7N}&J3y2Svsc`9#$YTlgH|>ZZeP6(V?Bt@Bp8r=jhp? zko&-5S#7%0=c(d6c!7X4_|J`2U^|2rog_+OhIQA(0n`;F8p7MxT{q9_rf**iymI8< z?Dv>~#y$TdN(?G6R2|h@D2j59_v;2ezhdnj|9$Pgq1?&p35dJ?qzNderPl;%AA7Dg z z1DZsd5!d#KN&oA6(cQY~N%}t14$rN+6Oe^m;Ts1BOKt3oK^!Xo%XmjL*z>*-4=T0` zB7fZ#5nMB3XrOe1|2ZA;^$uA4>#kVfV%32KwGXRxQZ*B6WcD3uq_n!laBE^kCk-u{ zL0da^1yM_H-v7FOkd6?;Auhx5w*(OEho3BsSe-1TKRG7V@wcy@-=wp5 z+P)-jLV;79_wG>P(}Y1!C8%L`^D+($-Gw2LXi@BrMw-UomKO}#pNjkHIFD^uzjl{aiH2AI$u+%DVIdmf=p?)?8y2u)84v+yQqP4xvv8n zMp;c|HC>FtXh2wUeXoHJe$Qzgg$$^q?==q{z3x9Do*j2EtEPe}t9sJ|SmuewV{ zNhG#}yFv`QrHS@p@d!WLS#WW6$4C6!!AkKVk7BZs{Ws$DY&)D-nr@HjA3jIF`km)! zXtG!ma4^?PP>Dn^m-#)63A@}o{>XjRJ#x6kuKk};e+pL^nKFG>(<>mhc|$dvUBskb z*_WO=zRpoh{>YQdm$>&$dx^Q+O)X)Xmv?C;U3R!2US&m+qi-he=o4DBkEqD`x;rzO}(1EyaxSO+dfzV%2d*l{;KUAMB$?aQs`73Bnt!)<7zU?l_ z-6eTC6rf$m8o|x#3U+hLZ6j~kHg=nnxcKYMNPtz(s%%POak$UTj+*Q2Hl-?Vzt_U^ zx0A(QD8z>%+im+()9%X%<05$9-*iF5maL7MHUZ5-Y?}~1=m@V2 zE&|j=Zo2J$+RbK7*Ohdx8zcgqC*p`e84GBZAf$^Q+d87nbcz|3T!2S+I6jhNi_-VW zLdOAk;e-y2a8kLh+$0npjzOq~8F1riR?B(xGUP2j+_i*%78w18v_|fzhoaJd-GF$2 zGBXN}!1cY(t_#}mmr*Yao#i(kED$W@O^J|Ksk7V>(Yph6;%_)-wS$cm<6{6BXrm3F zf>HUP+k==A(IQUk2f0zM8bzI8ZmZd-X+iN7kOOlBNR$`_B}{7=B}TC?wx@Iga1PVF zse?NFQuVdf`OII~vhWLpOzfS(S*Ojg$_kQ{1a1>S!wtx3q(VP-P&d9QuoNR$g;ZgE$-osDNTg z5Ld9`R!`YXol)R*xd7qI7VIFzkfdcX2i2kX1QyNSWBdnR^djZTYOH!+Vu6qNPg%tZ z0q#)hGZy#@{sYgw4*az=vC(7??**Ma(_qHxh}Z?+1;r>%%#}MF>Ni;23&jYGcN*ob zLuy!!wqBQh56goL0hXGY#ZygJcrFhI`-^&nrw4F3<`5b$4tN0y6qYxhhZzfGw=ANZ zGcrO)Y3V>O5e+5)TBaFVk3KF_bp1mXggkpb@|c|5L7W%6jcm2Ab>UXP7oISJny*aBV8aubjn6WHVVva4n*@(+Rb25{ z^(MZo$tVUj5`SdS_!5sQhF`Zr)%Uv#u$o1Mi>PSb(0w5%{^Ge=GX8=o*gr7M3~C=QVt6b?i=}`*?*e zm9^Vn9!t=teW1d3=VN5c9K&PpZj4W70V;KkYVxef@11wNnrAh~>w~uJg-@5uiB_sR zZQ$9e!ejer-{g2E(zC!sRI^!c-Z+?VEEqiOLB}`5$r5Xwrte{e%hOK3IW__lOCN-f z?igSZM>l-Z2R(T5jwbv#nTVKAw4sg_M`i^if^QyeSVgMH{v>!5m>z=SAa-~VI~TI1 zU&(A2;!6~7N70VqXY_`6ebc?-KJ__lUj(eKlH5`GSoDLA{zwE`3XNAcm>Htwkw>l8 z_KX`1WIxV&;d@+!BW+X6zOC*ZlWr>DbUZuBq3A(&jZjNruS|pq1M42cDrNc(eRKhh zgdB8Ks8Vw{KPjWdoMxtHLAoudCw63saDCcKS|oVz1q#dn)ASdhcGlc0J#(9Z07SkQ zJBsr%Ws{(s!F*I&LDok)k>f;7JbVQZ$t8iVx5Ri(OeP7{Pu_vaeU@G`a6!1x-Qh4W zx@!`pBl}Z~f!}p(4>ZkHm2y~^-#=~e$U|ayJg{m!d7&q(cd%t3;Qzh6O}YEU9ca7) z56Qcl3>2_0Dqot@8dq6`mLGZ??r;qVWWxXk5qKbZM`xe#=u^Cz$nK@>iW5qh-QE`6=xm-9SKE6|agw~H9f;f1o1gk*MQasVdq{}7 z?xcx60HirWp5)pIMWEy#f#UgvJJ@|{fpr|}p4ON+rP=n-v^ZDioscGA-KV6PGrrTw zn`+8U!*-xaoK#kr;bxjPcWD|Vf-<6!pkUnz7uE+FH1Nv?vm`k=N`eQ(1kAuiZBNe$ zz$0DJmO+Pr_G?@DMd^fsD&87Au!<5Iv*cmRw8K;bkz(GY#xQ$;p} zh_^k0O0K$)&>mak?*&Z;bhSFwmG|l__iBI}rgGD-M|d8UvJv6P(L~nm33+p!Ha1Js zR=v!%YZ1GNGR88t-up3b7O$f_b+APg@`kgR0Opy)EWz_=T_@~K4vh6m_bV*(T2>i7_aj>q%)oUvgxTtVAW>D2#*d<%&r@$)E<#Nwc0jlyk>0{+-Y2mp%)>?wX@6vnEIM+zyZIyba)YQmlbs=}TrHN$Q2yBpBPJX)fnxh&xU zmNp3okk$+U`t+)r`KN-lDIwT|rkG@#c=`+NKto3avRWiqUvsMfC3l~!Aq-rJEW%UQ z$D|})mP&ctY-NUzvby8)mH~=&Z8vnN6bl{SG@WdmPoj~XC()WI6HFBgO-`BV{niu4 zpC}gEx(O3!O*A6kQL|_l7^w*s;!RRqV-^=&X*paaPiE#tb5oV8>l*k+|9O7@8y7#5tg5(;4k(m=}k%|e|8u>DE5;wY-@3YO8h9+B4 ziX2+$=z4^ZDN={npLXRz8@dlBZo{6y-E?8ilpazIDJ8tBd25o5?qZ+6S@MkEEjIOH zs*;b$J_bJI7@+0N7cFqm1F=XN5NmZ%i}sLElGPpI%^C~dU=EMg~FATA1#knDjyeyZ7 z+q`;=9{PG2IdsqHZyn=^ZefqCUb*Eo$*>Nhgkq)wY zs6#gpu$7;9npuz4_FM`t+I{Ta!}V5V=v^|O8*vfu6K%AmN&JXr6eIm|$0-vEzgEGR z5R}4?mRUVs978I)G{!HL-4yaRY6J|-M%8fUA-(Ro?#YGAQ%jsN=ap|MBD$X3-V;Ze zSS8VAgpzLxZ(|KoUJvImlu4sP$D!ZV%-6WI%|m-Qad^0iL5bjLub4R~0h%XuCp0%! zmm+`TZCX{?JT)88l>P{#Ma#t!rD1}hZMj7vmINuFvRkM*gC{J@H#&o72l<>ooqZbF zVzFw-=Zyjr>)V`{^n_CQd+R6{oz+OwL`F3Yq=gGvu2wV5ylyrIT8cf?S!D0!)F*|H z9Jd&1$W5CxTUO*+j1nBne`5Z4Hg4^#wDA6yE@`vN0mY{Y#~&lm%v}&*FED6EwptzJ zM{L(-zi6i?XN^h&Z>S%~D&u4)>(WNK!uY?w?KUNV!&gJ;)N4&cs^rgH_zOuqnGDqb zHQ*%=rN7u5up~XZlCpvkh#l48#!P0?E%oSAUN<>cvW5@|!jAPcu)DSk+%2S~*_#Fw z++;Sg&8ZUH%(l=-vC!Staf&&{>R#W3$}8I?dREJAW}&N+)iP2))u|F9Y?|UeCK2&0clFf?hdDB_lxO>HhpxN3-Lfcz3ItsKr7a=f%j(FGU&mbXu z);nbCZ+jh+Y{1)$KkUebE-xKefhg_^+*vpcMz(_EFTIx|H>-VFw@*qIrI<#DN`v(J zi5?YxJytGlwZ4HtK$_^B*p3EXNlwq*AB?nj9>;BVKg+Y3OLQ!qSgd$eyxMVeRE;Nf z&{n;|^ER|i?hd0Vn>Ab<_aOr44BBqfdmFjfUIGNkyZ4`r-!{nSy+$6znG^%Jlf`rn z#_u@592m7}?ltCFfk#=65782zuCBMstIH6?wHbKulJ0-91Z6cT21R!TqjS|0-Tpv* z$%i+!;LsC{N4!lp1I}DOCg%~e7=1oNRSW~|&_J1rH1y~nsB}PT!!ZYZ`&53v=~98# zM4Gq|uILon7i`x!9<$sx_IQj(eWNubHDjERgDsU`&%>@Y8fDMh>D0tQGE>dH6V9qA z+Hu!}v|H#M^<+^T+wQWIa(!!NW&vb4gk{E#EW*O&59AS%vHLe*5|_n)TOQ$A{aY~! zTPATX(NT=svdQ5NY{IGcx8@VQ|DYK~Cv5FprVXFi8#B0_6BF}<1_h_YFU%olvz0hc z(V)PqI;@doLg?Z*mF%nUU;OvqPsirGR>NF5P4n{3nk42tG~RBf4@0BhJ*(zpdt2hX z)bLQb(GGduV>0YS770p1Sha&Z}F1#;s{&!u8fnBOQQPw8P1?+F%c;>oKeCp!sV)27(=9 z>5aNj@ZH4?I!$TaG&NQa-Gm@=Xq-SZ_lt5>dw2E(bE4&ZX}L_0j$6+A?M+=#<6)Mr zBeAiRB&9bRUu!fkj%LNvjxYjlwvQr}WH&@LYoBg3&RnJS6p3bF4k_{VLkO`>w&E3N zQ5J|xS!Vc0y)WwCBXhGA@uVLHq+feyVc8RI^*3%Xzy{#cxM=W^+(jN@Fmpv+KPkYFMtn(@POxU94&0IVN1#gKhN(xLM?^7&ttZ6)VzN4 zAUYM^_}w?JyvB!USKjarU|MZpa;iRO)*#Q`AQmA{M!mNU#4#Oo@I)_byvWWe?*cn3 zO*3>QX0jv~<^mUZaW@3glmPlxyNMkiDKSxHa6U9HZ}_4+j13XGmca0S7G4AmK_knc zTnG6Mij?o#D6X>iDAX+1iz|isu2xL0Bd?+Sh+TT@CI)kjsUmG*{OPkh7>X$~+2W}> zG3>3gaYOa9H-UQJCUZA#V-5?q0Vm=_k^?$cUsS}MbWLI4xCkRl>&<$pu41dpDZlRc z-iEkWG0l27quhitG##3wfBr6Pm-LKO^ZGF1+Yv>GPC)#2zQ5{v9rgyOjkSQ)1^0}% zNe>&=?U}yNrgO~hu>BjoEiQV>4*HR`)z(XTgTb)#>ooc`V8a%C z{0aE&P!J1#?blf^jGOSEgR1drUZGf#`2AqNw~Df*h9X6tG* z&LA38yK*%deLNQuiC69W15K8g7U!u|MUTR|Ed4M zBhaT02KJ{9jOnMJ3t4Hf_VQnpST^rxTtUW@df`cS+!Diq61bv9{Vj7^xG)(Nkyv83HKp!*@0JwpQNeDXS7?Z8?X-Q(WedsA!V~<*9(R*4Wt`wV1q1Us zDf*tLsFg!w8-Evt2u9xd$q8)#cZwkl;{9Srj%}rvE^ExZTw%mqp|e8h6UNvtoZuL{ zjM^KTVD6Yzcn!5Vs7pb3Ca5fS-Z@0x30_-ziy(eEIE<-3G|`-G+YXP+h#-O5{4bQa z*W{#V-Nw2Gu&&J6SfZVA5vg7UhmoHaVx1Q{p2UHPwT7FXQwbg3Mk9Y!=6QLClZGc# ztzc2Z|CzH)qkt84UdveY>BhNc-*qLV=O}VXolpY`2A`Y{YsWsMmVB>KLsMzH=C9JJJ_|5Kof*~wFmEdz9!>dQ06M|q z7Diy(G&eFfm^PH38Rx#tB;yim>+~k#wDF^lj|g||U4QiWk?G22FFPu<^zXASkGmmY zR2TNEdT0BLKf^?w*bJ@uerRdTxoR)%0W;_-Z7!I4l!wdOc`i`UeD@Qb**?(ha9$bT zbHDF-HGE6~561x=2>KYbG4wjH20`C96XHnT4dy(PL#Puo)^35fyx2j*)yNXw2Po)c z(h=-u()3%tS(9tZ94jyfm=kl5IFbQrZ5G5eM>)5MJl|gbwK+6Ia~(MlA7-QMa?WY( z2`Ari9UiZ^@}w8-@EIkxTID*661oAzdhJd^PRj-}*sqp?MhEId2a;sBIDh(J7%jmz zvPV74o%M7;L{XXU%+rI%}2emc!T+7_+XC)2vD zmf_py_RNLs6gheBl6y`L=G~ZnPO)EwoOMtg%eIHnAi>=s5F9qH8@B)(+qk>CLxM|i z2~Kc_puq|57GNU*f@^ROu8(tX-IsGx_vOu0*VO#cUv<@5y?(uVdRG57<)M)C%R|%q zkKRT2^jYWuMw~D+L;{|l#OY)F1Ojhd{JRz|^5YU;vgr28#^(dK^(i_B8Xa0~7|F~% zj$E$jfvbrRi0*onaxL%dma;SX#NMZ?a9(9p>33>v9Gy2Q|Io`Vq@b;Ctf2GEg=xKXM4s^jL7~5Te3fG_^H77(U=_2~4zZVt@4}aW5f_9wb33#4gR0oG6<{ zAJY479x5EX@Y8%v)4S8)NkUM41#LBZ8w!E^c4fUsM0i5HW$aD)Pw?zbn43@<{|OR# z9^GDD6SS|fee9{KE)FB!+7FVm*>Lvn@PoU3g(XJvmS;|b? z97h200B%Gl;N~sJJxAVU%6N5$+Zmz%_PSl6d-2hJRSF_5%nCYcPfC^xpn4M^9t)!m z^?kI6AR?G-Lb8QzDMopG%5AXn4KK!zsH<)40j~2aaMTGnjd6Lfzul{tF@3<>%RSQ!AHAu4ir4rrc+3oyZ^FkDq8?aNr)(0w(h~FI z0+KZ-v&=~~PRbFTMOgbCsQWPizBZ0t#BCi6y~h>3=yoK5nY%&0eru|CLkr+HE^-B9 zs;3R)6;*e8cPNTI<8k0i(rSYYo>0e|I-A21wRC}A*S<0jgOD4U_7Tc;5sEHRNMd`& zIW3kHs0RG?iLj!;45{0<sF=dqkYi8ioht4mf8-VV2tTi4-y>5mb)84@WkTum^k`NxDG?$w8j<>Sf-k)Hlb0;+ryvLjkxdU4)r`b@k) zi$i(RhKceAH^}vA{tkXMkSMslpV2naHvmDd+<|*l`Pw3;VBP?N4n=0=% zos>ThIxQmJC<&PfNo97Ah(n#k_*hhn29f({?q5?G3T(MS2Hk5 z2HT7IvU7be(YzuxpbX_=KYHFsHxEc}0l5^8)I`^UDB8M-GnA2S(YmCGUeYUa(9hEbQ z8%PMFYt~j>!_1Fa!L)Ve(xlm)2Z_N5=V_NQqg3`?l9XZSN&y{4_y%X1a1Tj1hD{w^ zw1|2EwO0?PmqkM8$tZG;sa)4hXhPe|SB7U6B+GZr_l>ZJxC&5BiUgDBGZ%mQQqQ=@wag5$doy=M_ zt;a^q8f$z|dXr$v7>Vnb9NfoWmIR(R{qpXFo2cq~pWCp}#_4XRS?4UQVHvqEI~_6C z(~wS#)iERlqZ2PVOlh-?X?-zfA#{-+;-m{5V{l^bz`6F=B*@Vsx_!8@wxoIjqy7|Q z_zF^r6r*nU{a`UiJC(AZuDC+WLUj}95IJlE&ut&xuV(r7h^h1wg0D0A_IM{SipZ0! zR(PD|U@Ur0`3@!a#-G&Ry?NwAoX!xt4nD5^y1~4phi-Z?67`I`M&cKp;Jp?U7eYwS ziN?8m-nQkt<8NM<#(cVTTcgLcIxgf^K_z}?nk~KL!sKv;j#RtVfD15iP&h(apLKe; z@AX@c3p>N>dJi32PG}&W@|u6dNAIOKcTHGbcw~WQYI!CTOXqJ$wy{bw|wU<3qG zU@_uY?6;+WhZdljvs>-P2#@$vR5-%xa#9zz%~TZ(4h75~{F@Bk#s;J*@koefZ0ph_ z$;dp3cah~gHYegQDN)wGX|{w3Y*Hnli* z8;WR>kvZfVgmyhBb1%GUhRA1DE}=N)mNX~ zac7OD`~H9)z9X32Vtb6`x^T1E)pyM=;lXdQ_Q1opI{eP`e1(5&A^u9}kp`N_ga~psE`bf(|yKvOo!}qjo4$uw8Y`5VkHU(nSksLk_EH6L7{yGhf&Q~r= zF$OHJgoc9BgoA>@{@ZD20|P62D^~-9=fXPC30=K34$lFRnDf8+nS8RR+fO+VDD*LBrks8J|sY;k$At(6mdv zf|NGEI<7gZ!f2KvFRU;~knj0S6=e|7)Ah3M-{Ui*CTa)~;Ml08RfkltAgRE`v{gtX zxP6{m3H@OH>R?J74!zgEDsX1Ei!rCvVIqQPq7y?9_V%{@OM_TSAeobrO}}gyWOWK5 znk5H*kcdSXj#`vz@{5C$fZOpHW`3#9Z7L~dwsBoGI8edZp&NZ~tXwg=Q~m*ns0+PnmAy=X{#Xb;gl9k0TuAToo6d$Op?ds6~< zMh9O4zz z_!d{7QpDV%4Q0VsR78
$8NAnUXbU5izM9GZ%r`AAu>N)Fop8N-^ zH)aTmkSaCAjy8~U7lGtyh%XkIh2q!uA7!w-M(U?VW=5hhiCsLz@8+R^1$;z4qR`$Mm3q*geNrLW9!9WTYHwmp zuIyK~AlIru({S6aC@MM&-lHj<&{~F`<_$dw8qg})M-?gnHmw|Yc(<xS2tXI}_72SvSK)U4thr{obvry&``i_I3B}bk%_VE#3|~$s_bXjmFhT zb=>)v!Qi2wUjNBxax&tQ3M!HSS5Ma@WdnyPRt&$ZV&2m_nOs{h(a-pcy>?8Kyx+%~ zg^I!(wKz{V3(_wS`O?+*p#nUus@hvtI&AjH3|ia#Qdi3bL(3u~6_2X`0O3IP1@jDB zj!uo0)a7fq^`3*4C+8I&cODx(#T|PDCoeb$sf}zo9x92nv6zI$g$L6msL}B*Urs{= zf~Dm~ns(d6@oW4hmv(Y&&xR>jUh`yE?NqE}28=C^W#KTwusiMp0Z>+33ezC7^e27MBZhWr)zMU$VadM%(;a+j?lb9`E`jhGEegpC~w)*sw9lR5sv4r-oV=CdvdgGnWMe zw0@Cf=Be{XC>RlXP8{z(t{AI0iOC+Kw|cPE{&-voP)3R}(l~eyf8(MLj+pAYR(43|iJ&J~z9NsCf##3=nG3n%(ri)DmL!tMuW0h*mx>CyJw00u= zYqlaXH!7NtVG#)}XL0!UE%_D^)gG9>P!9OnVzD53LvyyQiC!8JI8H5Z5;C zTQPJ+oi%9=sepWmv&&YRlZ--ml~V5G)!Z(U_F+I9^`p>2_QQ?j0-m!wI-1H1_Jxk3 zQ;^+(zzbzu=nE}*n!VcF0>B56Y&t>o;M{)g%&YnyLolLvnyS%ow-(A5sM%@VOb@*X z;<42yNiH>5rV$$jXy;t;G&lCGPehtakU}#Y){K0FJk0U?M*s2i@x*l=9qoY=A#wfU zt^FrVNU%FG(Bg-NR#u#+s5|G&=$BzK%7iyabyARZJE5^K^m90*=`{vEUqYYlc_Z=T z8hraC3O)hzl{gzxne(ik>k$Q0{2BL!p5rfFbFUvgf&+DH9s9jb*_uD4yhe;xN@s|Po zMjh<8XEkVkteXK-Av?ytd@ZHsAz3oV1nZ19LCMKJM4NuGUXnI>07a?%{)uLT@MUn^ zb*fa^U^hjoeVGT4e$wzmf^jsv7d?!aXhWdGSOBWuKpGL!1iD ze8cjnEwql=rVpWAf@{@UDOoX59x)hMtRf;+V>o?WcRaxIPHv*3rf9lbx(rsvVk>g_ z0sfx>*2$XF3HjNr5TDy0-OJU%(ZJTs-OTo{5DOZHKxE`cHejIsNlRQw_A-7ZIXySB z=l1JIvfngTLt0W&?j=N8eslsA#LO@@0;yGI{l>BCtT+l{mSq}ZsZo}e9by86vmkr| zkF$<*aF270gE+@Gk%u8H6BkS~sC3M-qeF5v%5?M$@;{JeUk<{K8wk#KaEaaQdLw!R8=%db-{jK1jA>gDs@1eotwHM@AlK{ zI%^ylItA?ur)mVRzIU_pq~c3e$4mrR&fs(&ukDO48DADDhQ#7Y?nSnfxagzSb5n7{IIliW+&a~ng9LW+=cC<3k{&&v{yFwrI@|aLXnjChq!_n@wDzY1fB3#+i>s9 z5aqk*V;016huA2*gm`x07E*9AejXtA%WoWne7Yw`J{YA<Oz~93FBaQG>u$Ri%>fTd(nuprJzZ(5~KkGOMzg%{r=Riv+~2M$C~dk7|q#URl7M zsNiY4b8A9Yp!=PY{&ci@nSDodxpc`jCY2C>!n}^iZqgANaBRA%C(mUprqOjfbfK)f zU$_Gy@PH`#wx<+UtArpYwsGA_6Y5pF)__ARvA!J!a(ZJf0(&CL{5%)Meh^xsk%lNi zh$mfQPX-fc>^!=gnGVK=6Cwu}-@A5p&cLlVf`liBe(=4ST{PjuI=@&V?5Ra%eUdv< z#Z2-9;D?^ScJkAZ^WO3C-cgceu{2YTJF#2bCGvoCu`F3*s?D{nscW6(pL4O^Sk~;~ zdA%zL3k5~~CvR7l6jxN1@R|G?lP7}nV)fzmPZ~p&#o#_BWV$a@s9}ReFcF*#?}yd0 ze05!~T-n3RYWZ#+GeFCnDU?(W1R4c&Q1VOi!|ErFe=p5cc%B(h_*zQ4&!kyog4Ujc(*Ua3-bjz%dsI{^-n}CPs)*nX84t z{HhPucgQbNF?b8FdtIrXR4>ltXHl*as+=(U?DG1uy8-4%fIQ|-S>&`iVSsDMh$kAH z(Tk%220nX|xn*h)1c@jCi||O&-=uW#ROU#HdwvxXqosY8^!eNCG>ns;i%XUrm17Cx}}N`5BXVS5v#%P+bx5J9VuOfJ_8cXgH2-0E?B5~(?XTj0BcPz_0u-N1*Zv3cOB(*(|G(wozxwOF z`PKh#S^3`q|1Hz|1rU(=U%>yH_x(=&Jr(&yJ(c|f^>2yE@5JAicE5-}<^MqZ$3pLS z=I?s`i>a*i2j)N2{deZ?s{V_a^C#xNH2!zi?;8AzB?SJ%`2M5FzvF%na=&m)s(--! YJLD;W;9vaJ0r~lUd|nsmtN-=(KT!!xasU7T diff --git a/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl b/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..b1f55798d533bebaff07df4b5cf9100f242fb29e GIT binary patch literal 21969 zcmV)IK)kvRZr*wvblS`*P)o#couIYet=!scCHal5Xj8>3%I(_#pHo0^r zgyW)5(yw1)wOR#RB7@4p#+Dxwb-=f*x^DYsN~5&*0MsjW3`D$Qmy{1SLTu(z$POJazBR5u>Vo z`j{ONpJKGJQG!llZ^94i&a6+3z)kfD?$+zCpYPVUUm(X)IniGJBP(o?L%4j1p85N! zRoX-qwLylyhr=3rTYII9gC1Yo{2q+1{A3sfD{p>qWNM)(j=l7ZMFG0)hy_&X$n=86 z_5B{!TU}9&h0%hnjC+sk*n3sH`8{==jELUWF|pI0r!~*B$!o{x+*orC%g=2|W)}IS z*kQs7I69RpWAe|r13}N zT`51r$I$4+0&M5KUw~yJ_76}?0|XQR000O8AUp<5eq~N&111b!?;ySC7MLA#PDw(UxtWfYZF?Dpgq`Xm8(p;;gR!#EZ;_Rp6 zVe45wqm%E3 z$?4fJIXS%me9p@SluGMtmgMCX{GFAHB|xpqA^}Y6)pA)@jhc^>=C<_uGy||xwZrA1 z>C3W8)Gw>-Ce2kbQwcoI%A!fLVxfv=lzhLc0HbPARw}tGS5&{w7Rx-lyxZ~UMc9{9 zteRv^ofKuYNONpp0T};X*PT@=ZIp)+l>hOrxdsTUIxDV{%XE?DSz1{#luK2mO7w{#yE^oc2rs_JqQP|TR`~;M_OMr|CmH=|8IrQ{_v`Ma;W?7G)KGif? z-i?5ebEuBBM`d;OR25HiAZ^_|1teaS%eyMOx^9yGY>+(L-~ZRWXZr{LoSZA*nkIvu z{UMZE;_*+&4<0HNxgon6CQ&!|^_-f614@lFCi=(l37Ns?WdSO0Oequ$TdvPn+T zg*vXPvT7APL_V{{0r)x0bQs2qYNhaBRp7i%nktO;Kje}mE5A?71g?j*> z_&@ON(+<+yhzEsZoFLy3jTbP>(<@c)&>&5II{E9#>3>4I2#e@#-akM3;ppVW@pqFK zKb)SwJo$GV)#Hvcy@e#{_^yMM#cYla(sRPwP}F-FO2Wx z{P;h9I);X5%={9kKOUbQU7ViDGXJ!r3mu=Gou0XYigX8P@>B7yuu@QyZ9@mC)-=si zSSU10f{>`Q+N<8~JJfqwzyg!ag)oiQ(IkmhNMO-zaM^e#7q>uC9{rf=_Qb0lh zvDS-i4a*)ac@AFkJPxcyZ2`{AQwI-MnR3;*hBSOc7bYtdg}*d8kR4}vTGz>sxT=xN z=^v92f({Q@{GfEjf6r2qytmgVFZ3b@5*fhRg$(4+tX#o5gi-_Ahh^tIbNMKFS)gc{ zftV>T30{Lb4Y$sVS_3j}3ZlAZfD;r-L;Zi2f5T*1)*0e9WUv#OJWWXe(-}aoc}Y{C z2ISZjJ!%vHNPl48nQo%+Cd^=Q0b!4N7lA{?0dP`wVvam=fEottLsn$XWYVuyemN9q zAD)y&fcs&u=g>dgJCHp%+}E_mAHo_rP%w?$L#&Wp8swv2&@>B$%zym?f|!Q_uN~1- zfNMMYB(1J$_~*&HTl}9uUM*@wMaqHEm1_DyD?>N`Jz>}z>{w6@E_H`VuV>$6m+gML zrez&k%Ba{ldGuBNsF!@zuIicsqQqOWg!?wx_gm6cP&G1z#>rQ6+mNNRt;O7x^gbYs zK7Bw`KYf5=pL!!^wf>VQc?lYM-5(6>#MC$WUBRy zJz{rW;eq#2HD;N?Hb!C8|B)c{@;DsxNo-!cA*ZWJb8r7o2#ZQ9$ATB`%+LVq>e#waU;Di zTF(BzFQgW|G^(uNECNN(@uKcQLfd5%Z&sCcy;rGARnZEsQdrT+Kx!j4nyl>j3R zdWEQiqDkNEMd?T5Ff^Qk1F20%f290|*c7nS1PK5Sei^nT?9{rDw@plvpm(ieYmNOBhOGGhtaEp^L!*;AICC zwKAWB9528?ojZk%oj5lNod@JvSOa1x1Ur;n0PZ((jw2{2icT3Gj1j20D+Oz!zy4x5 zL}QrHcTpaU??tOPvqFO?;~F}Ybq8*#An8i$HZ2w%V*89&av;@e2`djtugtpZ>;mkB zQ870psU5_Ar@S`{&>-5n)*~D*O15)6<`~MdkH;vs&wAmMs@>N8HYUb&mE|np!AiO{ z3k_)pB&z@@d7-BYa9%6&iKB; z_iv1oEH$2?B+#pkep!>Vhn7<}<4(!2fJt>{anm-3Z4y?~H|EX3?Di$X6i4Z0Z_T-r$TB=Z}q%TXySDD~^qGX{(U*s$`fEH1OFRb_p|#K#s6tODAf z8b}|zz%oruT_RW~{)Hta2njG>!H5NEqrJh&pZADAP+1r!ePJOw;q6(9{AASd0UDs% zTaF-_CS+fKSrF^=w}(2*7(;ZTi*c zIpmy-f#fv!&yBiZJA@UTh*B{1x@+Rd>IxDK;gsyIn`d>?w=V`>Ir7K$d(2tmp8pX= z2^AP79o1VXigLaZhK^)(t2V~1(Nu~TK4K~{vuQ!S&Cs?{EEv)Fxm5xlD8%flAhd&q zF0-m`Xy1fZGE!yHhLG#IW=;D&T`qZtf%G!mfoZDM3|-fbp#kV&@`{ZU{QKJV5XEX% zPXKIx(gfqLg|lFoNn;l%(rVHD4Xc@Tk`AR~bm;j)2dy@Z`>L~H4?Hbr#>G4ECdkc% zsEI14B$gF*QFoFrBuI%c4B#WPj#mXA{#@vxWgXcxJMImkJIhLlt-&zqd43GPh@OhJ zko7V6#jJP}w9x1}lEknz-!xSw;*Ft=d?y`9_H7I7CL#$!NgQmS+^euZ8oK7u0Q~_i zTYddeydR87@stdZ93Bd}I7s41R>9)i-%G98y|S~Qvun15{MgYo+`fqDzhAUx zEthoAxAt-n>8`vQ1!a}PljHO&~Bs7=5>&g6Xk>#@YP9)cq(z&pE<=GkLul>>Adz?+VB zBmx3|znc(pge8&!)U;|Q)(zKp)FwU1Sp~mtR?=|9MuW_X00|f!9GES~1x%6upN0i} zEWi1vpn3PJ28Arb7;fuSM*Qt2FRs<>opu?LgSbFZ&A}mvq3%3xK7j>o!(`&Xa0e&^ z5-sYX#+@N{v1S4JyCN2Xw1GP`4E%jDPSW86?SYr#(xLQv%K6B3nMwk;gV&C&<6J2S2t6??BR)NQ-6pdDA6s{z+P?!8J^Iumw|YWc)SopRIu(w+Fufzlx}`fX<#%Xluwa0>y-Cn%_E+ zyQR48CZDXjd7L~6x?Z|>Y2hJ&?IadJ%M~d-gXB+#$)CI1#b0ZHMbJ}MxL$YLdY^{* z^Vz)xa|DF*J<s8prCSTt zjRInAzdJ#-`yYeLtJL;H>A<5);BCQqzpJc_L--AaXIQtRDIFeu0WwU4hnwHSrkdQ1 zG5e8GKRh$Y$du`KZ{(O>U1mj^>w4XM=7ASMTcMT38@{AL+n1iTwQk#&b{CJ%FEEmg zB7<@n8wGPRFK<;P_t{XFp$_0)Kz!SJoi*nMHt2^Xgnfffen4&9GUCc5y5MK*bJuvi zveg`eb*)NUED^hMLui=1-`<6jh^~$Ax02|>hi(Y&)#~2fw-0Su^BR0?X$k?E8SLwj zo>cH#GcvrrusnP`)TZ4>AmK2%dz}D^uiPt8J0Iig2NE}EbR%)Cr77kKx)qn@vcGpQ zxE~rmweN*W7qtx?4-Y#D`5382RCJHOhI*o@2jl#ut&RIyK^SVR_jc1O)y8~l@gKKJ z+WM;=6yRVjBpyrl=2ytxj~~q0_tJW46<%(m0~G3Z;^qOg;7tXF^J`{72im&XZm!Y= zA{%9b)Pm!O$!AM$`xah4nBLUZX5s8RS|q8v8m|0c82AsH zb{#ncusbB?6Zy$lbuz5lXeTF||Qyqp_@E>2TgN zXA^9cS$D9J2{dZlLA_B3NVhd6iBeYC6=`1lDeQx*>dr0Hc$}9sJEh|^C9!lkSw-7& zHWDC+*-pZC^|6$U4-4rqFO5XX+86Zk>joS$gfs3(Li)C}SU5p+z z?Y@lA(BmLI(*+R-y7q{ZIejF=wTZWfj_``$j%&DM7A`iABFtEkbtRqIiA10?$sE2f z1NY6+gLLr^qKqiOjShH7F2Lize3*d)fz|tTp@a6k*nfvcIH_D$u2VY8g(pS~GvL;JRLCR|N&($q-eB&axZN8wwWk zrbNiA)LHI`=-pm&e?G&_5eM1&`2yVh>_UgmyZW-yHG7PB6FCY_qhW z_=?A#IgTt!jDixTH4N#dKvmmQa%Ie6nm2V&hhHXrZK7g-_Gu~|3_|epk`C2j0Ct^> zp=ddprgZAR&e%bM(@vAmugY}@3z+OsfvS(UtL|t-n@8Og`ejujg7#QbK%L?)S)y*X z;v7YhtH zMLI&=OGzj+`FN${6<7ds2fmD`rIp`iiEtcG^6I>O9Og-HEVo;M|NLiUMft|B{k8pu zJG8)--Xk!mc=l_4wx4Ln7kJfwMz)o2{Mvo{4fh&{c)ouuc36Ybufo!=*OkUGu-`cd zws+va@GH<@*#bILwzuDUQviqiFD)Dq(JkUL_wCDE6;~iB56NF2#9p({$X5GC7j6Y% z<8ff9`O1_Gwyc5K_~0=Z#A&{~PWvV(5l!yi#FsT09Tr^f$TB+`|foH18jPNoyxPQW6LZuq26W%A@5P55y#afQf+I!rh& zG9VFr^Jv2=QbqPB!J|`TAt*j^1`lFeL)P>wnVnC3iQ?@b+A;i$z7Vf(x>ww%KA*7@ zMx?~UnyX52z@OhgZScqw z-FTM2YCL(NC#!b~bs*sXt-Mj({o)ohUV(??T}=i$L?L{Vv0Ntx3~rbvS9#& z2t1I!qw`vLPC4F;*xEFoZC}iUl)F~_=n>pO2s!c5r^QtbS4C>JEaM8#&4}x;g{nZ8B2Pt~Op?L7w?PAX zwb6TaHmMcTMVzOmoH~45-gm8Hui10U?4YxaSXYP17-C=OL>uOjb86T}N}pk$Y$gmh z=Ksban~hUHb*hG~QXm+ACnk`n>vEPM-Zo!^j(ze8?XflfUeaVhSF2N9d6&*|uLihb zDmVRlgvlc)ks*#8O=R7kkT+MVv6&vX>SeB7izdv=GPd6PF>V&Gqxb4yizws`XE6cH zGlyA%XT-bG;h2O~>y_?TSmw2?M9y!lh_f+X=XFSDI*)BrT#mr1&59A8^PiYqH&UrR zB717JZP0kl+AMgl(GfiOy!SZ?oIvO!pwXz(N9iPoJ8*Zftn zDCewG@v3u8JEtbxNuVn1*^{!dwr6Hihuh3E4IMDyJ$%=YOdj50)YjLa{LWU*IGNFL zHbCfBaSl@42EV-qZOo%3Dw@j@9$;ycZ~$q|0H9B=s+pg4LYoqTO=yZqwu$Ge(hf9q zL?Ek0PEBZT6`iU?J#LH4CkDIN`@KIKGT;4K3v99fg4wGV`pBX37Lp#X^%)W_q{vgz+beg|=?O#90%K$amB%+66{xf`xdU7FU>Z4OdzY zSIN_vdC^=~&89+=ttdspOEtP0A!LfwA@-+TdC-RLf{EL(C-8o{ux3gR zsfLsiUe&xc$wqgv&)+P0#_txJdNEbWM`RZRA94)P^5%;cIOu^`qz#C*I;cf^NGQqb zk|mmvcqv!#ivKF~zB9P%sX}!4 zh(sv`py_;jByp6|_<=&>#n%})Qs67DMl){geSv_7HLn^v7#xIrK6!JD|1PsiO)^M6Yz3#d0 z$%V^POPn$1m2W8`x}MzL6X%>*CDCPsl5YxcV+~UN9p^BV86HE&q2JZa*SNIJLwh)J zc({o{m&wpxF>_G5z@vnn(A-#^i)70;s;aWNG8@p8k`SXs%f%9>nInU)kxDsMl}tjg$r5M zDe%n3KufWQI*aVRocg5jk>eIa4Y_HP6Vr-Zi&2VW`47B*-Ma`k*!t-$&A~z*)Q7Z$yuY)z#HnvVUlsGp>?SoU19vEZ@W#|TJhCT zI`vx9kSh5z7ygGNo=gVG|1sbt52;^l!gi9LT^ZEzOf7U&hZ{4Q6OjOslCa9Tk~M@# z5O%Dmf!(!T;BFx;&E7Pi;3l(?ZO)V6X10Y!iiPgBj#JDrR`=>UR9@LG(X(1^GYegn ztd>#oQ>_vq!lnxMG06a?FkQVXE&7CaKp7v-P%EHv^BQgFkyqMoL(ww1LP0_2nNxJ* zlMODD$yb6u_9zZ<)o^4t??)9i2$%|QmAa&C(+zJ-X@4d6N&tYuizOKQn5h%UNk$Fb zhDDp<30?kGUScE*^n=2Cfp&;hVa^xO_^)+n(hfV8ifZ<)^-ws@sMau8dE)*@H!<9F zKsDICXGDVl5MBGEY+jm3Oz$l0q)?r+!K`$8rqfY3$_y{6=S^pIL}3iT!g?3I^tcYLnG*aq(lh)n-=ixX?oR7lpN1AF=RnJKN6^6RaQcevV;}E8WOh^ zqG^muMh&RGX}rxYx>t%}OeTv}KAAulG$j8j3|Rh~_UbIbo}6Cj4vUIqwG~RIXHkqm z=hYY?90?cKzX&>EIQEOQKuRif!1x#Vv42hRUG_ZfDs?@D>YwmUxSWS<4 z`1(#K*>ZEA?%?olod~X^(0Yfwr^8<9C>!uLc@$?-4BSo^(>WNw;{bDD)TX)9m}dnZWjWkOOMJSz+Aaw`LlAdd;lWF~ zL(&qI)ub2{-4%?^O`hoX2g%obcvA}wJ;8Xy+jKMF%oV0`9x;p2=Mz-L6s1=$^*#Zc zG3XztbU%=&#Gbd)_XyyNHup|A<*8`L-RaeCp?A=eMR9Dq%hLTQTQf5YAj2UnGw#VEEL?s^ z9ue1Td;unLSMF@fBRs3W6qB%J66X>f#keh-?B9b;_@11t`9$wOXhzXFmOGbe!zcE7 zD{kk=#5|!v!S~k`=8&`5N}MY+DDYk_Yb2Qvy7;wXyUF)2|LbqZV{=}sVXmC2yu7t0 zi8&9AH{|NW(CBy1s`=R7xI8a4JXCJ9L!S4T3_FoUgqQvDW+~kVg)ywi>TZQGL@;ps ze339;?>r4C12zc~gH}`ui)D8LH(Z-oI)W$WneDu%uKcBA1`UzMt!ZS!_0~)y9e`N0 z!^yPTU=OJ4F{|yM`D;D~f*oS%jk-|q-Ng+$O=;aUHC7K@TO@L496>X8i*i+a7wiRd zqUC&PxlE9bTh9CKO92|ZHd?17eJb&wGVT%o0 zQbuO<`fW$3rI7b|%r=gi*KZy~r@|Y*`{tF`_z>;NI}JY2Y6Fuq>2qcc^6U*_5%M}Y zqSyxFm<~F4qL(#ZWapH3!B4(Ow`r$Ka$zoTffpAyAx#OOZ?&7)@lhfsstnGD#^rUK zC#M&iuNn<6Q{zR@5Hzw3%5{+Mg;IPmR&klVN1&KjnEdQbpx*Oz?#6A*Vc|C5M4U)+K*z~f6>%rs zDI7R1!pPEkvtFvJ*y?i1uRFfCA?{U7vmVYUH=ztohvw*?zYE(XJtNh;K1}#3V-cbg z5Wk)5uew@?y#Z=tEns!QJ>zZi^oDhNrZ2SV9J4!Y|3+`ijH!+QwphmQv)tf`U~T|6 zq7YuN5%v2oMI^Zj-2_UhN-6)!J8ILI>O)SVL({i5p9;IZ#IZOS(&3+S4EB2T`7zgn zZ@>MPValURgAw1Ixav_<6v;6PcCr8K^UEp!Gh;{Ij84Y)pf=OcG0f>%U$6>#qdm?+ zKeD#kdTDPk7JR|DmAlT z%TE3Wbj)W)h|*uJB6tyuZUtLjYH+>b&qdFz3=Zqrs+wrfP)ew?wgd+JU`8*REbUdB z^<+#F(OOVGIN+Z?K<_{GKX?TC^ufUX^no$`^m8FA4SFLM%>5@%@^WU5_-H*DC6>+i z8CQ_;q+WQE9d}>6_gYCdaXz#QYp&15<--3;~pv*G_>I6^22l^K_bt;0Kc$&n8VJ?;f6)_3xf%f1JO9v#u%$+RDr}+ktsENL_`4`XF!Ih%PGI|guNcB0 z-Y<6K*jBo5S!3qq3M1wUofSf#Fvfo21jpEA)ZWkpbH}XYUNrv9L0t;MGeKps^Ufji zPVm~&TLkgT!C_4Op^4^f+je-ma?{)VFO;;`BY#=ud3lSIh9^^Aw&1nS(Xn!7o7qm7xnq8~_Pbj{U~ZQ;bQ}`% zp@hd7VGw86nY!V4SLO*EUo8oEm|^7kP||lQ`5U2?1_!^b@7kPmP3N4a z2=@-jAz(Ck7?3yg=8bV5{IMP1qS1A}H73-==^%1s0=r}1H#cJJ)^dYC%nR;gwJti^ zp2rMSu9(FXpl^(a?||bm1ftV|@I%iq#|KXI)3ea&PJ)69+92M!-qS{vdPXUcIoyAW zYB7j=uwW}40Zl1h06M|q7DiycG}kixm-dXG8UMXZ7UObf>kKC1K=GrGj|g||K!5c3 zk?G22M>^`P^!HhpQ{50Ss_yz#z2kevWnltKY!X&>zt5z-=Bi!TGi1?Xv{)$DZcUe8FcxgdpQPDhD1U!{hRl8oYmLjwpx_Renu#R=#^t zdg+wqr{f%?ZP9vtG_A{O8NPjCk5$M{kss$az2p2}h`p_IO5t6`ts+f3^8DP(zB4IH z&lDa3fnhWP*4i|9Z$l|gcszpnvxo87^`nuL3ol(zN}8j`=1w5MC$z79rOF_<4Z(BtGqZWNn(Y-Nq zptvfM09|B!5{8duwvrYG>T!E<`@y;<%#opa$%m5p#OqM|5|KGl${18TqoG$PKcu%| zn;k9>ZqJ*AWAOoh{iEb@@_Yb)UH}|?=s(AYfqtTgcAbw{=cBIk5$k+3b>I^|L{Ze4 zZv*_q+#XY-`xtOPBv=QD%;0Qz(AzN(GPXMsNYwCVfs-HH^bz)fo2LJYvNS5wGNbrF zU?*VS3M|YUGWBdGn-&8{cZxeO)+h9Jp5^v;12=l)`GJXWcdfK!3LH2J?0Xcj0>JJk z#ZV;lh%VYw={Ajmo~}c7J;H$~XS_cjIj*7qAH)bBrMa<_iJsk*--4gSN;0-{>m&c~ z^Ya0d*36&B{6GEP(b-EE&yJgn6Rc}`?SSRNw!o^ZY z#51Ym3Z4}~ng_=Ct;9IO#cvI6Z=)Afh4{Ud_@RwMurqy@?@ydfKYcsuXpV{0nGN3d zqxE$+`98w~FLWxD-9zm9=KUXBKJ8jNYSktuFx~L#U5R0Sk}tD}I1os1Tr*`TC;f?p z(UNS`@_$AvX<34>->!j;zh&2L-jw;>n;p*6vkhR%Fu^3)cpozU9KB*$FC13u^78v| zqI8$PcpCrB&1~63$4&CPyk)3_qNceiwL1qcA%Z!g+-5MJz}=hF+@^L$Q~neede_{R z%-?PB_?xS|bE4ue&Gbq~2`J5!TvODn4jS>!JAiqHuUjE3T%DTmP`A)^&+qyXbx^FN z0qrdd^tTw2Tc8*$X7|T#7e@Bh9jXrOx0+8W@KKve&0%=sqbYESDyp&k)#euwpqaQ% zfw6Q(6EDkVv9n5rSrvWdij%10L5r~dX9oZ)Y9OB-6#N`&UA#sksA&+&@L(;;v-`$c zIX)x>S@(<|KZ<@4yQ4wqYrMin=@)p0#QJ8V-i@_}g)DUM{VSu7Z|jF>?FX}|o^++5 z_OiI?#yD`HwdZAv9b@o&w~$2kAHxp4Ch7J36d^k*de9ui4H`QjPsfU~hilj2hX$Hb&Aki( zuCm@(*%qCMRu*&lIb1AZ(Pu}9#2eqFG7%}i>7pc+;QOQQi++TiV>Zb|aER0H^}9iP z$64U*)!smcDV`Ca?;~{LW3q<6tlvt=+eNJDX6s=S$u*=6WT#Tyq{m+;GvjXy1@kQ~ z4AagsYxve{JvKISy>cYpFi}_;UuVWhD^-KtVD=RpRT4KG*A0(uMl;R&M)22ZxZ;nD z*W$h)p^hfWRu(J%ED4KsRx6{l$#V7JZ8To-*KNRV*W&iAb+_aZZM_wWKQ|FV@xV}o zVBjPSA%HC`5uwO?iNd)Zw-32>D&x2U9KXZOURuKaaeHQVgU;K>!|SKu1+4xZ@j#wc z6P%vfu-UmmwJ=4DQBGU4&Zx-my)7zl4%oOQ8dss^(Xyzf9{9<9oI?X|KWgB4%>F=| zusi)_OYJi}<^y&B+8HrJMsDT9Y~4k;iGOJq-EnQMQp%m}ZXt6Q1=Y71rYoL@5w7U% zM9p$r13{(tG&v*%J4!RQVZ4-59FScACaOmMC z!u;$(1iyqIXXlP6_@(5!bb980ll_0(v+Q?zwu$>>^xrz>+miP_{xNv(^z2#x^z5OW z2elu>c&BIU`0iPE=A_MhcY5}#*)GCS5x#3Pr(`=5C>-tn$KK9=15ir?1QY-O00;of zJO)iCwO+k@8UO%ftN;KP0001DZ*Od6VQw#UWpi|2VsCG3WnpeEaCz-L{d42CvA_GT zKztL4Y#5ED>wA6m^!n=FDKDPf*;m`?$7C`rNx?P~id0GJ&T3Wt?Qa(!BuE1MkaZU~ z7tNha5xa}s1+ds%0^srS@zEcXB%)qS&Z0Z&r{3h~=!Rx_c69VIS#8qr?mj10V3XO& z$#1`$olJj2uBq>3c?c-qq#j+;)FJG~AMhr{qg!0iNb)!8J!#*11CqfDImJ;?K)ql)OAU?TqS8vJU>gKbxvLW+XS|b z!ys8uHwtr_dQoPATcu%~!`(Q@!zAWVK3Bp-{FR_@&#tcL?zb1;TwMO;!o9itW`1#W z^o+dSE%^R;my^z^OoYT&R$=)m0$nu%^xpcw*b6`)@>=isu$}!$N9yd z&t9Lu64kla^FMt%zj!$xfI0u`+pGEY_4(z+0Hllg_09Rk{|&&nd^^87yScn7Sv)!l zA}`Boodie!OXz1-(d*7SX8I5jj-nH@LaZ1p)tE4Tv0D<0{ zy4dqykg?@${cT{679(p%x7O|G&bG%DNPrSFCoAl(mP9WJA`26chNk)aA8RivKpY%k z?3ZoV_-C}-_hEkTF2e`9aFZ2Sjyy>-ce#%8FbZRuxiHAxV4da3(#_alkm43~b2Yx% z@Udd?KZ-u^vjUJKt@vY507+7^BR=`U4G6K(u2hiA&w`AIMLr|cH2`dhwcL*q2ofl}@L#1gsZMRw>Q%jb%Hm0~bHrjR-^K7<&7W(jd8u!+(OQ z$kR0qvwLPTZovh?Yi={#Joynkow}t|?Z6En<9PiF5ZT2=60^T2_{E0u%~b(%J!c@7 z7j)b`oO_R41*|@B>wVGO;4KaPxFT~^hhXB7-5~H{cMX4;_vuH}#i`l#BKR4@EcuS| zE&%-`HB2Bc*(B3{5>c^aOII7dhK?M&ZV2oOi3QZe)@12t5pmNiU|R;50N>B^oI zRL%5`8mvft2v#ypmB-iKauv}6G$rc{y=FcilVZMRo;V+A*>QtPvdW+%|4VC?#((+x zZ|K90PYa(^Z@$LRTP_PIu)GGai~0G@*Ym3a5@;JBtpWyJQ3H@&09|Q;{(aQAbqj_e zP2M{M{TC9(gvRS7Dhf0`$yN{_F~`PYhQ`)tT@neB$U)UtX&^EuW0{cg6W&TxP`?=r zKbw$73k3TPFurG1I>cfk1ghIn|IkQO@&e2!?`-2fHL>w6#u=d7Lu$-upBmHl$`P_0hJ}|dh*ltjCs?&7hnP7YJ(7H2PKbI!le4+@(5(Y!e;4ne#`^O)SLm7Vo?`fGDRhrY?P8=C$sX zi1<;$mA$0MbjS}2Kf|O*7Opsg*t0E6Rp{M z;;+rNtg~n)+~el-Ip@7ZPP*k>yr3!gH4Coc{GOHuM`Xwf&<0KHk;Egn^gVErr+DRI znl3vf&%Vs^4OW*dmo&~{Zci!t9@uVB8LG@{&1jmEX0f}%;i6#5`eV^$-hhf|6Ym|h0ul44OF#Q zM|7g?B#oOe6$8_t=rBe?0iPTkCD&(!L zJe!Qk=OhRCN>)exJR?hQwL-k)mPE;WnzGC(6ID>m;1_g2ek<{R02@jq=cq6Z%|bs# zBRb%brx4b$R>S^^eO`odw=9ekov1;LISAKguXNPI(}YTUKyAA{+a&FV`K*US3+?~0 z++GHRsxIKniHGsD4-jjw0??48V2~%hpkj#Ly8t!Ldw3V4UE-6pi@h9U7ri$khwkKp zJDK9g{4cN1FXqafmzQte%r6Y>Q9zwvyh5Pz#;f_OZmjvmt2bv?-^{O?-i&TsLKmT@ zFkHww_PvPPFdW4+%g}-?y^XL39=X9Pm`Nb_G}V|Po>D>Wpcjv-60WxNv>ls;G1Wq= zEOm$r|61Jnu$;mscQj`4kIdvq4}u$^NlLv1Mn>k?p$Li4Sis*28lsh@vtKVSUteCh zuOI{f;l`(9hm0pl6%9?!qJpv!B7@0v}bl)+o-4H&({yNfn2w zmfbJ7ZHWf)11w^Ep}0mhZXXeLaoUCR)|YTf!Rd$*Ti02=AXqOUrpOaEhY}&Uu<1OX zo~@uWXY?IDa#SZ%-bSbA<$U*62%*J-uh~L`y2JnV78# z#{{|c!pJ(Fv#c5V+hPWQIGeG&An)&CZb1?lJdZ!m#>X(8>OG6)+)C+)myuO%Y;%pl z?FmH?7guJXVbiXt>Y>5eBQ$8C3cp}VjK1{8pQ;LKURmIdMEs^!*#+> zw-^5RW0m-e4%6Q}eoFpib@xAy(B0(~bUg28qPmu~wLR4M+efJJi%(XI_(;q3H~gaMSfIGC2|3(XItqkSFhauGMvlKoWFq^zp% zt)ZG~9^Gw*xp_~o$FOB1{0ZlQbz^xpu?EHwA51@TFNNF=;Q_^n;@X1)?1gCCprO<> zM3Ie@L}e9hWld~}q|~J+b%hO_Sw#xYQBfQ++Xo33?Ca2QsdI&EObP4x?!v-cXJtWe zH!1^DYwW2_X0c~_xfh6i=w?Hr4Sn})u_`VXyUH>}tS~=#Z`-en^^x-; zX{}D%FiE;7H&5&*qGlra-99tV@7prxczSBr?^GNNaStCWH*RL@W#gVT1-Hf_&+SIZ zcUba$y-~wV*#A0v^`26&J1I~~kD=fE9HB*RiXF|gx9bc5b{GSVVz%uBo%I2o^`|=P zLsfW>XL+865Qa(@5*!0B$BnxYG{aa@7&~Oz-&@GK^dyE#ZA#DjPUF4ltJ98=y1NK( zZz)@`B-fxedPJu05m{%C5S&b_tjv_3+Sx=#fqR%RPkHF98>q|`&;b+>fd;@=C@eJP z3#FWD%BfnxGlB+)xFDuO%7cCSRfId)A zcu*)5!pGDgyu|y=B)tlt7hmY-vsT$acn;8GqQ5RWy4mnrgGdf&s#`>T{0zx^= z;vAz5aLVb;T7hPq*0g!VfWZ-Hj8EwQ{G)S(3e{tWu&KLmMMVYU zDk7^kv&Rlt{OW7L%+8e`Sp8Mbtkn3rW&jBqDC?jmc+ zasSJ1G3rL|rH|s9=CsB8Q2EOgAE?ccRI+{J9Uci{k1Mzb%WHJaPPo=>=NYAoIt1cC z%>8Niet6qPG`1AL(#esmG*vV5Y`tI3b=iwS??W>P#z}O=7wws{sI!qZ?j5U3)5x3& zvoMlJXz!Zmr<-S zbQ$?OjlR>Ne!#E*rl;B5ad6TbLBjV&u%HFWh_L?^y8>95vW zs@*QYpk$x9W3NtUVH1cdhij}C>Q0B-7g>v2L#`zi=`u<0ynP;=b!pJ=2$pogRM6G} z8jH@%R3U?nW`S(U&L2dD9rIPAQ-?yMH`)cg{HucUPQzd)e=yWhFkq>M$nLe`=@Ge} zfcXKekAF2iAz$DF(UDF$9js`mP0iFS9ovV>u|BlAh?{0p&2g% zoNJQaG=9qeu)X?@TW+k81%N#Y~kw32$E8bN8Yb|QO;8o(4s0A8oA1NM%fYA~Ywl);>wife>>}ELV#c_BavOUU&eE2IX9-&XG)G_=Kb|He1 zlJ0?axS%i@ANGNE!HJKVB~iit0*&-!bkcpCcTiJX*M}3Dh*asJcN7S{cO(c2p@dMR ziZnwNMEa$NNS7vpRHcf5bfkBr7wNqh=|w;Ue7WyC_w!!oex5UX_MAV~GizqA-(Gvp zUbE=EAi(}QkYDQ%8tCTmt!Y&^ngCpUl0#Bg!O)TKKuCl(?;E6W)*4d~jT@nnkf zExb0c9K`|UVV)tU3}*5(iHDC0xw%yGda3j`Pn&T_qQBM4o;Jr(P8mbc1dc6TrR?|z z#;}bGJ|qkN5$!79k3*u5FKkxSlksHYXH^Gf$$V91_CF^Seg=ycHSp_Fy*@J~dHt{K$Cmn|+TR_gn4`lPyn`H{8 z20<{v=oTv)_l@WntMelVax+_Q-Dxq|gqIf)-`zQG#m%U=<(k9jrR4 z+4KYRqWQd;k`3YrgGmls`S#+lT$4-HYV>d-^md)4%-lW2?r(F(_^ZKt(eFwp2ku+> zE|#%Ei*-lNG6Kvq;&-B>~FZ zN2)ntL2i&kP$QO+EOGG7EHL<8YOyw=6g~=YNs!Y(3Vh>@_YBVl@zpxp^lmYU!#o1k zKAqoAKp}$+mo;I_vK*&#(lXBq4MJ6X2&$Bvk99-SOLY8P(pDA{-enUExlby~PQw+C z!q}*f;)lf)hPnLpYMraf@jyf)rfeAo&AGHVDd2gC6@fxpOmY8@b8BCyV}o2H=eU`f z@Aq>`a1M)EYHFjLCBb_2#B}T|er_Fk7}Ssfg$PHNRHC;KIzSDP=)4(H@=Aa8VoN7R^;Yt?T+O~i?hp*5 zV_0CH38Kb;Ny5d=sUGtee|!i-J)YpGl?qOveW(Z0Lm%ifj34UZzJsEXc&^x)F85)X z*-+27wkmCrOYW$9hC&e-2DB{a*+p8%ZE#k&Irfgcd6uq}O}&#_bZZxh8jaD9k~DyT zY2EHhVt$=ef*qlL^z(c{%dZ;9w6GHU=kqWXU2O`{8xY2l4)=7AYDZCME+kQJgKB=8 z^5k`>A2Bf^(>hY9y;Z{@Dd(~$j{V6#c$?1*r{iiHD{Zswy{fpdLw)3Y(#~X|Xg!p4 zo3vECIKjkthu>7Bz7g)-kmDF0P*b!1!pVHZ94APSDza7P20hn#dHX25ok>&a8B6y- z8IiYIp6X~pb{grHd=KMzfvlVXmku@L;t?IOv`d=pvJ6~OPKkWux{f+VpQJI*pECMY z{Bb(Y=J&y}la)IW^Pbw(Pb{i&`^g&Is$9BL*NYH74?e#g;9LI2PNEd$0c26(uxd5E zIc8j7l%{UIJY;!h3pjt_o<3{5sRrjq8C<<@k5Tu^Pfr}$tCo<_EuN$oh=PE76+V{Nsg*?8gm zMUQIXyFL03-3q>fBO9Xa3~0vsBFCqtTMs+x$ijrDfWqVg4!J4M-_1I%j}eKGEAYLZP&P>=E$m~{thS$5GHbcx<#`b$hq0$W)SPW- z=up~!a5VN*xbYoFlcD`8p_nI$t)rwfMD}Z0*5UU6q}RSPfRnMc894HRI@H>mK10xp zDXMJGF`1SBu2|7RgNmEEiur4hokV9L9bL3hUOu5c2ln(8A%2R*D<>u+d1=E(6i*n# zm@dz>oRfsqFFyD`NM#%~Nl*MH3&*5nL>PG4>B_Kl?KKawh=A&tXBFDWQ({lgSUVs} z!ak6F)^GDHdQqd}K3-hki*}LF`y_WqKMXm z!xcFWJ840LhZ;;@gbbc(tlXJD#*;{<&c*H}F-Y?|V5m?!#0A8KZl6RYxSOtsav^G47o_jDc^Z zv8)HprN8Pi9F#P@83LBNl+P;2_#$H2^hk)7Zs&efp$q*U&5XorYtE>AnVU|(_K<=| z(lUZ!3+Me#$bN^L?V*J(Ud4u9{o& zz&3O9pw~@w&gy;pa~AIDAtyyaXO3oH+S5dP2rAQLOD_s3BV_30Y6uc$sEfV<*^T8N z_qZle?w3_>N4LN~D`X_!$F7nj*=a+M7%4 zCd7ZVLf=;K+}osiZ)9Ysq{P2ueE5<^k>PE4%nrG^vN{#ehllXM1_MSqwKa9x zz#|kei_0r?<($DHScx)(maqnncT}|&br~K1`Uhx@HdC1G_`7=Idlm%ZMv-M`>F*c$_8@t0`UH2-~}SpS7K{L zI=EQVGE%LV5XjzW)43u4p7UMsOAVirWrGCMx$_q-Z^J+N{vpEmscumnU&I(qEpnQ(iH#zszXQd%XLV zOygvfi_55ylYDzT-KCU)AcV1NlexEZEB+9TwNDuV+r>?9*SW1Nd_}weKDgd=U%vI! ziP0pqH~dcFV&##6s6}2aqqqHC<8YVT@HmS;wZ3l&{q4 zHyXmQ%p;fUzwoiAJuCyXw8a`uticm5B>N{dNj9W=|aupk&dF;lbNNq_#;+!@YN>vruAmw9nFaXS;u4aXbK-Z)Mw%jxRk zY|6>~QE^q%mzJ}DzQJZ+k%u>EX6LFn8|-%m�@PHZYNURDQ;mU}~K84V-&+(q9P& zsH!d#$$;jkQ9=wdUZ&e8ouMhsm1UK`CqD>aI%8<}`*b;ux%^JL7|wtT>xfCKiGcgeza%fvT>t zP1*p%trPAq{2j<&af8(OuD7sy-+=&A3Ky|7{5sgZ_qb9*M4ToQI$ihA{J^$M;4Z$^_ih<*Lnrj*?!dB86Rp;@JqUf zRSA~RfEP{ry3J&SLmYgfHg|!hIgr!?mm~m|!W|I`pLWqGRE+X)E6CFXyTqR|4izQ8 znL=&s^yUTbxYdFoGc7s{+V{C8F_S6F|bU=3H~;dJuo9ud709EKVVuOnDgZ zEoWOJ#uC=v$lPDIi@+5{823G@c@v>0$igBH6tN_2)qI#L?32hc_=4~bPwP|xpT3*#hkeLLN;4BOKt=W3m-u4i;4%1-4W_ z|AoDdOp>CD{weIEvADwMJd{{?ySveHUJM5pf=pBn9*fW(1uxHxbs@YEc8N0G&cg^R z69U@Zi0$=mwf93`Uy_X>P+%T|<%;i!kRJ>1USw3qQ+5p9YKi}mJ%QH`?xGLVhWcm6 z=(}&YD(+9cyc^93_Ng*( z-`tW-d`mSETpfb9PiF(;+L9u*9**Zb<a%E@y?{D^JHXfBzf*# zBy*X;yV%0OXAgTSvcfw3k-KJ@Qeb6W)?s$guToDIJA8K0_(te4uo zgtSot3jVIh;9({?r>3b``v-UN7nK6tDl-#3cI3r#GNx3u9lnfXs5^#X-FC)&NYbX$ zsOG43N7sBgeI8rUEmGyud^CU;NjTVPB#6(`@Su~wj@HzHl;Cw$Cbkd-;nb8Ub3qVs zy!m}ast^zE_|_EOTm8q8EqvTqb|RYUII(SuyT>3glip2Ab&~Cg8X?8F{YP(em7o8p zMJ%<5rRrEg_Q8H6%W-i7Zx6aQbJuNnycQ8V%B7hUj7dh8xr>Gsb+2}_r`x-qH~v$G zb3?D#W3C%B`dr65)9+#(0lBA%Q2t}6*72S-jVh)Gix*CVt=m~9;f&gA%fHe+H z88J-?APAFdMqP3n#i5@od4Bcu&Ur)O;V$m7jyp)u)P!63qA!#ClC042hbQl?dHO@v zIfOi!jhy4=cjLp%Zr>&n0B%`X0FM9waCU+8v6%mwh|a8%NX~44!U$4mxI?tcbTaR6 zZBOVg;;cmA3Q{iCSSv@F9m!7^5V2jEv3pHEd|mf_HP#cKOs>mk39F_Z#QTA@H9V8v zZLt3+mJFxqM}+%517D6p;#_k}&Djz^XK~P>QaPx2IduT(RJj zAZ3%6zr5a}fmjsS|9_*+>m~i~Qy%c=^e=5U{|@-KR*wHO2LQ?g=C8x#FTl?Noxj`v zTV2mj``g!x`j`DLRX%@*{M%vcC!_`Xe~{lEy?!PBn!J7z+u*+;{v(zB%KSBj{bb(O z_zm;F3GG+buQ}%@tMGTMzcSFTxL;-c6PKj@8`u6PaxEAx-k%2e*DvyVU$D{n^Xorv CvcMJq literal 0 HcmV?d00001 diff --git a/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.ABOUT b/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.ABOUT new file mode 100644 index 0000000..a050700 --- /dev/null +++ b/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.ABOUT @@ -0,0 +1,18 @@ +about_resource: boolean.py-3.6-py2.py3-none-any.whl +attribute: true +checksum_md5: da39999eb131b589e84ad935dc4ca642 +checksum_sha1: d31b55e7ad2ee917232b3213afe3ae9678156a9f +copyright: Copyright (c) 2009-2016 Sebastian Kraemer, basti.kr@gmail.com and others +description: Implements boolean algebra in one module. +download_url: https://files.pythonhosted.org/packages/9b/27/d22062a221010e17935237ba4b574cd828238ea02e0765337c238466a512/boolean.py-3.6-py2.py3-none-any.whl +homepage_url: https://github.com/bastikr/boolean.py +license_expression: bsd-simplified +licenses: +- file: bsd-simplified.LICENSE + key: bsd-simplified + name: BSD-2-Clause +name: boolean.py +notice_file: boolean.py-3.6-py2.py3-none-any.whl.NOTICE +notice_url: https://github.com/bastikr/boolean.py/blob/master/LICENSE.txt +owner: Sebastian Kraemer +version: '3.6' diff --git a/thirdparty/prod/boolean.py.LICENSE b/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.NOTICE similarity index 93% rename from thirdparty/prod/boolean.py.LICENSE rename to thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.NOTICE index a0c637f..8819ea1 100644 --- a/thirdparty/prod/boolean.py.LICENSE +++ b/thirdparty/prod/boolean.py-3.6-py2.py3-none-any.whl.NOTICE @@ -1,23 +1,23 @@ -Copyright (c) 2009-2016 Sebastian Kraemer, basti.kr@gmail.com and others -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation and/or -other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +Copyright (c) 2009-2017 Sebastian Kraemer, basti.kr@gmail.com +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/thirdparty/prod/boolean.py.ABOUT b/thirdparty/prod/boolean.py.ABOUT deleted file mode 100644 index 31eb628..0000000 --- a/thirdparty/prod/boolean.py.ABOUT +++ /dev/null @@ -1,11 +0,0 @@ -about_resource: boolean.py-3.5-py2.py3-none-any.whl -version: 3.5 -download_url: https://pypi.python.org/packages/80/f3/0508ae7ba76b02f7fd666b705766edc1863fc8ef29d0519b4c95d60ab1bb/boolean.py-3.5-py2.py3-none-any.whl#md5=cf90b0c0530663bbf71a53fb58f6fa72 - -name: boolean.py - -copyright: Copyright (c) 2009-2016 Sebastian Kraemer, basti.kr@gmail.com and others -license_expression: bsd-simplified -license_file: boolean.py.LICENSE - -homepage_url: https://github.com/bastikr/boolean.py diff --git a/thirdparty/prod/bsd-simplified.LICENSE b/thirdparty/prod/bsd-simplified.LICENSE new file mode 100644 index 0000000..d99a0b1 --- /dev/null +++ b/thirdparty/prod/bsd-simplified.LICENSE @@ -0,0 +1,20 @@ +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From 22178970c75cb7fa5bfae4b68a76fd0a0191be43 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 Aug 2018 15:54:13 +0200 Subject: [PATCH 8/9] Update and correct ABOUT files #29 Signed-off-by: Philippe Ombredanne --- ...aboutcode_toolkit-3.0.2-py2.py3-none-any.whl | Bin 42728 -> 0 bytes ...aboutcode_toolkit-3.1.1-py2.py3-none-any.whl | Bin 0 -> 43546 bytes ...de_toolkit-3.1.1-py2.py3-none-any.whl.ABOUT} | 11 +++++------ thirdparty/dev/more-itertools-py2.ABOUT | 2 +- thirdparty/dev/more-itertools-py3.ABOUT | 2 +- .../boolean.py-3.6-py2.py3-none-any.whl.ABOUT | 5 +---- 6 files changed, 8 insertions(+), 12 deletions(-) delete mode 100644 thirdparty/dev/aboutcode_toolkit-3.0.2-py2.py3-none-any.whl create mode 100644 thirdparty/dev/aboutcode_toolkit-3.1.1-py2.py3-none-any.whl rename thirdparty/dev/{aboutcode_toolkit-3.0.2-py2.py3-none-any.whl.ABOUT => aboutcode_toolkit-3.1.1-py2.py3-none-any.whl.ABOUT} (55%) diff --git a/thirdparty/dev/aboutcode_toolkit-3.0.2-py2.py3-none-any.whl b/thirdparty/dev/aboutcode_toolkit-3.0.2-py2.py3-none-any.whl deleted file mode 100644 index 0710ef9f82bfa6ecc50c6ade579a45a78d209d10..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42728 zcmZs?Q;=>=*lqciZQHhO+qUhhT{d?ayKLLGZQHi3{{D`RIC1)%yjT}`xgv8$j2zEY zlmP)n1pok$08%M2*^Cm?$OJF|fE*qG_@7-PS662%V>eeb69-c>1|vr+dPmQ0bvuVm zcEq0>11Gl>sS8ssIfyDrBUk;*{E^6_xR`tNc zPyG#Jr?Io$k7x6B%wC)r&SS@P|8zQ)8{9#IM?uC^>5m>@sXINyVfb_jdA}_L$p9kDHL+x?@lS=ziu$n#%KeRn~&EW+tzK7b+ahawGoT}hXnu%OF@^xXS!Jw zN!up*x;caZ0Gtai|c`v(IY3rz}-lH87hNuyR_}utInv zLn3JSFZ#n`Frs;m%LYVQb6oa;nk_EY98 z;iIrE!W_sMW*N*+K}=fk8hY|FWBw8UIFv^As6 zV8!XK!mQ-2osFu{A1D5FEys&wZUUY#lI93z4VfF1IX|Fh_uY%hVd6^&1wPiMId2s;%Q6zF zJjv0CMHY`5jAh_u+$Nhvqwtzm8|}|?^u}A|4|aqr*^15!8j??} zq|vGD0;glB=iIEmzqvHaLz~*DS%|;5RLQ{9Nfcm=+Ra1HpT9;3z(c*%9cBTsd(|`s zoYzY7Tx!Ym&`i7K^I)k(lwk_V4PS`v8^9Hblb9uGCR=Nm`UZO4e5=~$m^*U2ZT@@) z@F*0ADI2so_|^*2B~90yO%DFIcT>m&ZOpInEa_9MkD$L<>>%{9+#o4knf@cO={{?D3r4rMM>Y z)x;R|7-@gt24Mi_(zuQzX0df60-F1dVez!#7+Mcque9bp^1|d$ss)6qtqZ#lyKZXF)8GHA@mjF1Em&JLxxj{++vt zb}+*aHOO%y_pu?Hm8+Xwuilo+8Cu-QkwkZ+Z5SQ+vwgdwDikz>hM1!ErSjdp+brwH zCTFYGdj-^yO)Dt4DzqdlK2Es!V-hn9!bbXOb?Cf7XfK5W?hNXJEyfvUDOuro=>w(} zuy}Te{f@DYnaeeQj^jT?wk6i;`oL$1b40d6e2#XNWTyT_HrOnwTAZy?KB?XZB)26dkW5}|H~WZ4af6nkUUJDPOTdEpUuQCC6Pzo0KRvdb9n z^Sh5BYDCF>)3Ti&{JVS7$vML!_tQyWOsRMtrF^7qPSWmPN;E}OwYOCNZWHr`-28Ib_yHR>-Z5~ z=!JkAyN^6Y3u&%F>mQjD@E*uL@|(E2t3-j|-7ss6g2%}I{DyEk%#?&!dY-~?*SUxC zK3Jm*AKY1?-3p~Q2_%uV?$du4UMAs4SAIswsx5#ck*%X(uwG!_0Q3-wTO_Z_KhndY zUmjx*?OGYB;1*C1Kbjlk4pstadmCAQGuBs-M|8MisMQg8cbyYw5R>~>i?_~~RE+N^k*#M4U}A1S8bb!r@6`K1-j)Ea`3XhsJte~` z{Nnf_DUc#MR|DDq>J|NhDgJK}-lvO;1q=%S+e1i|4zXhwrp4yW)LO&Sm@TJM$!O^$DpkV-o)$ll2-=YbB>qA)?1ZBmDb)r*DetmO)_wVXx>+0$D0Yg2E&23brJo?O4QwCH{f*f+iqdbx!ra$l{ zfgNTF_cbaLw|^2-38k`>9Cd6x3KM3i6-*FosTY&b$>82?fdf46qvd8D)|apUb@TOh zcVrVK8r0tWw7k8?etY@3xcK?HyLnEcb6goaecxQ%SUbV(?leAL?yl|D*6dEF0krzL z4wx9?K?qrdxUjdkq4(qKQZh)Bqd2uFnCepOsC$rXNeDy}fFx)!hUt~c6u>}h5alwi z5))IKmJVsYx??;V=#OH><~yh>g+-B^Nuv_)ixa6L=ofZJ`yPnLr8TFGNj+dICXnyM zk&30LpmQq5hj{yzHQ9C-R1pijq^M(9QmH+sxY8;->yD&NeJI57YZDK~RPG^5n8My0 zWg9a3W~E-zd&_$t7Bo?jYhHO|&_|D_rw>ln9t+}!`yS^$qQSZ}ectNYVxu9p;qBJ@ zdz)4u7*HqD>qJKsZKm6BDw)e)@0Y28$>hnCk~JG+#sn3L@7|Twi!b zGWqICM!D~SVb}J7%&{;E5^BS$)lX5XFMB@Zz+bkywv)y3PRf!2sz@e02@XA zojZp_ZGjCYs!?7ka~fnM%(oVLVa%y})%bmL^aM`P1{XIhXo;9>;fA{5|@Dd^*_`DG62v>YQ0aOv93Z-ztOpnUTNRl9y7 z*C*n~TjdfB@}ScNr*(+)MFSb=Nm5EVN1*#5*fB-|L!JwT2Z-d~_%g8WufdFz_odpj z`$nG)c;akRvM1mlzF}F!+~7Ig$5lTo1USkj5O`DgV)I>$=PgY961`_?=8wL66$&&k zg%lW{>=(6ZVqv0n%KvK7`E`JCaLza#nkQSFH4krP|I?|1_5d)+PH_XlTX|rG_AIWto79R8b@6ss)Lb zcq9^pEh?dnDZ0eS-BYgsYuW}>Z#YdjpJ;n~P=sfS9|{nvm)1z*D$^x!#)zRtTL%Y? z*te&WN_70=&%v&36}nA9$49q|QbHZEf@s2*H%>CZtL4EbSd*tgn*hIyWZd_|T&uw| z02K!M*uTawPO{SCIAwgFEI9s*3G^uc&;sqjmxhM@dzQ$S0cI$EyNKXfXg-&=_(O5x^J<(J$4J#qU*ykJ`#nYQD=nB0!_Sm-|KS~Q)BMaRbbJSRPT zzbE}wEAw9W;{Q0Rl)B+u`5J4i9pKxOeic7Cx?~++>Xr4LdHK76c}MDx7ESk({FB zO&Ca|T!AQIo{!bIyqk|CcOOh(MGzCTXZ3nd`zQ3pJZ>wcleI~q4>G%7H6)mnyFj1- z*yhvWM1@?@|2hQP4(GKK-H3agT`s0(qmpp0CYl0-(VX|RHMN;cNvw9XEI$N$2nfgt zT?Uc*pNA_J^J|6k_XST}Rq52CTT~gZS{j!jQgfSg_w#rlp*;fkq6qX%qyTH@p1*!_ zLh?H7Ed{YNJ^pKK1ByW-`VeP~XoJcdI%QIt2Y?1L8TX19_24_-F(twK@%yiIOD)ND}jciCYtdc#AP-1Vn7N&65|1Fk0VS#-?5#(<%_AJvN|>>LSGu z^U+>G>A{V@^VX+usmYZ; zwEyIMI%?DQrLw6ar)ioj7EU^6ZM}ujD9LgjZYl$5iOn2qnFw^1;0$hYuAkOXbwYewsb4ghg*9D~m+|6F7Um)_wf#;y`= z42IpZrdTykj=eF@uIcykEu`qkJuk