From 065a3a0b7bd270318bb322c4c9d21f78173023ea Mon Sep 17 00:00:00 2001 From: Peter Kolbus Date: Sat, 29 Jun 2019 10:56:21 -0500 Subject: [PATCH 1/5] Fix anomalous backslashes in strings Python 3.8 emits a SyntaxWarning for a string with an anomalous backslash, e.g.: :127: SyntaxWarning: invalid escape sequence \s Fix by adding the missing 'r' prefix. Change-Id: If9ad77c1e1707fa2432c8fc31eeb5e6d2d7200a5 Signed-off-by: Peter Kolbus --- src/license_expression/__init__.py | 2 +- src/license_expression/_pyahocorasick.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index f856e82..9256ae3 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -128,7 +128,7 @@ class ExpressionParseError(ParseError, ExpressionError): # mapping of lowercase operator strings to an operator object OPERATORS = {'and': KW_AND, 'or': KW_OR, 'with': KW_WITH} -_simple_tokenizer = re.compile(''' +_simple_tokenizer = re.compile(r''' (?P[^\s\(\)]+) | (?P\s+) diff --git a/src/license_expression/_pyahocorasick.py b/src/license_expression/_pyahocorasick.py index fefe51f..655de94 100644 --- a/src/license_expression/_pyahocorasick.py +++ b/src/license_expression/_pyahocorasick.py @@ -608,7 +608,7 @@ def overlap(self, other): # tokenize to separate text from parens -_tokenizer = re.compile(''' +_tokenizer = re.compile(r''' (?P[^\s\(\)]+) | (?P\s+) From e379ae00cf30ec3ec89ddd861f1dd29827138a19 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 10 Jun 2021 12:18:38 +0200 Subject: [PATCH 2/5] Update documentation for release Signed-off-by: Philippe Ombredanne --- AUTHORS.rst | 14 ++-- CHANGELOG.rst | 51 +++++++----- README.rst | 214 +++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 208 insertions(+), 71 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index df7d6db..d6a6344 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,11 +1,15 @@ The following organizations or individuals have contributed to this code: -- nexB Inc. @nexB -- Philippe Ombredanne @pombredanne -- Thomas Druez @tdruez +- Ayan Sinha Mahapatra @AyanSinhaMahapatra - Carmen Bianca Bakker @carmenbianca - Chin-Yeung Li @chinyeungli -- Steven Esser @majurg -- Sebastian Schuberth @sschuberth +- Dennis Clark @DennisClark +- John Horan @johnmhoran +- Jono Yang @JonoYang - Max Mehl @mxmehl +- nexB Inc. @nexB - Peter Kolbus @pkolbus +- Philippe Ombredanne @pombredanne +- Sebastian Schuberth @sschuberth +- Steven Esser @majurg +- Thomas Druez @tdruez diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 246afc4..f8837dc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,55 +1,68 @@ Changelog ========= -[Unreleased] +next ------------ -[2.0] - 2021-06-04 ------------------- +v21.6.10 +--------- Added ~~~~~ -- Add ability to simplify license expressions without over simplifying by not - using using boolean logic. + +- Switch to calver for package versioning to better convey the currency of the + bundled data. + +- Include https://scancode-licensedb.aboutcode.org/ licenses list with + ScanCode (v21.6.7) and SPDX licenses (v3.13) keys. Add new functions to + create Licensing using these licenses as LicenseSymbol. + +- Add new License.dedup() method to deduplicate and simplify license expressions + without over simplifying. + +- Add new License.validate() method to return a new ExpressionInfo object with + details on a license expression validation. + Changed ~~~~~~~ -- Drop support for python27 -- Adopt the skeleton from https://github.com/nexB/skeleton +- Drop support for Python 2. +- Adopt the project skeleton from https://github.com/nexB/skeleton + and its new configure script -[1.2] - 2019-11-14 +v1.2 - 2019-11-14 ------------------ Added ~~~~~ -- Add ability to render WITH expression wrapped in parenthesis +- Add ability to render WITH expression wrapped in parenthesis Fixes ~~~~~ -- Fix anomalous backslashes in strings +- Fix anomalous backslashes in strings Changed ~~~~~~~ -- Update the thirdparty directory structure. +- Update the thirdparty directory structure. -[1.0] - 2019-10-16 +v1.0 - 2019-10-16 ------------------ Added ~~~~~ -- New version of boolean.py library -- Add ability to leave license expressions unsorted when simplifying +- New version of boolean.py library +- Add ability to leave license expressions unsorted when simplifying Changed ~~~~~~~ -- updated travis CI settings +- updated travis CI settings -[0.999] - 2019-04-29 +v0.999 - 2019-04-29 -------------------- -- Initial release -- license-expression is small utility library to parse, compare and - simplify and normalize license expressions. +- Initial release +- license-expression is small utility library to parse, compare and + simplify and normalize license expressions. diff --git a/README.rst b/README.rst index 1cd3eca..11c7b4e 100644 --- a/README.rst +++ b/README.rst @@ -2,51 +2,101 @@ license-expression ================== -license-expression is a comprehensive utility library to parse, compare, +``license-expression`` is a comprehensive utility library to parse, compare, simplify and normalize license expressions (such as SPDX license expressions) +using boolean logic. + +- License: Apache-2.0 +- Python: 3.6+ +- Homepage: https://github.com/nexB/license-expression/ +- Install: `pip install license-expression` also available in most Linux distro. + +Software project licenses are often a combination of several free and open +source software licenses. License expressions -- as specified by SPDX -- provide +a concise and human readable way to express these licenses without having to +read long license texts, while still being machine-readable. + +License expressions are used by key FOSS projects such as Linux; several +packages ecosystem use them to document package licensing metadata such as +npm and Rubygems; they are important when exchanging software data (such as with +SPDX and SBOM in general) as a way to express licensing precisely. + +``license-expression`` is a comprehensive utility library to parse, compare, +simplify and normalize these license expressions (such as SPDX license expressions) using boolean logic like in: `GPL-2.0 or later WITH Classpath Exception AND MIT`. +It includes the license keys from SPDX https://spdx.org/licenses/ (version 3.13) +and ScanCode license DB (version 21.6.7) https://scancode-licensedb.aboutcode.org/ +to get started quickly. + +``license-expression`` is both powerful and simple to use and is a used as the +license expression engine in several projects and products such as: + +- AboutCode-toolkit https://github.com/nexB/aboutcode-toolkit +- AlekSIS (School Information System) https://github.com/AlekSIS-org/AlekSIS-Core +- Barista https://github.com/Optum/barista +- Conda forge tools https://github.com/conda-forge/conda-smithy +- DejaCode https://dejacode.com +- DeltaCode https://github.com/nexB/deltacode +- FenixscanX https://github.com/SmartsYoung/FenixscanX +- FetchCode https://github.com/nexB/fetchcode +- Flict https://github.com/vinland-technology/flict and https://github.com/vinland-technology +- license.sh https://github.com/webscopeio/license.sh +- liferay_inbound_checker https://github.com/carmenbianca/liferay_inbound_checker +- REUSE https://reuse.software/ and https://github.com/fsfe/reuse-tool +- ScanCode-io https://github.com/nexB/scancode.io +- ScanCode-toolkit https://github.com/nexB/scancode-toolkit + See also for details: -https://spdx.org/sites/cpstandard/files/pages/files/spdxversion2.1.pdf#page=95&zoom=auto +- https://spdx.github.io/spdx-spec/appendix-IV-SPDX-license-expressions/ -license: apache-2.0 +``license-expression`` is also packaged for most Linux distributions. See below. -Python: 3.6+ +Alternative: -Build and tests status -====================== +There is no known alternative library for Python, but there are several similar +libraries in other languages (but not as powerful of course!): -.. |travis-master-icon| image:: https://api.travis-ci.org/nexB/license-expression.png?branch=master - :target: https://travis-ci.org/nexB/license-expression - :alt: MacOSX Master branch tests status - :align: middle +- JavaScript https://github.com/jslicense/spdx-expression-parse.js +- Rust https://github.com/ehuss/license-exprs +- Haskell https://github.com/phadej/spdx +- Go https://github.com/kyoh86/go-spdx +- Ada https://github.com/Fabien-Chouteau/spdx_ada +- Java https://github.com/spdx/tools and https://github.com/aschet/spdx-license-expression-tools -.. |appveyor-master-icon| image:: https://ci.appveyor.com/api/projects/status/github/nexB/license-expression?svg=true - :target: https://ci.appveyor.com/project/nexB/license-expression - :alt: Windows Master branch tests status - :align: middle +Build and tests status +====================== -+-------+-----------------------+----------------------+------------------------+ -|Branch |**Linux (Travis)** |**MacOSX (Travis)** |**Windows (AppVeyor)** | -+=======+=======================+======================+========================+ -| | | | | -|Master | |travis-master-icon| | |travis-master-icon| | |appveyor-master-icon| | -| | | | | -+-------+-----------------------+----------------------+------------------------+ ++--------------------------+------------------------+----------------------------------+ +|**Linux & macOS (Travis)**| **Windows (AppVeyor)** |**Linux, Windows & macOS (Azure)**| ++==========================+========================+==================================+ +| | | | +| |travis-badge-icon| | |appveyor-badge-icon| | |azure-badge-icon| | +| | | | ++--------------------------+------------------------+----------------------------------+ Source code and download ======================== -* https://github.com/nexB/license-expression.git -* https://pypi.python.org/pypi/license-expression -* https://aur.archlinux.org/packages/python-license-expression/ (Arch Linux through AUR) +- GitHub https://github.com/nexB/license-expression.git +- PyPI https://pypi.python.org/pypi/license-expression + +Also available in several Linux distros: + +- Arch Linux https://aur.archlinux.org/packages/python-license-expression/ +- Debian https://packages.debian.org/unstable/source/license-expression +- DragonFly BSD https://github.com/DragonFlyBSD/DPorts/tree/master/textproc/py-license-expression +- Fedora https://src.fedoraproject.org/rpms/python-license-expression/ +- FreeBSD https://www.freshports.org/textproc/py-license-expression +- NixOS https://github.com/NixOS/nixpkgs/blob/release-21.05/pkgs/development/python-modules/license-expression/default.nix +- openSUSE https://build.opensuse.org/package/show/openSUSE:Factory/python-license-expression + Support ======= -Submit bugs and questions at: - -* https://github.com/nexB/license-expression/issues +- Submit bugs and questions at: https://github.com/nexB/license-expression/issues +- Join the chat at: https://gitter.im/aboutcode-org/discuss Description =========== @@ -55,53 +105,104 @@ This module defines a mini language to parse, validate, simplify, normalize and compare license expressions using a boolean logic engine. This supports SPDX license expressions and also accepts other license naming -conventions and license identifiers aliases to resolve and normalize licenses. +conventions and license identifiers aliases to resolve and normalize any license +expressions. Using boolean logic, license expressions can be tested for equality, containment, equivalence and can be normalized or simplified. -The main entry point is the Licensing object. +It also bundles the SPDX License list (3.13 as of now) and the ScanCode license +DB (based on ScanCode 21.6.7) to easily parse and validate expressions using +the license symbols. + Usage examples ============== -For example: - -.. code-block:: python +The main entry point is the ``Licensing`` object that you can use to parse, +validate, compare, simplify and normalize license expressions. + +Create an SPDX Licensing and parse expressions:: + + >>> from license_expression import get_spdx_licensing + >>> licensing = get_spdx_licensing() + >>> expression = ' GPL-2.0 or LGPL-2.1 and mit ' + >>> parsed = licensing.parse(expression) + >>> print(parsed.pretty()) + OR( + LicenseSymbol('GPL-2.0-only'), + AND( + LicenseSymbol('LGPL-2.1-only'), + LicenseSymbol('MIT') + ) + ) + + >>> str(parsed) + 'GPL-2.0-only OR (LGPL-2.1-only AND MIT)' + + >>> licensing.parse('unknwon with foo', validate=True, strict=True) + license_expression.ExpressionParseError: A plain license symbol cannot be used + as an exception in a "WITH symbol" statement. for token: "foo" at position: 13 + + >>> licensing.parse('unknwon with foo', validate=True) + license_expression.ExpressionError: Unknown license key(s): unknwon, foo + + >>> licensing.validate('foo and MIT and GPL-2.0+') + ExpressionInfo( + original_expression='foo and MIT and GPL-2.0+', + normalized_expression=None, + errors=['Unknown license key(s): foo'], + invalid_symbols=['foo'] + ) + + +Create a simple Licensing and parse expressions:: >>> from license_expression import Licensing, LicenseSymbol >>> licensing = Licensing() >>> expression = ' GPL-2.0 or LGPL-2.1 and mit ' >>> parsed = licensing.parse(expression) - >>> expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' - >>> assert expected == parsed.render('{symbol.key}') + >>> expression = ' GPL-2.0 or LGPL-2.1 and mit ' + >>> expected = 'GPL-2.0-only OR (LGPL-2.1-only AND mit)' + >>> assert str(parsed) == expected + >>> assert parsed.render('{symbol.key}') == expected + + +Create a Licensing with your own license symbols:: >>> expected = [ ... LicenseSymbol('GPL-2.0'), ... LicenseSymbol('LGPL-2.1'), ... LicenseSymbol('mit') ... ] - >>> assert expected == licensing.license_symbols(expression) - >>> assert expected == licensing.license_symbols(parsed) + >>> assert licensing.license_symbols(expression) == expected + >>> assert licensing.license_symbols(parsed) == expected >>> symbols = ['GPL-2.0+', 'Classpath', 'BSD'] >>> licensing = Licensing(symbols) >>> expression = 'GPL-2.0+ with Classpath or (bsd)' >>> parsed = licensing.parse(expression) >>> expected = 'GPL-2.0+ WITH Classpath OR BSD' - >>> assert expected == parsed.render('{symbol.key}') + >>> assertparsed.render('{symbol.key}') == expected >>> expected = [ ... LicenseSymbol('GPL-2.0+'), ... LicenseSymbol('Classpath'), ... LicenseSymbol('BSD') ... ] - >>> assert expected == licensing.license_symbols(parsed) - >>> assert expected == licensing.license_symbols(expression) + >>> assert licensing.license_symbols(parsed) == expected + >>> assert licensing.license_symbols(expression) == expected + +And expression can be deduplicated, to remove duplicate license subexpressions +without changing the order and without consider license choices as simplifiable:: -And expression can be simplified: + >>> expression2 = ' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)' + >>> parsed2 = licensing.parse(expression2) + >>> str(parsed2) + 'GPL-2.0 OR (mit AND LGPL 2.1) OR BSD OR GPL-2.0 OR (mit AND LGPL 2.1)' + >>> assert str(parsed2.simplify()) == 'BSD OR GPL-2.0 OR (LGPL 2.1 AND mit)' -.. code-block:: python +Expression can be simplified, treating them as boolean expressions:: >>> expression2 = ' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)' >>> parsed2 = licensing.parse(expression2) @@ -111,8 +212,6 @@ And expression can be simplified: Two expressions can be compared for equivalence and containment: -.. code-block:: python - >>> expr1 = licensing.parse(' GPL-2.0 or (LGPL 2.1 and mit) ') >>> expr2 = licensing.parse(' (mit and LGPL 2.1) or GPL-2.0 ') >>> licensing.is_equivalent(expr1, expr2) @@ -134,8 +233,29 @@ Two expressions can be compared for equivalence and containment: Development =========== -* Checkout a clone from https://github.com/nexB/license-expression.git -* Then run ``./configure`` (or ``configure.bat``) and then ``source bin/activate``. - This will install all vendored dependencies in a local virtualenv, including +- Checkout a clone from https://github.com/nexB/license-expression.git + +- Then run ``./configure --dev`` and then ``source tmp/bin/activate``. + This will install all dependencies in a local virtualenv, including development deps. -* To run the tests, run ``py.test -vvs`` + +- On Windows run ``configure.bat --dev`` and then ``Scripts\bin\activate``. + +- To run the tests, run ``pytest -vvs`` + + +.. |travis-badge-icon| image:: https://api.travis-ci.org/nexB/license-expression.png?branch=master + :target: https://travis-ci.org/nexB/license-expression + :alt: Travis tests status + :align: middle + +.. |appveyor-badge-icon| image:: https://ci.appveyor.com/api/projects/status/github/nexB/license-expression?svg=true + :target: https://ci.appveyor.com/project/nexB/license-expression + :alt: Appveyor tests status + :align: middle + +.. |azure-badge-icon| image:: https://dev.azure.com/nexB/license-expression/_apis/build/status/nexB.license-expression?branchName=master + :target: https://dev.azure.com/nexB/license-expression/_build/latest?definitionId=2&branchName=master + :alt: Azure pipelines tests status + :align: middle + From 0760576902ded401b8d07a7daf2dfa8a9a628d39 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 10 Jun 2021 15:27:28 +0200 Subject: [PATCH 3/5] Assert result == expected This is the new pytest (and natural way) Signed-off-by: Philippe Ombredanne --- tests/test_license_expression.py | 230 ++++++++++++++++--------------- 1 file changed, 119 insertions(+), 111 deletions(-) diff --git a/tests/test_license_expression.py b/tests/test_license_expression.py index 0b09340..1a4ae99 100644 --- a/tests/test_license_expression.py +++ b/tests/test_license_expression.py @@ -9,10 +9,10 @@ import pathlib import sys from collections import namedtuple -from unittest import TestCase from os.path import abspath from os.path import join from os.path import dirname +from unittest import TestCase from boolean.boolean import PARSE_UNBALANCED_CLOSING_PARENS from boolean.boolean import PARSE_INVALID_SYMBOL_SEQUENCE @@ -45,7 +45,6 @@ from license_expression import build_licensing from license_expression import build_spdx_licensing from license_expression import get_license_index -from license_expression import load_licensing_from_license_index def _parse_error_as_dict(pe): @@ -116,7 +115,7 @@ def test_tokenize_plain1(self): (TOKEN_AND, 'and', 9), (LicenseSymbol(key='gpl'), 'gpl', 13) ] - assert expected == list(licensing.tokenize(' ( mit ) and gpl')) + assert list(licensing.tokenize(' ( mit ) and gpl')) == expected def test_tokenize_plain2(self): licensing = Licensing() @@ -127,7 +126,7 @@ def test_tokenize_plain2(self): (LicenseSymbol(key='gpl'), 'gpl', 9), (TOKEN_RPAR, ')', 12) ] - assert expected == list(licensing.tokenize('(mit and gpl)')) + assert list(licensing.tokenize('(mit and gpl)')) == expected def test_tokenize_plain3(self): licensing = Licensing() @@ -138,7 +137,7 @@ def test_tokenize_plain3(self): (TOKEN_OR, 'or', 12), (LicenseSymbol(key='gpl'), 'gpl', 15) ] - assert expected == list(licensing.tokenize('mit AND gpl or gpl')) + assert list(licensing.tokenize('mit AND gpl or gpl')) == expected def test_tokenize_plain4(self): licensing = Licensing() @@ -155,7 +154,7 @@ def test_tokenize_plain4(self): (TOKEN_RPAR, ')', 24), (TOKEN_RPAR, ')', 25) ] - assert expected == list(licensing.tokenize('((l-a+ AND l-b) OR (l-c+))')) + assert list(licensing.tokenize('((l-a+ AND l-b) OR (l-c+))')) == expected def test_tokenize_plain5(self): licensing = Licensing() @@ -178,7 +177,10 @@ def test_tokenize_plain5(self): 'gpl with classpath', 31 ) ] - assert expected == list(licensing.tokenize('((l-a+ AND l-b) OR (l-c+)) and gpl with classpath')) + tokens = licensing.tokenize( + '((l-a+ AND l-b) OR (l-c+)) and gpl with classpath' + ) + assert list(tokens) == expected class LicensingTokenizeWithSymbolsTest(TestCase): @@ -207,7 +209,7 @@ def test_tokenize_1_with_symbols(self): (TOKEN_AND, 'AND', 28), (mit, 'MIT license', 32) ] - assert expected == list(result) + assert list(result) == expected def test_tokenize_1_no_symbols(self): licensing = Licensing() @@ -222,7 +224,7 @@ def test_tokenize_1_no_symbols(self): (LicenseSymbol(u'MIT license'), 'MIT license', 32) ] - assert expected == list(result) + assert list(result) == expected def test_tokenize_with_trailing_unknown(self): gpl_20, _gpl_20_plus, lgpl_21, _mit, licensing = self.get_symbols_and_licensing() @@ -234,7 +236,7 @@ def test_tokenize_with_trailing_unknown(self): (TOKEN_AND, 'and', 27), (LicenseSymbol(key='mit2'), 'mit2', 31), ] - assert expected == list(result) + assert list(result) == expected def test_tokenize_3(self): gpl_20, gpl_20_plus, lgpl_21, mit, licensing = self.get_symbols_and_licensing() @@ -253,7 +255,7 @@ def test_tokenize_3(self): (2, 'or', 64), (mit, 'mit', 67) ] - assert expected == list(result) + assert list(result) == expected def test_tokenize_unknown_as_trailing_single_attached_character(self): symbols = [LicenseSymbol('MIT', ['MIT license'])] @@ -262,7 +264,7 @@ def test_tokenize_unknown_as_trailing_single_attached_character(self): expected = [ (LicenseSymbol(u'mit2'), 'mit2', 0), ] - assert expected == result + assert result == expected def test_tokenize_with_unknown_symbol_containing_known_symbol_leading(self): l = Licensing(['gpl-2.0']) @@ -273,7 +275,7 @@ def test_tokenize_with_unknown_symbol_containing_known_symbol_leading(self): TOKEN_AND, LicenseSymbol(key='gpl-2.0-plus'), ] - assert expected == result + assert result == expected def test_tokenize_with_unknown_symbol_containing_known_symbol_contained(self): l = Licensing(['gpl-2.0']) @@ -285,7 +287,7 @@ def test_tokenize_with_unknown_symbol_containing_known_symbol_contained(self): LicenseSymbol(u'exception-gpl-2.0-plus') ) ] - assert expected == result + assert result == expected def test_tokenize_with_unknown_symbol_containing_known_symbol_trailing(self): l = Licensing(['gpl-2.0']) @@ -296,7 +298,7 @@ def test_tokenize_with_unknown_symbol_containing_known_symbol_trailing(self): TOKEN_AND, LicenseSymbol(u'exception-gpl-2.0') ] - assert expected == result + assert result == expected class LicensingParseTest(TestCase): @@ -324,7 +326,7 @@ def test_parse_raise_ParseError(self): 'token_string': ')', 'token_type': TOKEN_RPAR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_raise_ExpressionError_when_validating(self): expression = 'gpl and bsd or lgpl with exception' @@ -348,7 +350,7 @@ def test_parse_raise_ParseError_when_validating_strict(self): 'token_string': 'exception', 'token_type': TOKEN_SYMBOL } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_raise_ParseError_when_strict_no_validate(self): expression = 'gpl and bsd or lgpl with exception' @@ -363,7 +365,7 @@ def test_parse_raise_ParseError_when_strict_no_validate(self): 'token_string': 'exception', 'token_type': TOKEN_SYMBOL } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_raise_ExpressionError_when_validating_strict_with_unknown(self): expression = 'gpl and bsd or lgpl with exception' @@ -429,7 +431,7 @@ def test_parse_invalid_expression_raise_exception6(self): 'token_string': 'OR', 'token_type': TOKEN_OR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_not_invalid_expression_raise_no_exception2(self): licensing = Licensing() @@ -449,7 +451,7 @@ def test_parse_can_parse(self): self.assertEqual('GPL-2.0 OR (LGPL2.1 AND mit)', str(parsed)) expected = licensing.OR(gpl2, licensing.AND(lgpl, mit)) - assert expected == parsed + assert parsed == expected def test_parse_errors_catch_invalid_nesting(self): licensing = Licensing() @@ -463,7 +465,7 @@ def test_parse_errors_catch_invalid_nesting(self): 'token_string': '(', 'token_type': TOKEN_LPAR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_errors_catch_invalid_expression_with_bare_and(self): licensing = Licensing() @@ -477,7 +479,7 @@ def test_parse_errors_catch_invalid_expression_with_bare_and(self): 'token_string': 'and', 'token_type': TOKEN_AND } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_errors_catch_invalid_expression_with_or_and_no_other(self): licensing = Licensing() @@ -491,7 +493,7 @@ def test_parse_errors_catch_invalid_expression_with_or_and_no_other(self): 'token_string': 'or', 'token_type': TOKEN_OR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_errors_catch_invalid_expression_with_empty_parens(self): licensing = Licensing() @@ -505,7 +507,7 @@ def test_parse_errors_catch_invalid_expression_with_empty_parens(self): 'token_string': 'with', 'token_type': TOKEN_WITH } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_errors_catch_invalid_non_unicode_byte_strings_on_python3(self): py2 = sys.version_info[0] == 2 @@ -609,14 +611,14 @@ def test_end_to_end(self): l = Licensing() expr = l.parse(' GPL-2.0 or LGPL-2.1 and mit ') expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' - assert expected == str(expr) + assert str(expr) == expected expected = [ LicenseSymbol('GPL-2.0'), LicenseSymbol('LGPL-2.1'), LicenseSymbol('mit'), ] - assert expected == l.license_symbols(expr) + assert l.license_symbols(expr) == expected def test_pretty(self): l = Licensing() @@ -629,7 +631,7 @@ def test_pretty(self): LicenseSymbol('mit') ) )''' - assert expected == expr.pretty() + assert expr.pretty() == expected def test_simplify_and_contains(self): l = Licensing() @@ -716,7 +718,7 @@ def test_simplify_and_equivalent_and_contains(self): # note thats simplification does SORT the symbols such that they can # eventually be compared sequence-wise. This sorting is based on license key expected = 'GPL-2.0 OR bsd OR (LGPL-2.1 AND mit)' - assert expected == str(expr2.simplify()) + assert str(expr2.simplify()) == expected # Two expressions can be compared for equivalence: expr1 = l.parse(' GPL-2.0 or (LGPL-2.1 and mit) ') @@ -803,7 +805,7 @@ def test_render_complex(self): 'AND SAX-PD AND Unicode-Inc-License-Agreement ' 'AND W3C-Software-Notice AND License AND W3C-Documentation-License') - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected expectedkey = ('EPL-1.0 AND Apache-1.1 AND Apache-2.0 AND BSD-Modified AND ' 'CPL-1.0 AND ICU-Composite-License AND JPEG-License AND JDOM-License AND ' 'LGPL-2.0 AND MIT-Open-Group AND MPL-1.1 AND SAX-PD AND ' @@ -817,7 +819,7 @@ def test_render_with(self): result = licensing.parse(expression) expected = 'GPL-2.0 WITH Classpath-2.0 OR BSD-new' - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected expected_html = ( 'GPL-2.0 WITH ' @@ -826,7 +828,7 @@ def test_render_with(self): assert expected_html == result.render('{symbol.key}') expected = 'GPL-2.0 WITH Classpath-2.0 OR BSD-new' - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected def test_parse_complex(self): licensing = Licensing() @@ -835,7 +837,7 @@ def test_parse_complex(self): # this may look weird, but we did not provide symbols hence in "or later", # "later" is treated as if it were a license expected = 'GPL-2.0 OR (later WITH classpath-Exception AND mit) OR (LPL-2.1 AND mit) OR later' - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected def test_parse_complex2(self): licensing = Licensing() @@ -845,9 +847,9 @@ def test_parse_complex2(self): LicenseSymbol('LGPL-2.1'), LicenseSymbol('mit') ] - assert expected == sorted(licensing.license_symbols(expr)) + assert sorted(licensing.license_symbols(expr)) == expected expected = 'GPL-2.0 OR (LGPL-2.1 AND mit)' - assert expected == expr.render('{symbol.key}') + assert expr.render('{symbol.key}') == expected def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_with_or(self): licensing = Licensing() @@ -871,7 +873,7 @@ def test_Licensing_can_tokenize_valid_expressions_with_symbols_that_contain_and_ (LicenseSymbol(key='withme'), 'withme', 68) ] - assert expected == result + assert result == expected def test_Licensing_can_simple_tokenize_valid_expressions_with_symbols_that_contain_and_with_or(self): licensing = Licensing() @@ -905,7 +907,7 @@ def test_Licensing_can_simple_tokenize_valid_expressions_with_symbols_that_conta ' ', 'withme' ] - assert expected == result + assert result == expected def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_and_with_or(self): licensing = Licensing() @@ -913,14 +915,14 @@ def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_and_wit result = licensing.parse(expression) expected = 'orgpl OR (withbsd WITH orclasspath AND andmit) OR (anlgpl AND ormit) OR withme' - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected def test_Licensing_can_parse_valid_expressions_with_symbols_that_contain_spaces(self): licensing = Licensing() expression = ' GPL-2.0 or (mit and LGPL 2.1) or bsd Or GPL-2.0 or (mit and LGPL 2.1)' parsed = licensing.parse(expression) expected = 'GPL-2.0 OR (mit AND LGPL 2.1) OR bsd OR GPL-2.0 OR (mit AND LGPL 2.1)' - assert expected == str(parsed) + assert str(parsed) == expected def test_parse_invalid_expression_with_trailing_or(self): licensing = Licensing() @@ -937,7 +939,7 @@ def test_parse_invalid_expression_with_trailing_or_and_valid_start_does_not_rais parsed = licensing.parse(expression) # ExpressionError: OR requires two or more licenses as in: MIT OR BSD expected = 'mit OR mit' - assert expected == str(parsed) + assert str(parsed) == expected def test_parse_invalid_expression_with_repeated_trailing_or_raise_exception(self): licensing = Licensing() @@ -952,7 +954,7 @@ def test_parse_invalid_expression_with_repeated_trailing_or_raise_exception(self 'token_string': 'or', 'token_type': TOKEN_OR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_invalid_expression_drops_single_trailing_or(self): licensing = Licensing() @@ -988,7 +990,7 @@ def test_parse_invalid_expression_with_single_leading_or_raise_exception(self): 'token_string': 'or', 'token_type': TOKEN_OR } - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_Licensing_can_parse_expressions_with_symbols_that_contain_a_colon(self): licensing = Licensing() @@ -996,7 +998,7 @@ def test_Licensing_can_parse_expressions_with_symbols_that_contain_a_colon(self) result = licensing.parse(expression) expected = 'DocumentRef-James-1.0:LicenseRef-Eric-2.0' - assert expected == result.render('{symbol.key}') + assert result.render('{symbol.key}') == expected class LicensingParseWithSymbolsSimpleTest(TestCase): @@ -1025,7 +1027,7 @@ def test_parse_license_expression1(self): result = licensing.parse(express_string) assert express_string == str(result) expected = a - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression_with_alias(self): @@ -1034,7 +1036,7 @@ def test_parse_license_expression_with_alias(self): result = licensing.parse(express_string) assert 'L-a+' == str(result) expected = ap - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression3(self): @@ -1043,7 +1045,7 @@ def test_parse_license_expression3(self): result = licensing.parse(express_string) assert 'L-a+' == str(result) expected = ap - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression4(self): @@ -1052,7 +1054,7 @@ def test_parse_license_expression4(self): result = licensing.parse(express_string) assert 'l-a' == str(result) expected = LicenseSymbol(key='l-a', aliases=()) - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression5(self): @@ -1061,7 +1063,7 @@ def test_parse_license_expression5(self): result = licensing.parse(express_string) assert '(L-a+ AND l-b) OR l-c' == str(result) expected = licensing.OR(licensing.AND(ap, b), c) - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression6(self): @@ -1070,7 +1072,7 @@ def test_parse_license_expression6(self): result = licensing.parse(express_string) assert 'l-a AND l-b' == str(result) expected = licensing.AND(a, b) - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression7(self): @@ -1079,7 +1081,7 @@ def test_parse_license_expression7(self): result = licensing.parse(express_string) assert 'l-a OR l-b' == str(result) expected = licensing.OR(a, b) - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression8(self): @@ -1088,7 +1090,7 @@ def test_parse_license_expression8(self): result = licensing.parse(express_string) assert '(l-a AND l-b) OR l-c' == str(result) expected = licensing.OR(licensing.AND(a, b), c) - assert expected == result + assert result == expected assert [] == licensing.unknown_license_keys(result) def test_parse_license_expression8_twice(self): @@ -1111,39 +1113,39 @@ def test_parse_license_expression_with_trailing_space_plus(self): expresssion_str = 'l-a' result = licensing.parse(expresssion_str) - assert expresssion_str == str(result) - assert [] == licensing.unknown_license_keys(result) + assert str(result) == expresssion_str + assert licensing.unknown_license_keys(result) == [] # plus sign is not attached to the symbol, but an alias expresssion_str = 'l-a +' result = licensing.parse(expresssion_str) - assert 'l-a+' == str(result).lower() - assert [] == licensing.unknown_license_keys(result) + assert str(result).lower() == 'l-a+' + assert licensing.unknown_license_keys(result) == [] expresssion_str = '(l-a)' result = licensing.parse(expresssion_str) - assert 'l-a' == str(result).lower() - assert [] == licensing.unknown_license_keys(result) + assert str(result).lower() == 'l-a' + assert licensing.unknown_license_keys(result) == [] expresssion_str = '((l-a+ AND l-b) OR (l-c))' result = licensing.parse(expresssion_str) - assert '(L-a+ AND l-b) OR l-c' == str(result) - assert [] == licensing.unknown_license_keys(result) + assert str(result) == '(L-a+ AND l-b) OR l-c' + assert licensing.unknown_license_keys(result) == [] expresssion_str = 'l-a and l-b' result = licensing.parse(expresssion_str) - assert 'l-a AND l-b' == str(result) - assert [] == licensing.unknown_license_keys(result) + assert str(result) == 'l-a AND l-b' + assert licensing.unknown_license_keys(result) == [] expresssion_str = 'l-a or l-b' result = licensing.parse(expresssion_str) - assert 'l-a OR l-b' == str(result) - assert [] == licensing.unknown_license_keys(result) + assert str(result) == 'l-a OR l-b' + assert licensing.unknown_license_keys(result) == [] expresssion_str = 'l-a and l-b OR l-c' result = licensing.parse(expresssion_str) - assert '(l-a AND l-b) OR l-c' == str(result) - assert [] == licensing.unknown_license_keys(result) + assert str(result) == '(l-a AND l-b) OR l-c' + assert licensing.unknown_license_keys(result) == [] def test_parse_of_side_by_side_symbols_raise_exception(self): gpl2 = LicenseSymbol('gpl') @@ -1162,12 +1164,14 @@ def test_validate_symbols(self): LicenseSymbol('l-c'), ] warnings, errors = validate_symbols(symbols) + expectedw = [] - assert expectedw == warnings + assert warnings == expectedw + expectede = [ - 'Invalid duplicated license key: l-a.', + "Invalid duplicated license key: 'l-a'.", ] - assert expectede == errors + assert errors == expectede class LicensingParseWithSymbolsTest(TestCase): @@ -1185,7 +1189,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_non_exception_symbol 'position': 25, 'token_string': 'exception', 'token_type': TOKEN_SYMBOL} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in_incorrect_spot(self): licensing = Licensing([LicenseSymbol('gpl', is_exception=False), @@ -1200,7 +1204,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in 'position': 0, 'token_string': 'exception', 'token_type': TOKEN_SYMBOL} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected try: licensing.parse('gpl with gpl', validate=True, strict=True) @@ -1211,7 +1215,7 @@ def test_parse_raise_ParseError_when_validating_strict_with_exception_symbols_in 'position': 9, 'token_string': 'gpl', 'token_type': TOKEN_SYMBOL} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_with_unknown_symbol_string_contained_in_known_symbol_does_not_crash_with(self): l = Licensing(['lgpl-3.0-plus']) @@ -1368,7 +1372,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): (mit, 'mit', 67), (unknown, '123', 71) ] - assert expected == tokens + assert tokens == expected try: licensing.parse('The GNU GPL 20 or later or (LGPL-2.1 and mit) or The GNU GPL 20 or mit 123') @@ -1376,7 +1380,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception(self): except ParseError as pe: expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 71, 'token_string': '123', 'token_type': unknown} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): _gpl2, _gpl2_plus, _lgpl, _mit, _mitand2, licensing = self.get_symbols_and_licensing() @@ -1388,7 +1392,7 @@ def test_parse_expression_with_trailing_unknown_should_raise_exception2(self): except ParseError as pe: expected = {'error_code': PARSE_INVALID_SYMBOL_SEQUENCE, 'position': 22, 'token_string': '123', 'token_type': unknown} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_parse_expression_with_WITH(self): gpl2, _gpl2plus, lgpl, mit, mitand2, _ = self.get_symbols_and_licensing() @@ -1413,13 +1417,13 @@ def test_parse_expression_with_WITH(self): (LicenseWithExceptionSymbol(mit, mitexp), 'mit with mit exp', 67) ] - assert expected == tokens + assert tokens == expected parsed = licensing.parse(expr) expected = 'GPL-2.0+ OR (LGPL-2.1 AND MIT) OR gpl-2.0 OR MIT WITH mitexp' - assert expected == str(parsed) + assert str(parsed) == expected expected = 'GPL-2.0+ OR (LGPL-2.1 AND MIT) OR gpl-2.0 OR MIT WITH mitexp' - assert expected == parsed.render() + assert parsed.render() == expected def test_parse_expression_with_WITH_and_unknown_symbol(self): gpl2, _gpl2plus, lgpl, mit, mitand2, _ = self.get_symbols_and_licensing() @@ -1439,7 +1443,7 @@ def test_unknown_keys(self): expr = 'The GNU GPL 20 or LGPL-2.1 and mit' parsed = licensing.parse(expr) expected = 'gpl-2.0 OR (LGPL-2.1 AND MIT)' - assert expected == str(parsed) + assert str(parsed) == expected assert 'gpl-2.0 OR (LGPL-2.1 AND MIT)' == parsed.render('{symbol.key}') assert [] == licensing.unknown_license_keys(parsed) assert [] == licensing.unknown_license_keys(expr) @@ -1449,9 +1453,9 @@ def test_unknown_keys_with_trailing_char(self): expr = 'The GNU GPL 20 or LGPL-2.1 and mitand2' parsed = licensing.parse(expr) expected = [gpl2, lgpl, mitand2] - assert expected == licensing.license_symbols(parsed) - assert expected == licensing.license_symbols(licensing.parse(parsed)) - assert expected == licensing.license_symbols(expr) + assert licensing.license_symbols(parsed) == expected + assert licensing.license_symbols(licensing.parse(parsed)) == expected + assert licensing.license_symbols(expr) == expected assert [] == licensing.unknown_license_keys(parsed) assert [] == licensing.unknown_license_keys(expr) @@ -1479,7 +1483,7 @@ def test_parse_with_overlapping_key_without_symbols(self): licensing = Licensing() results = str(licensing.parse(expression)) expected = 'mit OR (mit AND zlib) OR mit OR mit WITH verylonglicense' - assert expected == results + assert results == expected def test_advanced_tokenizer_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' @@ -1510,7 +1514,7 @@ def test_advanced_tokenizer_tokenize_with_overlapping_key_with_symbols_and_trail Token(39, 53, 'verylonglicense', None), ] - assert expected == results + assert results == expected def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' @@ -1540,7 +1544,7 @@ def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_ Token(34, 37, 'with', Keyword(value=u'with', type=10)), Token(39, 53, 'verylonglicense', None), ] - assert expected == results + assert results == expected def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_unknown2(self): expression = 'mit with verylonglicense' @@ -1555,7 +1559,7 @@ def test_advanced_tokenizer_iter_with_overlapping_key_with_symbols_and_trailing_ Token(4, 7, 'with', Keyword(value=u'with', type=10)), Token(9, 23, 'verylonglicense', None), ] - assert expected == results + assert results == expected def test_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): expression = 'mit or mit AND zlib or mit or mit with verylonglicense' @@ -1588,11 +1592,11 @@ def test_tokenize_with_overlapping_key_with_symbols_and_trailing_unknown(self): 30) ] - assert expected == results + assert results == expected results = str(licensing.parse(expression)) expected = 'MIT OR (MIT AND zlib) OR MIT OR MIT WITH verylonglicense' - assert expected == results + assert results == expected class LicensingSymbolsTest(TestCase): @@ -1624,7 +1628,7 @@ def test_get_license_symbols2(self): LicenseSymbol('mit'), LicenseSymbol('Foo exception', is_exception=True), ] - assert expected == l.license_symbols(l.parse(expr), unique=False) + assert l.license_symbols(l.parse(expr), unique=False) == expected def test_get_license_symbols3(self): symbols = [ @@ -1661,7 +1665,7 @@ def test_get_license_symbols4(self): LicenseSymbol('Foo exception', is_exception=True), ] - assert expected == l.license_symbols(l.parse(expr), unique=False) + assert l.license_symbols(l.parse(expr), unique=False) == expected def test_license_symbols(self): licensing = Licensing([ @@ -1692,16 +1696,16 @@ def test_license_symbols(self): parsed = licensing.parse(expr) expected = [gpl_with_cp, mit, mit_with_some, lgpl, gpl_with_cp, mitplus, lgpl, mit, gpl2_with_someplus, lgpl] - assert expected == licensing.license_symbols(parsed, unique=False, decompose=False) + assert licensing.license_symbols(parsed, unique=False, decompose=False) == expected expected = [gpl_with_cp, mit, mit_with_some, lgpl, mitplus, gpl2_with_someplus] - assert expected == licensing.license_symbols(parsed, unique=True, decompose=False) + assert licensing.license_symbols(parsed, unique=True, decompose=False) == expected expected = [gpl2plus, cpex, mit, mit, someplus, lgpl, gpl2plus, cpex, mitplus, lgpl, mit, gpl2plus, someplus, lgpl] - assert expected == licensing.license_symbols(parsed, unique=False, decompose=True) + assert licensing.license_symbols(parsed, unique=False, decompose=True) == expected expected = [gpl2plus, cpex, mit, someplus, lgpl, mitplus] - assert expected == licensing.license_symbols(parsed, unique=True, decompose=True) + assert licensing.license_symbols(parsed, unique=True, decompose=True) == expected def test_primary_license_symbol_and_primary_license_key(self): licensing = Licensing([ @@ -1717,32 +1721,35 @@ def test_primary_license_symbol_and_primary_license_key(self): cpex = LicenseSymbol('classpath Exception') expected = LicenseWithExceptionSymbol(gpl, cpex) parsed = licensing.parse(expr) - assert expected == licensing.primary_license_symbol(parsed, decompose=False) + assert licensing.primary_license_symbol(parsed, decompose=False) == expected assert gpl == licensing.primary_license_symbol(parsed, decompose=True) assert 'GPL-2.0 or LATER' == licensing.primary_license_key(parsed) expr = ' GPL-2.0 or later with classpath Exception and mit or LGPL 2.1 and mit or later ' expected = 'GPL-2.0 or LATER WITH classpath Exception' - assert expected == licensing.primary_license_symbol( - parsed, decompose=False).render('{symbol.key}') + result = licensing.primary_license_symbol( + parsed, + decompose=False + ).render('{symbol.key}') + assert result == expected def test_render_plain(self): l = Licensing() result = l.parse('gpl-2.0 WITH exception-gpl-2.0-plus or MIT').render() expected = 'gpl-2.0 WITH exception-gpl-2.0-plus OR MIT' - assert expected == result + assert result == expected def test_render_as_readable_does_not_wrap_in_parens_single_with(self): l = Licensing() result = l.parse('gpl-2.0 WITH exception-gpl-2.0-plus').render_as_readable() expected = 'gpl-2.0 WITH exception-gpl-2.0-plus' - assert expected == result + assert result == expected def test_render_as_readable_wraps_in_parens_with_and_other_subexpressions(self): l = Licensing() result = l.parse('mit AND gpl-2.0 WITH exception-gpl-2.0-plus').render_as_readable() expected = 'mit AND (gpl-2.0 WITH exception-gpl-2.0-plus)' - assert expected == result + assert result == expected def test_render_as_readable_does_not_wrap_in_parens_if_no_with(self): l = Licensing() @@ -1853,7 +1860,7 @@ def test_simple_tokenizer(self): Token(235, 235, ' ', None), Token(236, 238, '2.1', LicenseSymbol(key='2.1',)) ] - assert expected == results + assert results == expected def test_tokenize_can_handle_expressions_with_symbols_that_contain_a_colon(self): licensing = Licensing() @@ -1865,7 +1872,7 @@ def test_tokenize_can_handle_expressions_with_symbols_that_contain_a_colon(self) u'DocumentRef-James-1.0:LicenseRef-Eric-2.0', 0) ] - assert expected == result + assert result == expected def test_tokenize_simple_can_handle_expressions_with_symbols_that_contain_a_colon(self): licensing = Licensing() @@ -1877,7 +1884,7 @@ def test_tokenize_simple_can_handle_expressions_with_symbols_that_contain_a_colo u'DocumentRef-James-1.0:LicenseRef-Eric-2.0', 0) ] - assert expected == result + assert result == expected def test_tokenize_can_handle_expressions_with_tabs_and_new_lines(self): licensing = Licensing() @@ -1887,7 +1894,7 @@ def test_tokenize_can_handle_expressions_with_tabs_and_new_lines(self): (LicenseSymbol(u'this is an expression', is_exception=False), u'this is an expression', 0) ] - assert expected == result + assert result == expected def test_tokenize_simple_can_handle_expressions_with_tabs_and_new_lines(self): licensing = Licensing() @@ -1899,7 +1906,7 @@ def test_tokenize_simple_can_handle_expressions_with_tabs_and_new_lines(self): (LicenseSymbol(u'an', is_exception=False), u'an', 13), (LicenseSymbol(u'expression', is_exception=False), u'expression', 16) ] - assert expected == result + assert result == expected def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): gpl2 = LicenseSymbol(key='GPL-2.0') @@ -1971,7 +1978,7 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): Token(239, 245, 'gpl-2.0', LicenseSymbol(u'GPL-2.0')) ] - assert expected == result + assert result == expected expected_groups = [ (Token(1, 16, 'GPL-2.0 or later', LicenseSymbol(u'GPL-2.0 or LATER')), @@ -2045,7 +2052,7 @@ def test_tokenize_step_by_step_does_not_munge_trailing_symbols(self): (gpl2, 'gpl-2.0', 239), ] - assert expected == list(licensing.tokenize(expr)) + assert list(licensing.tokenize(expr)) == expected class LicensingExpression(TestCase): @@ -2153,7 +2160,7 @@ def __init__(self, key, is_exception=False): l4 = LicenseSymbolLike(SymLike('c')) expected = [l1, lx, lx2, lx3, l3, l2, l4] - assert expected == sorted([l4, l3, l2, l1, lx , lx2, lx3]) + assert sorted([l4, l3, l2, l1, lx , lx2, lx3]) == expected class MockLicensesTest(TestCase): @@ -2196,7 +2203,7 @@ def test_and_and_or_is_invalid(self): 'position': 27, 'token_string': 'and', 'token_type': TOKEN_AND} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_or_or_is_invalid(self): expression = 'gpl-2.0 with classpath or or or or gpl-2.0-plus' @@ -2209,7 +2216,7 @@ def test_or_or_is_invalid(self): 'position': 26, 'token_string': 'or', 'token_type': TOKEN_OR} - assert expected == _parse_error_as_dict(pe) + assert _parse_error_as_dict(pe) == expected def test_tokenize_or_or(self): expression = 'gpl-2.0 with classpath or or or gpl-2.0-plus' @@ -2225,7 +2232,8 @@ def test_tokenize_or_or(self): (LicenseSymbol(u'gpl-2.0-plus'), 'gpl-2.0-plus', 32) ] - assert expected == results + assert results == expected + class LicensingValidateTest(TestCase): licensing = Licensing( @@ -2347,7 +2355,7 @@ def test_get_license_key_info(self): with open(test_license_index_location) as f: expected = json.load(f) result = get_license_index(test_license_index_location) - assert expected == result + assert result == expected def test_get_license_key_info_vendored(self): curr_dir = dirname(abspath(__file__)) @@ -2361,4 +2369,4 @@ def test_get_license_key_info_vendored(self): with open(vendored_license_key_index_location) as f: expected = json.load(f) result = get_license_index() - assert expected == result + assert result == expected From f0423e8fb9ee6accb3a1cf46532716d43a839a34 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 10 Jun 2021 15:29:32 +0200 Subject: [PATCH 4/5] Add new functions to get_spdx_licensing #56 Improve documentation strings and code format. Signed-off-by: Philippe Ombredanne --- src/license_expression/__init__.py | 795 ++++++++++++++++++----------- 1 file changed, 499 insertions(+), 296 deletions(-) diff --git a/src/license_expression/__init__.py b/src/license_expression/__init__.py index 4c9b239..0e78496 100644 --- a/src/license_expression/__init__.py +++ b/src/license_expression/__init__.py @@ -6,14 +6,15 @@ # See https://aboutcode.org for more information about nexB OSS projects. # """ -This module defines a mini language to parse, validate, simplify, normalize and -compare license expressions using a boolean logic engine. +This module defines a mini language to parse, validate, deduplicate, simplify, +normalize and compare license expressions using a boolean logic engine. -This supports SPDX license expressions and also accepts other license naming -conventions and license identifiers aliases to recognize and normalize licenses. +This supports SPDX and ScanCode license expressions and also accepts other +license naming conventions and license identifiers aliases to recognize and +normalize licenses. -Using boolean logic, license expressions can be tested for equality, containment, -equivalence and can be normalized or simplified. +Using boolean logic, license expressions can be tested for equality, +containment, equivalence and can be normalized, deduplicated or simplified. The main entry point is the Licensing object. """ @@ -55,33 +56,39 @@ from license_expression._pyahocorasick import Trie as AdvancedTokenizer from license_expression._pyahocorasick import Token - curr_dir = dirname(abspath(__file__)) data_dir = join(curr_dir, 'data') -vendored_scancode_licensedb_index_location = join(data_dir, 'scancode-licensedb-index.json') - +vendored_scancode_licensedb_index_location = join( + data_dir, + 'scancode-licensedb-index.json', +) # append new error codes to PARSE_ERRORS by monkey patching PARSE_EXPRESSION_NOT_UNICODE = 100 if PARSE_EXPRESSION_NOT_UNICODE not in PARSE_ERRORS: - PARSE_ERRORS[PARSE_EXPRESSION_NOT_UNICODE] = 'Expression string must be unicode.' + PARSE_ERRORS[PARSE_EXPRESSION_NOT_UNICODE] = ( + 'Expression string must be a string.' + ) PARSE_INVALID_EXCEPTION = 101 if PARSE_INVALID_EXCEPTION not in PARSE_ERRORS: PARSE_ERRORS[PARSE_INVALID_EXCEPTION] = ( 'A license exception symbol can only be used as an exception ' - 'in a "WITH exception" statement.') + 'in a "WITH exception" statement.' + ) PARSE_INVALID_SYMBOL_AS_EXCEPTION = 102 if PARSE_INVALID_SYMBOL_AS_EXCEPTION not in PARSE_ERRORS: PARSE_ERRORS[PARSE_INVALID_SYMBOL_AS_EXCEPTION] = ( 'A plain license symbol cannot be used as an exception ' - 'in a "WITH symbol" statement.') + 'in a "WITH symbol" statement.' + ) PARSE_INVALID_SYMBOL = 103 if PARSE_INVALID_SYMBOL not in PARSE_ERRORS: PARSE_ERRORS[PARSE_INVALID_SYMBOL] = ( - 'A proper license symbol is needed.') + 'A proper license symbol is needed.' + ) class ExpressionError(Exception): @@ -96,7 +103,8 @@ class ExpressionParseError(ParseError, ExpressionError): Keyword = namedtuple('Keyword', 'value type') Keyword.__len__ = lambda self: len(self.value) -# id for "with" token which is not a proper boolean symbol but an expression symbol +# id for the "WITH" token which is not a proper boolean symbol but an expression +# symbol TOKEN_WITH = 10 # keyword types that include operators and parens @@ -134,19 +142,24 @@ class ExpressionInfo: The ExpressionInfo class has the following fields: - original_expression: str. - - This is the license expression that was originally passed into Licensing.validate() + - This is the license expression that was originally passed into + Licensing.validate() + - normalized_expression: str. - If a valid license expression has been passed into `validate()`, then the license expression string will be set in this field. + - errors: list - If there were errors validating a license expression, the error messages will be appended here. + - invalid_symbols: list - If the license expression that has been passed into `validate()` has license keys that are invalid (either that they are unknown or not used in the right context), or the syntax is incorrect because an invalid symbol was used, then those symbols will be appended here. """ + def __init__( self, original_expression, @@ -173,7 +186,7 @@ def __repr__(self): class Licensing(boolean.BooleanAlgebra): """ Licensing defines a mini language to parse, validate and compare license - expressions. This is the main entry point this library. + expressions. This is the main entry point in this library. Some of the features are: @@ -184,20 +197,21 @@ class Licensing(boolean.BooleanAlgebra): licenses with spaces and keywords (such as AND, OR WITH) or parens in their names). - - in an expression licenses can be more than just identifiers such short or - long names + - in an expression licenses can be more than just identifiers such as short + or long names with spaces, symbols and even parenthesis. - - A license can have multiple aliases (such as GPLv2 or GPL2) and each will - be properly recognized when parsing. + - A license can have multiple aliases (such as GPL-2.0, GPLv2 or GPL2) and + each will be properly recognized when parsing. The expression is rendered + normalized using the canononical license keys. - - expressions can be simplified, normalized, sorted and compared for - containment and/or logical equivalence thanks to a built-in boolean logic - engine. + - expressions can be deduplicated, simplified, normalized, sorted and + compared for containment and/or logical equivalence thanks to a built-in + boolean logic engine. - Once parsed, expressions can be rendered using simple templates (for - instance to render HTML links in a GUI). + instance to render as HTML links in a web UI). - For example: + For example:: >>> l = Licensing() >>> expr = l.parse(" GPL-2.0 or LGPL-2.1 and mit ") @@ -229,11 +243,16 @@ class Licensing(boolean.BooleanAlgebra): def __init__(self, symbols=tuple(), quiet=True): """ - Initialize a Licensing with an optional `symbols` sequence of + Initialize a Licensing with an optional ``symbols`` sequence of LicenseSymbol or LicenseSymbol-like objects or license key strings. If - provided and this list data is invalid, raise a ValueError. + provided and this list data is invalid, raise a ValueError. Print + warning and errors found in the symbols unless ``quiet`` is True. """ - super(Licensing, self).__init__(Symbol_class=LicenseSymbol, AND_class=AND, OR_class=OR) + super(Licensing, self).__init__( + Symbol_class=LicenseSymbol, + AND_class=AND, + OR_class=OR, + ) # FIXME: this should be instead a super class of all symbols self.LicenseSymbol = self.Symbol @@ -256,19 +275,27 @@ def __init__(self, symbols=tuple(), quiet=True): raise ValueError('\n'.join(warns + errors)) # mapping of known symbol key to symbol for reference - self.known_symbols = {symbol.key: symbol for symbol in symbols} + self.known_symbols = { + symbol.key: symbol + for symbol in symbols + } # mapping of known symbol lowercase key to symbol for reference - self.known_symbols_lowercase = {symbol.key.lower(): symbol for symbol in symbols} + self.known_symbols_lowercase = { + symbol.key.lower(): symbol + for symbol in symbols + } # Aho-Corasick automaton-based Advanced Tokenizer self.advanced_tokenizer = None def is_equivalent(self, expression1, expression2, **kwargs): """ - Return True if both `expressions` LicenseExpression are equivalent. - If a string is provided, it will be parsed and simplified. - Extra kwargs are passed down to the parse() function. + Return True if both ``expression1`` and ``expression2`` + LicenseExpression objects are equivalent. If a string is provided, it + will be parsed and simplified. Extra ``kwargs`` are passed down to the + parse() function. + Raise ExpressionError on parse errors. """ ex1 = self._parse_and_simplify(expression1, **kwargs) ex2 = self._parse_and_simplify(expression2, **kwargs) @@ -276,11 +303,11 @@ def is_equivalent(self, expression1, expression2, **kwargs): def contains(self, expression1, expression2, **kwargs): """ - Return True if `expression1` contains `expression2`. - Expressions are either a string or a LicenseExpression object. - If a string is provided, it will be parsed and simplified. + Return True if ``expression1`` contains ``expression2``. where each + expression is either a string or a LicenseExpression object. If a string + is provided, it will be parsed and simplified. - Extra kwargs are passed down to the parse() function. + Extra ``kwargs`` are passed down to the parse() function. """ ex1 = self._parse_and_simplify(expression1, **kwargs) ex2 = self._parse_and_simplify(expression2, **kwargs) @@ -292,24 +319,27 @@ def _parse_and_simplify(self, expression, **kwargs): return None if not isinstance(expression, LicenseExpression): - raise TypeError('expressions must be LicenseExpression objects: %(expression1)r, %(expression2)r' % locals()) + raise TypeError( + f'expression must be LicenseExpression object: {expression!r}' + ) + return expression.simplify() def license_symbols(self, expression, unique=True, decompose=True, **kwargs): """ - Return a list of LicenseSymbol objects used in an expression in - the same order as they first appear in the expression tree. + Return a list of LicenseSymbol objects used in an expression in the same + order as they first appear in the expression tree. - `expression` is either a string or a LicenseExpression object. + ``expression`` is either a string or a LicenseExpression object. If a string is provided, it will be parsed. - If `unique` is True only return unique symbols. + If ``unique`` is True only return unique symbols. - If `decompose` is True then composite LicenseWithExceptionSymbol instance are - not returned directly but their underlying license and exception symbols are - retruned instead. + If ``decompose`` is True then composite LicenseWithExceptionSymbol + instances are not returned directly; instead their underlying license + and exception symbols are returned. - Extra kwargs are passed down to the parse() function. + Extra ``kwargs`` are passed down to the parse() function. For example: >>> l = Licensing() @@ -332,15 +362,15 @@ def license_symbols(self, expression, unique=True, decompose=True, **kwargs): def primary_license_symbol(self, expression, decompose=True, **kwargs): """ - Return the left-most license symbol of an `expression` or None. - `expression` is either a string or a LicenseExpression object. + Return the left-most license symbol of an ``expression`` or None. + ``expression`` is either a string or a LicenseExpression object. - If `decompose` is True, only the left-hand license symbol of a decomposed - LicenseWithExceptionSymbol symbol will be returned if this is the left most - member. Otherwise a composite LicenseWithExceptionSymbol is returned in this - case. + If ``decompose`` is True, only the left-hand license symbol of a + decomposed LicenseWithExceptionSymbol symbol will be returned if this is + the left most member. Otherwise a composite LicenseWithExceptionSymbol + is returned in this case. - Extra kwargs are passed down to the parse() function. + Extra ``kwargs`` are passed down to the parse() function. """ symbols = self.license_symbols(expression, decompose=decompose, **kwargs) if symbols: @@ -348,23 +378,29 @@ def primary_license_symbol(self, expression, decompose=True, **kwargs): def primary_license_key(self, expression, **kwargs): """ - Return the left-most license key of an `expression` or None. The underlying - symbols are decomposed. - `expression` is either a string or a LicenseExpression object. + Return the left-most license key of an ``expression`` or None. The + underlying symbols are decomposed. - Extra kwargs are passed down to the parse() function. + ``expression`` is either a string or a LicenseExpression object. + + Extra ``kwargs`` are passed down to the parse() function. """ - prim = self.primary_license_symbol(expression, decompose=True, **kwargs) + prim = self.primary_license_symbol( + expression=expression, + decompose=True, + **kwargs, + ) if prim: return prim.key def license_keys(self, expression, unique=True, **kwargs): """ - Return a list of licenses keys used in an expression in the same order as - they first appear in the expression. - `expression` is either a string or a LicenseExpression object. + Return a list of licenses keys used in an ``expression`` in the same + order as they first appear in the expression. ``expression`` is either a + string or a LicenseExpression object. - Extra kwargs are passed down to the parse() function. + If ``unique`` is True only return unique symbols. + Extra ``kwargs`` are passed down to the parse() function. For example: >>> l = Licensing() @@ -372,7 +408,12 @@ def license_keys(self, expression, unique=True, **kwargs): >>> expected = ['GPL-2.0', 'mit+', 'blabla', 'mit', 'LGPL-2.1'] >>> assert expected == l.license_keys(l.parse(expr)) """ - symbols = self.license_symbols(expression, unique=False, decompose=True, **kwargs) + symbols = self.license_symbols( + expression=expression, + unique=False, + decompose=True, + **kwargs, + ) return self._keys(symbols, unique) def _keys(self, symbols, unique=True): @@ -385,28 +426,37 @@ def _keys(self, symbols, unique=True): def unknown_license_symbols(self, expression, unique=True, **kwargs): """ - Return a list of unknown licenses symbols used in an `expression` in the same - order as they first appear in the `expression`. - `expression` is either a string or a LicenseExpression object. + Return a list of unknown license symbols used in an ``expression`` in + the same order as they first appear in the ``expression``. + ``expression`` is either a string or a LicenseExpression object. - Extra kwargs are passed down to the parse() function. + If ``unique`` is True only return unique symbols. + Extra ``kwargs`` are passed down to the parse() function. """ - return [ls for ls in self.license_symbols(expression, unique=unique, decompose=True, **kwargs) - if not ls.key in self.known_symbols] + symbols = self.license_symbols( + expression=expression, + unique=unique, + decompose=True, + **kwargs, + ) + return [ls for ls in symbols if not ls.key in self.known_symbols] def unknown_license_keys(self, expression, unique=True, **kwargs): """ - Return a list of unknown licenses keys used in an `expression` in the same - order as they first appear in the `expression`. + Return a list of unknown licenses keys used in an ``expression`` in the + same order as they first appear in the ``expression``. - `expression` is either a string or a LicenseExpression object. + ``expression`` is either a string or a LicenseExpression object. If a string is provided, it will be parsed. - If `unique` is True only return unique keys. - - Extra kwargs are passed down to the parse() function. + If ``unique`` is True only return unique keys. + Extra ``kwargs`` are passed down to the parse() function. """ - symbols = self.unknown_license_symbols(expression, unique=False, **kwargs) + symbols = self.unknown_license_symbols( + expression=expression, + unique=False, + **kwargs, + ) return self._keys(symbols, unique) def validate_license_keys(self, expression): @@ -415,33 +465,45 @@ def validate_license_keys(self, expression): msg = 'Unknown license key(s): {}'.format(', '.join(unknown_keys)) raise ExpressionError(msg) - def parse(self, expression, validate=False, strict=False, simple=False, **kwargs): + def parse( + self, + expression, + validate=False, + strict=False, + simple=False, + **kwargs + ): """ Return a new license LicenseExpression object by parsing a license - `expression` string. Check that the expression syntax is valid and raise - an ExpressionError or an ExpressionParseError on errors. - Return None for empty expressions. - `expression` is either a string or a LicenseExpression object. If this - is a LicenseExpression it is returned as-is. - Symbols are always recognized from known symbols if `symbols` were - provided at Licensing creation time: each license and exception is + ``expression``. Check that the ``expression`` syntax is valid and + raise an ExpressionError or an ExpressionParseError on errors. + + Return None for empty expressions. ``expression`` is either a string or + a LicenseExpression object. If ``expression`` is a LicenseExpression it + is returned as-is. + + Symbols are always recognized from known Licensing symbols if `symbols` + were provided at Licensing creation time: each license and exception is recognized from known license keys (and from aliases for a symbol if available). - If `validate` is True and a license is unknown, an ExpressionError error - is raised with a message listing the unknown license keys. + If ``validate`` is True and a license is unknown, an ExpressionError + error is raised with a message listing the unknown license keys. - If `validate` is False, no error is raised. You can call the - `unknown_license_keys` or `unknown_license_symbols` methods to get - unknown license keys or symbols found in a parsed LicenseExpression. + If ``validate`` is False, no error is raised if the ``expression`` + syntax is correct. You can call further call the + `unknown_license_keys()` or `unknown_license_symbols()` methods to get + unknown license keys or symbols found in the parsed LicenseExpression. - If `strict` is True, additional exceptions will be raised if in a + If ``strict`` is True, an ExpressionError will be raised if in a "WITH" expression such as "XXX with ZZZ" if the XXX symbol has `is_exception` set to True or the YYY symbol has `is_exception` set to - False. This checks that symbols are used strictly as constructed. + False. This checks that symbols are used strictly as intended in a + "WITH" subexpression using a license on the left and an exception on thr + right. - If `simple` is True, parsing will use a simple tokenizer that assumes - that license symbols are all license keys that cannot contain spaces. + If ``simple`` is True, parsing will use a simple tokenizer that assumes + that license symbols are all license keys and do not contain spaces. For example: >>> expression = 'EPL-1.0 and Apache-1.1 OR GPL-2.0 with Classpath-exception' @@ -460,26 +522,38 @@ def parse(self, expression, validate=False, strict=False, simple=False, **kwargs expression = str(expression) except: ext = type(expression) - raise ExpressionError('expression must be a string and not: %(ext)r' % locals()) + raise ExpressionError( + f'expression must be a string and not: {ext!r}' + ) if not isinstance(expression, str): ext = type(expression) - raise ExpressionError('expression must be a string and not: %(ext)r' % locals()) + raise ExpressionError( + f'expression must be a string and not: {ext!r}' + ) if not expression or not expression.strip(): return try: # this will raise a ParseError on errors - tokens = list(self.tokenize(expression, strict=strict, simple=simple)) + tokens = list(self.tokenize( + expression=expression, + strict=strict, + simple=simple, + )) expression = super(Licensing, self).parse(tokens) + except ParseError as e: - new_error = ExpressionParseError( - token_type=e.token_type, token_string=e.token_string, - position=e.position, error_code=e.error_code) - raise new_error + raise ExpressionParseError( + token_type=e.token_type, + token_string=e.token_string, + position=e.position, + error_code=e.error_code, + ) from e if not isinstance(expression, LicenseExpression): - raise ExpressionError('expression must be a LicenseExpression once parsed.') + raise ExpressionError( + 'expression must be a LicenseExpression once parsed.') if validate: self.validate_license_keys(expression) @@ -488,20 +562,21 @@ def parse(self, expression, validate=False, strict=False, simple=False, **kwargs def tokenize(self, expression, strict=False, simple=False): """ - Return an iterable of 3-tuple describing each token given an expression - unicode string. See boolean.BooleanAlgreba.tokenize() for API details. + Return an iterable of 3-tuple describing each token given an + ``expression`` string. See boolean.BooleanAlgreba.tokenize() for API + details. This 3-tuple contains these items: (token, token string, position): - token: either a Symbol instance or one of TOKEN_* token types.. - - token string: the original token unicode string. + - token string: the original token string. - position: the starting index of the token string in the `expr` string. - If `strict` is True, additional exceptions will be raised in a + If ``strict`` is True, additional exceptions will be raised in a expression such as "XXX with ZZZ" if the XXX symbol has is_exception` set to True or the ZZZ symbol has `is_exception` set to False. - If `simple` is True, use a simple tokenizer that assumes that license - symbols are all license keys that cannot contain spaces. + If ``simple`` is True, use a simple tokenizer that assumes that license + symbols are all license keys that do not contain spaces. """ if not expression: return @@ -541,7 +616,8 @@ def tokenize(self, expression, strict=False, simple=False): def get_advanced_tokenizer(self): """ - Return an AdvancedTokenizer instance either cached or created as needed. + Return an AdvancedTokenizer instance for this Licensing either cached or + created as needed. If symbols were provided when this Licensing object was created, the tokenizer will recognize known symbol keys and aliases (ignoring case) @@ -559,14 +635,15 @@ def get_advanced_tokenizer(self): for keyword in KEYWORDS: add_item(keyword.value, keyword) - # self.known_symbols has been created at Licensing initialization time and is - # already validated and trusted here + # self.known_symbols has been created at Licensing initialization time + # and is already validated and trusted here for key, symbol in self.known_symbols.items(): # always use the key even if there are no aliases. add_item(key, symbol) aliases = getattr(symbol, 'aliases', []) for alias in aliases: - # normalize spaces for each alias. The AdvancedTokenizer will lowercase them + # normalize spaces for each alias. The AdvancedTokenizer will + # lowercase them if alias: alias = ' '.join(alias.split()) add_item(alias, symbol) @@ -576,16 +653,14 @@ def get_advanced_tokenizer(self): def advanced_tokenizer(self, expression): """ - Return an iterable of Token describing each token given an expression - unicode string. + Return an iterable of Token from an ``expression`` string. """ tokenizer = self.get_advanced_tokenizer() return tokenizer.tokenize(expression) def simple_tokenizer(self, expression): """ - Return an iterable of Token describing each token given an expression - unicode string. + Return an iterable of Token from an ``expression`` string. The split is done on spaces, keywords and parens. Anything else is a symbol token, e.g. a typically license key or license id (that contains @@ -633,21 +708,22 @@ def simple_tokenizer(self, expression): def dedup(self, expression): """ - Return a deduplicated LicenseExpression given a license expression + Return a deduplicated LicenseExpression given a license ``expression`` string or LicenseExpression object. - The deduplication process is similar to simplification but is specialized - for working with license expressions. Simplification on the other hand - is a generic boolean operation not aware of the specifis of license expressions. + The deduplication process is similar to simplification but is + specialized for working with license expressions. Simplification is + otherwise a generic boolean operation that is not aware of the specifics + of license expressions. The deduplication: - Does not sort the licenses of sub-expression in an expression. They - stay in the same order as in the original. + stay in the same order as in the original expression. - - Choices (as in MIT or GPL) are kept as-is and not treated as - simplifiable. This way this avoid droping important choice options in - complex expressions which is never desirable. + - Choices (as in "MIT or GPL") are kept as-is and not treated as + simplifiable. This avoids droping important choice options in complex + expressions which is never desirable. """ exp = self.parse(expression) @@ -671,31 +747,30 @@ def dedup(self, expression): licensing=self, ) else: - raise Exception(f'Unknown expression type: {expression!r}') + raise ExpressionError(f'Unknown expression type: {expression!r}') return deduped def validate(self, expression, strict=True, **kwargs): """ Return a ExpressionInfo object that contains information about - the validation of an `expression` license expression string. + the validation of an ``expression`` license expression string. - If the syntax and license keys of `expression` is valid, then + If the syntax and license keys of ``expression`` is valid, then `ExpressionInfo.normalized_license_expression` is set. - If an error was encountered when validating `expression`, + If an error was encountered when validating ``expression``, `ExpressionInfo.errors` will be populated with strings containing the error message that has occured. If an error has occured due to unknown license keys or an invalid license symbol, the offending keys or symbols will be present in `ExpressionInfo.invalid_symbols` - If `strict` is True, validation error messages will be included if in a "WITH" - expression such as "XXX with ZZZ" if the XXX symbol has `is_exception` - set to True or the YYY symbol has `is_exception` set to False. This - checks that symbols are used strictly as intended. + If ``strict`` is True, validation error messages will be included if in + a "WITH" expression such as "XXX with ZZZ" if the XXX symbol has + `is_exception` set to True or the YYY symbol has `is_exception` set to + False. This checks that exception symbols are used strictly as intended + on the right side of a "WITH" statement. """ - expression_info = ExpressionInfo( - original_expression=str(expression) - ) + expression_info = ExpressionInfo(original_expression=str(expression)) # Check `expression` type and syntax try: @@ -722,10 +797,34 @@ def validate(self, expression, strict=True, **kwargs): return expression_info -def get_license_index(license_index_location=vendored_scancode_licensedb_index_location): +def get_scancode_licensing( + license_index_location=vendored_scancode_licensedb_index_location +): + """ + Return a Licensing object using ScanCode license keys loaded from a + ``license_index_location`` location of a license db JSON index files + See https://scancode-licensedb.aboutcode.org/index.json + """ + return build_licensing(get_license_index(license_index_location)) + + +def get_spdx_licensing( + license_index_location=vendored_scancode_licensedb_index_location +): + """ + Return a Licensing object using SPDX license keys loaded from a + ``license_index_location`` location of a license db JSON index files + See https://scancode-licensedb.aboutcode.org/index.json """ - Return a list of dictionaries that contain license key information from - `license_index_location` + return build_spdx_licensing(get_license_index(license_index_location)) + + +def get_license_index( + license_index_location=vendored_scancode_licensedb_index_location +): + """ + Return a list of mappings that contain license key information from + ``license_index_location`` The default value of `license_index_location` points to a vendored copy of the license index from https://scancode-licensedb.aboutcode.org/ @@ -737,7 +836,7 @@ def get_license_index(license_index_location=vendored_scancode_licensedb_index_l def load_licensing_from_license_index(license_index): """ Return a Licensing object that has been loaded with license keys and - attributes from `license_index`. + attributes from a ``license_index`` list of license mappings. """ syms = [LicenseSymbol(**l) for l in license_index] return Licensing(syms) @@ -745,7 +844,8 @@ def load_licensing_from_license_index(license_index): def build_licensing(license_index): """ - Return a Licensing object that has been loaded with license keys. + Return a Licensing object that has been loaded with license keys and + attributes from a ``license_index`` list of simple ScanCode license mappings. """ lics = [ { @@ -758,7 +858,8 @@ def build_licensing(license_index): def build_spdx_licensing(license_index): """ - Return a Licensing object that has been loaded with SPDX license keys. + Return a Licensing object that has been loaded with license keys and + attributes from a ``license_index`` list of simple SPDX license mappings. """ # Massage data such that SPDX license key is the primary license key lics = [ @@ -775,8 +876,8 @@ def build_spdx_licensing(license_index): def build_symbols_from_unknown_tokens(tokens): """ - Yield Token given a sequence of Token replacing unmatched contiguous Tokens - by a single token with a LicenseSymbol. + Yield Token given a ``token`` sequence of Token replacing unmatched + contiguous tokens by a single token with a LicenseSymbol. """ tokens = list(tokens) @@ -823,9 +924,9 @@ def build_token_with_symbol(): def build_token_groups_for_with_subexpression(tokens): """ - Yield tuples of Token given a sequence of Token such that: - - all symbol-with-symbol sequences of 3 tokens are grouped in a three-tuple - - other tokens are a single token wrapped in a tuple. + Yield tuples of Token given a ``tokens`` sequence of Token such that: + - all "XXX WITH YYY" sequences of 3 tokens are grouped in a three-tuple + - single tokens are just wrapped in a tuple for consistency. """ # if n-1 is sym, n is with and n+1 is sym: yield this as a group for a with @@ -869,7 +970,8 @@ def build_token_groups_for_with_subexpression(tokens): def is_with_subexpression(tokens_tripple): """ - Return True if a Token tripple is a WITH license sub-expression. + Return True if a ``tokens_tripple`` Token tripple is a "WITH" license sub- + expression. """ lic, wit, exc = tokens_tripple return (isinstance(lic.value, LicenseSymbol) @@ -880,14 +982,14 @@ def is_with_subexpression(tokens_tripple): def replace_with_subexpression_by_license_symbol(tokens, strict=False): """ - Given an iterable of Token, yiled token, replacing any XXX WITH ZZZ - subexpression by a LicenseWithExceptionSymbol symbol. + Given a ``tokens`` iterable of Token, yield updated Token(s) replacing any + "XXX WITH ZZZ" subexpression by a LicenseWithExceptionSymbol symbol. - Check validity of with subexpessions and raise ParseError as needed. + Check validity of WITH subexpessions and raise ParseError on errors. - If `strict` is True also raise ParseError if the left hand side - LicenseSymbol has is_exception True or if the right hand side LicenseSymbol - has is_exception False. + If ``strict`` is True also raise ParseError if the left hand side + LicenseSymbol has `is_exception` True or if the right hand side + LicenseSymbol has `is_exception` False. """ token_groups = build_token_groups_for_with_subexpression(tokens) @@ -909,18 +1011,25 @@ def replace_with_subexpression_by_license_symbol(tokens, strict=False): # a single group cannot be a single 'WITH' keyword: # this is an error that we catch and raise here. raise ParseError( - token_type=TOKEN_WITH, token_string=token.string, - position=token.start, error_code=PARSE_INVALID_EXPRESSION) + token_type=TOKEN_WITH, + token_string=token.string, + position=token.start, + error_code=PARSE_INVALID_EXPRESSION, + ) elif isinstance(tval, LicenseSymbol): if strict and tval.is_exception: raise ParseError( - token_type=TOKEN_SYMBOL, token_string=token.string, - position=token.start, error_code=PARSE_INVALID_EXCEPTION) + token_type=TOKEN_SYMBOL, + token_string=token.string, + position=token.start, + error_code=PARSE_INVALID_EXCEPTION, + ) else: # this should not be possible by design - raise Exception(f'Licensing.tokenize is internally confused...: {tval!r}') + raise Exception( + f'Licensing.tokenize is internally confused...: {tval!r}') yield token continue @@ -930,17 +1039,20 @@ def replace_with_subexpression_by_license_symbol(tokens, strict=False): string = ' '.join([tok.string for tok in token_group]) start = token_group[0].start raise ParseError( - TOKEN_SYMBOL, string, start, PARSE_INVALID_EXPRESSION) + token_type=TOKEN_SYMBOL, + token_string=string, + position=start, + error_code=PARSE_INVALID_EXPRESSION, + ) # from now on we have a tripple of tokens: a WITH sub-expression such as # "A with B" seq of three tokens lic_token, WITH , exc_token = token_group - token_string = ' '.join([ - lic_token.string, - WITH.string.strip(), - exc_token.string - ]) + lic = lic_token.string + exc = exc_token.string + WITH = WITH.string.strip() + token_string = f'{lic} {WITH} {exc}' # the left hand side license symbol lic_sym = lic_token.value @@ -948,34 +1060,50 @@ def replace_with_subexpression_by_license_symbol(tokens, strict=False): # this should not happen if not isinstance(lic_sym, LicenseSymbol): raise ParseError( - TOKEN_SYMBOL, lic_token.string, lic_token.start, - PARSE_INVALID_SYMBOL) + token_type=TOKEN_SYMBOL, + token_string=lic_token.string, + position=lic_token.start, + error_code=PARSE_INVALID_SYMBOL, + ) if strict and lic_sym.is_exception: raise ParseError( - TOKEN_SYMBOL, lic_token.string, lic_token.start, - PARSE_INVALID_EXCEPTION) + token_type=TOKEN_SYMBOL, + token_string=lic_token.string, + position=lic_token.start, + error_code=PARSE_INVALID_EXCEPTION, + ) # the right hand side exception symbol exc_sym = exc_token.value if not isinstance(exc_sym, LicenseSymbol): raise ParseError( - TOKEN_SYMBOL, lic_sym.string, lic_sym.start, - PARSE_INVALID_SYMBOL) + token_type=TOKEN_SYMBOL, + token_string=lic_sym.string, + position=lic_sym.start, + error_code=PARSE_INVALID_SYMBOL, + ) if strict and not exc_sym.is_exception: raise ParseError( - TOKEN_SYMBOL, exc_token.string, exc_token.start, - PARSE_INVALID_SYMBOL_AS_EXCEPTION) + token_type=TOKEN_SYMBOL, + token_string=exc_token.string, + position=exc_token.start, + error_code=PARSE_INVALID_SYMBOL_AS_EXCEPTION, + ) - lic_exc_sym = LicenseWithExceptionSymbol(lic_sym, exc_sym, strict) + lic_exc_sym = LicenseWithExceptionSymbol( + license_symbol=lic_sym, + exception_symbol=exc_sym, + strict=strict, + ) token = Token( - lic_token.start, - exc_token.end, - token_string, - lic_exc_sym, + start=lic_token.start, + end=exc_token.end, + string=token_string, + value=lic_exc_sym, ) yield token @@ -988,39 +1116,39 @@ class Renderable(object): def render(self, template='{symbol.key}', *args, **kwargs): """ Return a formatted string rendering for this expression using the - `template` format string to render each symbol. The variable available - are `symbol.key` and any other attribute that was attached to a license - symbol instance and a custom template can be provided to handle custom - HTML rendering or similar. + ``template`` format string to render each license symbol. The variables + available are `symbol.key` and any other attribute attached to a + LicenseSymbol-like instance; a custom ``template`` can be provided to + handle custom rendering such as HTML. - For symbols that hold multiple licenses (e.g. a WITH statement) the - template is applied to each symbol individually. + For symbols that hold multiple licenses (e.g. in a "XXX WITH YYY" + statement) the template is applied to each symbol individually. - Note that when render() is called the *args and **kwargs are propagated - recursively to any Renderable object render() method. + Note that when render() is called the ``*args`` and ``**kwargs`` are + passed down recursively to any Renderable object render() method. """ return NotImplementedError def render_as_readable(self, template='{symbol.key}', *args, **kwargs): """ Return a formatted string rendering for this expression using the - `template` format string to render each symbol. Add extra parenthesis - around WITH sub-expressions for improved readbility. See `render()` for - other arguments. + ``template`` format string to render each symbol. Add extra parenthesis + around "WITH" sub-expressions such as in "(XXX WITH YYY)"for improved + readbility. See ``render()`` for other arguments. """ if isinstance(self, LicenseWithExceptionSymbol): return self.render( template=template, wrap_with_in_parens=False, *args, - **kwargs, + **kwargs ) return self.render( template=template, wrap_with_in_parens=True, *args, - **kwargs, + **kwargs ) @@ -1037,7 +1165,7 @@ def decompose(self): def __contains__(self, other): """ - Test if expr is contained in this symbol. + Test if the ``other`` symbol is contained in this symbol. """ if not isinstance(other, BaseSymbol): return False @@ -1056,13 +1184,13 @@ def __contains__(self, other): @total_ordering class LicenseSymbol(BaseSymbol): """ - A LicenseSymbol represents a license as used in a license expression. + A LicenseSymbol represents a license key or identifier as used in a license + expression. """ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): if not key: - raise ExpressionError( - 'A license key cannot be empty: %(key)r' % locals()) + raise ExpressionError(f'A license key cannot be empty: {key!r}') if not isinstance(key, str): if isinstance(key, bytes): @@ -1070,36 +1198,39 @@ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): key = str(key) except: raise ExpressionError( - 'A license key must be a unicode string: %(key)r' % locals()) + f'A license key must be a string: {key!r}') else: raise ExpressionError( - 'A license key must be a unicode string: %(key)r' % locals()) + f'A license key must be a string: {key!r}') key = key.strip() if not key: - raise ExpressionError( - 'A license key cannot be blank: "%(key)s"' % locals()) + raise ExpressionError(f'A license key cannot be blank: {key!r}') # note: key can contain spaces if not is_valid_license_key(key): raise ExpressionError( - 'Invalid license key: the valid characters are: letters and numbers, ' - f'underscore, dot, colon or hyphen signs and spaces: {key!r}' + 'Invalid license key: the valid characters are: letters and ' + 'numbers, underscore, dot, colon or hyphen signs and ' + f'spaces: {key!r}' ) - # normalize for spaces + # normalize spaces key = ' '.join(key.split()) if key.lower() in KEYWORDS_STRINGS: raise ExpressionError( 'Invalid license key: a key cannot be a reserved keyword: "or",' - ' "and" or "with: "%(key)s"' % locals()) + f' "and" or "with": {key!r}') self.key = key if aliases and not isinstance(aliases, (list, tuple,)): - raise TypeError('License aliases must be a sequence.') + raise TypeError( + f'License aliases: {aliases!r} must be a sequence ' + f'and not: {type(aliases)}.' + ) self.aliases = aliases and tuple(aliases) or tuple() self.is_exception = is_exception @@ -1108,7 +1239,7 @@ def __init__(self, key, aliases=tuple(), is_exception=False, *args, **kwargs): def decompose(self): """ - Return an iterable of the underlying symbols for this symbol. + Return an iterable of the underlying license symbols for this symbol. """ yield self @@ -1118,20 +1249,29 @@ def __hash__(self, *args, **kwargs): def __eq__(self, other): if self is other: return True + if not (isinstance(other, self.__class__) or self.symbol_like(other)): return False + return self.key == other.key and self.is_exception == other.is_exception def __ne__(self, other): if self is other: return False + if not (isinstance(other, self.__class__) or self.symbol_like(other)): return True - return (self.key != other.key or self.is_exception != other.is_exception) + + return ( + self.key != other.key + or self.is_exception != other.is_exception + ) def __lt__(self, other): if isinstance( - other, (LicenseSymbol, LicenseWithExceptionSymbol, LicenseSymbolLike)): + other, + (LicenseSymbol, LicenseWithExceptionSymbol, LicenseSymbolLike), + ): return str(self) < str(other) else: return NotImplemented @@ -1150,17 +1290,22 @@ def __len__(self): def __repr__(self): cls = self.__class__.__name__ key = self.key - aliases = self.aliases and ('aliases=%(a)r, ' % {'a': self.aliases}) or '' + aliases = self.aliases and f'aliases={self.aliases!r}, ' or '' is_exception = self.is_exception - return '%(cls)s(%(key)r, %(aliases)sis_exception=%(is_exception)r)' % locals() + return f'{cls}({key!r}, {aliases}is_exception={is_exception!r})' def __copy__(self): - return LicenseSymbol(self.key, tuple(self.aliases), self.is_exception) + return LicenseSymbol( + key=self.key, + aliases=self.aliases and tuple(self.aliases) or tuple(), + is_exception=self.is_exception, + ) @classmethod def symbol_like(cls, symbol): """ - Return True if `symbol` is a symbol-like object with its essential attributes. + Return True if ``symbol`` is a symbol-like object with its essential + attributes. """ return hasattr(symbol, 'key') and hasattr(symbol, 'is_exception') @@ -1169,17 +1314,20 @@ def symbol_like(cls, symbol): @total_ordering class LicenseSymbolLike(LicenseSymbol): """ - A LicenseSymbolLike object wraps a symbol-like object to expose a + A LicenseSymbolLike object wraps a symbol-like object to expose it's LicenseSymbol behavior. """ def __init__(self, symbol_like, *args, **kwargs): if not self.symbol_like(symbol_like): - raise ExpressionError( - 'Not a symbol-like object: %(symbol_like)r' % locals()) + raise ExpressionError(f'Not a symbol-like object: {symbol_like!r}') self.wrapped = symbol_like - super(LicenseSymbolLike, self).__init__(self.wrapped.key, *args, **kwargs) + super(LicenseSymbolLike, self).__init__( + key=self.wrapped.key, + *args, + **kwargs + ) self.is_exception = self.wrapped.is_exception self.aliases = getattr(self.wrapped, 'aliases', tuple()) @@ -1197,6 +1345,7 @@ def __copy__(self): def render(self, template='{symbol.key}', *args, **kwargs): if self._render: return self._render(template, *args, **kwargs) + return super(LicenseSymbolLike, self).render(template, *args, **kwargs) __nonzero__ = __bool__ = lambda s: True @@ -1230,37 +1379,53 @@ def __lt__(self, other): @total_ordering class LicenseWithExceptionSymbol(BaseSymbol): """ - A LicenseWithExceptionSymbol represents a license "with" an exception as - used in a license expression. It holds two LicenseSymbols objects: one for - the left-hand license proper and one for the right-hand exception to this - license and deals with the specifics of resolution, validation and - representation. + A LicenseWithExceptionSymbol represents a license with a "WITH" keyword and + a license exception such as the Classpath exception. When used in a license + expression, this is treated as a single Symbol. It holds two LicenseSymbols + objects: one for the left-hand side license proper and one for the right- + hand side exception to the license and deals with the specifics of + resolution, validation and representation. """ - def __init__(self, license_symbol, exception_symbol, strict=False, *args, **kwargs): + def __init__( + self, + license_symbol, + exception_symbol, + strict=False, + *args, + **kwargs + ): """ - Initialize a new LicenseWithExceptionSymbol from a `license_symbol` and a - `exception_symbol` symbol-like objects. + Initialize a new LicenseWithExceptionSymbol from a ``license_symbol`` + and a ``exception_symbol`` symbol-like objects. - Raise a ExpressionError exception if strict is True and either: - - license_symbol.is_exception is True - - exception_symbol.is_exception is not True + Raise a ExpressionError exception if ``strict`` is True and either: + - ``license_symbol``.is_exception is True + - ``exception_symbol``.is_exception is not True """ if not LicenseSymbol.symbol_like(license_symbol): raise ExpressionError( - 'license_symbol must be a LicenseSymbol-like object: %(license_symbol)r' % locals()) + 'license_symbol must be a LicenseSymbol-like object: ' + f'{license_symbol!r}', + ) if strict and license_symbol.is_exception: raise ExpressionError( - 'license_symbol cannot be an exception with "is_exception" set to True: %(license_symbol)r' % locals()) + 'license_symbol cannot be an exception with the "is_exception" ' + f'attribute set to True:{license_symbol!r}', + ) if not LicenseSymbol.symbol_like(exception_symbol): raise ExpressionError( - 'exception_symbol must be a LicenseSymbol-like object: %(exception_symbol)r' % locals()) + 'exception_symbol must be a LicenseSymbol-like object: ' + f'{exception_symbol!r}', + ) if strict and not exception_symbol.is_exception: raise ExpressionError( - 'exception_symbol must be an exception with "is_exception" set to True: %(exception_symbol)r' % locals()) + 'exception_symbol must be an exception with "is_exception" ' + f'set to True: {exception_symbol!r}', + ) self.license_symbol = license_symbol self.exception_symbol = exception_symbol @@ -1268,25 +1433,32 @@ def __init__(self, license_symbol, exception_symbol, strict=False, *args, **kwar super(LicenseWithExceptionSymbol, self).__init__(str(self)) def __copy__(self): - return LicenseWithExceptionSymbol(copy(self.license_symbol), copy(self.exception_symbol)) + return LicenseWithExceptionSymbol( + license_symbol=copy(self.license_symbol), + exception_symbol=copy(self.exception_symbol), + ) def decompose(self): yield self.license_symbol yield self.exception_symbol - def render(self, template='{symbol.key}', wrap_with_in_parens=False, *args, **kwargs): + def render( + self, + template='{symbol.key}', + wrap_with_in_parens=False, + *args, + **kwargs + ): """ - Return a formatted WITH expression. If `wrap_with_in_parens`, wrap in - parens a WITH expression, unless it is alone and not used with other AND - or OR sub-expressions. + Return a formatted "WITH" expression. If ``wrap_with_in_parens``, wrap + the expression in parens as in "(XXX WITH YYY)". """ lic = self.license_symbol.render(template, *args, **kwargs) exc = self.exception_symbol.render(template, *args, **kwargs) + rend = f'{lic} WITH {exc}' if wrap_with_in_parens: - temp = '(%(lic)s WITH %(exc)s)' - else: - temp = '%(lic)s WITH %(exc)s' - return temp % locals() + rend = f'({rend})' + return rend def __hash__(self, *args, **kwargs): return hash((self.license_symbol, self.exception_symbol,)) @@ -1294,22 +1466,34 @@ def __hash__(self, *args, **kwargs): def __eq__(self, other): if self is other: return True + if not isinstance(other, self.__class__): return False - return (self.license_symbol == other.license_symbol - and self.exception_symbol == other.exception_symbol) + + return ( + self.license_symbol == other.license_symbol + and self.exception_symbol == other.exception_symbol + ) def __ne__(self, other): if self is other: return False + if not isinstance(other, self.__class__): return True - return not (self.license_symbol == other.license_symbol - and self.exception_symbol == other.exception_symbol) + + return ( + not ( + self.license_symbol == other.license_symbol + and self.exception_symbol == other.exception_symbol + ) + ) def __lt__(self, other): if isinstance( - other, (LicenseSymbol, LicenseWithExceptionSymbol, LicenseSymbolLike)): + other, + (LicenseSymbol, LicenseWithExceptionSymbol, LicenseSymbolLike) + ): return str(self) < str(other) else: return NotImplemented @@ -1317,14 +1501,17 @@ def __lt__(self, other): __nonzero__ = __bool__ = lambda s: True def __str__(self): - lkey = self.license_symbol.key - ekey = self.exception_symbol.key - return '%(lkey)s WITH %(ekey)s' % locals() + return f'{self.license_symbol.key} WITH {self.exception_symbol.key}' def __repr__(self): + cls = self.__class__.__name__ data = dict(cls=self.__class__.__name__) data.update(self.__dict__) - return '%(cls)s(license_symbol=%(license_symbol)r, exception_symbol=%(exception_symbol)r)' % data + return ( + f'{cls}(' + f'license_symbol={self.license_symbol!r}, ' + f'exception_symbol={self.exception_symbol!r})' + ) class RenderableFunction(Renderable): @@ -1332,8 +1519,8 @@ class RenderableFunction(Renderable): def render(self, template='{symbol.key}', *args, **kwargs): """ - Render an expression as a string, recursively applying the string `template` - to every symbols and operators. + Render an expression as a string, recursively applying the string + ``template`` to every symbols and operators. """ expression_args = self.args if len(expression_args) == 1: @@ -1343,15 +1530,18 @@ def render(self, template='{symbol.key}', *args, **kwargs): sym = sym.render(template, *args, **kwargs) else: - print('WARNING: symbol is not renderable: using plain string representation.') - # FIXME: CAN THIS REALLY HAPPEN since we only have symbols, OR, AND? + # FIXME: CAN THIS EVER HAPPEN since we only have symbols OR and AND? + print( + 'WARNING: symbol is not renderable: using plain string ' + f'representation: {sym!r}' + ) sym = str(sym) + # NB: the operator str already has a leading and trailing space if self.isliteral: - rendered = '%s%s' % (self.operator, sym) + rendered = f'{self.operator}{sym}' else: - # NB: the operator str already has a leading and trailing space - rendered = '%s(%s)' % (self.operator, sym) + rendered = f'{self.operator}({sym})' return rendered rendered_items = [] @@ -1362,7 +1552,7 @@ def render(self, template='{symbol.key}', *args, **kwargs): rendered = arg.render(template, *args, **kwargs) else: - # FIXME: CAN THIS REALLY HAPPEN since we only have symbols, or and AND? + # FIXME: CAN THIS EVER HAPPEN since we only have symbols OR and AND? print( 'WARNING: object in expression is not renderable: ' f'falling back to plain string representation: {arg!r}.' @@ -1372,7 +1562,7 @@ def render(self, template='{symbol.key}', *args, **kwargs): if arg.isliteral: rendered_items_append(rendered) else: - rendered_items_append('(%s)' % rendered) + rendered_items_append(f'({rendered})') return self.operator.join(rendered_items) @@ -1405,7 +1595,7 @@ def __init__(self, *args): def ordered_unique(seq): """ - Return unique items in a sequence seq preserving the original order. + Return unique items in a sequence ``seq`` preserving their original order. """ if not seq: return [] @@ -1419,11 +1609,13 @@ def ordered_unique(seq): def as_symbols(symbols): """ - Return an iterable of LicenseSymbol objects from a sequence of `symbols` or - strings. If an item is a string, then create a new LicenseSymbol for it - using the string as key. If this is not a string it must be a LicenseSymbol- - like type. It will raise a TypeError expection if an item is neither a - string or LicenseSymbol- like. + Return an iterable of LicenseSymbol objects from a ``symbols`` sequence of + strings or LicenseSymbol-like objects. + + If an item is a string, then create a new LicenseSymbol for it using the + string as key. + If this is not a string it must be a LicenseSymbol- like type. Raise a + TypeError expection if an item is neither a string or LicenseSymbol- like. """ if symbols: for symbol in symbols: @@ -1433,7 +1625,7 @@ def as_symbols(symbols): try: symbol = str(symbol) except: - raise TypeError('%(symbol)r is not a unicode string.' % locals()) + raise TypeError(f'{symbol!r} is not a string.') if isinstance(symbol, str): if symbol.strip(): @@ -1447,21 +1639,22 @@ def as_symbols(symbols): else: raise TypeError( - '%(symbol)r is not a unicode string ' - 'or a LicenseSymbol-like instance.' % locals()) + f'{symbol!r} is neither a string nor LicenseSymbol-like.' + ) def validate_symbols(symbols, validate_keys=False): """ - Return a tuple of (`warnings`, `errors`) given a sequence of `symbols` + Return a tuple of (`warnings`, `errors`) given a sequence of ``symbols`` LicenseSymbol-like objects. - - warnings is a list of validation warnings messages (possibly empty if there - were no warnings). - - errors is a list of validation error messages (possibly empty if there were no - errors). - + - `warnings` is a list of validation warnings messages (possibly empty if + there were no warnings). + - `errors` is a list of validation error messages (possibly empty if there + were no errors). Keys and aliases are cleaned and validated for uniqueness. + + If ``validate_keys`` also validate that license keys are known keys. """ # collection used for checking unicity and correctness @@ -1505,7 +1698,9 @@ def validate_symbols(symbols, validate_keys=False): initial_alias_len = len(aliases) # always normalize aliases for spaces and case - aliases = set([' '.join(alias.lower().strip().split()) for alias in aliases]) + aliases = set([ + ' '.join(alias.lower().strip().split()) for alias in aliases + ]) # KEEP UNIQUES, remove empties aliases = set(a for a in aliases if a) @@ -1541,30 +1736,39 @@ def validate_symbols(symbols, validate_keys=False): # build warning and error messages from invalid data errors = [] for ind in sorted(not_symbol_classes): - errors.append('Invalid item: not a LicenseSymbol object: %(ind)s.' % locals()) + errors.append(f'Invalid item: not a LicenseSymbol object: {ind!r}.') for dupe in sorted(dupe_keys): - errors.append('Invalid duplicated license key: %(dupe)s.' % locals()) + errors.append(f'Invalid duplicated license key: {dupe!r}.') for dalias, dkeys in sorted(dupe_aliases.items()): dkeys = ', '.join(dkeys) - errors.append('Invalid duplicated alias pointing to multiple keys: ' - '%(dalias)s point to keys: %(dkeys)s.' % locals()) + errors.append( + f'Invalid duplicated alias pointing to multiple keys: ' + f'{dalias} point to keys: {dkeys!r}.' + ) for ikey, ialiases in sorted(invalid_alias_as_kw.items()): ialiases = ', '.join(ialiases) - errors.append('Invalid aliases: an alias cannot be an expression keyword. ' - 'key: "%(ikey)s", aliases: %(ialiases)s.' % locals()) + errors.append( + f'Invalid aliases: an alias cannot be an expression keyword. ' + f'key: {ikey!r}, aliases: {ialiases}.' + ) for dupe in sorted(dupe_exceptions): - errors.append('Invalid duplicated license exception key: %(dupe)s.' % locals()) + errors.append(f'Invalid duplicated license exception key: {dupe}.') for ikw in sorted(invalid_keys_as_kw): - errors.append('Invalid key: a key cannot be an expression keyword: %(ikw)s.' % locals()) + errors.append( + f'Invalid key: a key cannot be an expression keyword: {ikw}.' + ) warnings = [] for dupe_alias in sorted(dupe_aliases): - errors.append('Duplicated or empty aliases ignored for license key: %(dupe_alias)r.' % locals()) + errors.append( + f'Duplicated or empty aliases ignored for license key: ' + f'{dupe_alias!r}.' + ) return warnings, errors @@ -1577,30 +1781,29 @@ def combine_expressions( ): """ Return a combined LicenseExpression object with the `relation`, given a list - of license `expressions` strings or LicenseExpression. If unique is True - remove duplicates before combining expressions. + of license ``expressions`` strings or LicenseExpression objects. If + ``unique`` is True remove duplicates before combining expressions. For example:: - - >>> a = 'mit' - >>> b = 'gpl' - >>> str(combine_expressions([a, b])) - 'mit AND gpl' - >>> assert 'mit' == str(combine_expressions([a])) - >>> combine_expressions([]) - >>> combine_expressions(None) - >>> str(combine_expressions(('gpl', 'mit', 'apache',))) - 'gpl AND mit AND apache' - >>> str(combine_expressions(('gpl', 'mit', 'apache',), relation='OR')) - 'gpl OR mit OR apache' + >>> a = 'mit' + >>> b = 'gpl' + >>> str(combine_expressions([a, b])) + 'mit AND gpl' + >>> assert 'mit' == str(combine_expressions([a])) + >>> combine_expressions([]) + >>> combine_expressions(None) + >>> str(combine_expressions(('gpl', 'mit', 'apache',))) + 'gpl AND mit AND apache' + >>> str(combine_expressions(('gpl', 'mit', 'apache',), relation='OR')) + 'gpl OR mit OR apache' """ if not expressions: return if not isinstance(expressions, (list, tuple)): raise TypeError( - 'expressions should be a list or tuple and not: {}'.format( - type(expressions))) + f'expressions should be a list or tuple and not: {type(expressions)}' + ) # only del with LicenseExpression objects expressions = [licensing.parse(le, simple=True) for le in expressions] From ffc9da55567ae5190c0aed8f540eb359f0f301a3 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 14 Jun 2021 09:46:26 -0700 Subject: [PATCH 5/5] Update README.rst * Add space in doctest Signed-off-by: Jono Yang --- README.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 11c7b4e..51bea77 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,7 @@ license expression engine in several projects and products such as: - license.sh https://github.com/webscopeio/license.sh - liferay_inbound_checker https://github.com/carmenbianca/liferay_inbound_checker - REUSE https://reuse.software/ and https://github.com/fsfe/reuse-tool -- ScanCode-io https://github.com/nexB/scancode.io +- ScanCode-io https://github.com/nexB/scancode.io - ScanCode-toolkit https://github.com/nexB/scancode-toolkit See also for details: @@ -136,17 +136,17 @@ Create an SPDX Licensing and parse expressions:: LicenseSymbol('MIT') ) ) - + >>> str(parsed) 'GPL-2.0-only OR (LGPL-2.1-only AND MIT)' - + >>> licensing.parse('unknwon with foo', validate=True, strict=True) license_expression.ExpressionParseError: A plain license symbol cannot be used as an exception in a "WITH symbol" statement. for token: "foo" at position: 13 - + >>> licensing.parse('unknwon with foo', validate=True) license_expression.ExpressionError: Unknown license key(s): unknwon, foo - + >>> licensing.validate('foo and MIT and GPL-2.0+') ExpressionInfo( original_expression='foo and MIT and GPL-2.0+', @@ -183,7 +183,7 @@ Create a Licensing with your own license symbols:: >>> expression = 'GPL-2.0+ with Classpath or (bsd)' >>> parsed = licensing.parse(expression) >>> expected = 'GPL-2.0+ WITH Classpath OR BSD' - >>> assertparsed.render('{symbol.key}') == expected + >>> assert parsed.render('{symbol.key}') == expected >>> expected = [ ... LicenseSymbol('GPL-2.0+'),