From 60712ccc852e9aec4702ba28a63aa879231478db Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Fri, 24 Jan 2020 15:37:17 +0100 Subject: [PATCH 01/10] Cleanup yaml.load usage 'yaml.load' should not be used any longer without providing a specific loader. The instances where a specific loader is not required were now replaced with the 'yaml.safe_load' method that uses the 'SafeLoader' class by default. --- odml/tools/converters/version_converter.py | 2 +- odml/tools/rdf_converter.py | 2 +- test/test_parser_yaml.py | 6 +++--- test/test_rdf_writer.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/odml/tools/converters/version_converter.py b/odml/tools/converters/version_converter.py index ef8b18b3..804fc68a 100644 --- a/odml/tools/converters/version_converter.py +++ b/odml/tools/converters/version_converter.py @@ -64,7 +64,7 @@ def _parse_json(self): def _parse_yaml(self): with open(self.filename) as file: - parsed_doc = yaml.load(file) + parsed_doc = yaml.safe_load(file) return self._parse_dict_document(parsed_doc) diff --git a/odml/tools/rdf_converter.py b/odml/tools/rdf_converter.py index a357315f..9fb010c3 100644 --- a/odml/tools/rdf_converter.py +++ b/odml/tools/rdf_converter.py @@ -46,7 +46,7 @@ def load_rdf_subclasses(): with open(subclass_file, "r") as yaml_file: try: - section_subclasses = yaml.load(yaml_file) + section_subclasses = yaml.safe_load(yaml_file) except yaml.parser.ParserError as err: print("[Error] Loading RDF subclass file: %s" % err) diff --git a/test/test_parser_yaml.py b/test/test_parser_yaml.py index 0c77dd18..6d59da70 100644 --- a/test/test_parser_yaml.py +++ b/test/test_parser_yaml.py @@ -19,7 +19,7 @@ def test_missing_root(self): message = "Missing root element" with open(os.path.join(self.basepath, filename)) as raw_data: - parsed_doc = yaml.load(raw_data) + parsed_doc = yaml.safe_load(raw_data) with self.assertRaises(ParserException) as exc: _ = self.yaml_reader.to_odml(parsed_doc) @@ -31,7 +31,7 @@ def test_missing_version(self): message = "Could not find odml-version" with open(os.path.join(self.basepath, filename)) as raw_data: - parsed_doc = yaml.load(raw_data) + parsed_doc = yaml.safe_load(raw_data) with self.assertRaises(ParserException) as exc: _ = self.yaml_reader.to_odml(parsed_doc) @@ -42,7 +42,7 @@ def test_invalid_version(self): filename = "invalid_version.yaml" with open(os.path.join(self.basepath, filename)) as raw_data: - parsed_doc = yaml.load(raw_data) + parsed_doc = yaml.safe_load(raw_data) with self.assertRaises(InvalidVersionException): _ = self.yaml_reader.to_odml(parsed_doc) diff --git a/test/test_rdf_writer.py b/test/test_rdf_writer.py index 224ac205..54040b93 100644 --- a/test/test_rdf_writer.py +++ b/test/test_rdf_writer.py @@ -134,7 +134,7 @@ def test_adding_values(self): def test_section_subclass(self): p = os.path.join(odml.__path__[0], 'resources', 'section_subclasses.yaml') with open(p, "r") as f: - subclass = yaml.load(f) + subclass = yaml.safe_load(f) doc = odml.Document() subclass_key = next(iter(subclass)) From 78024f5904d526fef95e59a93275d5bd8ad1ce8e Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Thu, 30 Jan 2020 16:09:23 +0100 Subject: [PATCH 02/10] [test/dtypes] Address assert regexp depr warning --- test/test_dtypes.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/test/test_dtypes.py b/test/test_dtypes.py index 7f0013c0..0f2fa11c 100644 --- a/test/test_dtypes.py +++ b/test/test_dtypes.py @@ -1,11 +1,25 @@ import datetime import unittest +from sys import version_info + import odml.dtypes as typ class TestTypes(unittest.TestCase): + def assertLocalRegExp(self, text, regular_expression): + """ + Python 2 is dead and assertRegexpMatches is deprecated and + will be removed, but keep compatibility until py 2 support is + fully dropped. + """ + + if version_info.major < 3: + self.assertRegexpMatches(text, regular_expression) + else: + self.assertRegex(text, regular_expression) + def setUp(self): pass @@ -36,8 +50,8 @@ def test_date(self): self.assertIsInstance(typ.date_get(""), datetime.date) re = "^[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])$" - self.assertRegexpMatches(typ.date_get(None).strftime(typ.FORMAT_DATE), re) - self.assertRegexpMatches(typ.date_get("").strftime(typ.FORMAT_DATE), re) + self.assertLocalRegExp(typ.date_get(None).strftime(typ.FORMAT_DATE), re) + self.assertLocalRegExp(typ.date_get("").strftime(typ.FORMAT_DATE), re) date = datetime.date(2011, 12, 1) date_string = '2011-12-01' @@ -68,8 +82,8 @@ def test_time(self): self.assertIsInstance(typ.time_get(""), datetime.time) re = "^[0-5][0-9]:[0-5][0-9]:[0-5][0-9]$" - self.assertRegexpMatches(typ.time_get(None).strftime(typ.FORMAT_TIME), re) - self.assertRegexpMatches(typ.time_get("").strftime(typ.FORMAT_TIME), re) + self.assertLocalRegExp(typ.time_get(None).strftime(typ.FORMAT_TIME), re) + self.assertLocalRegExp(typ.time_get("").strftime(typ.FORMAT_TIME), re) time = datetime.time(12, 34, 56) time_string = '12:34:56' @@ -101,8 +115,8 @@ def test_datetime(self): re = "^[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1]) " \ "[0-5][0-9]:[0-5][0-9]:[0-5][0-9]$" - self.assertRegexpMatches(typ.datetime_get(None).strftime(typ.FORMAT_DATETIME), re) - self.assertRegexpMatches(typ.datetime_get("").strftime(typ.FORMAT_DATETIME), re) + self.assertLocalRegExp(typ.datetime_get(None).strftime(typ.FORMAT_DATETIME), re) + self.assertLocalRegExp(typ.datetime_get("").strftime(typ.FORMAT_DATETIME), re) date = datetime.datetime(2011, 12, 1, 12, 34, 56) date_string = '2011-12-01 12:34:56' From a96fb5f7f9f79a9f0263253dc26d547733df15ec Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Thu, 30 Jan 2020 16:16:48 +0100 Subject: [PATCH 03/10] [init] Add untested Python version warning Add a warning when importing odml with an outdated Python version. --- odml/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/odml/__init__.py b/odml/__init__.py index 3760bfcf..31832219 100644 --- a/odml/__init__.py +++ b/odml/__init__.py @@ -1,3 +1,7 @@ +import warnings + +from sys import version_info + _property = property from . import doc @@ -8,6 +12,11 @@ from .info import VERSION from .tools.parser_utils import SUPPORTED_PARSERS as PARSERS +if version_info.major < 3 or version_info.major == 3 and version_info.minor < 6: + msg = "The '%s' package is not tested with your Python version. " % __name__ + msg += "Please consider upgrading to the latest Python distribution." + warnings.warn(msg) + __version__ = VERSION From c59a841ca8dd289383565e8ec140af4c75054f55 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 13:45:15 +0100 Subject: [PATCH 04/10] [init] Rename version_info import Since 'version_info' is available via the odmltools package import, rename it to python_version to clarify the context of this function. --- odml/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/odml/__init__.py b/odml/__init__.py index 31832219..bba3d1b7 100644 --- a/odml/__init__.py +++ b/odml/__init__.py @@ -1,6 +1,6 @@ import warnings -from sys import version_info +from sys import version_info as _python_version _property = property @@ -12,7 +12,7 @@ from .info import VERSION from .tools.parser_utils import SUPPORTED_PARSERS as PARSERS -if version_info.major < 3 or version_info.major == 3 and version_info.minor < 6: +if _python_version.major < 3 or _python_version.major == 3 and _python_version.minor < 6: msg = "The '%s' package is not tested with your Python version. " % __name__ msg += "Please consider upgrading to the latest Python distribution." warnings.warn(msg) From 9d1cb88580e7c814a2fcbabb07497c8ee532a697 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 16:34:02 +0100 Subject: [PATCH 05/10] [MANIFEST] Update packaged files - update README file extension rtf->md - use recursive-include to add odml/resources - add test/resources files --- MANIFEST.in | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 610ff1b7..070d3b58 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,4 @@ -include LICENSE -include README.rst +include LICENSE README.md CHANGELOG.md include odml/info.json -include odml/resources/section_subclasses.yaml -include odml/resources/odml-ontology.ttl +recursive-include odml/resources * +recursive-include test/resources * From 70be7848cc130f89b967456886421ac07a9b4e59 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 18:16:33 +0100 Subject: [PATCH 06/10] [tools/ParserUtils] Add odml tuple export func --- odml/tools/parser_utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/odml/tools/parser_utils.py b/odml/tools/parser_utils.py index 9f498dbe..66d17ead 100644 --- a/odml/tools/parser_utils.py +++ b/odml/tools/parser_utils.py @@ -34,3 +34,24 @@ class InvalidVersionException(ParserException): Exception wrapper to indicate a non-compatible odML version. """ pass + + +def odml_tuple_export(odml_tuples): + """ + Converts odml style tuples to a parsable string representation. + Every tuple is represented by brackets '()'. The individual elements of a tuple are + separated by a semicolon ';'. The individual tuples are separated by a comma ','. + An odml 3-tuple list of 2 tuples would be serialized to: "[(11;12;13),(21;22;23)]". + + :param odml_tuples: List of odml style tuples. + :return: string + """ + str_tuples = "" + for val in odml_tuples: + str_val = ";".join(val) + if str_tuples: + str_tuples = "%s,(%s)" % (str_tuples, str_val) + else: + str_tuples = "(%s)" % str_val + + return "[%s]" % str_tuples From 8bec9a263d986908365c4328be419a172eadc025 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 18:17:02 +0100 Subject: [PATCH 07/10] [tools] Use odml tuple export Closes #353, closes #250 Use the parser_utils odml_tuple_export function in xmlparser and dict_parser. It is required to properly serialize odml style tuples to XML, JSON and YAML files. --- odml/tools/dict_parser.py | 6 +++--- odml/tools/xmlparser.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/odml/tools/dict_parser.py b/odml/tools/dict_parser.py index 35e0f2fc..4b22783c 100644 --- a/odml/tools/dict_parser.py +++ b/odml/tools/dict_parser.py @@ -5,7 +5,7 @@ from .. import format as odmlfmt from ..info import FORMAT_VERSION -from .parser_utils import InvalidVersionException, ParserException +from .parser_utils import InvalidVersionException, ParserException, odml_tuple_export class DictWriter: @@ -107,8 +107,8 @@ def get_properties(props_list): elif (tag == []) or tag: # Even if 'values' is empty, allow '[]' # Custom odML tuples require special handling. if attr == "values" and prop.dtype and \ - prop.dtype.endswith("-tuple") and len(prop.values) > 0: - prop_dict["value"] = "(%s)" % ";".join(prop.values[0]) + prop.dtype.endswith("-tuple") and prop.values: + prop_dict["value"] = odml_tuple_export(prop.values) else: # Always use the arguments key attribute name when saving prop_dict[i] = tag diff --git a/odml/tools/xmlparser.py b/odml/tools/xmlparser.py index f400e9c6..7e0bcad0 100644 --- a/odml/tools/xmlparser.py +++ b/odml/tools/xmlparser.py @@ -22,7 +22,7 @@ from .. import format as ofmt from ..info import FORMAT_VERSION -from .parser_utils import InvalidVersionException, ParserException +from .parser_utils import InvalidVersionException, ParserException, odml_tuple_export try: unicode = unicode @@ -121,8 +121,8 @@ def save_element(curr_el): continue if isinstance(fmt, ofmt.Property.__class__) and k == "value": # Custom odML tuples require special handling for save loading from file. - if curr_el.dtype and curr_el.dtype.endswith("-tuple") and len(val) > 0: - ele = E(k, "(%s)" % ";".join(val[0])) + if curr_el.dtype and curr_el.dtype.endswith("-tuple") and val: + ele = E(k, odml_tuple_export(val)) else: ele = E(k, to_csv(val)) cur.append(ele) From 4af3bc00d24bc232146bc5ff782907edb379ca30 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 18:36:08 +0100 Subject: [PATCH 08/10] [odml/dtypes] Raise ValueError in tuple_get Closes #354 Also updates the corresponding test. --- odml/dtypes.py | 12 +++++++++--- test/test_dtypes.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/odml/dtypes.py b/odml/dtypes.py index 931a934e..51473342 100644 --- a/odml/dtypes.py +++ b/odml/dtypes.py @@ -328,12 +328,18 @@ def tuple_get(string, count=None): """ if not string: return None + string = string.strip() - assert string.startswith("(") and string.endswith(")") + if not (string.startswith("(") and string.endswith(")")): + msg = "Tuple value misses brackets: '%s'" % string + raise ValueError(msg) + string = string[1:-1] res = [x.strip() for x in string.split(";")] - if count is not None: # be strict - assert len(res) == count + if count is not None and not len(res) == count: + msg = "%s-tuple value does not match required item length" % count + raise ValueError(msg) + return res diff --git a/test/test_dtypes.py b/test/test_dtypes.py index 0f2fa11c..2d54a03a 100644 --- a/test/test_dtypes.py +++ b/test/test_dtypes.py @@ -213,10 +213,10 @@ def test_tuple(self): self.assertEqual(typ.tuple_get("(39.12; 67.19)"), ["39.12", "67.19"]) # Test fail on missing parenthesis. - with self.assertRaises(AssertionError): + with self.assertRaises(ValueError): _ = typ.tuple_get("fail") # Test fail on mismatching element count and count number. - with self.assertRaises(AssertionError): + with self.assertRaises(ValueError): _ = typ.tuple_get("(1; 2; 3)", 2) def test_dtype_none(self): From 8fe2d4319a16872a6da584c6ce3f26d74cccb0fe Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 18:55:14 +0100 Subject: [PATCH 09/10] [test/dtypes] Add multiple tuple integration --- test/test_dtypes_integration.py | 68 ++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/test/test_dtypes_integration.py b/test/test_dtypes_integration.py index 6550124a..2b1ab50d 100644 --- a/test/test_dtypes_integration.py +++ b/test/test_dtypes_integration.py @@ -324,34 +324,84 @@ def test_bool(self): self.assertEqual(self.doc, ydoc) def test_tuple(self): + # test single tuple value val_type = "3-tuple" val_in = "(1; 1; 1)" - val_odml = ["1", "1", "1"] + val_odml = [["1", "1", "1"]] parent_sec = self.doc.sections[0] - _ = odml.Property(name="tuple test single", dtype=val_type, - value=val_in, parent=parent_sec) + sec_name = parent_sec.name + prop_name = "tuple_test_single" + _ = odml.Property(name=prop_name, dtype=val_type, + values=val_in, parent=parent_sec) + + # Test correct json save and load. + odml.save(self.doc, self.json_file, "JSON") + jdoc = odml.load(self.json_file, "JSON") + + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) + self.assertEqual(self.doc, jdoc) + + # Test correct xml save and load. + odml.save(self.doc, self.xml_file) + xdoc = odml.load(self.xml_file) + + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) + self.assertEqual(self.doc, xdoc) + + # Test correct yaml save and load. + odml.save(self.doc, self.yaml_file, "YAML") + ydoc = odml.load(self.yaml_file, "YAML") + + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) + self.assertEqual(self.doc, ydoc) + + # test multiple tuple values + val_type = "3-tuple" + val_in = ["(1; 1; 1)", "(2; 2; 2)", "(3; 3; 3)"] + val_odml = [["1", "1", "1"], ["2", "2", "2"], ["3", "3", "3"]] + + parent_sec = self.doc.sections[0] + sec_name = parent_sec.name + prop_name = "tuple_test_multiple" + _ = odml.Property(name=prop_name, dtype=val_type, + values=val_in, parent=parent_sec) # Test correct json save and load. odml.save(self.doc, self.json_file, "JSON") jdoc = odml.load(self.json_file, "JSON") - self.assertEqual(jdoc.sections[0].properties[0].dtype, val_type) - self.assertEqual(jdoc.sections[0].properties[0].values, [val_odml]) + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(jdoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) self.assertEqual(self.doc, jdoc) # Test correct xml save and load. odml.save(self.doc, self.xml_file) xdoc = odml.load(self.xml_file) - self.assertEqual(xdoc.sections[0].properties[0].dtype, val_type) - self.assertEqual(xdoc.sections[0].properties[0].values, [val_odml]) + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(xdoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) self.assertEqual(self.doc, xdoc) # Test correct yaml save and load. odml.save(self.doc, self.yaml_file, "YAML") ydoc = odml.load(self.yaml_file, "YAML") - self.assertEqual(ydoc.sections[0].properties[0].dtype, val_type) - self.assertEqual(ydoc.sections[0].properties[0].values, [val_odml]) + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].dtype, val_type) + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].values, val_odml) + self.assertEqual(ydoc.sections[sec_name].properties[prop_name].values, + self.doc.sections[sec_name].properties[prop_name].values) self.assertEqual(self.doc, ydoc) From f4b72026850068f18d2dbd8539fb8108d0900f12 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 17 Feb 2020 20:19:14 +0100 Subject: [PATCH 10/10] [odml/property] Fix py2 odml tuple issue When loading odml style tuples from a yaml or json file with Python 2, the string containing the tuples is not properly parsed from a unicode string to a list of odml style tuples. With JSON and YAML in Python 3 this conversion works out of the box but to not break Python 2 compatibility already this hotfix is introduced. --- odml/property.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/odml/property.py b/odml/property.py index 26a391d5..09443e6d 100644 --- a/odml/property.py +++ b/odml/property.py @@ -10,6 +10,38 @@ from .tools.doc_inherit import inherit_docstring, allow_inherit_docstring +def odml_tuple_import(t_count, new_value): + """ + Checks via a heuristic if the values in a string fit the general + odml style tuple format and the individual items match the + required number of tuple values. + Legacy Python2 code required to parse unicode strings to a list + of odml style tuples. + + :param t_count: integer, required values within a single odml style tuple. + :param new_value: string containing an odml style tuple list. + :return: list of odml style tuples. + """ + try: + unicode = unicode + except NameError: + unicode = str + + if len(new_value) != 1 and not isinstance(new_value[0], unicode): + return new_value + + cln = new_value[0].strip() + l_check = cln.startswith("[") and cln.endswith("]") + br_check = cln.count("(") == cln.count(")") + com_check = cln.count("(") == (cln.count(",") + 1) + sep_check = t_count == 1 or cln.count("(") == (cln.count(";") / (t_count - 1)) + + if l_check and br_check and com_check and sep_check: + new_value = cln[1:-1].split(",") + + return new_value + + @allow_inherit_docstring class BaseProperty(base.BaseObject): """ @@ -346,6 +378,12 @@ def values(self, new_value): if self._dtype is None: self._dtype = dtypes.infer_dtype(new_value[0]) + # Python2 legacy code for loading odml style tuples from YAML or JSON. + # Works from Python 3 onwards. + if self._dtype.endswith("-tuple") and not self._validate_values(new_value): + t_count = int(self._dtype.split("-")[0]) + new_value = odml_tuple_import(t_count, new_value) + if not self._validate_values(new_value): if self._dtype in ("date", "time", "datetime"): req_format = dtypes.default_values(self._dtype)