From 3dafab54467bffc998913020dbc32140289bb2a2 Mon Sep 17 00:00:00 2001 From: "M. Sonntag" Date: Mon, 24 Sep 2018 15:40:51 +0200 Subject: [PATCH] [tools/VersionConv] Drop broken tag support The VersionConverter convered an edge case of xml files with opening tags that were missing its closing tag. Support for this is dropped to make sure file are always opened with the proper encoding via lxml instead. --- odml/tools/version_converter.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/odml/tools/version_converter.py b/odml/tools/version_converter.py index 5dc3a04b..99fdda93 100644 --- a/odml/tools/version_converter.py +++ b/odml/tools/version_converter.py @@ -24,42 +24,28 @@ class VersionConverter(object): 'dtype': 'type' } - _error_strings = { - '': '' - } - def __init__(self, filename): self.filename = filename self.conversion_log = [] def _parse_xml(self): """ - _parse_xml checks whether the provided file object can be parsed, - fixes known mismatching elements and returns the parsed lxml tree. + _parse_xml checks whether the provided file object can be parsed + and returns the parsed lxml tree. :return: ElementTree """ + # Make pretty print available by resetting format + parser = ET.XMLParser(remove_blank_text=True) if isinstance(self.filename, io.StringIO): doc = self.filename.getvalue() + tree = ET.ElementTree(ET.fromstring(doc, parser)) + elif os.path.exists(self.filename) and os.path.getsize(self.filename) > 0: - with open(self.filename, 'r+') as file: - doc = file.read() + tree = ET.parse(self.filename, parser) else: msg = "Cannot parse provided file object '%s'." % self.filename raise Exception(msg) - # Fix known mismatching elements - for elem, val in self._error_strings.items(): - if elem in doc: - doc = doc.replace(elem, val) - - # Make sure encoding is present for the xml parser - if sys.version_info.major > 2: - doc = doc.encode('utf-8') - - # Make pretty print available by resetting format - parser = ET.XMLParser(remove_blank_text=True) - tree = ET.ElementTree(ET.fromstring(doc, parser)) - return tree def _parse_json(self):