From c26690f614696f1e45ff15bae14b35d2353a5c62 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Thu, 31 May 2018 10:21:49 +0000 Subject: [PATCH 1/2] Fix serialization / deserialization of char array. --- javaobj.py | 9 +++++---- tests/testCharArray.ser | Bin 0 -> 41 bytes tests/tests.py | 7 +++++++ 3 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 tests/testCharArray.ser diff --git a/javaobj.py b/javaobj.py index 79da5f4..6c1e1cf 100644 --- a/javaobj.py +++ b/javaobj.py @@ -126,6 +126,8 @@ def read_to_str(data): """ return ''.join(chr(char) for char in data) + unichr = chr + else: # Python 2 interpreter : str & unicode def to_str(data, encoding="UTF-8"): @@ -1078,10 +1080,7 @@ def _read_value(self, field_type, ident, name=""): elif field_type == self.TYPE_BYTE: (res,) = self._readStruct(">b") elif field_type == self.TYPE_CHAR: - # TYPE_CHAR is defined by the serialization specification - # but not used in the implementation, so this is - # a hypothetical code - res = bytes(self._readStruct(">bb")).decode("utf-16-be") + res = unichr(self._readStruct(">H")[0]) elif field_type == self.TYPE_SHORT: (res,) = self._readStruct(">h") elif field_type == self.TYPE_INTEGER: @@ -1531,6 +1530,8 @@ def _write_value(self, field_type, value): self._writeStruct(">B", 1, (1 if value else 0,)) elif field_type == self.TYPE_BYTE: self._writeStruct(">b", 1, (value,)) + elif field_type == self.TYPE_CHAR: + self._writeStruct(">H", 1, (ord(value),)) elif field_type == self.TYPE_SHORT: self._writeStruct(">h", 1, (value,)) elif field_type == self.TYPE_INTEGER: diff --git a/tests/testCharArray.ser b/tests/testCharArray.ser new file mode 100644 index 0000000000000000000000000000000000000000..1e551fcbc6c65d1027f3cd0b74d75870d97ae72b GIT binary patch literal 41 xcmZ4UmVvdjh=D2Ed4pQohDWh2YnT`qDhe1F7}yyYZZI$~-eF*1`v0GS82}g#3@QKs literal 0 HcmV?d00001 diff --git a/tests/tests.py b/tests/tests.py index 3247cf6..7c1b548 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -280,6 +280,13 @@ def test_arrays(self): self._try_marshalling(jobj, pobj) + def test_char_array(self): + jobj = self.read_file("testCharArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual(pobj, ['\u0000', '\ud800', '\u0001', '\udc00', '\u0002', '\uffff', '\u0003']) + self._try_marshalling(jobj, pobj) + def test_enums(self): jobj = self.read_file("objEnums.ser") pobj = javaobj.loads(jobj) From 38c7b922dc486f223343bfa7b1e1093198aa9d44 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Thu, 31 May 2018 11:00:46 +0000 Subject: [PATCH 2/2] Fix logging unprintable characters on Py2. --- javaobj.py | 21 +++++++++++++++++---- tests/tests.py | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/javaobj.py b/javaobj.py index 6c1e1cf..821bee7 100644 --- a/javaobj.py +++ b/javaobj.py @@ -127,6 +127,7 @@ def read_to_str(data): return ''.join(chr(char) for char in data) unichr = chr + unicode = str else: # Python 2 interpreter : str & unicode @@ -983,7 +984,11 @@ def do_array(self, parent=None, ident=0): else: for _ in range(size): res = self._read_value(type_char, ident) - log_debug("Native value: {0}".format(res), ident) + _res = res + # py2 + if str is not unicode and isinstance(res, unicode): + _res = res.encode('ascii', 'replace') + log_debug("Native value: {0}".format(_res), ident) array.append(res) return array @@ -1074,13 +1079,15 @@ def _read_value(self, field_type, ident, name=""): # We don't need details for arrays and objects field_type = field_type[0] + _res = None if field_type == self.TYPE_BOOLEAN: (val,) = self._readStruct(">B") res = bool(val) elif field_type == self.TYPE_BYTE: (res,) = self._readStruct(">b") elif field_type == self.TYPE_CHAR: - res = unichr(self._readStruct(">H")[0]) + _res = self._readStruct(">H")[0] + res = unichr(_res) elif field_type == self.TYPE_SHORT: (res,) = self._readStruct(">h") elif field_type == self.TYPE_INTEGER: @@ -1096,7 +1103,10 @@ def _read_value(self, field_type, ident, name=""): else: raise RuntimeError("Unknown typecode: {0}".format(field_type)) - log_debug("* {0} {1}: {2}".format(field_type, name, res), ident) + if _res is None: + _res = res + + log_debug("* {0} {1}: {2}".format(field_type, name, _res), ident) return res def _convert_char_to_type(self, type_char): @@ -1512,7 +1522,10 @@ def write_array(self, obj): else: log_debug("Write array of type %s" % type_char) for v in obj: - log_debug("Writing: %s" % v) + _v = v + if str is not unicode and isinstance(v, unicode): + _v = v.encode('ascii', 'replace') + log_debug("Writing: %s" % _v) self._write_value(type_char, v) def _write_value(self, field_type, value): diff --git a/tests/tests.py b/tests/tests.py index 7c1b548..cfe358f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -284,7 +284,7 @@ def test_char_array(self): jobj = self.read_file("testCharArray.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) - self.assertEqual(pobj, ['\u0000', '\ud800', '\u0001', '\udc00', '\u0002', '\uffff', '\u0003']) + self.assertEqual(pobj, [u'\u0000', u'\ud800', u'\u0001', u'\udc00', u'\u0002', u'\uffff', u'\u0003']) self._try_marshalling(jobj, pobj) def test_enums(self):