From 492e5e77d58e9bf999a9c293d03d7c78974dbafc Mon Sep 17 00:00:00 2001 From: willem Date: Thu, 29 Jun 2017 15:10:13 +0200 Subject: [PATCH] Add support for decoding CESU-8 encoded strings. This works around java's broken utf-8 implementation. --- javaobj.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/javaobj.py b/javaobj.py index 51e320d..9fec6f4 100644 --- a/javaobj.py +++ b/javaobj.py @@ -47,6 +47,12 @@ # Python 3+ from io import BytesIO +try: + import ftfy.bad_codecs + javacodec = "utf-8-var" +except ImportError: + javacodec = "utf-8" + # ------------------------------------------------------------------------------ # Module version @@ -641,7 +647,7 @@ def _readString(self, length_fmt="H"): """ (length,) = self._readStruct(">{0}".format(length_fmt)) ba = self.object_stream.read(length) - return to_str(ba) + return to_str(ba.decode(javacodec)) def do_classdesc(self, parent=None, ident=0): """