From 5a953a1f0210b19e55f4230a3726fbe76df0b689 Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Thu, 25 Oct 2018 20:01:49 +0200 Subject: [PATCH 1/9] Support string buffer input as well as bytes. Fixes #21 --- src/base64io/__init__.py | 14 +++++++++----- test/unit/test_base64_stream.py | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index a952212..074fa30 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -226,8 +226,8 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # case the base64 module happily removes any whitespace. return data - _data_buffer = io.BytesIO() - _data_buffer.write(b"".join(data.split())) + _data_buffer = io.BytesIO() if isinstance(data, bytes) else io.StringIO() + _data_buffer.write(type(data)().join(data.split())) _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() while _remaining_bytes_to_read > 0: @@ -236,7 +236,7 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # No more data to read from wrapped stream. break - _data_buffer.write(b"".join(_raw_additional_data.split())) + _data_buffer.write(type(data)().join(_raw_additional_data.split())) _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() return _data_buffer.getvalue() @@ -273,8 +273,12 @@ def read(self, b=-1): data = self.__wrapped.read(_bytes_to_read) # Remove whitespace from read data and attempt to read more data to get the desired # number of bytes. - if any([char.encode("utf-8") in data for char in string.whitespace]): - data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) + if isinstance(data, bytes): + if any([char.encode("utf-8") in data for char in string.whitespace]): + data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) + else: + if any([char in data for char in string.whitespace]): + data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) results = io.BytesIO() # First, load any stashed bytes diff --git a/test/unit/test_base64_stream.py b/test/unit/test_base64_stream.py index de8410c..4492516 100644 --- a/test/unit/test_base64_stream.py +++ b/test/unit/test_base64_stream.py @@ -154,6 +154,22 @@ def test_base64io_decode(bytes_to_generate, bytes_per_round, number_of_rounds, t assert test == plaintext_source[:total_bytes_to_expect] +@pytest.mark.parametrize( + "bytes_to_generate, bytes_per_round, number_of_rounds, total_bytes_to_expect", build_test_cases() +) +def test_base64io_decode_str(bytes_to_generate, bytes_per_round, number_of_rounds, total_bytes_to_expect): + plaintext_source = os.urandom(bytes_to_generate) + plaintext_b64 = io.StringIO(base64.b64encode(plaintext_source).decode('ascii')) + plaintext_wrapped = Base64IO(plaintext_b64) + + test = b"" + for _round in range(number_of_rounds): + test += plaintext_wrapped.read(bytes_per_round) + + assert len(test) == total_bytes_to_expect + assert test == plaintext_source[:total_bytes_to_expect] + + @pytest.mark.parametrize( "bytes_to_generate, bytes_per_round, number_of_rounds, total_bytes_to_expect", build_test_cases() ) @@ -297,6 +313,14 @@ def test_base64io_decode_with_whitespace(plaintext_source, b64_plaintext_with_wh assert test == plaintext_source[:read_bytes] +@pytest.mark.parametrize("plaintext_source, b64_plaintext_with_whitespace, read_bytes", build_whitespace_testcases()) +def test_base64io_decode_with_whitespace_str(plaintext_source, b64_plaintext_with_whitespace, read_bytes): + with Base64IO(io.StringIO(b64_plaintext_with_whitespace.decode('ascii'))) as decoder: + test = decoder.read(read_bytes) + + assert test == plaintext_source[:read_bytes] + + @pytest.mark.parametrize( "plaintext_source, b64_plaintext_with_whitespace, read_bytes", ((b"\x00\x00\x00", b"AAAA", 3),) ) From 2bac88f9f02af7549bd7dc415cbec869eee42398 Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Thu, 1 Nov 2018 13:24:26 +0100 Subject: [PATCH 2/9] Fix type annotation --- src/base64io/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 074fa30..1c0bae9 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -209,7 +209,7 @@ def writelines(self, lines): self.write(line) def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): - # type: (bytes, int) -> bytes + # type: (AnyStr, int) -> AnyStr """Read additional data from wrapped stream until we reach the desired number of bytes. .. note:: From 2281abd5765fe16717d23ded88999fc1bd862b8e Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Thu, 1 Nov 2018 13:24:46 +0100 Subject: [PATCH 3/9] Reduce code duplication --- src/base64io/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 1c0bae9..2ec635c 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -274,11 +274,12 @@ def read(self, b=-1): # Remove whitespace from read data and attempt to read more data to get the desired # number of bytes. if isinstance(data, bytes): - if any([char.encode("utf-8") in data for char in string.whitespace]): - data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) + whitespace = [char.encode("utf-8") for char in string.whitespace] else: - if any([char in data for char in string.whitespace]): - data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) + whitespace = string.whitespace + + if any([char in data for char in whitespace]): + data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) results = io.BytesIO() # First, load any stashed bytes From 611ee2b90dfc4a930d2380d7fbbd50a12e829d52 Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 10:38:30 +0100 Subject: [PATCH 4/9] Add missing import --- src/base64io/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 2ec635c..76b266d 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -23,7 +23,7 @@ try: # Python 3.5.0 and 3.5.1 have incompatible typing modules from types import TracebackType # noqa pylint: disable=unused-import - from typing import IO, Iterable, List, Type, Optional # noqa pylint: disable=unused-import + from typing import IO, Iterable, List, Type, Optional, AnyStr # noqa pylint: disable=unused-import except ImportError: # pragma: no cover # We only actually need these imports when running the mypy checks pass From cacfb1ba41e7195b77fbdd1cf4277c5742b9625d Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 10:53:16 +0100 Subject: [PATCH 5/9] Just encode the string - no need to encode chars one at a time --- src/base64io/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 76b266d..b5672bf 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -274,7 +274,7 @@ def read(self, b=-1): # Remove whitespace from read data and attempt to read more data to get the desired # number of bytes. if isinstance(data, bytes): - whitespace = [char.encode("utf-8") for char in string.whitespace] + whitespace = string.whitespace.encode("utf-8") else: whitespace = string.whitespace From 8995c132c29f4d0be4efeb897b4f2fd804b6b59f Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 11:20:27 +0100 Subject: [PATCH 6/9] Add type hint for _data_buffer in attempt to fix mypy test --- src/base64io/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index b5672bf..0a073da 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -23,7 +23,7 @@ try: # Python 3.5.0 and 3.5.1 have incompatible typing modules from types import TracebackType # noqa pylint: disable=unused-import - from typing import IO, Iterable, List, Type, Optional, AnyStr # noqa pylint: disable=unused-import + from typing import Union, IO, Iterable, List, Type, Optional, AnyStr # noqa pylint: disable=unused-import except ImportError: # pragma: no cover # We only actually need these imports when running the mypy checks pass @@ -226,7 +226,8 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # case the base64 module happily removes any whitespace. return data - _data_buffer = io.BytesIO() if isinstance(data, bytes) else io.StringIO() + _data_buffer = io.BytesIO() if isinstance(data, bytes) \ + else io.StringIO() # type: Union[io.BytesIO, io.StringIO] _data_buffer.write(type(data)().join(data.split())) _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() From 74ac0e8f0a09499fc42b82afd4e4924645b5b215 Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 11:58:34 +0100 Subject: [PATCH 7/9] Fix mypy test for py3 --- src/base64io/__init__.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 0a073da..161784a 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -226,9 +226,9 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # case the base64 module happily removes any whitespace. return data - _data_buffer = io.BytesIO() if isinstance(data, bytes) \ - else io.StringIO() # type: Union[io.BytesIO, io.StringIO] - _data_buffer.write(type(data)().join(data.split())) + _data_buffer = io.BytesIO() if isinstance(data, bytes) else io.StringIO() + join_char = b'' if isinstance(data, bytes) else '' + _data_buffer.write(join_char.join(data.split())) _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() while _remaining_bytes_to_read > 0: @@ -237,7 +237,7 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # No more data to read from wrapped stream. break - _data_buffer.write(type(data)().join(_raw_additional_data.split())) + _data_buffer.write(join_char.join(_raw_additional_data.split())) _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() return _data_buffer.getvalue() @@ -274,10 +274,8 @@ def read(self, b=-1): data = self.__wrapped.read(_bytes_to_read) # Remove whitespace from read data and attempt to read more data to get the desired # number of bytes. - if isinstance(data, bytes): - whitespace = string.whitespace.encode("utf-8") - else: - whitespace = string.whitespace + whitespace = string.whitespace.encode("utf-8") if isinstance(data, bytes) \ + else string.whitespace # type: Union[bytes, str] if any([char in data for char in whitespace]): data = self._read_additional_data_removing_whitespace(data, _bytes_to_read) From 9b6069ecb1b5a38bbf018d366b9de69a6af334ab Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 12:01:22 +0100 Subject: [PATCH 8/9] Ignore mypy false positives on py2 --- src/base64io/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 161784a..43b56dd 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -228,8 +228,8 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): _data_buffer = io.BytesIO() if isinstance(data, bytes) else io.StringIO() join_char = b'' if isinstance(data, bytes) else '' - _data_buffer.write(join_char.join(data.split())) - _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() + _data_buffer.write(join_char.join(data.split())) # type: ignore + _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() # type: ignore while _remaining_bytes_to_read > 0: _raw_additional_data = self.__wrapped.read(_remaining_bytes_to_read) @@ -237,9 +237,9 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): # No more data to read from wrapped stream. break - _data_buffer.write(join_char.join(_raw_additional_data.split())) - _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() - return _data_buffer.getvalue() + _data_buffer.write(join_char.join(_raw_additional_data.split())) # type: ignore + _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() # type: ignore + return _data_buffer.getvalue() # type: ignore def read(self, b=-1): # type: (int) -> bytes From bc06b20fafe2abf7bb0423c2c76db9dcb150c1d8 Mon Sep 17 00:00:00 2001 From: "EKC (Erik Cederstrand)" Date: Fri, 2 Nov 2018 15:27:40 +0100 Subject: [PATCH 9/9] Fix tests on py2? --- src/base64io/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base64io/__init__.py b/src/base64io/__init__.py index 43b56dd..d399080 100644 --- a/src/base64io/__init__.py +++ b/src/base64io/__init__.py @@ -227,7 +227,7 @@ def _read_additional_data_removing_whitespace(self, data, total_bytes_to_read): return data _data_buffer = io.BytesIO() if isinstance(data, bytes) else io.StringIO() - join_char = b'' if isinstance(data, bytes) else '' + join_char = b'' if isinstance(data, bytes) else u'' _data_buffer.write(join_char.join(data.split())) # type: ignore _remaining_bytes_to_read = total_bytes_to_read - _data_buffer.tell() # type: ignore