From 9053757dee3fa12ec5a82a022e94c9017632eb17 Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Thu, 31 Jan 2019 14:20:42 +0100 Subject: [PATCH 1/3] Strip BOM from JSON responses --- msrest/pipeline/universal.py | 6 ++++++ msrest/universal_http/__init__.py | 4 ++-- tests/test_universal_pipeline.py | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/msrest/pipeline/universal.py b/msrest/pipeline/universal.py index 08b0787fd6..71b4b2cd63 100644 --- a/msrest/pipeline/universal.py +++ b/msrest/pipeline/universal.py @@ -31,6 +31,7 @@ import os import xml.etree.ElementTree as ET import platform +import codecs from typing import Mapping, Any, Optional, AnyStr, Union, IO, cast, TYPE_CHECKING # pylint: disable=unused-import @@ -156,6 +157,11 @@ def deserialize_from_text(cls, data, content_type=None): # Explain to mypy the correct type. data_as_str = cast(str, data) + # Remove Byte Order Mark if present in string + bom = codecs.BOM_UTF8.decode(encoding='utf-8') + if data_as_str.startswith(bom): + data_as_str = data_as_str.lstrip(bom) + if content_type is None: return data diff --git a/msrest/universal_http/__init__.py b/msrest/universal_http/__init__.py index 3c54222a31..e6f242d3a9 100644 --- a/msrest/universal_http/__init__.py +++ b/msrest/universal_http/__init__.py @@ -338,10 +338,10 @@ def text(self, encoding=None): # type: (str) -> str """Return the whole body as a string. - :param str encoding: The encoding to apply. If None, use "utf-8". + :param str encoding: The encoding to apply. If None, use "utf-8-sig". Implementation can be smarter if they want (using headers). """ - return self.body().decode(encoding or "utf-8") + return self.body().decode(encoding or "utf-8-sig") def raise_for_status(self): """Raise for status. Should be overriden, but basic implementation provided. diff --git a/tests/test_universal_pipeline.py b/tests/test_universal_pipeline.py index 0291f196d3..6defd1cd3c 100644 --- a/tests/test_universal_pipeline.py +++ b/tests/test_universal_pipeline.py @@ -145,6 +145,12 @@ def body(self): result = response.context["deserialized_data"] assert result["success"] is True + # JSON with UTF-8 BOM + response = build_response(b'\xef\xbb\xbf{"success": true}', content_type="application/json; charset=utf-8") + raw_deserializer.on_response(None, response, stream=False) + result = response.context["deserialized_data"] + assert result["success"] is True + # For compat, if no content-type, decode JSON response = build_response(b'"data"') raw_deserializer.on_response(None, response, stream=False) From 26bbef6487a64c1286a3b85a2b48bc2d8a794ec1 Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Wed, 6 Feb 2019 03:55:05 +0100 Subject: [PATCH 2/3] change as requested to put bom as constant --- msrest/pipeline/universal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/msrest/pipeline/universal.py b/msrest/pipeline/universal.py index 71b4b2cd63..cf3086deb7 100644 --- a/msrest/pipeline/universal.py +++ b/msrest/pipeline/universal.py @@ -46,6 +46,8 @@ _LOGGER = logging.getLogger(__name__) +BOM = codecs.BOM_UTF8.decode(encoding='utf-8') + class HeadersPolicy(SansIOHTTPPolicy): """A simple policy that sends the given headers @@ -158,9 +160,7 @@ def deserialize_from_text(cls, data, content_type=None): data_as_str = cast(str, data) # Remove Byte Order Mark if present in string - bom = codecs.BOM_UTF8.decode(encoding='utf-8') - if data_as_str.startswith(bom): - data_as_str = data_as_str.lstrip(bom) + data_as_str = data_as_str.lstrip(BOM) if content_type is None: return data From f55b93057234d1acd72905ff4956dae65a065e00 Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Sat, 9 Feb 2019 10:22:25 +0100 Subject: [PATCH 3/3] make bom constant private --- msrest/pipeline/universal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msrest/pipeline/universal.py b/msrest/pipeline/universal.py index cf3086deb7..6aa1b105e8 100644 --- a/msrest/pipeline/universal.py +++ b/msrest/pipeline/universal.py @@ -46,7 +46,7 @@ _LOGGER = logging.getLogger(__name__) -BOM = codecs.BOM_UTF8.decode(encoding='utf-8') +_BOM = codecs.BOM_UTF8.decode(encoding='utf-8') class HeadersPolicy(SansIOHTTPPolicy): @@ -160,7 +160,7 @@ def deserialize_from_text(cls, data, content_type=None): data_as_str = cast(str, data) # Remove Byte Order Mark if present in string - data_as_str = data_as_str.lstrip(BOM) + data_as_str = data_as_str.lstrip(_BOM) if content_type is None: return data