diff --git a/msrest/pipeline/universal.py b/msrest/pipeline/universal.py index 08b0787fd6..6aa1b105e8 100644 --- a/msrest/pipeline/universal.py +++ b/msrest/pipeline/universal.py @@ -31,6 +31,7 @@ import os import xml.etree.ElementTree as ET import platform +import codecs from typing import Mapping, Any, Optional, AnyStr, Union, IO, cast, TYPE_CHECKING # pylint: disable=unused-import @@ -45,6 +46,8 @@ _LOGGER = logging.getLogger(__name__) +_BOM = codecs.BOM_UTF8.decode(encoding='utf-8') + class HeadersPolicy(SansIOHTTPPolicy): """A simple policy that sends the given headers @@ -156,6 +159,9 @@ def deserialize_from_text(cls, data, content_type=None): # Explain to mypy the correct type. data_as_str = cast(str, data) + # Remove Byte Order Mark if present in string + data_as_str = data_as_str.lstrip(_BOM) + if content_type is None: return data diff --git a/msrest/universal_http/__init__.py b/msrest/universal_http/__init__.py index 3c54222a31..e6f242d3a9 100644 --- a/msrest/universal_http/__init__.py +++ b/msrest/universal_http/__init__.py @@ -338,10 +338,10 @@ def text(self, encoding=None): # type: (str) -> str """Return the whole body as a string. - :param str encoding: The encoding to apply. If None, use "utf-8". + :param str encoding: The encoding to apply. If None, use "utf-8-sig". Implementation can be smarter if they want (using headers). """ - return self.body().decode(encoding or "utf-8") + return self.body().decode(encoding or "utf-8-sig") def raise_for_status(self): """Raise for status. Should be overriden, but basic implementation provided. diff --git a/tests/test_universal_pipeline.py b/tests/test_universal_pipeline.py index 0291f196d3..6defd1cd3c 100644 --- a/tests/test_universal_pipeline.py +++ b/tests/test_universal_pipeline.py @@ -145,6 +145,12 @@ def body(self): result = response.context["deserialized_data"] assert result["success"] is True + # JSON with UTF-8 BOM + response = build_response(b'\xef\xbb\xbf{"success": true}', content_type="application/json; charset=utf-8") + raw_deserializer.on_response(None, response, stream=False) + result = response.context["deserialized_data"] + assert result["success"] is True + # For compat, if no content-type, decode JSON response = build_response(b'"data"') raw_deserializer.on_response(None, response, stream=False)