Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1165,3 +1165,4 @@ Other

- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`)
- Bug in :func:`assert_series_equal` and :func:`assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`)
64 changes: 64 additions & 0 deletions pandas/tests/util/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,24 @@ def test_numpy_array_equal_message(self):
assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
obj='Index')

def test_numpy_array_equal_unicode_message(self):
# Test ensures that `assert_numpy_array_equals` raises the right
# exception when comparing np.arrays containing differing
# unicode objects (#20503)

expected = """numpy array are different

numpy array values are different \\(33\\.33333 %\\)
\\[left\\]: \\[á, à, ä\\]
\\[right\\]: \\[á, à, å\\]"""

with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))

@td.skip_if_windows
def test_numpy_array_equal_object_message(self):

Expand Down Expand Up @@ -499,10 +517,13 @@ def _assert_not_equal(self, a, b, **kwargs):
def test_equal(self):
self._assert_equal(Series(range(3)), Series(range(3)))
self._assert_equal(Series(list('abc')), Series(list('abc')))
self._assert_equal(Series(list(u'áàä')), Series(list(u'áàä')))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a test where left is unicode and right is non-unicode (but string)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 1f7e231


def test_not_equal(self):
self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
self._assert_not_equal(Series(list(u'áàä')), Series(list(b'aaa')))
self._assert_not_equal(Series(range(3)), Series(range(4)))
self._assert_not_equal(
Series(range(3)), Series(
Expand Down Expand Up @@ -678,6 +699,49 @@ def test_frame_equal_message(self):
pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
by_blocks=True)

def test_frame_equal_message_unicode(self):
# Test ensures that `assert_frame_equals` raises the right
# exception when comparing DataFrames containing differing
# unicode objects (#20503)

expected = """DataFrame\\.iloc\\[:, 1\\] are different

DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
\\[left\\]: \\[é, è, ë\\]
\\[right\\]: \\[é, è, e̊\\]"""

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'e̊']}))

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'e̊']}),
by_blocks=True)

expected = """DataFrame\\.iloc\\[:, 0\\] are different

DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\)
\\[left\\]: \\[á, à, ä\\]
\\[right\\]: \\[a, a, a\\]"""

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': ['a', 'a', 'a'],
'E': ['e', 'e', 'e']}))

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': ['a', 'a', 'a'],
'E': ['e', 'e', 'e']}),
by_blocks=True)


class TestAssertCategoricalEqual(object):

Expand Down
11 changes: 10 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import pandas.compat as compat
from pandas.compat import (
filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
raise_with_traceback, httplib, StringIO, PY3)
raise_with_traceback, httplib, StringIO, string_types, PY3, PY2)

from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex,
DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex,
Expand Down Expand Up @@ -992,11 +992,20 @@ def raise_assert_detail(obj, message, left, right, diff=None):
left = pprint_thing(left)
elif is_categorical_dtype(left):
left = repr(left)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you import PY2 and string_types up top

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍575b2e8

if PY2 and isinstance(left, string_types):
# left needs to be printable in native text type in python2
left = left.encode('utf-8')

if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif is_categorical_dtype(right):
right = repr(right)

if PY2 and isinstance(right, string_types):
# right needs to be printable in native text type in python2
right = right.encode('utf-8')

msg = """{obj} are different

{message}
Expand Down