Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1165,3 +1165,4 @@ Other

- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`)
- Bug in :func:`assert_series_equal` and :func:`assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`)
44 changes: 44 additions & 0 deletions pandas/tests/util/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,24 @@ def test_numpy_array_equal_message(self):
assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
obj='Index')

def test_numpy_array_equal_unicode_message(self):
# Test ensures that `assert_numpy_array_equals` raises the right
# exception when comparing np.arrays containing differing
# unicode objects (#20503)

expected = """numpy array are different

numpy array values are different \\(33\\.33333 %\\)
\\[left\\]: \\[á, à, ä\\]
\\[right\\]: \\[á, à, å\\]"""

with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))

@td.skip_if_windows
def test_numpy_array_equal_object_message(self):

Expand Down Expand Up @@ -499,10 +517,12 @@ def _assert_not_equal(self, a, b, **kwargs):
def test_equal(self):
self._assert_equal(Series(range(3)), Series(range(3)))
self._assert_equal(Series(list('abc')), Series(list('abc')))
self._assert_equal(Series(list(u'áàä')), Series(list(u'áàä')))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a test where left is unicode and right is non-unicode (but string)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 1f7e231


def test_not_equal(self):
self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
self._assert_not_equal(Series(range(3)), Series(range(4)))
self._assert_not_equal(
Series(range(3)), Series(
Expand Down Expand Up @@ -678,6 +698,30 @@ def test_frame_equal_message(self):
pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
by_blocks=True)

def test_frame_equal_message_unicode(self):
# Test ensures that `assert_frame_equals` raises the right
# exception when comparing DataFrames containing differing
# unicode objects (#20503)

expected = """DataFrame\\.iloc\\[:, 1\\] are different
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 6b087f2


DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
\\[left\\]: \\[é, è, ë\\]
\\[right\\]: \\[é, è, e̊\\]"""

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'e̊']}))

with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'e̊']}),
by_blocks=True)


class TestAssertCategoricalEqual(object):

Expand Down
7 changes: 7 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,11 +992,18 @@ def raise_assert_detail(obj, message, left, right, diff=None):
left = pprint_thing(left)
elif is_categorical_dtype(left):
left = repr(left)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you import PY2 and string_types up top

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍575b2e8

if compat.PY2 and isinstance(left, compat.string_types):
left = left.encode('utf-8')

if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif is_categorical_dtype(right):
right = repr(right)

if compat.PY2 and isinstance(right, compat.string_types):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment on these.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 45d2b8e

right = right.encode('utf-8')

msg = """{obj} are different

{message}
Expand Down