From bac5fa7e225c321c3d9c0c4aad0c093a9ccb9253 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 16 Sep 2017 20:49:56 -0500 Subject: [PATCH 1/3] Raise ValueError when using levels with non-unique values in MultiIndex constructor --- pandas/core/indexes/multi.py | 5 +++++ pandas/tests/indexes/test_multi.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8b2cf0e7c0b40..84b85fe956336 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -147,6 +147,11 @@ def _verify_integrity(self, labels=None, levels=None): " level (%d). NOTE: this index is in an" " inconsistent state" % (i, label.max(), len(level))) + for i, level in enumerate(levels): + if len(level) != len(set(level)): + raise ValueError("Level values must be unique: %s " + "on level %d" % ([value for value + in level], i)) def _get_levels(self): return self._levels diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 86308192c9166..959d72e49b676 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -588,6 +588,14 @@ def test_constructor_mismatched_label_levels(self): with tm.assert_raises_regex(ValueError, label_error): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] + def test_constructor_non_unique_level_values(self): + # GH #17464 + with tm.assert_raises_regex(ValueError, '^Level values'): + MultiIndex(levels=[[0, 1], [0, 0, 1, 1]], + labels=[[0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1]], + names=[u'idx0', u'idx1']) + def assert_multiindex_copied(self, copy, original): # Levels should be (at least, shallow copied) tm.assert_copy(copy.levels, original.levels) From 372d88ba283f3dd9bf9ba9e4e0d4fb670cf69e03 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 16 Sep 2017 21:43:34 -0500 Subject: [PATCH 2/3] Change % to .format string notation --- pandas/core/indexes/multi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 84b85fe956336..71e16fdf75f91 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -149,9 +149,9 @@ def _verify_integrity(self, labels=None, levels=None): len(level))) for i, level in enumerate(levels): if len(level) != len(set(level)): - raise ValueError("Level values must be unique: %s " - "on level %d" % ([value for value - in level], i)) + raise ValueError("Level values must be unique: {0}" + " on level {1}".format([value for value + in level], i)) def _get_levels(self): return self._levels From 2483ffeb2f8128bdaf41ced82018affab8ce4efa Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sun, 17 Sep 2017 21:20:38 -0500 Subject: [PATCH 3/3] Check for uniqueness with is_unique and add whatsnew --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/indexes/multi.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 722e19d2703b5..6cfc90c14ec70 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -432,7 +432,7 @@ Other API Changes - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - +- :class:`MultiIndex` constructor now checks if the values of each level are unique when ``verify_integrity=True`` (:issue:`17464`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 71e16fdf75f91..5e463d276d655 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -148,8 +148,8 @@ def _verify_integrity(self, labels=None, levels=None): " inconsistent state" % (i, label.max(), len(level))) for i, level in enumerate(levels): - if len(level) != len(set(level)): - raise ValueError("Level values must be unique: {0}" + if not level.is_unique: + raise ValueError("Level values must be unique: {0!r}" " on level {1}".format([value for value in level], i))