Skip to content

Commit 361021b

Browse files
authored
TST: add validation checks on levels keyword from pd.concat (#46654)
1 parent f3b8439 commit 361021b

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ Other enhancements
9292
- :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`)
9393
- Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`)
9494
- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
95+
- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
96+
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
97+
-
9598

9699
.. ---------------------------------------------------------------------------
97100
.. _whatsnew_150.notable_bug_fixes:

pandas/core/reshape/concat.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,8 @@ def _get_concat_axis(self) -> Index:
668668
return idx
669669

670670
if self.keys is None:
671+
if self.levels is not None:
672+
raise ValueError("levels supported only when keys is not None")
671673
concat_axis = _concat_indexes(indexes)
672674
else:
673675
concat_axis = _make_concat_multiindex(
@@ -712,6 +714,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
712714
else:
713715
levels = [ensure_index(x) for x in levels]
714716

717+
for level in levels:
718+
if not level.is_unique:
719+
raise ValueError(f"Level values not unique: {level.tolist()}")
720+
715721
if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):
716722
codes_list = []
717723

pandas/tests/reshape/concat/test_index.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,19 @@ def test_concat_with_key_not_unique(self):
371371
out_b = df_b.loc[("x", 0), :]
372372

373373
tm.assert_frame_equal(out_a, out_b)
374+
375+
def test_concat_with_duplicated_levels(self):
376+
# keyword levels should be unique
377+
df1 = DataFrame({"A": [1]}, index=["x"])
378+
df2 = DataFrame({"A": [1]}, index=["y"])
379+
msg = r"Level values not unique: \['x', 'y', 'y'\]"
380+
with pytest.raises(ValueError, match=msg):
381+
concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]])
382+
383+
@pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]])
384+
def test_concat_with_levels_with_none_keys(self, levels):
385+
df1 = DataFrame({"A": [1]}, index=["x"])
386+
df2 = DataFrame({"A": [1]}, index=["y"])
387+
msg = "levels supported only when keys is not None"
388+
with pytest.raises(ValueError, match=msg):
389+
concat([df1, df2], levels=levels)

0 commit comments

Comments
 (0)