From e10a799ebe2c628d1a2e2296028f65a14da6296a Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 3 Dec 2020 23:35:05 +0100 Subject: [PATCH] BUG: Validate drop_duplicates ignore_index argument for bool --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/frame.py | 1 + pandas/tests/frame/methods/test_drop_duplicates.py | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84ac2d0c17676..216139b13dfc5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -793,6 +793,7 @@ Reshaping - Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`) - Bug in :func:`merge_ordered` returned wrong join result when length of ``left_by`` or ``right_by`` equals to the rows of ``left`` or ``right`` (:issue:`38166`) - Bug in :func:`merge_ordered` didn't raise when elements in ``left_by`` or ``right_by`` not exist in ``left`` columns or ``right`` columns (:issue:`38167`) +- Bug in :func:`DataFrame.drop_duplicates` not validating bool dtype for ``ignore_index`` keyword (:issue:`38274`) Sparse ^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f149f10b05d3..f710660d6ad8e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5273,6 +5273,7 @@ def drop_duplicates( return self.copy() inplace = validate_bool_kwarg(inplace, "inplace") + ignore_index = validate_bool_kwarg(ignore_index, "ignore_index") duplicated = self.duplicated(subset, keep=keep) result = self[-duplicated] diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 79b152b677dfd..b1d3890540bf9 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -459,3 +459,12 @@ def test_drop_duplicates_series_vs_dataframe(keep): dropped_frame = df[[column]].drop_duplicates(keep=keep) dropped_series = df[column].drop_duplicates(keep=keep) tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) + + +@pytest.mark.parametrize("arg", [[1], 1, "True", [], 0]) +def test_drop_duplicates_non_boolean_ignore_index(arg): + # GH#38274 + df = DataFrame({"a": [1, 2, 1, 3]}) + msg = '^For argument "ignore_index" expected type bool, received type .*.$' + with pytest.raises(ValueError, match=msg): + df.drop_duplicates(ignore_index=arg)