Skip to content

Commit a835b37

Browse files
committed
ENH: add DataFrame.is_unique method
1 parent a2f42ac commit a835b37

File tree

4 files changed

+95
-2
lines changed

4 files changed

+95
-2
lines changed

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ Reindexing / selection / label manipulation
190190
DataFrame.head
191191
DataFrame.idxmax
192192
DataFrame.idxmin
193+
DataFrame.is_unique
193194
DataFrame.last
194195
DataFrame.reindex
195196
DataFrame.reindex_like

pandas/core/base.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,16 +1105,19 @@ def nunique(self, dropna: bool = True) -> int:
11051105
obj = remove_na_arraylike(self) if dropna else self
11061106
return len(obj.unique())
11071107

1108+
def _is_unique(self) -> bool:
1109+
return self.nunique(dropna=False) == len(self)
1110+
11081111
@property
11091112
def is_unique(self) -> bool:
11101113
"""
1111-
Return boolean if values in the object are unique.
1114+
Return True if values in the object are unique, else False.
11121115
11131116
Returns
11141117
-------
11151118
bool
11161119
"""
1117-
return self.nunique(dropna=False) == len(self)
1120+
return self._is_unique()
11181121

11191122
@property
11201123
def is_monotonic(self) -> bool:

pandas/core/frame.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5372,6 +5372,34 @@ def drop_duplicates(
53725372
else:
53735373
return result
53745374

5375+
def is_unique(
5376+
self, subset: Optional[Union[Hashable, Sequence[Hashable]]] = None
5377+
) -> Series:
5378+
"""
5379+
Return boolean Series denoting columns with unique values.
5380+
5381+
Parameter
5382+
---------
5383+
subset : column label or sequence of labels, optional
5384+
Only consider certain columns for finding uniques. by default use columns.
5385+
5386+
Returns
5387+
-------
5388+
Series
5389+
5390+
See Also:
5391+
---------
5392+
DataFrame.duplicated : Indicate duplicate rows.
5393+
"""
5394+
if subset is not None:
5395+
subset = subset if is_list_like(subset) else [subset]
5396+
return self.loc[:, subset].is_unique()
5397+
5398+
if len(self.columns):
5399+
return self.apply(Series._is_unique)
5400+
else:
5401+
return self._constructor_sliced(dtype=bool)
5402+
53755403
def duplicated(
53765404
self,
53775405
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
@@ -5405,6 +5433,7 @@ def duplicated(
54055433
Series.duplicated : Equivalent method on Series.
54065434
Series.drop_duplicates : Remove duplicate values from Series.
54075435
DataFrame.drop_duplicates : Remove duplicate values from DataFrame.
5436+
DataFrame.is_unique : Indicate columns with unique values.
54085437
54095438
Examples
54105439
--------
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import re
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, Series, date_range
7+
import pandas._testing as tm
8+
9+
10+
@pytest.mark.parametrize(
11+
"frame, expected",
12+
[
13+
# single column
14+
[DataFrame(), Series(dtype=bool)],
15+
[DataFrame({"a": ["x"]}), Series({"a": True})],
16+
[DataFrame({"a": ["x", "y"]}), Series({"a": True})],
17+
[DataFrame({"a": ["x", "x"]}), Series({"a": False})],
18+
[DataFrame({"a": ["x", "y", "y"]}), Series({"a": False})],
19+
# multiple columns
20+
[DataFrame(columns=["a", "b"]), Series({"a": True, "b": True})],
21+
[DataFrame({"a": ["x"], "b": ["y"]}), Series({"a": True, "b": True})],
22+
[
23+
DataFrame({"a": ["x", "y"], "b": ["x", "x"]}),
24+
Series({"a": True, "b": False}),
25+
],
26+
# multiple columns, same column name
27+
[DataFrame(columns=["a", "a"]), Series([True, True], index=["a", "a"])],
28+
[
29+
DataFrame([["x", "y"]], columns=["a", "a"]),
30+
Series([True, True], index=["a", "a"]),
31+
],
32+
[
33+
DataFrame([["x", "y"], ["y", "y"]], columns=["a", "a"]),
34+
Series([True, False], index=["a", "a"]),
35+
],
36+
],
37+
)
38+
def test_is_unique(frame, expected):
39+
# GH37565
40+
result = frame.is_unique()
41+
tm.assert_series_equal(result, expected)
42+
43+
44+
@pytest.mark.parametrize(
45+
"frame, subset, expected",
46+
[
47+
[DataFrame(columns=["a", "b"]), ["a"], Series({"a": True})],
48+
[DataFrame({"a": ["x"], "b": ["y"]}), "a", Series({"a": True})],
49+
[DataFrame({"a": ["x"], "b": ["y"]}), ["a"], Series({"a": True})],
50+
[
51+
DataFrame({"a": ["x", "y"], "b": ["x", "x"]}),
52+
["a", "b"],
53+
Series({"a": True, "b": False}),
54+
],
55+
],
56+
)
57+
def test_is_unique_subsetting(frame, subset, expected):
58+
# GH37565
59+
result = frame.is_unique(subset=subset)
60+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)