diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 52bdcd03d3b49..ecbb5367febc5 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -675,13 +675,18 @@ def value_counts(self, dropna: bool = True) -> Series: vc = self._data.value_counts() - # Index cannot hold ExtensionArrays yet - index = Index(type(self)(vc.field(0)).astype(object)) + values = vc.field(0) + counts = vc.field(1) + if dropna and self._data.null_count > 0: + mask = values.is_valid() + values = values.filter(mask) + counts = counts.filter(mask) + # No missing values so we can adhere to the interface and return a numpy array. - counts = np.array(vc.field(1)) + counts = np.array(counts) - if dropna and self._data.null_count > 0: - raise NotImplementedError("yo") + # Index cannot hold ExtensionArrays yet + index = Index(type(self)(values)).astype(object) return Series(counts, index=index).astype("Int64") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 2fec1925149ad..2b2db49c62ba2 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -476,12 +476,7 @@ def test_arrow_roundtrip(dtype, dtype_object): assert result.loc[2, "a"] is pd.NA -def test_value_counts_na(dtype, request): - if dtype == "arrow_string": - reason = "TypeError: boolean value of NA is ambiguous" - mark = pytest.mark.xfail(reason=reason) - request.node.add_marker(mark) - +def test_value_counts_na(dtype): arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype) result = arr.value_counts(dropna=False) expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64") @@ -492,12 +487,7 @@ def test_value_counts_na(dtype, request): tm.assert_series_equal(result, expected) -def test_value_counts_with_normalize(dtype, request): - if dtype == "arrow_string": - reason = "TypeError: boolean value of NA is ambiguous" - mark = pytest.mark.xfail(reason=reason) - request.node.add_marker(mark) - +def test_value_counts_with_normalize(dtype): s = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) result = s.value_counts(normalize=True) expected = pd.Series([2, 1], index=["a", "b"], dtype="Float64") / 3