diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index e2a74ea6f5351..ddb2f01898ec7 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1012,6 +1012,26 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): ------ TypeError : subclass does not define reductions """ + pa_type = self._data.type + + data_to_reduce = self._data + + if name in ["any", "all"] and ( + pa.types.is_integer(pa_type) + or pa.types.is_floating(pa_type) + or pa.types.is_duration(pa_type) + ): + # pyarrow only supports any/all for boolean dtype, we allow + # for other dtypes, matching our non-pyarrow behavior + + if pa.types.is_duration(pa_type): + data_to_cmp = self._data.cast(pa.int64()) + else: + data_to_cmp = self._data + + not_eq = pc.not_equal(data_to_cmp, 0) + data_to_reduce = not_eq + if name == "sem": def pyarrow_meth(data, skip_nulls, **kwargs): @@ -1033,8 +1053,9 @@ def pyarrow_meth(data, skip_nulls, **kwargs): if pyarrow_meth is None: # Let ExtensionArray._reduce raise the TypeError return super()._reduce(name, skipna=skipna, **kwargs) + try: - result = pyarrow_meth(self._data, skip_nulls=skipna, **kwargs) + result = pyarrow_meth(data_to_reduce, skip_nulls=skipna, **kwargs) except (AttributeError, NotImplementedError, TypeError) as err: msg = ( f"'{type(self).__name__}' with dtype {self.dtype} " diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a7c243cdfe74f..af3952a532113 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -566,10 +566,24 @@ def test_reduce_series( f"pyarrow={pa.__version__} for {pa_dtype}" ), ) - if not pa.types.is_boolean(pa_dtype): + if pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype): + # We *might* want to make this behave like the non-pyarrow cases, + # but have not yet decided. request.node.add_marker(xfail_mark) + op_name = all_boolean_reductions ser = pd.Series(data) + + if pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype): + # xref GH#34479 we support this in our non-pyarrow datetime64 dtypes, + # but it isn't obvious we _should_. For now, we keep the pyarrow + # behavior which does not support this. + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(ser, op_name)(skipna=skipna) + + return + result = getattr(ser, op_name)(skipna=skipna) assert result is (op_name == "any")