diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 610592bb3ce1f..ed0cfe0408ba9 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -462,6 +462,7 @@ Reshaping - Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) +- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Sparse diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 5294b4061ad44..b40d7904e33e4 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -137,18 +137,6 @@ def __from_arrow__( else: return BooleanArray._concat_same_type(results) - def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: - # Handle only boolean + np.bool_ -> boolean, since other cases like - # Int64 + boolean -> Int64 will be handled by the other type - if all( - isinstance(t, BooleanDtype) - or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_))) - for t in dtypes - ): - return BooleanDtype() - else: - return None - def coerce_to_array( values, mask=None, copy: bool = False diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 49a71922f331b..0c14fac57d9db 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -2,8 +2,6 @@ import numpy as np -from pandas._typing import DtypeObj - from pandas.core.dtypes.common import is_float_dtype from pandas.core.dtypes.dtypes import register_extension_dtype @@ -37,20 +35,6 @@ def construct_array_type(cls) -> type[FloatingArray]: """ return FloatingArray - def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: - # for now only handle other floating types - if not all(isinstance(t, FloatingDtype) for t in dtypes): - return None - np_dtype = np.find_common_type( - # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" has no - # attribute "numpy_dtype" - [t.numpy_dtype for t in dtypes], # type: ignore[union-attr] - [], - ) - if np.issubdtype(np_dtype, np.floating): - return FLOAT_STR_TO_DTYPE[str(np_dtype)] - return None - @classmethod def _str_to_dtype_mapping(cls): return FLOAT_STR_TO_DTYPE diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9ef3939656ecd..24e5fa1bef552 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -2,12 +2,9 @@ import numpy as np -from pandas._typing import DtypeObj - from pandas.core.dtypes.base import register_extension_dtype from pandas.core.dtypes.common import is_integer_dtype -from pandas.core.arrays.masked import BaseMaskedDtype from pandas.core.arrays.numeric import ( NumericArray, NumericDtype, @@ -38,38 +35,6 @@ def construct_array_type(cls) -> type[IntegerArray]: """ return IntegerArray - def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: - # we only handle nullable EA dtypes and numeric numpy dtypes - if not all( - isinstance(t, BaseMaskedDtype) - or ( - isinstance(t, np.dtype) - and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_)) - ) - for t in dtypes - ): - return None - np_dtype = np.find_common_type( - # error: List comprehension has incompatible type List[Union[Any, - # dtype, ExtensionDtype]]; expected List[Union[dtype, None, type, - # _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]], - # List[Any], _DtypeDict, Tuple[Any, Any]]] - [ - t.numpy_dtype # type: ignore[misc] - if isinstance(t, BaseMaskedDtype) - else t - for t in dtypes - ], - [], - ) - if np.issubdtype(np_dtype, np.integer): - return INT_STR_TO_DTYPE[str(np_dtype)] - elif np.issubdtype(np_dtype, np.floating): - from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE - - return FLOAT_STR_TO_DTYPE[str(np_dtype)] - return None - @classmethod def _str_to_dtype_mapping(cls): return INT_STR_TO_DTYPE diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 946892822720c..66eed0a75fa19 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1437,3 +1437,22 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype: return FLOAT_STR_TO_DTYPE[dtype.name] else: raise NotImplementedError(dtype) + + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: + # We unwrap any masked dtypes, find the common dtype we would use + # for that, then re-mask the result. + from pandas.core.dtypes.cast import find_common_type + + new_dtype = find_common_type( + [ + dtype.numpy_dtype if isinstance(dtype, BaseMaskedDtype) else dtype + for dtype in dtypes + ] + ) + if not isinstance(new_dtype, np.dtype): + # If we ever support e.g. Masked[DatetimeArray] then this will change + return None + try: + return type(self).from_numpy_dtype(new_dtype) + except (KeyError, NotImplementedError): + return None diff --git a/pandas/tests/arrays/integer/test_concat.py b/pandas/tests/arrays/integer/test_concat.py index 2e8ef506140af..feba574da548f 100644 --- a/pandas/tests/arrays/integer/test_concat.py +++ b/pandas/tests/arrays/integer/test_concat.py @@ -15,11 +15,13 @@ (["UInt8", "Int8"], "Int16"), (["Int32", "UInt32"], "Int64"), (["Int64", "UInt64"], "Float64"), - (["Int64", "boolean"], "Int64"), - (["UInt8", "boolean"], "UInt8"), + (["Int64", "boolean"], "object"), + (["UInt8", "boolean"], "object"), ], ) def test_concat_series(to_concat_dtypes, result_dtype): + # we expect the same dtypes as we would get with non-masked inputs, + # just masked where available. result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes]) expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( @@ -47,11 +49,13 @@ def test_concat_series(to_concat_dtypes, result_dtype): (["UInt8", "int8"], "Int16"), (["Int32", "uint32"], "Int64"), (["Int64", "uint64"], "Float64"), - (["Int64", "bool"], "Int64"), - (["UInt8", "bool"], "UInt8"), + (["Int64", "bool"], "object"), + (["UInt8", "bool"], "object"), ], ) def test_concat_series_with_numpy(to_concat_dtypes, result_dtype): + # we expect the same dtypes as we would get with non-masked inputs, + # just masked where available. s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0]) s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))