diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 383f8a49fd02c..a19b304529383 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1071,7 +1071,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: fill_value = Index(self._left, copy=False)._na_value empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) else: - empty = self._from_sequence([fill_value] * empty_len) + empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype) if periods > 0: a = empty diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 21fe7cd8180ad..ebf5f2636bf12 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -150,7 +150,7 @@ def __len__(self) -> int: return len(self._pa_array) @classmethod - def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): + def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False): from pandas.core.arrays.masked import BaseMaskedArray _chk_pyarrow_available() diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c43c16cded852..7bc250ae957b2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -52,7 +52,6 @@ ensure_object, is_bool, is_bool_dtype, - is_extension_array_dtype, is_float_dtype, is_integer, is_integer_dtype, @@ -1385,20 +1384,22 @@ def _maybe_coerce_merge_keys(self) -> None: if lk.dtype.kind == rk.dtype.kind: continue - if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype( - rk.dtype + if isinstance(lk.dtype, ExtensionDtype) and not isinstance( + rk.dtype, ExtensionDtype ): ct = find_common_type([lk.dtype, rk.dtype]) - if is_extension_array_dtype(ct): - rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr] + if isinstance(ct, ExtensionDtype): + com_cls = ct.construct_array_type() + rk = com_cls._from_sequence(rk, dtype=ct, copy=False) else: - rk = rk.astype(ct) # type: ignore[arg-type] - elif is_extension_array_dtype(rk.dtype): + rk = rk.astype(ct) + elif isinstance(rk.dtype, ExtensionDtype): ct = find_common_type([lk.dtype, rk.dtype]) - if is_extension_array_dtype(ct): - lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr] + if isinstance(ct, ExtensionDtype): + com_cls = ct.construct_array_type() + lk = com_cls._from_sequence(lk, dtype=ct, copy=False) else: - lk = lk.astype(ct) # type: ignore[arg-type] + lk = lk.astype(ct) # check whether ints and floats if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): @@ -2500,15 +2501,15 @@ def _convert_arrays_and_get_rizer_klass( if not isinstance(lk, ExtensionArray): lk = cls._from_sequence(lk, dtype=dtype, copy=False) else: - lk = lk.astype(dtype) + lk = lk.astype(dtype, copy=False) if not isinstance(rk, ExtensionArray): rk = cls._from_sequence(rk, dtype=dtype, copy=False) else: - rk = rk.astype(dtype) + rk = rk.astype(dtype, copy=False) else: - lk = lk.astype(dtype) - rk = rk.astype(dtype) + lk = lk.astype(dtype, copy=False) + rk = rk.astype(dtype, copy=False) if isinstance(lk, BaseMaskedArray): # Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]"; # expected type "Type[object]" diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index 6459b315c684d..a5a2dd33940b8 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -242,7 +242,8 @@ def test_coerce_to_numpy_array(): def test_to_boolean_array_from_strings(): result = BooleanArray._from_sequence_of_strings( - np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object) + np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object), + dtype="boolean", ) expected = BooleanArray( np.array([True, False, True, True, False, False, False]), @@ -254,7 +255,7 @@ def test_to_boolean_array_from_strings(): def test_to_boolean_array_from_strings_invalid_string(): with pytest.raises(ValueError, match="cannot be cast"): - BooleanArray._from_sequence_of_strings(["donkey"]) + BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean") @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 36aba388f1b30..373f1c95463fc 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -755,12 +755,12 @@ def test_categorical_extension_array_nullable(self, nulls_fixture): def test_from_sequence_copy(self): cat = Categorical(np.arange(5).repeat(2)) - result = Categorical._from_sequence(cat, dtype=None, copy=False) + result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False) # more generally, we'd be OK with a view assert result._codes is cat._codes - result = Categorical._from_sequence(cat, dtype=None, copy=True) + result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True) assert not tm.shares_memory(result, cat) diff --git a/pandas/tests/arrays/datetimes/test_cumulative.py b/pandas/tests/arrays/datetimes/test_cumulative.py index 428d6b13d28ed..e9d2dfdd0048a 100644 --- a/pandas/tests/arrays/datetimes/test_cumulative.py +++ b/pandas/tests/arrays/datetimes/test_cumulative.py @@ -26,6 +26,7 @@ def test_accumulators_freq(self): "2000-01-02", "2000-01-03", ], + dtype="M8[ns]", ) tm.assert_datetime_array_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index 9ecfc51cb2208..64fe40e53a9d2 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -175,32 +175,34 @@ def test_to_integer_array_dtype_keyword(constructor): def test_to_integer_array_float(): - result = IntegerArray._from_sequence([1.0, 2.0]) + result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64") expected = pd.array([1, 2], dtype="Int64") tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): - IntegerArray._from_sequence([1.5, 2.0]) + IntegerArray._from_sequence([1.5, 2.0], dtype="Int64") # for float dtypes, the itemsize is not preserved - result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32")) + result = IntegerArray._from_sequence( + np.array([1.0, 2.0], dtype="float32"), dtype="Int64" + ) assert result.dtype == Int64Dtype() def test_to_integer_array_str(): - result = IntegerArray._from_sequence(["1", "2", None]) + result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64") expected = pd.array([1, 2, np.nan], dtype="Int64") tm.assert_extension_array_equal(result, expected) with pytest.raises( ValueError, match=r"invalid literal for int\(\) with base 10: .*" ): - IntegerArray._from_sequence(["1", "2", ""]) + IntegerArray._from_sequence(["1", "2", ""], dtype="Int64") with pytest.raises( ValueError, match=r"invalid literal for int\(\) with base 10: .*" ): - IntegerArray._from_sequence(["1.5", "2.0"]) + IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64") @pytest.mark.parametrize( diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 5d4aa54d1b4b9..4381469196e18 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -60,7 +60,11 @@ def test_dt64_array(dtype_unit): None, NumpyExtensionArray(np.array([], dtype=object)), ), - (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), + ( + np.array([1, 2], dtype="int64"), + None, + IntegerArray._from_sequence([1, 2], dtype="Int64"), + ), ( np.array([1.0, 2.0], dtype="float64"), None, @@ -284,7 +288,7 @@ def test_array_copy(): # datetime ( [pd.Timestamp("2000"), pd.Timestamp("2001")], - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], @@ -319,7 +323,7 @@ def test_array_copy(): # timedelta ( [pd.Timedelta("1h"), pd.Timedelta("2h")], - TimedeltaArray._from_sequence(["1h", "2h"]), + TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), ), ( np.array([1, 2], dtype="m8[ns]"), @@ -330,35 +334,42 @@ def test_array_copy(): TimedeltaArray(np.array([1, 2], dtype="m8[us]")), ), # integer - ([1, 2], IntegerArray._from_sequence([1, 2])), - ([1, None], IntegerArray._from_sequence([1, None])), - ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])), - ([1, np.nan], IntegerArray._from_sequence([1, np.nan])), + ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")), + ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")), + ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")), + ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")), # float - ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2])), - ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA])), - ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA])), - ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA])), + ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")), + ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), + ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), + ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), # integer-like float - ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0])), - ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA])), - ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA])), - ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA])), + ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")), + ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), + ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), + ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), # mixed-integer-float - ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0])), - ([1, np.nan, 2.0], FloatingArray._from_sequence([1.0, None, 2.0])), + ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")), + ( + [1, np.nan, 2.0], + FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"), + ), # string ( ["a", "b"], - pd.StringDtype().construct_array_type()._from_sequence(["a", "b"]), + pd.StringDtype() + .construct_array_type() + ._from_sequence(["a", "b"], dtype=pd.StringDtype()), ), ( ["a", None], - pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + pd.StringDtype() + .construct_array_type() + ._from_sequence(["a", None], dtype=pd.StringDtype()), ), # Boolean - ([True, False], BooleanArray._from_sequence([True, False])), - ([True, None], BooleanArray._from_sequence([True, None])), + ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")), + ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")), ], ) def test_array_inference(data, expected): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 8828f33b7c62c..c32a6a6a115ac 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -18,7 +18,7 @@ def test_from_sequence_from_cls(self, data): def test_array_from_scalars(self, data): scalars = [data[0], data[1], data[2]] - result = data._from_sequence(scalars) + result = data._from_sequence(scalars, dtype=data.dtype) assert isinstance(result, type(data)) def test_series_constructor(self, data): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index a354e5767f37f..c803a8113b4a4 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -263,7 +263,7 @@ def test_duplicated(self, data, keep): @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) def test_unique(self, data, box, method): - duplicated = box(data._from_sequence([data[0], data[0]])) + duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype)) result = method(duplicated) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 5b8955087436e..f07585c0aec10 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -54,7 +54,7 @@ def __init__(self, values, dtype=None, copy=False) -> None: self.data = values @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): data = np.empty(len(scalars), dtype=object) data[:] = scalars return cls(data) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 47cd3a51f664b..5cc279f65faa6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -294,11 +294,13 @@ def test_from_dtype(self, data, request): def test_from_sequence_pa_array(self, data): # https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784 # data._pa_array = pa.ChunkedArray - result = type(data)._from_sequence(data._pa_array) + result = type(data)._from_sequence(data._pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) assert isinstance(result._pa_array, pa.ChunkedArray) - result = type(data)._from_sequence(data._pa_array.combine_chunks()) + result = type(data)._from_sequence( + data._pa_array.combine_chunks(), dtype=data.dtype + ) tm.assert_extension_array_equal(result, data) assert isinstance(result._pa_array, pa.ChunkedArray) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 0d5ed87306b48..abf7e093fd6cd 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -31,7 +31,7 @@ def test_array_of_dt64_nat_raises(self): TimedeltaIndex(arr) with pytest.raises(TypeError, match=msg): - TimedeltaArray._from_sequence(arr) + TimedeltaArray._from_sequence(arr, dtype="m8[ns]") with pytest.raises(TypeError, match=msg): to_timedelta(arr) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index b3d4d9d67190f..b67694f1c58c7 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -98,7 +98,7 @@ def test_to_timedelta_oob_non_nano(self): TimedeltaIndex(arr) with pytest.raises(OutOfBoundsTimedelta, match=msg): - TimedeltaArray._from_sequence(arr) + TimedeltaArray._from_sequence(arr, dtype="m8[s]") @pytest.mark.parametrize( "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]