diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f797222753183..79e8d2d90ab1e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -310,6 +310,8 @@ def _from_sequence_not_strict( freq = freq if freq is not lib.no_default else None freq, freq_infer = dtl.maybe_infer_freq(freq) + # if the user either explicitly passes tz=None or a tz-naive dtype, we + # disallows inferring a tz. explicit_tz_none = tz is None if tz is lib.no_default: tz = None @@ -2023,9 +2025,8 @@ def _sequence_to_dt64ns( Parameters ---------- data : list-like - dtype : dtype, str, or None, default None copy : bool, default False - tz : tzinfo, str, or None, default None + tz : tzinfo or None, default None dayfirst : bool, default False yearfirst : bool, default False ambiguous : str, bool, or arraylike, default 'raise' @@ -2116,7 +2117,6 @@ def _sequence_to_dt64ns( if tz is not None: # Convert tz-naive to UTC - tz = timezones.maybe_get_tz(tz) # TODO: if tz is UTC, are there situations where we *don't* want a # copy? tz_localize_to_utc always makes one. data = tzconversion.tz_localize_to_utc( @@ -2130,9 +2130,6 @@ def _sequence_to_dt64ns( else: # must be integer dtype otherwise # assume this data are epoch timestamps - if tz: - tz = timezones.maybe_get_tz(tz) - if data.dtype != INT64_DTYPE: data = data.astype(np.int64, copy=False) result = data.view(DT64NS_DTYPE) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 6eded11597f89..291af070f1f43 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -922,7 +922,7 @@ def sequence_to_td64ns( elif is_integer_dtype(data.dtype): # treat as multiples of the given unit - data, copy_made = ints_to_td64ns(data, unit=unit) + data, copy_made = _ints_to_td64ns(data, unit=unit) copy = copy and not copy_made elif is_float_dtype(data.dtype): @@ -959,7 +959,7 @@ def sequence_to_td64ns( return data, inferred_freq -def ints_to_td64ns(data, unit: str = "ns"): +def _ints_to_td64ns(data, unit: str = "ns"): """ Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating the integers as multiples of the given timedelta unit. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e3d766709f52f..a5c677bbd72fa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -433,7 +433,7 @@ def _wrap_range_setop(self, other, res_i8): new_freq = to_offset(Timedelta(res_i8.step)) res_i8 = res_i8 - # TODO: we cannot just do + # TODO(GH#41493): we cannot just do # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq) # because test_setops_preserve_freq fails with _validate_frequency raising. # This raising is incorrect, as 'on_freq' is incorrect. This will diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b05340fcb96a5..d138b4f54b113 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -911,6 +911,8 @@ def _maybe_add_join_keys( left_has_missing = None right_has_missing = None + assert all(is_array_like(x) for x in self.left_join_keys) + keys = zip(self.join_names, self.left_on, self.right_on) for i, (name, lname, rname) in enumerate(keys): if not _should_fill(lname, rname): @@ -947,7 +949,7 @@ def _maybe_add_join_keys( ): take_right = self.right[name]._values - elif left_indexer is not None and is_array_like(self.left_join_keys[i]): + elif left_indexer is not None: take_left = self.left_join_keys[i] take_right = self.right_join_keys[i] diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index 858eaacd67ec2..f66ebc451c239 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -20,11 +20,19 @@ ) +def series_via_frame_from_dict(x, **kwargs): + return DataFrame({"a": x}, **kwargs)["a"] + + +def series_via_frame_from_scalar(x, **kwargs): + return DataFrame(x, **kwargs)[0] + + @pytest.fixture( params=[ Series, - lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], - lambda x, **kwargs: DataFrame(x, **kwargs)[0], + series_via_frame_from_dict, + series_via_frame_from_scalar, Index, ], ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"], @@ -116,15 +124,6 @@ class TestConstruction: # test certain constructor behaviours on dtype inference across Series, # Index and DataFrame - @pytest.mark.parametrize( - "klass", - [ - Series, - lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], - lambda x, **kwargs: DataFrame(x, **kwargs)[0], - Index, - ], - ) @pytest.mark.parametrize( "a", [ @@ -140,7 +139,7 @@ class TestConstruction: "object-string", ], ) - def test_constructor_datetime_outofbound(self, a, klass): + def test_constructor_datetime_outofbound(self, a, constructor): # GH-26853 (+ bug GH-26206 out of bound non-ns unit) # No dtype specified (dtype inference) @@ -149,9 +148,9 @@ def test_constructor_datetime_outofbound(self, a, klass): if a.dtype.kind == "M": msg = "Out of bounds" with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): - klass(a) + constructor(a) else: - result = klass(a) + result = constructor(a) assert result.dtype == "object" tm.assert_numpy_array_equal(result.to_numpy(), a) @@ -159,7 +158,7 @@ def test_constructor_datetime_outofbound(self, a, klass): # Forced conversion fails for all -> all cases raise error msg = "Out of bounds|Out of bounds .* present at position 0" with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): - klass(a, dtype="datetime64[ns]") + constructor(a, dtype="datetime64[ns]") def test_constructor_datetime_nonns(self, constructor): arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]") diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index d129f5b365ca4..907456e04300f 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -696,6 +696,7 @@ def test_constructor_dtype(self): idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern") tm.assert_index_equal(idx, expected) + def test_constructor_dtype_tz_mismatch_raises(self): # if we already have a tz and its not the same, then raise idx = DatetimeIndex( ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a7125e69f2a27..5cc6879f9660f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -723,35 +723,27 @@ def test_join_append_timedeltas2(self): ) tm.assert_frame_equal(result, expected) - def test_other_datetime_unit(self): + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_other_datetime_unit(self, unit): # GH 13389 df1 = DataFrame({"entity_id": [101, 102]}) - s = Series([None, None], index=[101, 102], name="days") - - for dtype in [ - "datetime64[D]", - "datetime64[h]", - "datetime64[m]", - "datetime64[s]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[ns]", - ]: + ser = Series([None, None], index=[101, 102], name="days") - df2 = s.astype(dtype).to_frame("days") - # coerces to datetime64[ns], thus should not be affected - assert df2["days"].dtype == "datetime64[ns]" + dtype = f"datetime64[{unit}]" + df2 = ser.astype(dtype).to_frame("days") + # coerces to datetime64[ns], thus should not be affected + assert df2["days"].dtype == "datetime64[ns]" - result = df1.merge(df2, left_on="entity_id", right_index=True) + result = df1.merge(df2, left_on="entity_id", right_index=True) - exp = DataFrame( - { - "entity_id": [101, 102], - "days": np.array(["nat", "nat"], dtype="datetime64[ns]"), - }, - columns=["entity_id", "days"], - ) - tm.assert_frame_equal(result, exp) + exp = DataFrame( + { + "entity_id": [101, 102], + "days": np.array(["nat", "nat"], dtype="datetime64[ns]"), + }, + columns=["entity_id", "days"], + ) + tm.assert_frame_equal(result, exp) @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) def test_other_timedelta_unit(self, unit): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4f355be7f0745..a33912178bfd6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -921,21 +921,18 @@ def test_constructor_dtype_datetime64(self): def test_constructor_dtype_datetime64_10(self): # GH3416 - dates = [ - np.datetime64(datetime(2013, 1, 1)), - np.datetime64(datetime(2013, 1, 2)), - np.datetime64(datetime(2013, 1, 3)), - ] + pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)] + dates = [np.datetime64(x) for x in pydates] - s = Series(dates) - assert s.dtype == "M8[ns]" + ser = Series(dates) + assert ser.dtype == "M8[ns]" - s.iloc[0] = np.nan - assert s.dtype == "M8[ns]" + ser.iloc[0] = np.nan + assert ser.dtype == "M8[ns]" # GH3414 related expected = Series( - [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)], + pydates, dtype="datetime64[ns]", ) @@ -951,6 +948,10 @@ def test_constructor_dtype_datetime64_10(self): result = Series([np.nan] + dates[1:], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) + def test_constructor_dtype_datetime64_11(self): + pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)] + dates = [np.datetime64(x) for x in pydates] + dts = Series(dates, dtype="datetime64[ns]") # valid astype @@ -1151,12 +1152,13 @@ def test_constructor_no_partial_datetime_casting(self): assert all(ser[i] is vals[i] for i in range(len(vals))) @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) - @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("kind", ["M", "m"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) - def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): + def test_construction_to_datetimelike_unit(self, arr_dtype, kind, unit): # tests all units # gh-19223 - dtype = f"{dtype}[{unit}]" + # TODO: GH#19223 was about .astype, doesn't belong here + dtype = f"{kind}8[{unit}]" arr = np.array([1, 2, 3], dtype=arr_dtype) s = Series(arr) result = s.astype(dtype)