pandas-dev · mroeschke · Oct 12, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 12, 2022
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -310,6 +310,8 @@ def _from_sequence_not_strict(
         freq = freq if freq is not lib.no_default else None
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
+        # if the user either explicitly passes tz=None or a tz-naive dtype, we
+        #  disallows inferring a tz.
         explicit_tz_none = tz is None
         if tz is lib.no_default:
             tz = None
@@ -2023,9 +2025,8 @@ def _sequence_to_dt64ns(
     Parameters
     ----------
     data : list-like
-    dtype : dtype, str, or None, default None
     copy : bool, default False
-    tz : tzinfo, str, or None, default None
+    tz : tzinfo or None, default None
     dayfirst : bool, default False
     yearfirst : bool, default False
     ambiguous : str, bool, or arraylike, default 'raise'
@@ -2116,7 +2117,6 @@ def _sequence_to_dt64ns(
 
         if tz is not None:
             # Convert tz-naive to UTC
-            tz = timezones.maybe_get_tz(tz)
             # TODO: if tz is UTC, are there situations where we *don't* want a
             #  copy?  tz_localize_to_utc always makes one.
             data = tzconversion.tz_localize_to_utc(
@@ -2130,9 +2130,6 @@ def _sequence_to_dt64ns(
     else:
         # must be integer dtype otherwise
         # assume this data are epoch timestamps
-        if tz:
-            tz = timezones.maybe_get_tz(tz)
-
         if data.dtype != INT64_DTYPE:
             data = data.astype(np.int64, copy=False)
         result = data.view(DT64NS_DTYPE)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -922,7 +922,7 @@ def sequence_to_td64ns(
 
     elif is_integer_dtype(data.dtype):
         # treat as multiples of the given unit
-        data, copy_made = ints_to_td64ns(data, unit=unit)
+        data, copy_made = _ints_to_td64ns(data, unit=unit)
         copy = copy and not copy_made
 
     elif is_float_dtype(data.dtype):
@@ -959,7 +959,7 @@ def sequence_to_td64ns(
     return data, inferred_freq
 
 
-def ints_to_td64ns(data, unit: str = "ns"):
+def _ints_to_td64ns(data, unit: str = "ns"):
     """
     Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
     the integers as multiples of the given timedelta unit.

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -433,7 +433,7 @@ def _wrap_range_setop(self, other, res_i8):
             new_freq = to_offset(Timedelta(res_i8.step))
             res_i8 = res_i8
 
-        # TODO: we cannot just do
+        # TODO(GH#41493): we cannot just do
         #  type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
         # because test_setops_preserve_freq fails with _validate_frequency raising.
         # This raising is incorrect, as 'on_freq' is incorrect. This will

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -911,6 +911,8 @@ def _maybe_add_join_keys(
         left_has_missing = None
         right_has_missing = None
 
+        assert all(is_array_like(x) for x in self.left_join_keys)
+
         keys = zip(self.join_names, self.left_on, self.right_on)
         for i, (name, lname, rname) in enumerate(keys):
             if not _should_fill(lname, rname):
@@ -947,7 +949,7 @@ def _maybe_add_join_keys(
                             ):
                                 take_right = self.right[name]._values
 
-            elif left_indexer is not None and is_array_like(self.left_join_keys[i]):
+            elif left_indexer is not None:
                 take_left = self.left_join_keys[i]
                 take_right = self.right_join_keys[i]
 

diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py
@@ -20,11 +20,19 @@
 )
 
 
+def series_via_frame_from_dict(x, **kwargs):
+    return DataFrame({"a": x}, **kwargs)["a"]
+
+
+def series_via_frame_from_scalar(x, **kwargs):
+    return DataFrame(x, **kwargs)[0]
+
+
 @pytest.fixture(
     params=[
         Series,
-        lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"],
-        lambda x, **kwargs: DataFrame(x, **kwargs)[0],
+        series_via_frame_from_dict,
+        series_via_frame_from_scalar,
         Index,
     ],
     ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"],
@@ -116,15 +124,6 @@ class TestConstruction:
     # test certain constructor behaviours on dtype inference across Series,
     # Index and DataFrame
 
-    @pytest.mark.parametrize(
-        "klass",
-        [
-            Series,
-            lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"],
-            lambda x, **kwargs: DataFrame(x, **kwargs)[0],
-            Index,
-        ],
-    )
     @pytest.mark.parametrize(
         "a",
         [
@@ -140,7 +139,7 @@ class TestConstruction:
             "object-string",
         ],
     )
-    def test_constructor_datetime_outofbound(self, a, klass):
+    def test_constructor_datetime_outofbound(self, a, constructor):
         # GH-26853 (+ bug GH-26206 out of bound non-ns unit)
 
         # No dtype specified (dtype inference)
@@ -149,17 +148,17 @@ def test_constructor_datetime_outofbound(self, a, klass):
         if a.dtype.kind == "M":
             msg = "Out of bounds"
             with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg):
-                klass(a)
+                constructor(a)
         else:
-            result = klass(a)
+            result = constructor(a)
             assert result.dtype == "object"
             tm.assert_numpy_array_equal(result.to_numpy(), a)
 
         # Explicit dtype specified
         # Forced conversion fails for all -> all cases raise error
         msg = "Out of bounds|Out of bounds .* present at position 0"
         with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg):
-            klass(a, dtype="datetime64[ns]")
+            constructor(a, dtype="datetime64[ns]")
 
     def test_constructor_datetime_nonns(self, constructor):
         arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]")

diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -696,6 +696,7 @@ def test_constructor_dtype(self):
         idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
         tm.assert_index_equal(idx, expected)
 
+    def test_constructor_dtype_tz_mismatch_raises(self):
         # if we already have a tz and its not the same, then raise
         idx = DatetimeIndex(
             ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -723,35 +723,27 @@ def test_join_append_timedeltas2(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_other_datetime_unit(self):
+    @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"])
+    def test_other_datetime_unit(self, unit):
         # GH 13389
         df1 = DataFrame({"entity_id": [101, 102]})
-        s = Series([None, None], index=[101, 102], name="days")
-
-        for dtype in [
-            "datetime64[D]",
-            "datetime64[h]",
-            "datetime64[m]",
-            "datetime64[s]",
-            "datetime64[ms]",
-            "datetime64[us]",
-            "datetime64[ns]",
-        ]:
+        ser = Series([None, None], index=[101, 102], name="days")
 
-            df2 = s.astype(dtype).to_frame("days")
-            # coerces to datetime64[ns], thus should not be affected
-            assert df2["days"].dtype == "datetime64[ns]"
+        dtype = f"datetime64[{unit}]"
+        df2 = ser.astype(dtype).to_frame("days")
+        # coerces to datetime64[ns], thus should not be affected
+        assert df2["days"].dtype == "datetime64[ns]"
 
-            result = df1.merge(df2, left_on="entity_id", right_index=True)
+        result = df1.merge(df2, left_on="entity_id", right_index=True)
 
-            exp = DataFrame(
-                {
-                    "entity_id": [101, 102],
-                    "days": np.array(["nat", "nat"], dtype="datetime64[ns]"),
-                },
-                columns=["entity_id", "days"],
-            )
-            tm.assert_frame_equal(result, exp)
+        exp = DataFrame(
+            {
+                "entity_id": [101, 102],
+                "days": np.array(["nat", "nat"], dtype="datetime64[ns]"),
+            },
+            columns=["entity_id", "days"],
+        )
+        tm.assert_frame_equal(result, exp)
 
     @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"])
     def test_other_timedelta_unit(self, unit):

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -921,21 +921,18 @@ def test_constructor_dtype_datetime64(self):
 
     def test_constructor_dtype_datetime64_10(self):
         # GH3416
-        dates = [
-            np.datetime64(datetime(2013, 1, 1)),
-            np.datetime64(datetime(2013, 1, 2)),
-            np.datetime64(datetime(2013, 1, 3)),
-        ]
+        pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)]
+        dates = [np.datetime64(x) for x in pydates]
 
-        s = Series(dates)
-        assert s.dtype == "M8[ns]"
+        ser = Series(dates)
+        assert ser.dtype == "M8[ns]"
 
-        s.iloc[0] = np.nan
-        assert s.dtype == "M8[ns]"
+        ser.iloc[0] = np.nan
+        assert ser.dtype == "M8[ns]"
 
         # GH3414 related
         expected = Series(
-            [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)],
+            pydates,
             dtype="datetime64[ns]",
         )
 
@@ -951,6 +948,10 @@ def test_constructor_dtype_datetime64_10(self):
         result = Series([np.nan] + dates[1:], dtype="datetime64[ns]")
         tm.assert_series_equal(result, expected)
 
+    def test_constructor_dtype_datetime64_11(self):
+        pydates = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)]
+        dates = [np.datetime64(x) for x in pydates]
+
         dts = Series(dates, dtype="datetime64[ns]")
 
         # valid astype
@@ -1151,12 +1152,13 @@ def test_constructor_no_partial_datetime_casting(self):
         assert all(ser[i] is vals[i] for i in range(len(vals)))
 
     @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
-    @pytest.mark.parametrize("dtype", ["M8", "m8"])
+    @pytest.mark.parametrize("kind", ["M", "m"])
     @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
-    def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
+    def test_construction_to_datetimelike_unit(self, arr_dtype, kind, unit):
         # tests all units
         # gh-19223
-        dtype = f"{dtype}[{unit}]"
+        # TODO: GH#19223 was about .astype, doesn't belong here
+        dtype = f"{kind}8[{unit}]"
         arr = np.array([1, 2, 3], dtype=arr_dtype)
         s = Series(arr)
         result = s.astype(dtype)