pandas-dev · jreback · Dec 5, 2018 · Dec 3, 2018 · Dec 3, 2018 · Dec 5, 2018
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -6,7 +6,6 @@
 import numpy as np
 
 from pandas._libs import NaT, iNaT, lib
-from pandas._libs.tslibs import timezones
 from pandas._libs.tslibs.period import (
     DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period)
 from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
@@ -22,7 +21,6 @@
     is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
     is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
     is_period_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
-from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
@@ -1127,6 +1125,41 @@ def validate_endpoints(closed):
     return left_closed, right_closed
 
 
+def validate_inferred_freq(freq, inferred_freq, freq_infer):
+    """
+    If the user passes a freq and another freq is inferred from passed data,
+    require that they match.
+
+    Parameters
+    ----------
+    freq : DateOffset or None
+    inferred_freq : DateOffset or None
+    freq_infer : bool
+
+    Returns
+    -------
+    freq : DateOffset or None
+    freq_infer : bool
+
+    Notes
+    -----
+    We assume at this point that `maybe_infer_freq` has been called, so
+    `freq` is either a DateOffset object or None.
+    """
+    if inferred_freq is not None:
+        if freq is not None and freq != inferred_freq:
+            raise ValueError('Inferred frequency {inferred} from passed '
+                             'values does not conform to passed frequency '
+                             '{passed}'
+                             .format(inferred=inferred_freq,
+                                     passed=freq.freqstr))
+        elif freq is None:
+            freq = inferred_freq
+        freq_infer = False
+
+    return freq, freq_infer
+
+
 def maybe_infer_freq(freq):
     """
     Comparing a DateOffset to the string "infer" raises, so we need to
@@ -1154,44 +1187,6 @@ def maybe_infer_freq(freq):
     return freq, freq_infer
 
 
-def validate_tz_from_dtype(dtype, tz):
-    """
-    If the given dtype is a DatetimeTZDtype, extract the implied
-    tzinfo object from it and check that it does not conflict with the given
-    tz.
-
-    Parameters
-    ----------
-    dtype : dtype, str
-    tz : None, tzinfo
-
-    Returns
-    -------
-    tz : consensus tzinfo
-
-    Raises
-    ------
-    ValueError : on tzinfo mismatch
-    """
-    if dtype is not None:
-        if isinstance(dtype, compat.string_types):
-            try:
-                dtype = DatetimeTZDtype.construct_from_string(dtype)
-            except TypeError:
-                # Things like `datetime64[ns]`, which is OK for the
-                # constructors, but also nonsense, which should be validated
-                # but not by us. We *do* allow non-existent tz errors to
-                # go through
-                pass
-        dtz = getattr(dtype, 'tz', None)
-        if dtz is not None:
-            if tz is not None and not timezones.tz_compare(tz, dtz):
-                raise ValueError("cannot supply both a tz and a dtype"
-                                 " with a tz")
-            tz = dtz
-    return tz
-
-
 def validate_dtype_freq(dtype, freq):
     """
     If both a dtype and a freq are available, ensure they match.  If only

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -14,9 +14,9 @@
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
-    _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
-    is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
-    is_timedelta64_dtype)
+    _INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype,
+    is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
+    is_period_dtype, is_string_dtype, is_timedelta64_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -206,45 +206,35 @@ def _simple_new(cls, values, freq=None, tz=None):
         result._tz = timezones.tz_standardize(tz)
         return result
 
-    def __new__(cls, values, freq=None, tz=None, dtype=None):
+    def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
+                dayfirst=False, yearfirst=False, ambiguous='raise'):
+        return cls._from_sequence(
+            values, freq=freq, tz=tz, dtype=dtype, copy=copy,
+            dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
 
-        if freq is None and hasattr(values, "freq"):
-            # i.e. DatetimeArray, DatetimeIndex
-            freq = values.freq
+    @classmethod
+    def _from_sequence(cls, data, dtype=None, copy=False,
+                       tz=None, freq=None,
+                       dayfirst=False, yearfirst=False, ambiguous='raise'):
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
-        # if dtype has an embedded tz, capture it
-        tz = dtl.validate_tz_from_dtype(dtype, tz)
-
-        if is_object_dtype(values):
-            # kludge; dispatch until the DatetimeArray constructor is complete
-            from pandas import DatetimeIndex
-            values = DatetimeIndex(values, freq=freq, tz=tz)
+        subarr, tz, inferred_freq = sequence_to_dt64ns(
+            data, dtype=dtype, copy=copy, tz=tz,
+            dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
 
-        if isinstance(values, ABCSeries):
-            # extract to ndarray or DatetimeIndex
-            values = values._values
-
-        if isinstance(values, DatetimeArrayMixin):
-            # extract nanosecond unix timestamps
-            if tz is None:
-                tz = values.tz
-            values = values.asi8
+        freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
+                                                      freq_infer)
 
-        if values.dtype == 'i8':
-            values = values.view('M8[ns]')
+        result = cls._simple_new(subarr, freq=freq, tz=tz)
 
-        assert isinstance(values, np.ndarray), type(values)
-        assert is_datetime64_dtype(values)  # not yet assured nanosecond
-        values = conversion.ensure_datetime64ns(values, copy=False)
+        if inferred_freq is None and freq is not None:
+            # this condition precludes `freq_infer`
+            cls._validate_frequency(result, freq, ambiguous=ambiguous)
 
-        result = cls._simple_new(values, freq=freq, tz=tz)
-        if freq_infer:
+        elif freq_infer:
             result.freq = to_offset(result.inferred_freq)
 
-        # NB: Among other things not yet ported from the DatetimeIndex
-        # constructor, this does not call _deepcopy_if_needed
         return result
 
     @classmethod
@@ -1494,7 +1484,7 @@ def maybe_convert_dtype(data, copy):
     elif is_timedelta64_dtype(data):
         warnings.warn("Passing timedelta64-dtype data is deprecated, will "
                       "raise a TypeError in a future version",
-                      FutureWarning, stacklevel=3)
+                      FutureWarning, stacklevel=5)
         data = data.view(_NS_DTYPE)
 
     elif is_period_dtype(data):
@@ -1512,6 +1502,110 @@ def maybe_convert_dtype(data, copy):
     return data, copy
 
 
+def sequence_to_dt64ns(data, dtype=None, copy=False,
+                       tz=None,
+                       dayfirst=False, yearfirst=False, ambiguous='raise'):
+    """
+    Parameters
+    ----------
+    data : list-like
+    dtype : dtype, str, or None, default None
+    copy : bool, default False
+    tz : tzinfo, str, or None, default None
+    dayfirst : bool, default False
+    yearfirst : bool, default False
+    ambiguous : str, bool, or arraylike, default 'raise'
+        See pandas._libs.tslibs.conversion.tz_localize_to_utc
+
+    Returns
+    -------
+    result : numpy.ndarray
+        The sequence converted to a numpy array with dtype ``datetime64[ns]``.
+    tz : tzinfo or None
+        Either the user-provided tzinfo or one inferred from the data.
+    inferred_freq : Tick or None
+        The inferred frequency of the sequence.
+
+    Raises
+    ------
+    TypeError : PeriodDType data is passed
+    """
+
+    inferred_freq = None
+
+    if not hasattr(data, "dtype"):
+        # e.g. list, tuple
+        if np.ndim(data) == 0:
+            # i.e. generator
+            data = list(data)
+        data = np.asarray(data)
+        copy = False
+    elif isinstance(data, ABCSeries):
+        data = data._values
+
+    if hasattr(data, "freq"):
+        # i.e. DatetimeArray/Index
+        inferred_freq = data.freq
+
+    # if dtype has an embedded tz, capture it
+    tz = validate_tz_from_dtype(dtype, tz)
+
+    # By this point we are assured to have either a numpy array or Index
+    data, copy = maybe_convert_dtype(data, copy)
+
+    if is_object_dtype(data) or is_string_dtype(data):
+        # TODO: We do not have tests specific to string-dtypes,
+        #  also complex or categorical or other extension
+        copy = False
+        if lib.infer_dtype(data) == 'integer':
+            data = data.astype(np.int64)
+        else:
+            # data comes back here as either i8 to denote UTC timestamps
+            #  or M8[ns] to denote wall times
+            data, inferred_tz = objects_to_datetime64ns(
+                data, dayfirst=dayfirst, yearfirst=yearfirst)
+            tz = maybe_infer_tz(tz, inferred_tz)
+
+    if is_datetime64tz_dtype(data):
+        tz = maybe_infer_tz(tz, data.tz)
+        result = data._data
+
+    elif is_datetime64_dtype(data):
+        # tz-naive DatetimeArray/Index or ndarray[datetime64]
+        data = getattr(data, "_data", data)
+        if data.dtype != _NS_DTYPE:
+            data = conversion.ensure_datetime64ns(data)
+
+        if tz is not None:
+            # Convert tz-naive to UTC
+            tz = timezones.maybe_get_tz(tz)
+            data = conversion.tz_localize_to_utc(data.view('i8'), tz,
+                                                 ambiguous=ambiguous)
+            data = data.view(_NS_DTYPE)
+
+        assert data.dtype == _NS_DTYPE, data.dtype
+        result = data
+
+    else:
+        # must be integer dtype otherwise
+        # assume this data are epoch timestamps
+        if data.dtype != _INT64_DTYPE:
+            data = data.astype(np.int64, copy=False)
+        result = data.view(_NS_DTYPE)
+
+    if copy:
+        # TODO: should this be deepcopy?
+        result = result.copy()
+
+    assert isinstance(result, np.ndarray), type(result)
+    assert result.dtype == 'M8[ns]', result.dtype
+
+    # We have to call this again after possibly inferring a tz above
+    validate_tz_from_dtype(dtype, tz)
+
+    return result, tz, inferred_freq
+
+
 def objects_to_datetime64ns(data, dayfirst, yearfirst,
                             utc=False, errors="raise",
                             require_iso8601=False, allow_object=False):
@@ -1778,3 +1872,52 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz):
     if is_none is None and is_not_none is not None:
         ts = ts.tz_localize(**localize_args)
     return ts
+
+
+# -------------------------------------------------------------------
+# Validation and Inference
+
+def validate_tz_from_dtype(dtype, tz):
+    """
+    If the given dtype is a DatetimeTZDtype, extract the implied
+    tzinfo object from it and check that it does not conflict with the given
+    tz.
+
+    Parameters
+    ----------
+    dtype : dtype, str
+    tz : None, tzinfo
+
+    Returns
+    -------
+    tz : consensus tzinfo
+
+    Raises
+    ------
+    ValueError : on tzinfo mismatch
+    """
+    if dtype is not None:
+        if isinstance(dtype, compat.string_types):
+            try:
+                dtype = DatetimeTZDtype.construct_from_string(dtype)
+            except TypeError:
+                # Things like `datetime64[ns]`, which is OK for the
+                # constructors, but also nonsense, which should be validated
+                # but not by us. We *do* allow non-existent tz errors to
+                # go through
+                pass
+        dtz = getattr(dtype, 'tz', None)
+        if dtz is not None:
+            if tz is not None and not timezones.tz_compare(tz, dtz):
+                raise ValueError("cannot supply both a tz and a dtype"
+                                 " with a tz")
+            tz = dtz
+
+        if tz is not None and is_datetime64_dtype(dtype):
+            # We also need to check for the case where the user passed a
+            #  tz-naive dtype (i.e. datetime64[ns])
+            if tz is not None and not timezones.tz_compare(tz, dtz):
+                raise ValueError("cannot supply both a tz and a "
+                                 "timezone-naive dtype (i.e. datetime64[ns]")
+
+    return tz
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -138,27 +138,19 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
         return result
 
     def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
-        return cls._from_sequence(values, freq=freq, dtype=dtype, copy=copy)
+        return cls._from_sequence(values, dtype=dtype, copy=copy, freq=freq)
 
     @classmethod
-    def _from_sequence(cls, data, freq=None, unit=None,
-                       dtype=_TD_DTYPE, copy=False):
+    def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False,
+                       freq=None, unit=None):
         if dtype != _TD_DTYPE:
             raise ValueError("Only timedelta64[ns] dtype is valid.")
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
         data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
-        if inferred_freq is not None:
-            if freq is not None and freq != inferred_freq:
-                raise ValueError('Inferred frequency {inferred} from passed '
-                                 'values does not conform to passed frequency '
-                                 '{passed}'
-                                 .format(inferred=inferred_freq,
-                                         passed=freq.freqstr))
-            elif freq is None:
-                freq = inferred_freq
-            freq_infer = False
+        freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
+                                                      freq_infer)
 
         result = cls._simple_new(data, freq=freq)