Skip to content

Implement DatetimeArray._from_sequence #24074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 5, 2018
75 changes: 35 additions & 40 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np

from pandas._libs import NaT, iNaT, lib
from pandas._libs.tslibs import timezones
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period)
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
Expand All @@ -22,7 +21,6 @@
is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
is_period_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -1127,6 +1125,41 @@ def validate_endpoints(closed):
return left_closed, right_closed


def validate_inferred_freq(freq, inferred_freq, freq_infer):
"""
If the user passes a freq and another freq is inferred from passed data,
require that they match.

Parameters
----------
freq : DateOffset or None
inferred_freq : DateOffset or None
freq_infer : bool

Returns
-------
freq : DateOffset or None
freq_infer : bool

Notes
-----
We assume at this point that `maybe_infer_freq` has been called, so
`freq` is either a DateOffset object or None.
"""
if inferred_freq is not None:
if freq is not None and freq != inferred_freq:
raise ValueError('Inferred frequency {inferred} from passed '
'values does not conform to passed frequency '
'{passed}'
.format(inferred=inferred_freq,
passed=freq.freqstr))
elif freq is None:
freq = inferred_freq
freq_infer = False

return freq, freq_infer


def maybe_infer_freq(freq):
"""
Comparing a DateOffset to the string "infer" raises, so we need to
Expand Down Expand Up @@ -1154,44 +1187,6 @@ def maybe_infer_freq(freq):
return freq, freq_infer


def validate_tz_from_dtype(dtype, tz):
"""
If the given dtype is a DatetimeTZDtype, extract the implied
tzinfo object from it and check that it does not conflict with the given
tz.

Parameters
----------
dtype : dtype, str
tz : None, tzinfo

Returns
-------
tz : consensus tzinfo

Raises
------
ValueError : on tzinfo mismatch
"""
if dtype is not None:
if isinstance(dtype, compat.string_types):
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
except TypeError:
# Things like `datetime64[ns]`, which is OK for the
# constructors, but also nonsense, which should be validated
# but not by us. We *do* allow non-existent tz errors to
# go through
pass
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
return tz


def validate_dtype_freq(dtype, freq):
"""
If both a dtype and a freq are available, ensure they match. If only
Expand Down
209 changes: 176 additions & 33 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
is_timedelta64_dtype)
_INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype,
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -206,45 +206,35 @@ def _simple_new(cls, values, freq=None, tz=None):
result._tz = timezones.tz_standardize(tz)
return result

def __new__(cls, values, freq=None, tz=None, dtype=None):
def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
dayfirst=False, yearfirst=False, ambiguous='raise'):
return cls._from_sequence(
values, freq=freq, tz=tz, dtype=dtype, copy=copy,
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)

if freq is None and hasattr(values, "freq"):
# i.e. DatetimeArray, DatetimeIndex
freq = values.freq
@classmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason you don't want to add verify_integrity here (as maybe _verify_integrity=True)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've deprecated the kwarg in the DatetimeIndex constructor to get rid of it. In cases where verify_integrity is not needed, a different constructor (e.g. simple_new) should be used.

def _from_sequence(cls, data, dtype=None, copy=False,
tz=None, freq=None,
dayfirst=False, yearfirst=False, ambiguous='raise'):

freq, freq_infer = dtl.maybe_infer_freq(freq)

# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if is_object_dtype(values):
# kludge; dispatch until the DatetimeArray constructor is complete
from pandas import DatetimeIndex
values = DatetimeIndex(values, freq=freq, tz=tz)
subarr, tz, inferred_freq = sequence_to_dt64ns(
data, dtype=dtype, copy=copy, tz=tz,
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)

if isinstance(values, ABCSeries):
# extract to ndarray or DatetimeIndex
values = values._values

if isinstance(values, DatetimeArrayMixin):
# extract nanosecond unix timestamps
if tz is None:
tz = values.tz
values = values.asi8
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
freq_infer)

if values.dtype == 'i8':
values = values.view('M8[ns]')
result = cls._simple_new(subarr, freq=freq, tz=tz)

assert isinstance(values, np.ndarray), type(values)
assert is_datetime64_dtype(values) # not yet assured nanosecond
values = conversion.ensure_datetime64ns(values, copy=False)
if inferred_freq is None and freq is not None:
# this condition precludes `freq_infer`
cls._validate_frequency(result, freq, ambiguous=ambiguous)

result = cls._simple_new(values, freq=freq, tz=tz)
if freq_infer:
elif freq_infer:
result.freq = to_offset(result.inferred_freq)

# NB: Among other things not yet ported from the DatetimeIndex
# constructor, this does not call _deepcopy_if_needed
return result

@classmethod
Expand Down Expand Up @@ -1494,7 +1484,7 @@ def maybe_convert_dtype(data, copy):
elif is_timedelta64_dtype(data):
warnings.warn("Passing timedelta64-dtype data is deprecated, will "
"raise a TypeError in a future version",
FutureWarning, stacklevel=3)
FutureWarning, stacklevel=5)
data = data.view(_NS_DTYPE)

elif is_period_dtype(data):
Expand All @@ -1512,6 +1502,110 @@ def maybe_convert_dtype(data, copy):
return data, copy


def sequence_to_dt64ns(data, dtype=None, copy=False,
tz=None,
dayfirst=False, yearfirst=False, ambiguous='raise'):
"""
Parameters
----------
data : list-like
dtype : dtype, str, or None, default None
copy : bool, default False
tz : tzinfo, str, or None, default None
dayfirst : bool, default False
yearfirst : bool, default False
ambiguous : str, bool, or arraylike, default 'raise'
See pandas._libs.tslibs.conversion.tz_localize_to_utc

Returns
-------
result : numpy.ndarray
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
The inferred frequency of the sequence.

Raises
------
TypeError : PeriodDType data is passed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this explicity handled?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Via maybe_convert_dtype

"""

inferred_freq = None

if not hasattr(data, "dtype"):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.asarray(data)
copy = False
elif isinstance(data, ABCSeries):
data = data._values

if hasattr(data, "freq"):
# i.e. DatetimeArray/Index
inferred_freq = data.freq

# if dtype has an embedded tz, capture it
tz = validate_tz_from_dtype(dtype, tz)

# By this point we are assured to have either a numpy array or Index
data, copy = maybe_convert_dtype(data, copy)

if is_object_dtype(data) or is_string_dtype(data):
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
copy = False
if lib.infer_dtype(data) == 'integer':
data = data.astype(np.int64)
else:
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
data, inferred_tz = objects_to_datetime64ns(
data, dayfirst=dayfirst, yearfirst=yearfirst)
tz = maybe_infer_tz(tz, inferred_tz)

if is_datetime64tz_dtype(data):
tz = maybe_infer_tz(tz, data.tz)
result = data._data

elif is_datetime64_dtype(data):
# tz-naive DatetimeArray/Index or ndarray[datetime64]
data = getattr(data, "_data", data)
if data.dtype != _NS_DTYPE:
data = conversion.ensure_datetime64ns(data)

if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
ambiguous=ambiguous)
data = data.view(_NS_DTYPE)

assert data.dtype == _NS_DTYPE, data.dtype
result = data

else:
# must be integer dtype otherwise
# assume this data are epoch timestamps
if data.dtype != _INT64_DTYPE:
data = data.astype(np.int64, copy=False)
result = data.view(_NS_DTYPE)

if copy:
# TODO: should this be deepcopy?
result = result.copy()

assert isinstance(result, np.ndarray), type(result)
assert result.dtype == 'M8[ns]', result.dtype

# We have to call this again after possibly inferring a tz above
validate_tz_from_dtype(dtype, tz)

return result, tz, inferred_freq


def objects_to_datetime64ns(data, dayfirst, yearfirst,
utc=False, errors="raise",
require_iso8601=False, allow_object=False):
Expand Down Expand Up @@ -1778,3 +1872,52 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz):
if is_none is None and is_not_none is not None:
ts = ts.tz_localize(**localize_args)
return ts


# -------------------------------------------------------------------
# Validation and Inference

def validate_tz_from_dtype(dtype, tz):
"""
If the given dtype is a DatetimeTZDtype, extract the implied
tzinfo object from it and check that it does not conflict with the given
tz.

Parameters
----------
dtype : dtype, str
tz : None, tzinfo

Returns
-------
tz : consensus tzinfo

Raises
------
ValueError : on tzinfo mismatch
"""
if dtype is not None:
if isinstance(dtype, compat.string_types):
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
except TypeError:
# Things like `datetime64[ns]`, which is OK for the
# constructors, but also nonsense, which should be validated
# but not by us. We *do* allow non-existent tz errors to
# go through
pass
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz

if tz is not None and is_datetime64_dtype(dtype):
# We also need to check for the case where the user passed a
# tz-naive dtype (i.e. datetime64[ns])
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a "
"timezone-naive dtype (i.e. datetime64[ns]")

return tz
18 changes: 5 additions & 13 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,27 +138,19 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
return result

def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
return cls._from_sequence(values, freq=freq, dtype=dtype, copy=copy)
return cls._from_sequence(values, dtype=dtype, copy=copy, freq=freq)

@classmethod
def _from_sequence(cls, data, freq=None, unit=None,
dtype=_TD_DTYPE, copy=False):
def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False,
freq=None, unit=None):
if dtype != _TD_DTYPE:
raise ValueError("Only timedelta64[ns] dtype is valid.")

freq, freq_infer = dtl.maybe_infer_freq(freq)

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
if inferred_freq is not None:
if freq is not None and freq != inferred_freq:
raise ValueError('Inferred frequency {inferred} from passed '
'values does not conform to passed frequency '
'{passed}'
.format(inferred=inferred_freq,
passed=freq.freqstr))
elif freq is None:
freq = inferred_freq
freq_infer = False
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq,
freq_infer)

result = cls._simple_new(data, freq=freq)

Expand Down
Loading