-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Implement DatetimeArray._from_sequence #24074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 6 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
be745aa
Implement DatetimeArray._from_sequence
jbrockmendel d85aa7a
move check down
jbrockmendel ad88a79
Merge branch 'master' of https://github.com/pandas-dev/pandas into fr…
jbrockmendel d8f8d85
separate most of from_sequence into sequence_to_dt64ns
jbrockmendel e94cfff
isort fixup
jbrockmendel 96c8119
docstring
jbrockmendel dbb9677
Merge branch 'master' of https://github.com/pandas-dev/pandas into fr…
jbrockmendel 6ce2528
requested rearrangement
jbrockmendel 9d7cb39
Merge branch 'master' of https://github.com/pandas-dev/pandas into fr…
jbrockmendel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,9 @@ | |
from pandas.util._decorators import Appender | ||
|
||
from pandas.core.dtypes.common import ( | ||
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type, | ||
is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, | ||
is_timedelta64_dtype) | ||
_INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, | ||
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, | ||
is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
@@ -206,45 +206,35 @@ def _simple_new(cls, values, freq=None, tz=None): | |
result._tz = timezones.tz_standardize(tz) | ||
return result | ||
|
||
def __new__(cls, values, freq=None, tz=None, dtype=None): | ||
def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False, | ||
dayfirst=False, yearfirst=False, ambiguous='raise'): | ||
return cls._from_sequence( | ||
values, freq=freq, tz=tz, dtype=dtype, copy=copy, | ||
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) | ||
|
||
if freq is None and hasattr(values, "freq"): | ||
# i.e. DatetimeArray, DatetimeIndex | ||
freq = values.freq | ||
@classmethod | ||
def _from_sequence(cls, data, dtype=None, copy=False, | ||
tz=None, freq=None, | ||
dayfirst=False, yearfirst=False, ambiguous='raise'): | ||
|
||
freq, freq_infer = dtl.maybe_infer_freq(freq) | ||
|
||
# if dtype has an embedded tz, capture it | ||
tz = dtl.validate_tz_from_dtype(dtype, tz) | ||
|
||
if is_object_dtype(values): | ||
# kludge; dispatch until the DatetimeArray constructor is complete | ||
from pandas import DatetimeIndex | ||
values = DatetimeIndex(values, freq=freq, tz=tz) | ||
subarr, tz, inferred_freq = sequence_to_dt64ns( | ||
data, dtype=dtype, copy=copy, tz=tz, | ||
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) | ||
|
||
if isinstance(values, ABCSeries): | ||
# extract to ndarray or DatetimeIndex | ||
values = values._values | ||
|
||
if isinstance(values, DatetimeArrayMixin): | ||
# extract nanosecond unix timestamps | ||
if tz is None: | ||
tz = values.tz | ||
values = values.asi8 | ||
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, | ||
freq_infer) | ||
|
||
if values.dtype == 'i8': | ||
values = values.view('M8[ns]') | ||
result = cls._simple_new(subarr, freq=freq, tz=tz) | ||
|
||
assert isinstance(values, np.ndarray), type(values) | ||
assert is_datetime64_dtype(values) # not yet assured nanosecond | ||
values = conversion.ensure_datetime64ns(values, copy=False) | ||
if inferred_freq is None and freq is not None: | ||
# this condition precludes `freq_infer` | ||
cls._validate_frequency(result, freq, ambiguous=ambiguous) | ||
|
||
result = cls._simple_new(values, freq=freq, tz=tz) | ||
if freq_infer: | ||
elif freq_infer: | ||
result.freq = to_offset(result.inferred_freq) | ||
|
||
# NB: Among other things not yet ported from the DatetimeIndex | ||
# constructor, this does not call _deepcopy_if_needed | ||
return result | ||
|
||
@classmethod | ||
|
@@ -1494,7 +1484,7 @@ def maybe_convert_dtype(data, copy): | |
elif is_timedelta64_dtype(data): | ||
warnings.warn("Passing timedelta64-dtype data is deprecated, will " | ||
"raise a TypeError in a future version", | ||
FutureWarning, stacklevel=3) | ||
FutureWarning, stacklevel=5) | ||
data = data.view(_NS_DTYPE) | ||
|
||
elif is_period_dtype(data): | ||
|
@@ -1512,6 +1502,110 @@ def maybe_convert_dtype(data, copy): | |
return data, copy | ||
|
||
|
||
def sequence_to_dt64ns(data, dtype=None, copy=False, | ||
tz=None, | ||
dayfirst=False, yearfirst=False, ambiguous='raise'): | ||
""" | ||
Parameters | ||
---------- | ||
data : list-like | ||
dtype : dtype, str, or None, default None | ||
copy : bool, default False | ||
tz : tzinfo, str, or None, default None | ||
dayfirst : bool, default False | ||
yearfirst : bool, default False | ||
ambiguous : str, bool, or arraylike, default 'raise' | ||
See pandas._libs.tslibs.conversion.tz_localize_to_utc | ||
|
||
Returns | ||
------- | ||
result : numpy.ndarray | ||
The sequence converted to a numpy array with dtype ``datetime64[ns]``. | ||
tz : tzinfo or None | ||
Either the user-provided tzinfo or one inferred from the data. | ||
inferred_freq : Tick or None | ||
The inferred frequency of the sequence. | ||
|
||
Raises | ||
------ | ||
TypeError : PeriodDType data is passed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this explicity handled? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Via maybe_convert_dtype |
||
""" | ||
|
||
inferred_freq = None | ||
|
||
if not hasattr(data, "dtype"): | ||
# e.g. list, tuple | ||
if np.ndim(data) == 0: | ||
# i.e. generator | ||
data = list(data) | ||
data = np.asarray(data) | ||
copy = False | ||
elif isinstance(data, ABCSeries): | ||
data = data._values | ||
|
||
if hasattr(data, "freq"): | ||
# i.e. DatetimeArray/Index | ||
inferred_freq = data.freq | ||
|
||
# if dtype has an embedded tz, capture it | ||
tz = validate_tz_from_dtype(dtype, tz) | ||
|
||
# By this point we are assured to have either a numpy array or Index | ||
data, copy = maybe_convert_dtype(data, copy) | ||
|
||
if is_object_dtype(data) or is_string_dtype(data): | ||
# TODO: We do not have tests specific to string-dtypes, | ||
# also complex or categorical or other extension | ||
copy = False | ||
if lib.infer_dtype(data) == 'integer': | ||
data = data.astype(np.int64) | ||
else: | ||
# data comes back here as either i8 to denote UTC timestamps | ||
# or M8[ns] to denote wall times | ||
data, inferred_tz = objects_to_datetime64ns( | ||
data, dayfirst=dayfirst, yearfirst=yearfirst) | ||
tz = maybe_infer_tz(tz, inferred_tz) | ||
|
||
if is_datetime64tz_dtype(data): | ||
tz = maybe_infer_tz(tz, data.tz) | ||
result = data._data | ||
|
||
elif is_datetime64_dtype(data): | ||
# tz-naive DatetimeArray/Index or ndarray[datetime64] | ||
data = getattr(data, "_data", data) | ||
if data.dtype != _NS_DTYPE: | ||
data = conversion.ensure_datetime64ns(data) | ||
|
||
if tz is not None: | ||
# Convert tz-naive to UTC | ||
tz = timezones.maybe_get_tz(tz) | ||
data = conversion.tz_localize_to_utc(data.view('i8'), tz, | ||
ambiguous=ambiguous) | ||
data = data.view(_NS_DTYPE) | ||
|
||
assert data.dtype == _NS_DTYPE, data.dtype | ||
result = data | ||
|
||
else: | ||
# must be integer dtype otherwise | ||
# assume this data are epoch timestamps | ||
if data.dtype != _INT64_DTYPE: | ||
data = data.astype(np.int64, copy=False) | ||
result = data.view(_NS_DTYPE) | ||
|
||
if copy: | ||
# TODO: should this be deepcopy? | ||
result = result.copy() | ||
|
||
assert isinstance(result, np.ndarray), type(result) | ||
assert result.dtype == 'M8[ns]', result.dtype | ||
|
||
# We have to call this again after possibly inferring a tz above | ||
validate_tz_from_dtype(dtype, tz) | ||
|
||
return result, tz, inferred_freq | ||
|
||
|
||
def objects_to_datetime64ns(data, dayfirst, yearfirst, | ||
utc=False, errors="raise", | ||
require_iso8601=False, allow_object=False): | ||
|
@@ -1778,3 +1872,52 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz): | |
if is_none is None and is_not_none is not None: | ||
ts = ts.tz_localize(**localize_args) | ||
return ts | ||
|
||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# ------------------------------------------------------------------- | ||
# Validation and Inference | ||
|
||
def validate_tz_from_dtype(dtype, tz): | ||
""" | ||
If the given dtype is a DatetimeTZDtype, extract the implied | ||
tzinfo object from it and check that it does not conflict with the given | ||
tz. | ||
|
||
Parameters | ||
---------- | ||
dtype : dtype, str | ||
tz : None, tzinfo | ||
|
||
Returns | ||
------- | ||
tz : consensus tzinfo | ||
|
||
Raises | ||
------ | ||
ValueError : on tzinfo mismatch | ||
""" | ||
if dtype is not None: | ||
if isinstance(dtype, compat.string_types): | ||
try: | ||
dtype = DatetimeTZDtype.construct_from_string(dtype) | ||
except TypeError: | ||
# Things like `datetime64[ns]`, which is OK for the | ||
# constructors, but also nonsense, which should be validated | ||
# but not by us. We *do* allow non-existent tz errors to | ||
# go through | ||
pass | ||
dtz = getattr(dtype, 'tz', None) | ||
if dtz is not None: | ||
if tz is not None and not timezones.tz_compare(tz, dtz): | ||
raise ValueError("cannot supply both a tz and a dtype" | ||
" with a tz") | ||
tz = dtz | ||
|
||
if tz is not None and is_datetime64_dtype(dtype): | ||
# We also need to check for the case where the user passed a | ||
# tz-naive dtype (i.e. datetime64[ns]) | ||
if tz is not None and not timezones.tz_compare(tz, dtz): | ||
raise ValueError("cannot supply both a tz and a " | ||
"timezone-naive dtype (i.e. datetime64[ns]") | ||
|
||
return tz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is there a reason you don't want to add
verify_integrity
here (as maybe_verify_integrity=True
)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We've deprecated the kwarg in the DatetimeIndex constructor to get rid of it. In cases where verify_integrity is not needed, a different constructor (e.g. simple_new) should be used.