-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Add date dtype #34441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add date dtype #34441
Changes from 23 commits
ac8e285
0ad60de
ae1a498
5c5ee4b
224b59d
5213efe
e000786
a9ac366
4b441f3
4ec5d72
539444e
6db4aea
6f4eb44
0e30fa5
a26a4f7
af37183
69b297f
9aab22d
2f3f579
eb947d7
a6d6bc5
61d07f9
85e71fd
5673cf3
79c9254
e9c8d96
c209de1
f207989
31fa485
73e278b
068e9bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
from pandas.core.dtypes.base import ExtensionDtype | ||
from pandas.core.arrays.datetimelike import DatelikeOps, DatetimeLikeArrayMixin | ||
from pandas.core.arrays.datetimes import sequence_to_dt64ns | ||
from pandas.core.dtypes.common import ( | ||
is_integer_dtype, | ||
is_datetime64_dtype, | ||
is_object_dtype, | ||
pandas_dtype, | ||
) | ||
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass | ||
from pandas.core.dtypes.dtypes import DateDtype | ||
from pandas.core.construction import array | ||
from pandas._libs.tslibs import Timestamp | ||
from pandas._libs.tslibs.conversion import NS_DTYPE | ||
from pandas._libs import tslib, lib | ||
|
||
import numpy as np | ||
|
||
D_DATETIME_DTYPE = "datetime64[D]" | ||
INTEGER_BACKEND = "i8" | ||
VALID_TYPES = {INTEGER_BACKEND, "datetime64[ns]", D_DATETIME_DTYPE, "object"} | ||
|
||
|
||
def _to_date_values(values, copy=False): | ||
data, _, _ = sequence_to_dt64ns(values, copy=copy) | ||
return data.astype(D_DATETIME_DTYPE) | ||
|
||
|
||
class DateArray(DatetimeLikeArrayMixin, DatelikeOps): | ||
""" | ||
Pandas ExtensionArray for date (year, month, day only) data. | ||
|
||
.. warning:: | ||
|
||
DateArray is currently experimental, and its API may change | ||
without warning. In particular, :attr:`DateArray.dtype` is | ||
expected to change to always be an instance of an ``ExtensionDtype`` | ||
subclass. | ||
|
||
Parameters | ||
---------- | ||
values : Series, Index, DateArray, ndarray | ||
The date data. | ||
freq : str or Offset, optional | ||
dtype : pd.DateDtype | ||
|
||
copy : bool, default False | ||
Whether to copy the underlying array of values. | ||
|
||
Attributes | ||
---------- | ||
None | ||
|
||
Methods | ||
------- | ||
None | ||
""" | ||
|
||
freq = "D" | ||
|
||
def __init__(self, values, copy=False): | ||
if isinstance(values, (ABCSeries, ABCIndexClass)): | ||
values = values._values | ||
|
||
if isinstance(values, type(self)): | ||
values = values._data | ||
|
||
if not isinstance(values, np.ndarray): | ||
msg = ( | ||
f"Unexpected type '{type(values).__name__}'. 'values' must be " | ||
"a DateArray ndarray, or Series or Index containing one of" | ||
" those." | ||
) | ||
raise ValueError(msg) | ||
|
||
if not self._is_compatible_dtype(values.dtype): | ||
msg = ( | ||
f"The dtype of 'values' is incorrect. Must be one of {VALID_TYPES}." | ||
f" Got {values.dtype} instead." | ||
) | ||
raise ValueError(msg) | ||
|
||
if values.dtype == INTEGER_BACKEND: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why aren't you simply keeping ordinals since epoch? its performant and much simpler There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am keeping them, as I understand, the view just changes the outer representation, but not the backend. The same thing is done in the datetime array |
||
values = values.view(D_DATETIME_DTYPE) | ||
elif values.dtype != "datetime64[D]": | ||
values = _to_date_values(values, copy) | ||
|
||
if copy: | ||
values = values.copy() | ||
|
||
self._data = values | ||
|
||
@staticmethod | ||
def _is_compatible_dtype(dtype): | ||
return ( | ||
is_integer_dtype(dtype) | ||
or is_object_dtype(dtype) | ||
or is_datetime64_dtype(dtype) | ||
or dtype == "datetime64[D]" | ||
) | ||
|
||
@classmethod | ||
def _simple_new(cls, values, **kwargs): | ||
assert isinstance(values, np.ndarray) | ||
if values.dtype == INTEGER_BACKEND: | ||
values = values.view(D_DATETIME_DTYPE) | ||
|
||
result = object.__new__(cls) | ||
result._data = values | ||
return result | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
""" | ||
Construct a new ExtensionArray from a sequence of scalars. | ||
|
||
Parameters | ||
---------- | ||
scalars : Sequence | ||
Each element will be an instance of the scalar type for this | ||
array, ``cls.dtype.type``. | ||
dtype : dtype, optional | ||
Construct for this particular dtype. This should be a Dtype | ||
compatible with the ExtensionArray. | ||
copy : bool, default False | ||
If True, copy the underlying data. | ||
|
||
Returns | ||
------- | ||
DateArray | ||
""" | ||
if ( | ||
isinstance(scalars, np.ndarray) | ||
and lib.infer_dtype(scalars, skipna=True) == "integer" | ||
): | ||
values = scalars.astype(INTEGER_BACKEND) | ||
elif is_integer_dtype(scalars): | ||
values = scalars._data | ||
else: | ||
values = _to_date_values(scalars, copy) | ||
return cls._simple_new(values) | ||
|
||
@property | ||
def dtype(self) -> ExtensionDtype: | ||
return DateDtype() | ||
|
||
def __iter__(self): | ||
for date_data in self._data: | ||
yield date_data | ||
|
||
@property | ||
def _box_func(self): | ||
# TODO Implement Datestamp of a similar form in cython | ||
return lambda x: Timestamp(x, freq="D", tz="utc") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we want to be timezone naive by default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that's why I have a todo to create a date stamp, but that will need to be implemented in cython, which I can do, I just wanted to get something working first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldnt we want datetime.date objects anyway (or Period[D] objects) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel Yeah probably this is just a place holder |
||
|
||
@property | ||
def asi8(self) -> np.ndarray: | ||
return self._data.view(INTEGER_BACKEND) | ||
|
||
@property | ||
def as_datetime_i8(self) -> np.ndarray: | ||
return self._data.astype(NS_DTYPE).view(INTEGER_BACKEND) | ||
|
||
@property | ||
def date(self): | ||
timestamps = self.as_datetime_i8 | ||
return tslib.ints_to_pydatetime(timestamps, box="date") | ||
|
||
def astype(self, dtype, copy=True): | ||
dtype = pandas_dtype(dtype) | ||
if isinstance(dtype, type(self.dtype)): | ||
if copy: | ||
return self.copy() | ||
return self | ||
if is_datetime64_dtype(dtype): | ||
return array(self._data, dtype=NS_DTYPE) | ||
if is_object_dtype(dtype): | ||
return self._box_values(self.as_datetime_i8) | ||
return super().astype(dtype, copy) | ||
|
||
def _format_native_types(self, na_rep="NaT", date_format=None): | ||
from pandas.io.formats.format import _get_format_datetime64_from_values | ||
|
||
fmt = _get_format_datetime64_from_values(self, date_format) | ||
|
||
return tslib.format_array_from_datetime( | ||
self.as_datetime_i8, tz="utc", format=fmt, na_rep=na_rep | ||
) | ||
|
||
def __len__(self): | ||
return len(self._data) |
Uh oh!
There was an error while loading. Please reload this page.