From 71a98938173bb1c2b31a9408b30af4ad00fb40ea Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 11 Jan 2019 11:49:18 +0100 Subject: [PATCH 1/3] TimedeltaArray freq validation without _from_sequence --- pandas/core/arrays/timedeltas.py | 70 ++++++++++++++++++++------ pandas/tests/arrays/test_timedeltas.py | 2 +- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 47b3f93f88b78..73639cb1ad2ee 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -134,28 +134,51 @@ def dtype(self): _attributes = ["freq"] def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): - if not hasattr(values, "dtype"): - raise ValueError( + if isinstance(values, (ABCSeries, ABCIndexClass)): + values = values._values + + if isinstance(values, type(self)): + values, freq, freq_infer = extract_values_freq(values, freq) + + if not isinstance(values, np.ndarray): + msg = ( "Unexpected type '{}'. 'values' must be a TimedeltaArray " "ndarray, or Series or Index containing one of those." - .format(type(values).__name__)) + ) + raise ValueError(msg.format(type(values).__name__)) + + if values.dtype == 'i8': + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(_TD_DTYPE) + + if values.dtype != _TD_DTYPE: + raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) + + try: + dtype_mismatch = dtype != _TD_DTYPE + except TypeError: + raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + else: + if dtype_mismatch: + raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + if freq == "infer": - raise ValueError( + msg = ( "Frequency inference not allowed in TimedeltaArray.__init__. " - "Use 'pd.array()' instead.") + "Use 'pd.array()' instead." + ) + raise ValueError(msg) - if dtype is not None and not is_dtype_equal(dtype, _TD_DTYPE): - raise TypeError("dtype {dtype} cannot be converted to " - "timedelta64[ns]".format(dtype=dtype)) + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) - if values.dtype == 'i8': - values = values.view('timedelta64[ns]') - - result = type(self)._from_sequence(values, dtype=dtype, - copy=copy, freq=freq) - self._data = result._data - self._freq = result._freq - self._dtype = result._dtype + self._data = values + self._dtype = dtype + self._freq = freq @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): @@ -982,3 +1005,18 @@ def _generate_regular_range(start, end, periods, offset): data = np.arange(b, e, stride, dtype=np.int64) return data + + +def extract_values_freq(arr, freq): + # type: (TimedeltaArray, Offset) -> Tuple[ndarray, Offset, bool] + freq_infer = False + if freq is None: + freq = arr.freq + elif freq and arr.freq: + freq = to_offset(freq) + freq, freq_infer = dtl.validate_inferred_freq( + freq, arr.freq, + freq_infer=False + ) + values = arr._data + return values, freq, freq_infer diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index af23b2467fcdf..a8745f78392ca 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -43,7 +43,7 @@ def test_incorrect_dtype_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype='m8[ns]') arr = TimedeltaArray(data, copy=False) - assert arr._data.base is data + assert arr._data is data arr = TimedeltaArray(data, copy=True) assert arr._data is not data From b95b243937606e88872fb1012210c3ad3e40a115 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 11 Jan 2019 11:55:11 +0100 Subject: [PATCH 2/3] add the actual freq validation --- pandas/core/arrays/timedeltas.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 73639cb1ad2ee..dd6310c0ee702 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -137,6 +137,8 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): if isinstance(values, (ABCSeries, ABCIndexClass)): values = values._values + inferred_freq = getattr(values, "_freq", None) + if isinstance(values, type(self)): values, freq, freq_infer = extract_values_freq(values, freq) @@ -180,6 +182,9 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): self._dtype = dtype self._freq = freq + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): assert dtype == _TD_DTYPE, dtype From 19c5160467a9f2ba11398d1381db206be1bd5c10 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 11 Jan 2019 12:06:46 +0100 Subject: [PATCH 3/3] inline extract_values_freq --- pandas/core/arrays/timedeltas.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index dd6310c0ee702..be1a7097b0e0d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -15,8 +15,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, - is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, + is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -140,7 +140,12 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): inferred_freq = getattr(values, "_freq", None) if isinstance(values, type(self)): - values, freq, freq_infer = extract_values_freq(values, freq) + if freq is None: + freq = values.freq + elif freq and values.freq: + freq = to_offset(freq) + freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) + values = values._data if not isinstance(values, np.ndarray): msg = ( @@ -1010,18 +1015,3 @@ def _generate_regular_range(start, end, periods, offset): data = np.arange(b, e, stride, dtype=np.int64) return data - - -def extract_values_freq(arr, freq): - # type: (TimedeltaArray, Offset) -> Tuple[ndarray, Offset, bool] - freq_infer = False - if freq is None: - freq = arr.freq - elif freq and arr.freq: - freq = to_offset(freq) - freq, freq_infer = dtl.validate_inferred_freq( - freq, arr.freq, - freq_infer=False - ) - values = arr._data - return values, freq, freq_infer