diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4c1e98b236db7..86661eb5e4414 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -896,6 +896,7 @@ Reshaping - Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) Other ^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 777f08bd9db2b..359c030157bd3 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -279,18 +279,22 @@ def _trim_zeros(x): def _coerce_to_type(x): """ if the passed data is of datetime/timedelta type, - this method converts it to integer so that cut method can + this method converts it to numeric so that cut method can handle it """ dtype = None if is_timedelta64_dtype(x): - x = to_timedelta(x).view(np.int64) + x = to_timedelta(x) dtype = np.timedelta64 elif is_datetime64_dtype(x): - x = to_datetime(x).view(np.int64) + x = to_datetime(x) dtype = np.datetime64 + if dtype is not None: + # GH 19768: force NaT to NaN during integer conversion + x = np.where(x.notna(), x.view(np.int64), np.nan) + return x, dtype diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index f7262a2f0da63..ff914273d47b1 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -6,7 +6,8 @@ from pandas import (Series, isna, to_datetime, DatetimeIndex, Timestamp, Interval, IntervalIndex, Categorical, - cut, qcut, date_range) + cut, qcut, date_range, NaT, TimedeltaIndex) +from pandas.tseries.offsets import Nano, Day import pandas.util.testing as tm from pandas.api.types import CategoricalDtype as CDT @@ -250,6 +251,18 @@ def test_qcut_nas(self): result = qcut(arr, 4) assert isna(result[:20]).all() + @pytest.mark.parametrize('s', [ + Series(DatetimeIndex(['20180101', NaT, '20180103'])), + Series(TimedeltaIndex(['0 days', NaT, '2 days']))], + ids=lambda x: str(x.dtype)) + def test_qcut_nat(self, s): + # GH 19768 + intervals = IntervalIndex.from_tuples( + [(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])]) + expected = Series(Categorical(intervals, ordered=True)) + result = qcut(s, 2) + tm.assert_series_equal(result, expected) + def test_qcut_index(self): result = qcut([0, 2], 2) intervals = [Interval(-0.001, 1), Interval(1, 2)]