Skip to content

Commit 458dc94

Browse files
committed
FIX value_counts should skip NaT #7423
1 parent 18d0155 commit 458dc94

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

doc/source/v0.14.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ Bug Fixes
159159

160160

161161

162-
162+
- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`)
163163

164164

165165

pandas/core/algorithms.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,25 +202,27 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
202202
raise TypeError("bins argument only works with numeric data.")
203203
values = cat.labels
204204

205-
if com.is_integer_dtype(values.dtype):
205+
dtype = values.dtype
206+
if com.is_integer_dtype(dtype):
206207
values = com._ensure_int64(values)
207208
keys, counts = htable.value_count_int64(values)
208209

209210
elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
210-
dtype = values.dtype
211211
values = values.view(np.int64)
212212
keys, counts = htable.value_count_int64(values)
213213

214+
from pandas.lib import NaT
215+
msk = keys != NaT.value
216+
keys, counts = keys[msk], counts[msk]
214217
# convert the keys back to the dtype we came in
215-
keys = Series(keys, dtype=dtype)
218+
keys = keys.astype(dtype)
216219

217220
else:
218221
mask = com.isnull(values)
219222
values = com._ensure_object(values)
220223
keys, counts = htable.value_count_object(values, mask)
221224

222225
result = Series(counts, index=com._values_from_object(keys))
223-
224226
if bins is not None:
225227
# TODO: This next line should be more efficient
226228
result = result.reindex(np.arange(len(cat.levels)), fill_value=0)

pandas/tests/test_algos.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,20 @@ def test_value_counts_dtypes(self):
237237

238238
self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
239239

240+
def test_value_counts_nat(self):
241+
td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]')
242+
dt = pd.to_datetime(['NaT', '2014-01-01'])
243+
244+
res_td = algos.value_counts(td)
245+
res_dt = algos.value_counts(dt)
246+
247+
self.assertEqual(len(res_td), 1)
248+
self.assertEqual(len(res_dt), 1)
249+
250+
exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1})
251+
tm.assert_series_equal(res_dt, exp_dt)
252+
253+
# TODO same for res_td (timedelta)
240254

241255
def test_quantile():
242256
s = Series(np.random.randn(100))

0 commit comments

Comments
 (0)