Skip to content

Commit 90b252a

Browse files
committed
Merge pull request #3621 from jreback/nat
BUG: (GH3062) Correctly parse date columns with embedded (nan/NaT) into datetime64[ns] in read_csv
2 parents fe72b3d + c9e3372 commit 90b252a

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ pandas 0.11.1
122122
- Fix ``read_csv`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]``
123123
was failing (GH3611_)
124124
- Fix indexing issue in ndim >= 3 with ``iloc`` (GH3617_)
125+
- Correctly parse date columns with embedded (nan/NaT) into datetime64[ns] dtype in ``read_csv``
126+
when ``parse_dates`` is specified (GH3062_)
125127

126128
.. _GH3164: https://github.com/pydata/pandas/issues/3164
127129
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -172,6 +174,7 @@ pandas 0.11.1
172174
.. _GH3617: https://github.com/pydata/pandas/issues/3617
173175
.. _GH3435: https://github.com/pydata/pandas/issues/3435
174176
.. _GH3611: https://github.com/pydata/pandas/issues/3611
177+
.. _GH3062: https://github.com/pydata/pandas/issues/3062
175178
.. _GH1512: https://github.com/pydata/pandas/issues/1512
176179

177180

pandas/io/tests/test_parsers.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,28 @@ def test_custom_na_values(self):
531531
skiprows=[1])
532532
assert_almost_equal(df3.values, expected)
533533

534+
def test_nat_parse(self):
535+
536+
# GH 3062
537+
df = DataFrame(dict({
538+
'A' : np.asarray(range(10),dtype='float64'),
539+
'B' : pd.Timestamp('20010101') }))
540+
df.iloc[3:6,:] = np.nan
541+
542+
with ensure_clean('__nat_parse_.csv') as path:
543+
df.to_csv(path)
544+
result = read_csv(path,index_col=0,parse_dates=['B'])
545+
tm.assert_frame_equal(result,df)
546+
547+
expected = Series(dict( A = 'float64',B = 'datetime64[ns]'))
548+
tm.assert_series_equal(expected,result.dtypes)
549+
550+
# test with NaT for the nan_rep
551+
# we don't have a method to specif the Datetime na_rep (it defaults to '')
552+
df.to_csv(path)
553+
result = read_csv(path,index_col=0,parse_dates=['B'])
554+
tm.assert_frame_equal(result,df)
555+
534556
def test_skiprows_bug(self):
535557
# GH #505
536558
text = """#foo,a,b,c

pandas/tslib.pyx

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,10 @@ class Timestamp(_Timestamp):
318318
ts.dts.us, ts.tzinfo)
319319

320320

321+
_nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN'])
321322
class NaTType(_NaT):
322323
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""
324+
323325
def __new__(cls):
324326
cdef _NaT base
325327

@@ -647,8 +649,11 @@ cdef convert_to_tsobject(object ts, object tz):
647649
obj.value = ts
648650
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
649651
elif util.is_string_object(ts):
650-
_string_to_dts(ts, &obj.dts)
651-
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
652+
if ts in _nat_strings:
653+
obj.value = NPY_NAT
654+
else:
655+
_string_to_dts(ts, &obj.dts)
656+
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
652657
elif PyDateTime_Check(ts):
653658
if tz is not None:
654659
# sort of a temporary hack
@@ -862,6 +867,10 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
862867
iresult[i] = iNaT
863868
continue
864869

870+
elif val in _nat_strings:
871+
iresult[i] = iNaT
872+
continue
873+
865874
_string_to_dts(val, &dts)
866875
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns,
867876
&dts)

0 commit comments

Comments
 (0)