Skip to content

Commit eaa2e26

Browse files
committed
Merge pull request #4684 from jreback/timedelta_fill
ENH: GH3371 support timedelta fillna
2 parents 1055eae + b3f2444 commit eaa2e26

File tree

11 files changed

+296
-39
lines changed

11 files changed

+296
-39
lines changed

doc/source/release.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ pandas 0.13
6767
- A Series of dtype ``timedelta64[ns]`` can now be divided by another
6868
``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
6969
is frequency conversion.
70+
- Timedelta64 support ``fillna/ffill/bfill`` with an integer interpreted as seconds,
71+
or a ``timedelta`` (:issue:`3371`)
72+
- Datetime64 support ``ffill/bfill``
7073
- Performance improvements with ``__getitem__`` on ``DataFrames`` with
7174
when the key is a column
7275
- Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation
@@ -156,6 +159,8 @@ pandas 0.13
156159
- Remove undocumented/unused ``kind`` keyword argument from ``read_excel``, and ``ExcelFile``. (:issue:`4713`, :issue:`4712`)
157160
- The ``method`` argument of ``NDFrame.replace()`` is valid again, so that a
158161
a list can be passed to ``to_replace`` (:issue:`4743`).
162+
- provide automatic dtype conversions on _reduce operations (:issue:`3371`)
163+
- exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
159164

160165
**Internal Refactoring**
161166

doc/source/timeseries.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,15 @@ issues). ``idxmin, idxmax`` are supported as well.
11951195
df.min().idxmax()
11961196
df.min(axis=1).idxmin()
11971197
1198+
You can fillna on timedeltas. Integers will be interpreted as seconds. You can
1199+
pass a timedelta to get a particular value.
1200+
1201+
.. ipython:: python
1202+
1203+
y.fillna(0)
1204+
y.fillna(10)
1205+
y.fillna(timedelta(days=-1,seconds=5))
1206+
11981207
.. _timeseries.timedeltas_convert:
11991208

12001209
Time Deltas & Conversions

doc/source/v0.13.0.txt

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ Enhancements
195195
- NaN handing in get_dummies (:issue:`4446`) with `dummy_na`
196196

197197
.. ipython:: python
198+
198199
# previously, nan was erroneously counted as 2 here
199200
# now it is not counted at all
200201
get_dummies([1, 2, np.nan])
@@ -237,10 +238,17 @@ Enhancements
237238
from pandas import offsets
238239
td + offsets.Minute(5) + offsets.Milli(5)
239240

240-
- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
241-
``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
242-
the bandwidth, and to gkde.evaluate() to specify the indicies at which it
243-
is evaluated, respecttively. See scipy docs.
241+
- Fillna is now supported for timedeltas
242+
243+
.. ipython:: python
244+
245+
td.fillna(0)
246+
td.fillna(timedelta(days=1,seconds=5))
247+
248+
- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
249+
``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
250+
the bandwidth, and to gkde.evaluate() to specify the indicies at which it
251+
is evaluated, respecttively. See scipy docs.
244252

245253
.. _whatsnew_0130.refactoring:
246254

pandas/core/common.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,54 @@ def diff(arr, n, axis=0):
705705
return out_arr
706706

707707

708+
def _coerce_scalar_to_timedelta_type(r):
709+
# kludgy here until we have a timedelta scalar
710+
# handle the numpy < 1.7 case
711+
712+
if is_integer(r):
713+
r = timedelta(microseconds=r/1000)
714+
715+
if _np_version_under1p7:
716+
if not isinstance(r, timedelta):
717+
raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
718+
if compat.PY3:
719+
# convert to microseconds in timedelta64
720+
r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000))
721+
else:
722+
return r
723+
724+
if isinstance(r, timedelta):
725+
r = np.timedelta64(r)
726+
elif not isinstance(r, np.timedelta64):
727+
raise AssertionError("Invalid type for timedelta scalar: %s" % type(r))
728+
return r.astype('timedelta64[ns]')
729+
730+
def _coerce_to_dtypes(result, dtypes):
731+
""" given a dtypes and a result set, coerce the result elements to the dtypes """
732+
if len(result) != len(dtypes):
733+
raise AssertionError("_coerce_to_dtypes requires equal len arrays")
734+
735+
def conv(r,dtype):
736+
try:
737+
if isnull(r):
738+
pass
739+
elif dtype == _NS_DTYPE:
740+
r = Timestamp(r)
741+
elif dtype == _TD_DTYPE:
742+
r = _coerce_scalar_to_timedelta_type(r)
743+
elif dtype == np.bool_:
744+
r = bool(r)
745+
elif dtype.kind == 'f':
746+
r = float(r)
747+
elif dtype.kind == 'i':
748+
r = int(r)
749+
except:
750+
pass
751+
752+
return r
753+
754+
return np.array([ conv(r,dtype) for r, dtype in zip(result,dtypes) ])
755+
708756
def _infer_dtype_from_scalar(val):
709757
""" interpret the dtype from a scalar, upcast floats and ints
710758
return the new value and the dtype """
@@ -1288,7 +1336,7 @@ def _possibly_cast_to_timedelta(value, coerce=True):
12881336
# coercion compatability
12891337
if coerce == 'compat' and _np_version_under1p7:
12901338

1291-
def convert(td, type):
1339+
def convert(td, dtype):
12921340

12931341
# we have an array with a non-object dtype
12941342
if hasattr(td,'item'):
@@ -1317,6 +1365,7 @@ def convert(td, type):
13171365
# < 1.7 coercion
13181366
if not is_list_like(value):
13191367
value = np.array([ value ])
1368+
13201369
dtype = value.dtype
13211370
return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]')
13221371

pandas/core/frame.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323

2424
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
2525
_default_index, _maybe_upcast, _is_sequence,
26-
_infer_dtype_from_scalar, _values_from_object)
26+
_infer_dtype_from_scalar, _values_from_object,
27+
_coerce_to_dtypes, _DATELIKE_DTYPES)
2728
from pandas.core.generic import NDFrame
2829
from pandas.core.index import Index, MultiIndex, _ensure_index
2930
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
@@ -4235,11 +4236,24 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
42354236
axis = self._get_axis_number(axis)
42364237
f = lambda x: op(x, axis=axis, skipna=skipna, **kwds)
42374238
labels = self._get_agg_axis(axis)
4239+
4240+
# exclude timedelta/datetime unless we are uniform types
4241+
if axis == 1 and self._is_mixed_type and len(set(self.dtypes) & _DATELIKE_DTYPES):
4242+
numeric_only = True
4243+
42384244
if numeric_only is None:
42394245
try:
42404246
values = self.values
42414247
result = f(values)
42424248
except Exception as e:
4249+
4250+
# try by-column first
4251+
if filter_type is None and axis == 0:
4252+
try:
4253+
return self.apply(f).iloc[0]
4254+
except:
4255+
pass
4256+
42434257
if filter_type is None or filter_type == 'numeric':
42444258
data = self._get_numeric_data()
42454259
elif filter_type == 'bool':
@@ -4273,9 +4287,11 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
42734287
result = result.astype(np.float64)
42744288
elif filter_type == 'bool' and notnull(result).all():
42754289
result = result.astype(np.bool_)
4276-
# otherwise, accept it
42774290
except (ValueError, TypeError):
4278-
pass
4291+
4292+
# try to coerce to the original dtypes item by item if we can
4293+
if axis == 0:
4294+
result = com._coerce_to_dtypes(result, self.dtypes)
42794295

42804296
return Series(result, index=labels)
42814297

pandas/core/generic.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,9 @@
2020
_infer_dtype_from_scalar, _maybe_promote,
2121
ABCSeries)
2222

23-
24-
2523
def is_dictlike(x):
2624
return isinstance(x, (dict, com.ABCSeries))
2725

28-
2926
def _single_replace(self, to_replace, method, inplace, limit):
3027
orig_dtype = self.dtype
3128
result = self if inplace else self.copy()
@@ -1906,7 +1903,21 @@ def abs(self):
19061903
abs: type of caller
19071904
"""
19081905
obj = np.abs(self)
1909-
obj = com._possibly_cast_to_timedelta(obj, coerce=False)
1906+
1907+
# suprimo numpy 1.6 hacking
1908+
if com._np_version_under1p7:
1909+
if self.ndim == 1:
1910+
if obj.dtype == 'm8[us]':
1911+
obj = obj.astype('m8[ns]')
1912+
elif self.ndim == 2:
1913+
def f(x):
1914+
if x.dtype == 'm8[us]':
1915+
x = x.astype('m8[ns]')
1916+
return x
1917+
1918+
if 'm8[us]' in obj.dtypes.values:
1919+
obj = obj.apply(f)
1920+
19101921
return obj
19111922

19121923
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,

0 commit comments

Comments
 (0)