Skip to content

Commit 3138c7e

Browse files
committed
ENH: convert handles up and down sampling
1 parent 7b2c16e commit 3138c7e

File tree

4 files changed

+129
-25
lines changed

4 files changed

+129
-25
lines changed

pandas/core/generic.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44

55
from pandas.core.common import save, load
6-
from pandas.core.index import MultiIndex
6+
from pandas.core.index import MultiIndex, DatetimeIndex
77
import pandas.core.datetools as datetools
88

99
#-------------------------------------------------------------------------------
@@ -132,25 +132,53 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True):
132132
return groupby(self, by, axis=axis, level=level, as_index=as_index,
133133
sort=sort)
134134

135-
def convert(self, rule, how='last', axis=0, as_index=True):
135+
def convert(self, rule, method='pad', how='last', axis=0, as_index=True):
136136
"""
137-
Convenience method for frequency conversion of timestamped data
137+
Convenience method for frequency conversion and resampling of regular
138+
time-series data.
139+
140+
Parameters
141+
----------
142+
rule : the offset string or object representing target conversion
143+
how : string, method for down- or re-sampling, default 'last'
144+
method : string, method for upsampling, default 'pad'
145+
axis : int, optional, default 0
146+
as_index : see synonymous argument of groupby
138147
"""
139148
from pandas.core.groupby import Tinterval, translateGrouping
140149

141150
if isinstance(rule, basestring):
142151
rule = datetools.toOffset(rule)
143152

153+
idx = self._get_axis(axis)
154+
if not isinstance(idx, DatetimeIndex):
155+
raise ValueError("Cannot call convert with non-DatetimeIndex")
156+
157+
if idx.offset is None:
158+
raise ValueError("Cannot call convert with non-regular index")
159+
144160
if not isinstance(rule, datetools.DateOffset):
145161
raise ValueError("Rule not a recognized offset")
146162

147-
interval = Tinterval(rule, label='right', closed='right')
148-
grouped = self.groupby(interval, axis=axis, as_index=as_index)
163+
interval = Tinterval(rule, label='right', closed='right', _obj=self)
149164

150-
if isinstance(how, basestring):
151-
how = translateGrouping(how)
165+
currfreq = len(idx)
166+
targfreq = len(interval.binner) - 2 # since binner extends endpoints
152167

153-
return grouped.agg(how)
168+
if targfreq <= currfreq:
169+
# down- or re-sampling
170+
grouped = self.groupby(interval, axis=axis, as_index=as_index)
171+
172+
if isinstance(how, basestring):
173+
how = translateGrouping(how)
174+
175+
result = grouped.agg(how)
176+
else:
177+
# upsampling
178+
result = self.reindex(interval.binner[1:-1].view('M8[us]'),
179+
method=method)
180+
181+
return result
154182

155183

156184
def select(self, crit, axis=0):

pandas/core/groupby.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ def get_group_levels(self):
626626

627627
return name_list
628628

629-
def generate_bins_generic(values, binner, closed, label):
629+
def generate_bins_generic(values, binner, closed, label, drop):
630630
"""
631631
Generate bin edge offsets and bin labels for one array using another array
632632
which has bin edge values. Both arrays must be sorted.
@@ -690,9 +690,9 @@ def generate_bins_generic(values, binner, closed, label):
690690
if j >= lenidx:
691691
break
692692

693-
# if we've seen some values, mark bin
694-
if vc != 0:
695-
bins[bc] = j
693+
# if we've seen some values or not ignoring empty bins
694+
if vc != 0 or not drop:
695+
bins[bc] = j
696696
bc += 1
697697
vc = 0
698698

@@ -752,20 +752,27 @@ class Tinterval(Grouper, CustomGrouper):
752752
begin = None
753753
end = None
754754
nperiods = None
755+
binner = None
755756

756757
def __init__(self, interval='Min', closed='left', label='left',
757-
begin=None, end=None, nperiods=None):
758+
begin=None, end=None, nperiods=None, _obj=None):
758759
self.offset = interval
759760
self.closed = closed
760761
self.label = label
761762
self.begin = begin
762763
self.end = end
763764
self.nperiods = None
764765

766+
if _obj is not None:
767+
self.set_obj(_obj)
768+
765769
def set_obj(self, obj):
766770
"""
767771
Injects the object we'll act on, which we use to initialize grouper
768772
"""
773+
if id(self.obj) == id(obj):
774+
return
775+
769776
self.obj = obj
770777

771778
if not isinstance(obj.index, DatetimeIndex):
@@ -778,14 +785,14 @@ def set_obj(self, obj):
778785
self.binlabels = []
779786
return
780787

781-
binner = _generate_time_binner(obj.index, self.offset, self.begin,
782-
self.end, self.nperiods)
788+
self.binner = _generate_time_binner(obj.index, self.offset, self.begin,
789+
self.end, self.nperiods)
783790

784-
if isinstance(binner, DatetimeIndex):
785-
binner = binner.asi8
791+
if isinstance(self.binner, DatetimeIndex):
792+
self.binner = self.binner.asi8
786793

787794
# general version, knowing nothing about relative frequencies
788-
bins, labels = lib.generate_bins_dt64(index.asi8, binner,
795+
bins, labels = lib.generate_bins_dt64(index.asi8, self.binner,
789796
self.closed, self.label)
790797

791798
self.bins = bins
@@ -1767,14 +1774,16 @@ def numpy_groupby(data, labels, axis=0):
17671774
# Helper functions
17681775

17691776
def translateGrouping(how):
1770-
if how == 'olhc':
1771-
return {'open' : lambda arr: arr[0],
1772-
'low' : lambda arr: arr.min(),
1773-
'high' : lambda arr: arr.max(),
1777+
if set(how) == set('ohlc'):
1778+
return {'open' : lambda arr: arr[0],
1779+
'low' : lambda arr: arr.min(),
1780+
'high' : lambda arr: arr.max(),
17741781
'close' : lambda arr: arr[-1]}
17751782

1776-
if how == 'last':
1777-
return lambda arr: arr[-1]
1783+
if how in 'last':
1784+
def picker(arr):
1785+
return arr[-1] if arr is not None and len(arr) else np.nan
1786+
return picker
17781787

17791788
raise ValueError("Unrecognized method: %s" % how)
17801789

pandas/core/series.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ def iget_value(self, i):
599599
return self[label]
600600

601601
iget = iget_value
602+
irow = iget_value
602603

603604
def get_value(self, label):
604605
"""

pandas/tests/test_datetime64.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,12 +345,78 @@ def test_convert_basic(self):
345345

346346
assert_series_equal(result, expect)
347347

348+
# from daily
349+
dti = DatetimeIndex(start=datetime(2005,1,1),
350+
end=datetime(2005,1,10), offset='D')
351+
352+
s = Series(rand(len(dti)), dti)
353+
354+
# to weekly
355+
result = s.convert('W') # implicitly @SUN
356+
357+
self.assertEquals(len(result), 3)
358+
self.assert_((result.index.dayofweek == [6,6,6]).all())
359+
self.assertEquals(result.irow(0), s['1/2/2005'])
360+
self.assertEquals(result.irow(1), s['1/9/2005'])
361+
self.assertEquals(result.irow(2), s.irow(-1))
362+
363+
result = s.convert('W@MON')
364+
self.assertEquals(len(result), 2)
365+
self.assert_((result.index.dayofweek == [0,0]).all())
366+
self.assertEquals(result.irow(0), s['1/3/2005'])
367+
self.assertEquals(result.irow(1), s['1/10/2005'])
368+
369+
result = s.convert('W@TUE')
370+
self.assertEquals(len(result), 2)
371+
self.assert_((result.index.dayofweek == [1,1]).all())
372+
self.assertEquals(result.irow(0), s['1/4/2005'])
373+
self.assertEquals(result.irow(1), s['1/10/2005'])
374+
375+
result = s.convert('W@WED')
376+
self.assertEquals(len(result), 2)
377+
self.assert_((result.index.dayofweek == [2,2]).all())
378+
self.assertEquals(result.irow(0), s['1/5/2005'])
379+
self.assertEquals(result.irow(1), s['1/10/2005'])
380+
381+
result = s.convert('W@THU')
382+
self.assertEquals(len(result), 2)
383+
self.assert_((result.index.dayofweek == [3,3]).all())
384+
self.assertEquals(result.irow(0), s['1/6/2005'])
385+
self.assertEquals(result.irow(1), s['1/10/2005'])
386+
387+
result = s.convert('W@FRI')
388+
self.assertEquals(len(result), 2)
389+
self.assert_((result.index.dayofweek == [4,4]).all())
390+
self.assertEquals(result.irow(0), s['1/7/2005'])
391+
self.assertEquals(result.irow(1), s['1/10/2005'])
392+
393+
# to biz day
394+
result = s.convert('B')
395+
self.assertEquals(len(result), 6)
396+
self.assert_((result.index.dayofweek == [0,1,2,3,4,0]).all())
397+
self.assertEquals(result.irow(0), s['1/3/2005'])
398+
self.assertEquals(result.irow(1), s['1/4/2005'])
399+
self.assertEquals(result.irow(5), s['1/10/2005'])
400+
401+
def test_convert_upsample(self):
402+
# from daily
403+
dti = DatetimeIndex(start=datetime(2005,1,1),
404+
end=datetime(2005,1,10), offset='D')
405+
406+
s = Series(rand(len(dti)), dti)
407+
408+
# to minutely, by padding
409+
result = s.convert('Min', method='pad')
410+
self.assertEquals(len(result), 12961)
411+
self.assertEquals(result[0], s[0])
412+
self.assertEquals(result[-1], s[-1])
413+
348414
def test_convert_olhc(self):
349415
s = self.series
350416

351417
grouper = Tinterval(Minute(5), closed='right', label='right')
352418
expect = s.groupby(grouper).agg(lambda x: x[-1])
353-
result = s.convert('5Min', 'olhc')
419+
result = s.convert('5Min', how='ohlc')
354420

355421
self.assertEquals(len(result), len(expect))
356422
self.assertEquals(len(result.columns), 4)

0 commit comments

Comments
 (0)