Skip to content

Commit b9bb1b5

Browse files
committed
Merge pull request #3137 from jreback/GH3070
ENH: GH3070 allow string selection on a DataFrame with a datelike index, to have partial_string semantics (like Series)
2 parents 271fc95 + 914625c commit b9bb1b5

File tree

6 files changed

+93
-26
lines changed

6 files changed

+93
-26
lines changed

RELEASE.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ pandas 0.11.0
9898
histograms. (GH2710_).
9999
- DataFrame.from_records now accepts not only dicts but any instance of
100100
the collections.Mapping ABC.
101+
- Allow selection semantics via a string with a datelike index to work in both
102+
Series and DataFrames (GH3070_)
103+
104+
.. ipython:: python
105+
106+
idx = date_range("2001-10-1", periods=5, freq='M')
107+
ts = Series(np.random.rand(len(idx)),index=idx)
108+
ts['2001']
109+
110+
df = DataFrame(dict(A = ts))
111+
df['2001']
101112
102113
103114
**API Changes**
@@ -263,6 +274,7 @@ pandas 0.11.0
263274
.. _GH3059: https://github.com/pydata/pandas/issues/3059
264275
.. _GH2993: https://github.com/pydata/pandas/issues/2993
265276
.. _GH3115: https://github.com/pydata/pandas/issues/3115
277+
.. _GH3070: https://github.com/pydata/pandas/issues/3070
266278

267279
pandas 0.10.1
268280
=============

doc/source/cookbook.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ Expanding Data
105105
`Alignment and to-date
106106
<http://stackoverflow.com/questions/15489011/python-time-series-alignment-and-to-date-functions>`__
107107

108+
`Rolling Computation window based on values instead of counts
109+
<http://stackoverflow.com/questions/14300768/pandas-rolling-computation-with-window-based-on-values-instead-of-counts>`__
110+
108111
Splitting
109112
~~~~~~~~~
110113

@@ -171,6 +174,9 @@ CSV
171174
`Reading the first few lines of a frame
172175
<http://stackoverflow.com/questions/15008970/way-to-read-first-few-lines-for-pandas-dataframe>`__
173176

177+
`Inferring dtypes from a file
178+
<http://stackoverflow.com/questions/15555005/get-inferred-dataframe-types-iteratively-using-chunksize>`__
179+
174180
SQL
175181
~~~
176182

doc/source/v0.11.0.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,17 @@ Enhancements
245245

246246
- You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_)
247247

248+
- You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_)
249+
250+
.. ipython:: python
251+
252+
idx = date_range("2001-10-1", periods=5, freq='M')
253+
ts = Series(np.random.rand(len(idx)),index=idx)
254+
ts['2001']
255+
256+
df = DataFrame(dict(A = ts))
257+
df['2001']
258+
248259
- ``Squeeze`` to possibly remove length 1 dimensions from an object.
249260

250261
.. ipython:: python
@@ -313,3 +324,4 @@ on GitHub for a complete list.
313324
.. _GH3011: https://github.com/pydata/pandas/issues/3011
314325
.. _GH3076: https://github.com/pydata/pandas/issues/3076
315326
.. _GH3059: https://github.com/pydata/pandas/issues/3059
327+
.. _GH3070: https://github.com/pydata/pandas/issues/3070

pandas/core/frame.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from pandas.core.generic import NDFrame
2929
from pandas.core.index import Index, MultiIndex, _ensure_index
3030
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
31-
_is_index_slice, _check_bool_indexer,
31+
_convert_to_index_sliceable, _check_bool_indexer,
3232
_maybe_convert_indices)
3333
from pandas.core.internals import BlockManager, make_block, form_blocks
3434
from pandas.core.series import Series, _radd_compat
@@ -1864,10 +1864,13 @@ def iget_value(self, i, j):
18641864
return self.iat[i,j]
18651865

18661866
def __getitem__(self, key):
1867-
if isinstance(key, slice):
1868-
# slice rows
1869-
return self._getitem_slice(key)
1870-
elif isinstance(key, (np.ndarray, list)):
1867+
1868+
# see if we can slice the rows
1869+
indexer = _convert_to_index_sliceable(self, key)
1870+
if indexer is not None:
1871+
return self._getitem_slice(indexer)
1872+
1873+
if isinstance(key, (np.ndarray, list)):
18711874
# either boolean or fancy integer index
18721875
return self._getitem_array(key)
18731876
elif isinstance(key, DataFrame):
@@ -1879,14 +1882,7 @@ def __getitem__(self, key):
18791882
return self._get_item_cache(key)
18801883

18811884
def _getitem_slice(self, key):
1882-
idx_type = self.index.inferred_type
1883-
if idx_type == 'floating':
1884-
indexer = self.ix._convert_to_indexer(key, axis=0)
1885-
elif idx_type == 'integer' or _is_index_slice(key):
1886-
indexer = key
1887-
else:
1888-
indexer = self.ix._convert_to_indexer(key, axis=0)
1889-
return self._slice(indexer, axis=0)
1885+
return self._slice(key, axis=0)
18901886

18911887
def _getitem_array(self, key):
18921888
# also raises Exception if object array with NA values
@@ -1982,10 +1978,12 @@ def __setattr__(self, name, value):
19821978
object.__setattr__(self, name, value)
19831979

19841980
def __setitem__(self, key, value):
1985-
if isinstance(key, slice):
1986-
# slice rows
1987-
self._setitem_slice(key, value)
1988-
elif isinstance(key, (np.ndarray, list)):
1981+
# see if we can slice the rows
1982+
indexer = _convert_to_index_sliceable(self, key)
1983+
if indexer is not None:
1984+
return self._setitem_slice(indexer, value)
1985+
1986+
if isinstance(key, (np.ndarray, list)):
19891987
self._setitem_array(key, value)
19901988
elif isinstance(key, DataFrame):
19911989
self._setitem_frame(key, value)
@@ -1994,14 +1992,7 @@ def __setitem__(self, key, value):
19941992
self._set_item(key, value)
19951993

19961994
def _setitem_slice(self, key, value):
1997-
idx_type = self.index.inferred_type
1998-
if idx_type == 'floating':
1999-
indexer = self.ix._convert_to_indexer(key, axis=0)
2000-
elif idx_type == 'integer' or _is_index_slice(key):
2001-
indexer = key
2002-
else:
2003-
indexer = self.ix._convert_to_indexer(key, axis=0)
2004-
self.ix._setitem_with_indexer(indexer, value)
1995+
self.ix._setitem_with_indexer(key, value)
20051996

20061997
def _setitem_array(self, key, value):
20071998
# also raises Exception if object array with NA values

pandas/core/indexing.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,30 @@ def _convert_key(self, key):
827827
_eps = np.finfo('f4').eps
828828

829829

830+
def _convert_to_index_sliceable(obj, key):
831+
""" if we are index sliceable, then return my slicer, otherwise return None """
832+
idx = obj.index
833+
if isinstance(key, slice):
834+
idx_type = idx.inferred_type
835+
if idx_type == 'floating':
836+
indexer = obj.ix._convert_to_indexer(key, axis=0)
837+
elif idx_type == 'integer' or _is_index_slice(key):
838+
indexer = key
839+
else:
840+
indexer = obj.ix._convert_to_indexer(key, axis=0)
841+
return indexer
842+
843+
elif isinstance(key, basestring):
844+
845+
# we need a timelike key here
846+
if idx.is_all_dates:
847+
try:
848+
return idx._get_string_slice(key)
849+
except:
850+
return None
851+
852+
return None
853+
830854
def _is_index_slice(obj):
831855
def _is_valid_index(x):
832856
return (com.is_integer(x) or com.is_float(x)

pandas/tseries/tests/test_timeseries.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,34 @@ def test_indexing_unordered(self):
196196
for t in result.index:
197197
self.assertTrue(t.year == 2005)
198198

199+
def test_indexing(self):
200+
201+
idx = date_range("2001-1-1", periods=20, freq='M')
202+
ts = Series(np.random.rand(len(idx)),index=idx)
203+
204+
# getting
205+
206+
# GH 3070, make sure semantics work on Series/Frame
207+
expected = ts['2001']
208+
209+
df = DataFrame(dict(A = ts))
210+
result = df['2001']['A']
211+
assert_series_equal(expected,result)
212+
213+
# setting
214+
ts['2001'] = 1
215+
expected = ts['2001']
216+
217+
df.loc['2001','A'] = 1
218+
219+
result = df['2001']['A']
220+
assert_series_equal(expected,result)
221+
199222
def assert_range_equal(left, right):
200223
assert(left.equals(right))
201224
assert(left.freq == right.freq)
202225
assert(left.tz == right.tz)
203226

204-
205227
class TestTimeSeries(unittest.TestCase):
206228
_multiprocess_can_split_ = True
207229

0 commit comments

Comments
 (0)