Merge pull request #3137 from jreback/GH3070

jreback · jreback · commit b9bb1b554879 · 2013-03-22T09:20:31.000-07:00
ENH: GH3070 allow string selection on a DataFrame with a datelike index, to have partial_string semantics (like Series)
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -98,6 +98,17 @@ pandas 0.11.0
     histograms. (GH2710_).
   - DataFrame.from_records now accepts not only dicts but any instance of
     the collections.Mapping ABC.
+  - Allow selection semantics via a string with a datelike index to work in both
+    Series and DataFrames (GH3070_)
+
+    .. ipython:: python
+
+        idx = date_range("2001-10-1", periods=5, freq='M')
+        ts = Series(np.random.rand(len(idx)),index=idx)
+        ts['2001']
+
+        df = DataFrame(dict(A = ts))
+        df['2001']
 
 
 **API Changes**
@@ -263,6 +274,7 @@ pandas 0.11.0
 .. _GH3059: https://github.com/pydata/pandas/issues/3059
 .. _GH2993: https://github.com/pydata/pandas/issues/2993
 .. _GH3115: https://github.com/pydata/pandas/issues/3115
+.. _GH3070: https://github.com/pydata/pandas/issues/3070
 
 pandas 0.10.1
 =============
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
@@ -105,6 +105,9 @@ Expanding Data
 `Alignment and to-date
 <http://stackoverflow.com/questions/15489011/python-time-series-alignment-and-to-date-functions>`__
 
+`Rolling Computation window based on values instead of counts
+<http://stackoverflow.com/questions/14300768/pandas-rolling-computation-with-window-based-on-values-instead-of-counts>`__
+
 Splitting
 ~~~~~~~~~
 
@@ -171,6 +174,9 @@ CSV
 `Reading the first few lines of a frame
 <http://stackoverflow.com/questions/15008970/way-to-read-first-few-lines-for-pandas-dataframe>`__
 
+`Inferring dtypes from a file
+<http://stackoverflow.com/questions/15555005/get-inferred-dataframe-types-iteratively-using-chunksize>`__
+
 SQL
 ~~~
 
diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt
@@ -245,6 +245,17 @@ Enhancements
 
   - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_)
 
+  - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_)
+
+    .. ipython:: python
+
+        idx = date_range("2001-10-1", periods=5, freq='M')
+        ts = Series(np.random.rand(len(idx)),index=idx)
+        ts['2001']
+
+        df = DataFrame(dict(A = ts))
+        df['2001']
+
   - ``Squeeze`` to possibly remove length 1 dimensions from an object.
 
     .. ipython:: python
@@ -313,3 +324,4 @@ on GitHub for a complete list.
 .. _GH3011: https://github.com/pydata/pandas/issues/3011
 .. _GH3076: https://github.com/pydata/pandas/issues/3076
 .. _GH3059: https://github.com/pydata/pandas/issues/3059
+.. _GH3070: https://github.com/pydata/pandas/issues/3070
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -28,7 +28,7 @@
 from pandas.core.generic import NDFrame
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
-                                  _is_index_slice, _check_bool_indexer,
+                                  _convert_to_index_sliceable, _check_bool_indexer,
                                   _maybe_convert_indices)
 from pandas.core.internals import BlockManager, make_block, form_blocks
 from pandas.core.series import Series, _radd_compat
@@ -1864,10 +1864,13 @@ def iget_value(self, i, j):
         return self.iat[i,j]
 
     def __getitem__(self, key):
-        if isinstance(key, slice):
-            # slice rows
-            return self._getitem_slice(key)
-        elif isinstance(key, (np.ndarray, list)):
+
+        # see if we can slice the rows
+        indexer = _convert_to_index_sliceable(self, key)
+        if indexer is not None:
+            return self._getitem_slice(indexer)
+
+        if isinstance(key, (np.ndarray, list)):
             # either boolean or fancy integer index
             return self._getitem_array(key)
         elif isinstance(key, DataFrame):
@@ -1879,14 +1882,7 @@ def __getitem__(self, key):
             return self._get_item_cache(key)
 
     def _getitem_slice(self, key):
-        idx_type = self.index.inferred_type
-        if idx_type == 'floating':
-            indexer = self.ix._convert_to_indexer(key, axis=0)
-        elif idx_type == 'integer' or _is_index_slice(key):
-            indexer = key
-        else:
-            indexer = self.ix._convert_to_indexer(key, axis=0)
-        return self._slice(indexer, axis=0)
+        return self._slice(key, axis=0)
 
     def _getitem_array(self, key):
         # also raises Exception if object array with NA values
@@ -1982,10 +1978,12 @@ def __setattr__(self, name, value):
                 object.__setattr__(self, name, value)
 
     def __setitem__(self, key, value):
-        if isinstance(key, slice):
-            # slice rows
-            self._setitem_slice(key, value)
-        elif isinstance(key, (np.ndarray, list)):
+        # see if we can slice the rows
+        indexer = _convert_to_index_sliceable(self, key)
+        if indexer is not None:
+            return self._setitem_slice(indexer, value)
+
+        if isinstance(key, (np.ndarray, list)):
             self._setitem_array(key, value)
         elif isinstance(key, DataFrame):
             self._setitem_frame(key, value)
@@ -1994,14 +1992,7 @@ def __setitem__(self, key, value):
             self._set_item(key, value)
 
     def _setitem_slice(self, key, value):
-        idx_type = self.index.inferred_type
-        if idx_type == 'floating':
-            indexer = self.ix._convert_to_indexer(key, axis=0)
-        elif idx_type == 'integer' or _is_index_slice(key):
-            indexer = key
-        else:
-            indexer = self.ix._convert_to_indexer(key, axis=0)
-        self.ix._setitem_with_indexer(indexer, value)
+        self.ix._setitem_with_indexer(key, value)
 
     def _setitem_array(self, key, value):
         # also raises Exception if object array with NA values
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -827,6 +827,30 @@ def _convert_key(self, key):
 _eps = np.finfo('f4').eps
 
 
+def _convert_to_index_sliceable(obj, key):
+    """ if we are index sliceable, then return my slicer, otherwise return None """
+    idx = obj.index
+    if isinstance(key, slice):
+        idx_type = idx.inferred_type
+        if idx_type == 'floating':
+            indexer = obj.ix._convert_to_indexer(key, axis=0)
+        elif idx_type == 'integer' or _is_index_slice(key):
+            indexer = key
+        else:
+            indexer = obj.ix._convert_to_indexer(key, axis=0)
+        return indexer
+
+    elif isinstance(key, basestring):
+
+        # we need a timelike key here
+        if idx.is_all_dates:
+            try:
+                return idx._get_string_slice(key)
+            except:
+                return None
+
+    return None
+
 def _is_index_slice(obj):
     def _is_valid_index(x):
         return (com.is_integer(x) or com.is_float(x)
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -196,12 +196,34 @@ def test_indexing_unordered(self):
         for t in result.index:
             self.assertTrue(t.year == 2005)
 
+    def test_indexing(self):
+
+        idx = date_range("2001-1-1", periods=20, freq='M')
+        ts = Series(np.random.rand(len(idx)),index=idx)
+
+        # getting
+
+        # GH 3070, make sure semantics work on Series/Frame
+        expected = ts['2001']
+        
+        df = DataFrame(dict(A = ts))
+        result = df['2001']['A']
+        assert_series_equal(expected,result)
+
+        # setting
+        ts['2001'] = 1
+        expected = ts['2001']
+
+        df.loc['2001','A'] = 1
+
+        result = df['2001']['A']
+        assert_series_equal(expected,result)
+
 def assert_range_equal(left, right):
     assert(left.equals(right))
     assert(left.freq == right.freq)
     assert(left.tz == right.tz)
 
-
 class TestTimeSeries(unittest.TestCase):
     _multiprocess_can_split_ = True