Skip to content

Commit 42ce8ff

Browse files
committed
ENH: start tightening up API around integer slicing per #592
1 parent 79cc4e0 commit 42ce8ff

File tree

6 files changed

+107
-35
lines changed

6 files changed

+107
-35
lines changed

RELEASE.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ pandas 0.7.0
123123
- Improve the performance of ``DataFrame.sort_index`` by up to 5x or more
124124
when sorting by multiple columns
125125
- Substantially improve performance of DataFrame and Series constructors when
126-
passed a nested dict or dict, respectively (GH #540)
126+
passed a nested dict or dict, respectively (GH #540, GH #621)
127127
- Modified setup.py so that pip / setuptools will install dependencies (GH
128128
#507, various pull requests)
129129
- Unstack called on DataFrame with non-MultiIndex will return Series (GH

pandas/core/index.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -750,17 +750,25 @@ def slice_locs(self, start=None, end=None):
750750
"""
751751
if start is None:
752752
beg_slice = 0
753-
elif start in self:
754-
beg_slice = self.get_loc(start)
755753
else:
756-
beg_slice = self.searchsorted(start, side='left')
754+
try:
755+
beg_slice = self.get_loc(start)
756+
except KeyError:
757+
if self.is_monotonic:
758+
beg_slice = self.searchsorted(start, side='left')
759+
else:
760+
raise
757761

758762
if end is None:
759763
end_slice = len(self)
760-
elif end in self:
761-
end_slice = self.get_loc(end) + 1
762764
else:
763-
end_slice = self.searchsorted(end, side='right')
765+
try:
766+
end_slice = self.get_loc(end) + 1
767+
except KeyError:
768+
if self.is_monotonic:
769+
end_slice = self.searchsorted(end, side='right')
770+
else:
771+
raise
764772

765773
return beg_slice, end_slice
766774

pandas/core/indexing.py

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -207,29 +207,44 @@ def _convert_to_indexer(self, obj, axis=0):
207207
raise AmbiguousIndexError with integer labels?
208208
- No, prefer label-based indexing
209209
"""
210-
index = self.obj._get_axis(axis)
210+
labels = self.obj._get_axis(axis)
211211

212212
try:
213-
return index.get_loc(obj)
213+
return labels.get_loc(obj)
214214
except (KeyError, TypeError):
215215
pass
216216

217-
is_int_index = _is_integer_index(index)
217+
is_int_index = _is_integer_index(labels)
218218
if isinstance(obj, slice):
219-
if _is_label_slice(index, obj):
220-
i, j = index.slice_locs(obj.start, obj.stop)
221219

222-
if obj.step is not None:
223-
raise Exception('Non-zero step not supported with '
224-
'label-based slicing')
225-
return slice(i, j)
220+
int_slice = _is_integer_slice(obj)
221+
null_slice = obj.start is None and obj.stop is None
222+
# could have integers in the first level of the MultiIndex
223+
position_slice = (int_slice
224+
and not labels.inferred_type == 'integer'
225+
and not isinstance(labels, MultiIndex))
226+
227+
if null_slice or position_slice:
228+
slicer = obj
226229
else:
227-
return obj
230+
try:
231+
i, j = labels.slice_locs(obj.start, obj.stop)
232+
slicer = slice(i, j, obj.step)
233+
except Exception:
234+
if _is_integer_slice(obj):
235+
if labels.inferred_type == 'integer':
236+
raise
237+
slicer = obj
238+
else:
239+
raise
240+
241+
return slicer
242+
228243
elif _is_list_like(obj):
229244
objarr = _asarray_tuplesafe(obj)
230245

231246
if objarr.dtype == np.bool_:
232-
if not obj.index.equals(index):
247+
if not obj.index.equals(labels):
233248
raise IndexingError('Cannot use boolean index with '
234249
'misaligned or unequal labels')
235250
return objarr
@@ -238,7 +253,7 @@ def _convert_to_indexer(self, obj, axis=0):
238253
if _is_integer_dtype(objarr) and not is_int_index:
239254
return objarr
240255

241-
indexer = index.get_indexer(objarr)
256+
indexer = labels.get_indexer(objarr)
242257
mask = indexer == -1
243258
if mask.any():
244259
raise KeyError('%s not in index' % objarr[mask])
@@ -247,7 +262,7 @@ def _convert_to_indexer(self, obj, axis=0):
247262
else:
248263
if com.is_integer(obj) and not is_int_index:
249264
return obj
250-
return index.get_loc(obj)
265+
return labels.get_loc(obj)
251266

252267
def _tuplify(self, loc):
253268
tup = [slice(None, None) for _ in range(self.ndim)]
@@ -259,21 +274,40 @@ def _get_slice_axis(self, slice_obj, axis=0):
259274

260275
axis_name = obj._get_axis_name(axis)
261276
labels = getattr(obj, axis_name)
262-
if _is_label_slice(labels, slice_obj):
263-
i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
264-
slicer = slice(i, j)
265277

266-
if slice_obj.step is not None:
267-
raise Exception('Non-zero step not supported with label-based '
268-
'slicing')
269-
else:
278+
int_slice = _is_integer_slice(slice_obj)
279+
280+
null_slice = slice_obj.start is None and slice_obj.stop is None
281+
# could have integers in the first level of the MultiIndex
282+
position_slice = (int_slice and not labels.inferred_type == 'integer'
283+
and not isinstance(labels, MultiIndex))
284+
if null_slice or position_slice:
270285
slicer = slice_obj
286+
else:
287+
try:
288+
i, j = labels.slice_locs(slice_obj.start, slice_obj.stop)
289+
slicer = slice(i, j, slice_obj.step)
290+
except Exception:
291+
if _is_integer_slice(slice_obj):
292+
if labels.inferred_type == 'integer':
293+
raise
294+
slicer = slice_obj
295+
else:
296+
raise
271297

272298
if not _need_slice(slice_obj):
273299
return obj
274300

275301
return obj._slice(slicer, axis=axis)
276302

303+
def _is_integer_slice(obj):
304+
def _crit(v):
305+
return v is None or com.is_integer(v)
306+
307+
both_none = obj.start is None and obj.stop is None
308+
309+
return not both_none and (_crit(obj.start) and _crit(obj.stop))
310+
277311
class _SeriesIndexer(_NDFrameIndexer):
278312
"""
279313
Class to support fancy indexing, potentially using labels

pandas/core/panel.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,6 @@ def _init_dict(self, data, axes, dtype=None):
269269
if dtype is not None:
270270
v = v.astype(dtype)
271271
values = v.values
272-
273-
# if values.ndim == 2:
274-
# values = values[None, :, :]
275-
276272
reshaped_data[item] = values
277273

278274
# segregates dtypes and forms blocks matching to columns

pandas/tests/test_frame.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,36 @@ def test_getitem_fancy_2d(self):
328328

329329
def test_getitem_fancy_slice_integers_step(self):
330330
df = DataFrame(np.random.randn(10, 5))
331-
self.assertRaises(Exception, df.ix.__getitem__, slice(0, 8, 2))
332-
self.assertRaises(Exception, df.ix.__setitem__, slice(0, 8, 2), np.nan)
331+
332+
# this is OK
333+
result = df.ix[:8:2]
334+
df.ix[:8:2] = np.nan
335+
self.assert_(isnull(df.ix[:8:2]).values.all())
336+
337+
def test_getitem_setitem_integer_slice_keyerrors(self):
338+
df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2))
339+
340+
# this is OK
341+
cp = df.copy()
342+
cp.ix[4:10] = 0
343+
self.assert_((cp.ix[4:10] == 0).values.all())
344+
345+
# so is this
346+
cp = df.copy()
347+
cp.ix[3:11] = 0
348+
self.assert_((cp.ix[3:11] == 0).values.all())
349+
350+
result = df.ix[4:10]
351+
result2 = df.ix[3:11]
352+
expected = df.reindex([4, 6, 8, 10])
353+
354+
assert_frame_equal(result, expected)
355+
assert_frame_equal(result2, expected)
356+
357+
# non-monotonic, raise KeyError
358+
df2 = df[::-1]
359+
self.assertRaises(KeyError, df2.ix.__getitem__, slice(3, 11))
360+
self.assertRaises(KeyError, df2.ix.__setitem__, slice(3, 11), 0)
333361

334362
def test_setitem_fancy_2d(self):
335363
f = self.frame
@@ -1659,7 +1687,7 @@ def test_eng_float_formatter(self):
16591687

16601688
repr(self.frame)
16611689

1662-
com.set_printoptions(precision=4)
1690+
com.reset_printoptions()
16631691

16641692
def test_repr_tuples(self):
16651693
buf = StringIO()

pandas/tests/test_multilevel.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def test_getitem_toplevel(self):
235235
assert_frame_equal(result, expected)
236236
assert_frame_equal(result, result2)
237237

238-
def test_getitem_slice_integers(self):
238+
def test_getitem_setitem_slice_integers(self):
239239
index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
240240
labels=[[0, 0, 1, 1, 2, 2],
241241
[0, 1, 0, 1, 0, 1]])
@@ -246,12 +246,18 @@ def test_getitem_slice_integers(self):
246246
exp = frame[2:]
247247
assert_frame_equal(res, exp)
248248

249+
frame.ix[1:2] = 7
250+
self.assert_((frame.ix[1:2] == 7).values.all())
251+
249252
series = Series(np.random.randn(len(index)), index=index)
250253

251254
res = series.ix[1:2]
252255
exp = series[2:]
253256
assert_series_equal(res, exp)
254257

258+
series.ix[1:2] = 7
259+
self.assert_((series.ix[1:2] == 7).values.all())
260+
255261
def test_getitem_int(self):
256262
levels = [[0, 1], [0, 1, 2]]
257263
labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]

0 commit comments

Comments
 (0)