Skip to content

Commit ed5d891

Browse files
committed
TST: indexing testing with minor Series.__getitem__ refactoring
1 parent 41ec919 commit ed5d891

File tree

7 files changed

+164
-51
lines changed

7 files changed

+164
-51
lines changed

pandas/core/format.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,8 @@ def _get_formatted_values(self):
101101
fmt_values.append(' %s' % self.formatter(v))
102102
elif is_float[i]:
103103
fmt_values.append(float_format(v))
104-
elif not leading_space:
105-
fmt_values.append(' %s' % self.formatter(v))
106104
else:
107-
fmt_values.append(self.formatter(v))
105+
fmt_values.append(' %s' % self.formatter(v))
108106
else:
109107
fmt_values = _format_fixed_width(self.series.values,
110108
self.formatter)

pandas/core/index.py

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.util.decorators import cache_readonly
1111
import pandas.core.common as com
1212
import pandas._tseries as lib
13-
import pandas._engines as _engines
13+
import pandas._engines as _gin
1414

1515
__all__ = ['Index']
1616

@@ -24,6 +24,8 @@ def wrapper(self, other):
2424
return func(other)
2525
return wrapper
2626

27+
class InvalidIndexError(Exception):
28+
pass
2729

2830
class Index(np.ndarray):
2931
"""
@@ -146,8 +148,8 @@ def _cleanup(self):
146148
def _engine(self):
147149
import weakref
148150
# property, for now, slow to look up
149-
return _engines.DictIndexEngine(weakref.ref(self),
150-
self._map_indices)
151+
return _gin.DictIndexEngine(weakref.ref(self),
152+
self._map_indices)
151153

152154
def _get_level_number(self, level):
153155
if not isinstance(level, int):
@@ -469,12 +471,31 @@ def get_loc(self, key):
469471
"""
470472
return self._engine.get_loc(key)
471473

472-
def get_value(self, arr, key):
474+
def get_value(self, series, key):
473475
"""
474476
Fast lookup of value from 1-dimensional ndarray. Only use this if you
475477
know what you're doing
476478
"""
477-
return self._engine.get_value(arr, key)
479+
try:
480+
return self._engine.get_value(series, key)
481+
except KeyError, e1:
482+
if self.inferred_type == 'integer':
483+
raise
484+
485+
try:
486+
return _gin.get_value_at(series, key)
487+
except IndexError:
488+
raise
489+
except TypeError:
490+
# generator/iterator-like
491+
if hasattr(key, 'next'):
492+
raise InvalidIndexError(key)
493+
else:
494+
raise e1
495+
except Exception: # pragma: no cover
496+
raise e1
497+
except TypeError:
498+
raise InvalidIndexError(key)
478499

479500
def set_value(self, arr, key, value):
480501
"""
@@ -1091,6 +1112,40 @@ def has_duplicates(self):
10911112

10921113
return False
10931114

1115+
def get_value(self, series, key):
1116+
# somewhat broken encapsulation
1117+
from pandas.core.indexing import _maybe_droplevels
1118+
from pandas.core.series import Series
1119+
1120+
# Label-based
1121+
try:
1122+
return self._engine.get_value(series, key)
1123+
except KeyError, e1:
1124+
try:
1125+
# TODO: what if a level contains tuples??
1126+
loc = self.get_loc(key)
1127+
new_values = series.values[loc]
1128+
new_index = self[loc]
1129+
new_index = _maybe_droplevels(new_index, key)
1130+
return Series(new_values, index=new_index, name=series.name)
1131+
except KeyError:
1132+
pass
1133+
1134+
try:
1135+
return _gin.get_value_at(series, key)
1136+
except IndexError:
1137+
raise
1138+
except TypeError:
1139+
# generator/iterator-like
1140+
if hasattr(key, 'next'):
1141+
raise InvalidIndexError(key)
1142+
else:
1143+
raise e1
1144+
except Exception: # pragma: no cover
1145+
raise e1
1146+
except TypeError:
1147+
raise InvalidIndexError(key)
1148+
10941149
def get_level_values(self, level):
10951150
"""
10961151
Return vector of label values for requested level, equal to the length
@@ -1641,22 +1696,31 @@ def get_loc_level(self, key, level=0):
16411696
else:
16421697
indexer = None
16431698
for i, k in enumerate(key):
1644-
if k is None:
1645-
continue
1699+
if not isinstance(k, slice):
1700+
k = self._get_level_indexer(k, level=i)
1701+
if isinstance(k, slice):
1702+
# everything
1703+
if k.start == 0 and k.stop == len(self):
1704+
k = slice(None, None)
1705+
else:
1706+
k_index = k
16461707

16471708
if isinstance(k, slice):
16481709
if k == slice(None, None):
1649-
continue
1710+
continue
16501711
else:
1651-
k_index = np.empty(len(self), dtype=bool)
1652-
k_index[k] = True
1653-
else:
1654-
k_index = self._get_level_indexer(k, level=i)
1712+
raise NotImplementedError
1713+
# if self.levels[i].inferred_type == 'integer':
1714+
# raise NotImplementedError
1715+
# k_index = np.zeros(len(self), dtype=bool)
1716+
# k_index[k] = True
16551717

16561718
if indexer is None:
16571719
indexer = k_index
1658-
else:
1720+
else: # pragma: no cover
16591721
indexer &= k_index
1722+
if indexer is None:
1723+
indexer = slice(None, None)
16601724
return indexer
16611725
else:
16621726
return self._get_level_indexer(key, level=level)

pandas/core/series.py

Lines changed: 13 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from itertools import izip
99
import csv
1010
import operator
11+
import types
1112

1213
from numpy import nan, ndarray
1314
import numpy as np
@@ -18,17 +19,16 @@
1819
_asarray_tuplesafe)
1920
from pandas.core.daterange import DateRange
2021
from pandas.core.format import SeriesFormatter
21-
from pandas.core.index import Index, MultiIndex, _ensure_index
22-
from pandas.core.indexing import _SeriesIndexer, _maybe_droplevels
22+
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
23+
_ensure_index)
24+
from pandas.core.indexing import _SeriesIndexer
2325
from pandas.util import py3compat
2426
from pandas.util.terminal import get_terminal_size
2527
import pandas.core.common as com
2628
import pandas.core.datetools as datetools
2729
import pandas.core.generic as generic
2830
import pandas.core.nanops as nanops
2931
import pandas._tseries as lib
30-
import pandas._engines as _gin
31-
3232
from pandas.util.decorators import Appender, Substitution
3333

3434
__all__ = ['Series', 'TimeSeries']
@@ -261,39 +261,17 @@ def ix(self):
261261
return self._ix
262262

263263
def __getitem__(self, key):
264-
index = self.index
265-
266-
# Label-based
267264
try:
268-
return index._engine.get_value(self, key)
269-
except KeyError, e1:
270-
if isinstance(index, MultiIndex):
271-
values = self.values
272-
try:
273-
loc = index.get_loc(key)
274-
# TODO: what if a level contains tuples??
275-
new_index = index[loc]
276-
new_index = _maybe_droplevels(new_index, key)
277-
return Series(values[loc], index=new_index,
278-
name=self.name)
279-
except KeyError:
280-
pass
281-
282-
if index.inferred_type == 'integer':
283-
raise
284-
285-
try:
286-
return _gin.get_value_at(self, key)
287-
except IndexError:
288-
raise
289-
except Exception, _:
290-
pass
291-
raise e1
292-
except TypeError:
265+
return self.index.get_value(self, key)
266+
except InvalidIndexError:
293267
pass
268+
except Exception:
269+
raise
294270

295-
# boolean
271+
if hasattr(key, 'next'):
272+
key = list(key)
296273

274+
# boolean
297275
# special handling of boolean data with NAs stored in object
298276
# arrays. Since we can't represent NA with dtype=bool
299277
if _is_bool_indexer(key):
@@ -311,7 +289,7 @@ def _get_with(self, key):
311289
if isinstance(key, tuple):
312290
return self._get_values_tuple(key)
313291

314-
if not isinstance(key, (list, np.ndarray)):
292+
if not isinstance(key, (list, np.ndarray)): # pragma: no cover
315293
key = list(key)
316294

317295
key_type = lib.infer_dtype(key)
@@ -348,7 +326,7 @@ def _get_values_tuple(self, key):
348326
# kludgearound
349327
new_index = result.index
350328
for i, k in reversed(list(enumerate(key))):
351-
if k != slice(None, None):
329+
if not isinstance(k, slice):
352330
new_index = new_index.droplevel(i)
353331
result.index = new_index
354332

pandas/tests/test_frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,6 +2322,8 @@ def test_append_series_dict(self):
23222322

23232323
series = df.ix[4]
23242324
self.assertRaises(Exception, df.append, series)
2325+
series.name = None
2326+
self.assertRaises(Exception, df.append, series)
23252327

23262328
result = df.append(series[::-1], ignore_index=True)
23272329
expected = df.append(DataFrame({0 : series[::-1]},

pandas/tests/test_index.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,31 @@ def test_get_loc_duplicates(self):
803803
index = Index([2, 2, 2, 2])
804804
self.assertRaises(Exception, index.get_loc, 2)
805805

806+
def test_get_loc_level(self):
807+
index = MultiIndex(levels=[Index(range(4)),
808+
Index(range(4)),
809+
Index(range(4))],
810+
labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]),
811+
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
812+
np.array([1, 0, 1, 1, 0, 0, 1, 0])])
813+
814+
loc = index.get_loc_level((0, 1))
815+
expected = slice(1, 2)
816+
self.assertEqual(loc, expected)
817+
818+
loc = index.get_loc_level((0, 1, 0))
819+
expected = 1
820+
self.assertEqual(loc, expected)
821+
822+
self.assertRaises(KeyError, index.get_loc_level, (2, 2))
823+
824+
index = MultiIndex(levels=[[2000], range(4)],
825+
labels=[np.array([0, 0, 0, 0]),
826+
np.array([0, 1, 2, 3])])
827+
result = index.get_loc_level((2000, slice(None, None)))
828+
expected = slice(None, None)
829+
self.assertEqual(result, expected)
830+
806831
def test_slice_locs(self):
807832
df = tm.makeTimeDataFrame()
808833
stacked = df.stack()

pandas/tests/test_multilevel.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ def test_series_getitem(self):
175175
# key error
176176
self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4))
177177

178+
def test_series_getitem_corner(self):
179+
s = self.ymd['A']
180+
181+
# don't segfault, GH #495
182+
# out of bounds access
183+
self.assertRaises(IndexError, s.__getitem__, len(self.ymd))
184+
185+
# generator
186+
result = s[(x > 0 for x in s)]
187+
expected = s[s > 0]
188+
assert_series_equal(result, expected)
189+
178190
def test_series_setitem(self):
179191
s = self.ymd['A']
180192

@@ -216,6 +228,24 @@ def test_xs_level_series(self):
216228
expected = self.frame.xs('two', level=1)['A']
217229
assert_series_equal(result, expected)
218230

231+
s = self.ymd['A']
232+
result = s[2000, 5]
233+
expected = self.ymd.ix[2000, 5]['A']
234+
assert_series_equal(result, expected)
235+
236+
# not implementing this for now
237+
238+
self.assertRaises(NotImplementedError, s.__getitem__,
239+
(2000, slice(3, 4)))
240+
241+
# result = s[2000, 3:4]
242+
# lv =s.index.get_level_values(1)
243+
# expected = s[(lv == 3) | (lv == 4)]
244+
# expected.index = expected.index.droplevel(0)
245+
# assert_series_equal(result, expected)
246+
247+
# can do this though
248+
219249
def test_fancy_2d(self):
220250
result = self.frame.ix['foo', 'B']
221251
expected = self.frame.xs('foo')['B']

pandas/tests/test_series.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,14 @@ def test_getitem_boolean(self):
361361
assert_series_equal(result, expected)
362362
self.assert_(np.array_equal(result.index, s.index[mask]))
363363

364+
def test_getitem_generator(self):
365+
gen = (x > 0 for x in self.series)
366+
result = self.series[gen]
367+
result2 = self.series[iter(self.series > 0)]
368+
expected = self.series[self.series > 0]
369+
assert_series_equal(result, expected)
370+
assert_series_equal(result2, expected)
371+
364372
def test_getitem_boolean_object(self):
365373
# using column from DataFrame
366374
s = self.series
@@ -737,6 +745,14 @@ def test_to_string_mixed(self):
737745
'3 baz')
738746
self.assertEqual(result, expected)
739747

748+
s = Series(['foo', 5, 'bar', 'baz'])
749+
result = s.to_string()
750+
expected = ('0 foo\n'
751+
'1 5\n'
752+
'2 bar\n'
753+
'3 baz')
754+
self.assertEqual(result, expected)
755+
740756
def test_to_string_float_na_spacing(self):
741757
s = Series([0., 1.5678, 2., -3., 4.])
742758
s[::2] = np.nan

0 commit comments

Comments
 (0)