Skip to content

Commit 00b6c89

Browse files
committed
API: iat/iloc will raise if enlargement is specified as its ambiguous
CLN: set_value in Series/Frame now go thru indexing routings in core/indexing.py
1 parent ea6408c commit 00b6c89

File tree

12 files changed

+248
-77
lines changed

12 files changed

+248
-77
lines changed

doc/source/indexing.rst

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -412,40 +412,14 @@ Pandas will detect this and raise ``IndexError``, rather than return an empty st
412412
>>> df.iloc[:,3:6]
413413
IndexError: out-of-bounds on slice (end)
414414

415-
.. _indexing.basics.get_value:
416-
417-
Fast scalar value getting and setting
418-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
419-
420-
Since indexing with ``[]`` must handle a lot of cases (single-label access,
421-
slicing, boolean indexing, etc.), it has a bit of overhead in order to figure
422-
out what you're asking for. If you only want to access a scalar value, the
423-
fastest way is to use the ``at`` and ``iat`` methods, which are implemented on
424-
all of the data structures.
425-
426-
Similary to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analagously to ``iloc``
427-
428-
.. ipython:: python
429-
430-
s.iat[5]
431-
df.at[dates[5], 'A']
432-
df.iat[3, 0]
433-
434-
You can also set using these same indexers. These have the additional
435-
capability of enlarging an object. This method *always* returns a reference to
436-
the object it modified, which in the case of enlargement, will be a **new object**:
437-
438-
.. ipython:: python
439-
440-
df.at[dates[5], 'E'] = 7
441-
df.iat[3, 0] = 7
442-
443415
.. _indexing.basics.partial_setting:
444416

445417
Setting With Enlargement
446418
~~~~~~~~~~~~~~~~~~~~~~~~
447419

448-
The ``.loc/.iloc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
420+
.. versionadded:: 0.13
421+
422+
The ``.loc/.ix/[]`` operations can perform enlargement when setting a non-existant key for that axis.
449423

450424
In the ``Series`` case this is effectively an appending operation
451425

@@ -473,6 +447,38 @@ This is like an ``append`` operation on the ``DataFrame``.
473447
dfi.loc[3] = 5
474448
dfi
475449
450+
.. _indexing.basics.get_value:
451+
452+
Fast scalar value getting and setting
453+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
454+
455+
Since indexing with ``[]`` must handle a lot of cases (single-label access,
456+
slicing, boolean indexing, etc.), it has a bit of overhead in order to figure
457+
out what you're asking for. If you only want to access a scalar value, the
458+
fastest way is to use the ``at`` and ``iat`` methods, which are implemented on
459+
all of the data structures.
460+
461+
Similary to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analagously to ``iloc``
462+
463+
.. ipython:: python
464+
465+
s.iat[5]
466+
df.at[dates[5], 'A']
467+
df.iat[3, 0]
468+
469+
You can also set using these same indexers.
470+
471+
.. ipython:: python
472+
473+
df.at[dates[5], 'E'] = 7
474+
df.iat[3, 0] = 7
475+
476+
``at`` may enlarge the object in-place as above if the indexer is missing.
477+
478+
.. ipython:: python
479+
480+
df.at[6, 0] = 7
481+
df
476482
477483
Boolean indexing
478484
~~~~~~~~~~~~~~~~

doc/source/release.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ pandas 0.13
9292
an alias of iteritems used to get around ``2to3``'s changes).
9393
(:issue:`4384`, :issue:`4375`, :issue:`4372`)
9494
- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
95+
- allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
96+
the index for that axis (:issue:`2578`)
97+
- ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`)
9598

9699
- ``HDFStore``
97100

@@ -123,8 +126,6 @@ pandas 0.13
123126

124127
- added ``date_unit`` parameter to specify resolution of timestamps. Options
125128
are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
126-
- allow ``ix/loc/iloc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
127-
the index for that axis (:issue:`2578`)
128129

129130
- ``Index`` and ``MultiIndex`` changes (:issue:`4039`):
130131

pandas/core/frame.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,18 +1731,12 @@ def set_value(self, index, col, value):
17311731
engine.set_value(series.values, index, value)
17321732
return self
17331733
except KeyError:
1734-
new_index, new_columns = self._expand_axes((index, col))
1735-
result = self.reindex(index=new_index, columns=new_columns,
1736-
copy=False)
1737-
likely_dtype, value = _infer_dtype_from_scalar(value)
17381734

1739-
made_bigger = not np.array_equal(new_columns, self.columns)
1735+
# set using a non-recursive method & reset the cache
1736+
self.loc[index,col] = value
1737+
self._item_cache.pop(col,None)
17401738

1741-
# how to make this logic simpler?
1742-
if made_bigger:
1743-
com._possibly_cast_item(result, col, likely_dtype)
1744-
1745-
return result.set_value(index, col, value)
1739+
return self
17461740

17471741
def irow(self, i, copy=False):
17481742
return self._ixs(i, axis=0)

pandas/core/indexing.py

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,41 @@ def _convert_tuple(self, key, is_setter=False):
102102
keyidx.append(idx)
103103
return tuple(keyidx)
104104

105+
def _has_valid_setitem_indexer(self, indexer):
106+
return True
107+
108+
def _has_valid_positional_setitem_indexer(self, indexer):
109+
""" validate that an positional indexer cannot enlarge its target
110+
will raise if needed, does not modify the indexer externally """
111+
if isinstance(indexer, dict):
112+
raise IndexError("{0} cannot enlarge its target object".format(self.name))
113+
else:
114+
if not isinstance(indexer, tuple):
115+
indexer = self._tuplify(indexer)
116+
for ax, i in zip(self.obj.axes,indexer):
117+
if isinstance(i, slice):
118+
# should check the stop slice?
119+
pass
120+
elif is_list_like(i):
121+
# should check the elements?
122+
pass
123+
elif com.is_integer(i):
124+
if i >= len(ax):
125+
raise IndexError("{0} cannot enlarge its target object".format(self.name))
126+
elif isinstance(i, dict):
127+
raise IndexError("{0} cannot enlarge its target object".format(self.name))
128+
129+
return True
130+
105131
def _setitem_with_indexer(self, indexer, value):
106132

133+
self._has_valid_setitem_indexer(indexer)
134+
107135
# also has the side effect of consolidating in-place
108136
from pandas import Panel, DataFrame, Series
109137

110138
# maybe partial set
139+
take_split_path = self.obj._is_mixed_type
111140
if isinstance(indexer,tuple):
112141
nindexer = []
113142
for i, idx in enumerate(indexer):
@@ -116,10 +145,26 @@ def _setitem_with_indexer(self, indexer, value):
116145
# reindex the axis to the new value
117146
# and set inplace
118147
key,_ = _convert_missing_indexer(idx)
119-
labels = self.obj._get_axis(i) + Index([key])
148+
149+
# if this is the items axes, then take the main missing path
150+
# first; this correctly sets the dtype and avoids cache issues
151+
# essentially this separates out the block that is needed to possibly
152+
# be modified
153+
if self.ndim > 1 and i == self.obj._info_axis_number:
154+
155+
# add the new item, and set the value
156+
new_indexer = _convert_from_missing_indexer_tuple(indexer)
157+
self.obj[key] = np.nan
158+
self.obj.loc[new_indexer] = value
159+
return self.obj
160+
161+
# reindex the axis
162+
index = self.obj._get_axis(i)
163+
labels = _safe_append_to_index(index, key)
120164
self.obj._data = self.obj.reindex_axis(labels,i)._data
121165

122166
nindexer.append(labels.get_loc(key))
167+
123168
else:
124169
nindexer.append(idx)
125170

@@ -133,11 +178,19 @@ def _setitem_with_indexer(self, indexer, value):
133178
# reindex the axis to the new value
134179
# and set inplace
135180
if self.ndim == 1:
136-
self.obj._data = self.obj.append(Series(value,index=[indexer]))._data
137-
return
181+
index = self.obj.index
182+
if len(index) == 0:
183+
new_index = Index([indexer])
184+
else:
185+
new_index = _safe_append_to_index(index, indexer)
186+
187+
new_values = np.concatenate([self.obj.values, [value]])
188+
self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name)
189+
return self.obj
138190

139191
elif self.ndim == 2:
140-
labels = self.obj._get_axis(0) + Index([indexer])
192+
index = self.obj._get_axis(0)
193+
labels = _safe_append_to_index(index, indexer)
141194
self.obj._data = self.obj.reindex_axis(labels,0)._data
142195
return getattr(self.obj,self.name).__setitem__(indexer,value)
143196

@@ -146,7 +199,7 @@ def _setitem_with_indexer(self, indexer, value):
146199
return self.obj.__setitem__(indexer,value)
147200

148201
# align and set the values
149-
if self.obj._is_mixed_type:
202+
if take_split_path:
150203
if not isinstance(indexer, tuple):
151204
indexer = self._tuplify(indexer)
152205

@@ -732,6 +785,10 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
732785

733786
mask = check == -1
734787
if mask.any():
788+
789+
# mi here
790+
if isinstance(obj, tuple) and is_setter:
791+
return { 'key' : obj }
735792
raise KeyError('%s not in index' % objarr[mask])
736793

737794
return indexer
@@ -742,7 +799,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
742799
except (KeyError):
743800

744801
# allow a not found key only if we are a setter
745-
if np.isscalar(obj) and is_setter:
802+
if not is_list_like(obj) and is_setter:
746803
return { 'key' : obj }
747804
raise
748805

@@ -933,6 +990,9 @@ def _has_valid_type(self, key, axis):
933990

934991
return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key)
935992

993+
def _has_valid_setitem_indexer(self, indexer):
994+
self._has_valid_positional_setitem_indexer(indexer)
995+
936996
def _getitem_tuple(self, tup):
937997

938998
self._has_valid_tuple(tup)
@@ -965,7 +1025,6 @@ def _get_slice_axis(self, slice_obj, axis=0):
9651025
return self.obj.take(slice_obj, axis=axis)
9661026

9671027
def _getitem_axis(self, key, axis=0):
968-
9691028
if isinstance(key, slice):
9701029
self._has_valid_type(key,axis)
9711030
return self._get_slice_axis(key, axis=axis)
@@ -1005,14 +1064,12 @@ def __getitem__(self, key):
10051064
else:
10061065
raise ValueError('Invalid call for scalar access (getting)!')
10071066

1008-
if len(key) != self.obj.ndim:
1009-
raise ValueError('Not enough indexers for scalar access (getting)!')
10101067
key = self._convert_key(key)
10111068
return self.obj.get_value(*key)
10121069

10131070
def __setitem__(self, key, value):
10141071
if not isinstance(key, tuple):
1015-
raise ValueError('Invalid call for scalar access (setting)!')
1072+
key = self._tuplify(key)
10161073
if len(key) != self.obj.ndim:
10171074
raise ValueError('Not enough indexers for scalar access (setting)!')
10181075
key = self._convert_key(key)
@@ -1026,6 +1083,9 @@ class _AtIndexer(_ScalarAccessIndexer):
10261083
class _iAtIndexer(_ScalarAccessIndexer):
10271084
""" integer based scalar accessor """
10281085

1086+
def _has_valid_setitem_indexer(self, indexer):
1087+
self._has_valid_positional_setitem_indexer(indexer)
1088+
10291089
def _convert_key(self, key):
10301090
""" require integer args (and convert to label arguments) """
10311091
ckey = []
@@ -1179,6 +1239,19 @@ def _convert_missing_indexer(indexer):
11791239

11801240
return indexer, False
11811241

1242+
def _convert_from_missing_indexer_tuple(indexer):
1243+
""" create a filtered indexer that doesn't have any missing indexers """
1244+
def get_indexer(_idx):
1245+
return _idx['key'] if isinstance(_idx,dict) else _idx
1246+
return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ])
1247+
1248+
def _safe_append_to_index(index, key):
1249+
""" a safe append to an index, if incorrect type, then catch and recreate """
1250+
try:
1251+
return index.insert(len(index), key)
1252+
except:
1253+
return Index(np.concatenate([index.asobject.values,np.array([key])]))
1254+
11821255
def _maybe_convert_indices(indices, n):
11831256
""" if we have negative indicies, translate to postive here
11841257
if have indicies that are out-of-bounds, raise an IndexError """

pandas/core/internals.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, notnull,
1111
_NS_DTYPE, _TD_DTYPE, ABCSeries, ABCSparseSeries,
12-
is_list_like)
12+
is_list_like, _infer_dtype_from_scalar)
1313
from pandas.core.index import (Index, MultiIndex, _ensure_index,
1414
_handle_legacy_indexes)
1515
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
@@ -460,6 +460,24 @@ def _try_cast_result(self, result, dtype=None):
460460
if self.is_integer or self.is_bool or self.is_datetime:
461461
pass
462462
elif self.is_float and result.dtype == self.dtype:
463+
464+
# protect against a bool/object showing up here
465+
if isinstance(dtype,compat.string_types) and dtype == 'infer':
466+
return result
467+
if not isinstance(dtype,type):
468+
dtype = dtype.type
469+
if issubclass(dtype,(np.bool_,np.object_)):
470+
if issubclass(dtype,np.bool_):
471+
if isnull(result).all():
472+
return result.astype(np.bool_)
473+
else:
474+
result = result.astype(np.object_)
475+
result[result==1] = True
476+
result[result==0] = False
477+
return result
478+
else:
479+
return result.astype(np.object_)
480+
463481
return result
464482

465483
# may need to change the dtype here
@@ -536,8 +554,12 @@ def setitem(self, indexer, value):
536554
values[indexer] = value
537555

538556
# coerce and try to infer the dtypes of the result
557+
if np.isscalar(value):
558+
dtype,_ = _infer_dtype_from_scalar(value)
559+
else:
560+
dtype = 'infer'
539561
values = self._try_coerce_result(values)
540-
values = self._try_cast_result(values, 'infer')
562+
values = self._try_cast_result(values, dtype)
541563
return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
542564
except:
543565
pass
@@ -902,7 +924,7 @@ def _can_hold_element(self, element):
902924
if is_list_like(element):
903925
element = np.array(element)
904926
return issubclass(element.dtype.type, (np.floating, np.integer))
905-
return isinstance(element, (float, int))
927+
return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance(bool,np.bool_)
906928

907929
def _try_cast(self, element):
908930
try:

pandas/core/series.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,13 +1204,10 @@ def set_value(self, label, value):
12041204
self.index._engine.set_value(self.values, label, value)
12051205
return self
12061206
except KeyError:
1207-
if len(self.index) == 0:
1208-
new_index = Index([label])
1209-
else:
1210-
new_index = self.index.insert(len(self), label)
12111207

1212-
new_values = np.concatenate([self.values, [value]])
1213-
return self._constructor(new_values, index=new_index, name=self.name)
1208+
# set using a non-recursive method
1209+
self.loc[label] = value
1210+
return self
12141211

12151212
def reset_index(self, level=None, drop=False, name=None, inplace=False):
12161213
"""

pandas/sparse/tests/test_sparse.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,7 @@ def test_icol(self):
10751075
type(iframe.icol(0).sp_index))
10761076

10771077
def test_set_value(self):
1078+
10781079
res = self.frame.set_value('foobar', 'B', 1.5)
10791080
self.assert_(res is not self.frame)
10801081
self.assert_(res.index[-1] == 'foobar')

0 commit comments

Comments
 (0)