Skip to content

Commit ea6408c

Browse files
committed
ENH: GH2578, allow ix and friends to partially set when the key is not contained
in the object
1 parent f373864 commit ea6408c

File tree

8 files changed

+316
-29
lines changed

8 files changed

+316
-29
lines changed

doc/source/indexing.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,40 @@ the object it modified, which in the case of enlargement, will be a **new object
440440
df.at[dates[5], 'E'] = 7
441441
df.iat[3, 0] = 7
442442
443+
.. _indexing.basics.partial_setting:
444+
445+
Setting With Enlargement
446+
~~~~~~~~~~~~~~~~~~~~~~~~
447+
448+
The ``.loc/.iloc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
449+
450+
In the ``Series`` case this is effectively an appending operation
451+
452+
.. ipython:: python
453+
454+
se = Series([1,2,3])
455+
se
456+
se[5] = 5.
457+
se
458+
459+
A ``DataFrame`` can be enlarged on either axis via ``.loc``
460+
461+
.. ipython:: python
462+
463+
dfi = DataFrame(np.arange(6).reshape(3,2),
464+
columns=['A','B'])
465+
dfi
466+
dfi.loc[:,'C'] = dfi.loc[:,'A']
467+
dfi
468+
469+
This is like an ``append`` operation on the ``DataFrame``.
470+
471+
.. ipython:: python
472+
473+
dfi.loc[3] = 5
474+
dfi
475+
476+
443477
Boolean indexing
444478
~~~~~~~~~~~~~~~~
445479

doc/source/release.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ pandas 0.13
123123

124124
- added ``date_unit`` parameter to specify resolution of timestamps. Options
125125
are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
126+
- allow ``ix/loc/iloc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
127+
the index for that axis (:issue:`2578`)
126128

127129
- ``Index`` and ``MultiIndex`` changes (:issue:`4039`):
128130

@@ -296,7 +298,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
296298
- ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise
297299
if code argument out of range (:issue:`4519`, :issue:`4520`)
298300
- Fix boolean indexing on an empty series loses index names (:issue:`4235`),
299-
infer_dtype works with empty arrays.
301+
infer_dtype works with empty arrays.
300302
- Fix reindexing with multiple axes; if an axes match was not replacing the current axes, leading
301303
to a possible lazay frequency inference issue (:issue:`3317`)
302304
- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly

doc/source/v0.13.0.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,54 @@ API changes
134134
df1 and df2
135135
s1 and s2
136136

137+
Indexing API Changes
138+
~~~~~~~~~~~~~~~~~~~~
139+
140+
Prior to 0.13, it was impossible to use an indexer (``.loc/.iloc/.ix``) to set a value that
141+
was not contained in the index of a particular axis. (:issue:`2578`). See more at :ref:`here<indexing.basics.partial_setting>`
142+
143+
In the ``Series`` case this is effectively an appending operation
144+
145+
.. ipython:: python
146+
147+
s = Series([1,2,3])
148+
s
149+
s[5] = 5.
150+
s
151+
152+
.. ipython:: python
153+
154+
dfi = DataFrame(np.arange(6).reshape(3,2),
155+
columns=['A','B'])
156+
dfi
157+
158+
This would previously ``KeyError``
159+
160+
.. ipython:: python
161+
162+
dfi.loc[:,'C'] = dfi.loc[:,'A']
163+
dfi
164+
165+
This is like an ``append`` operation.
166+
167+
.. ipython:: python
168+
169+
dfi.loc[3] = 5
170+
dfi
171+
172+
A Panel setting operation on an arbitrary axis aligns the input to the Panel
173+
174+
.. ipython:: python
175+
176+
p = pd.Panel(np.arange(16).reshape(2,4,2),
177+
items=['Item1','Item2'],
178+
major_axis=pd.date_range('2001/1/12',periods=4),
179+
minor_axis=['A','B'],dtype='float64')
180+
p
181+
p.loc[:,:,'C'] = Series([30,32],index=p.items)
182+
p
183+
p.loc[:,:,'C']
184+
137185
Enhancements
138186
~~~~~~~~~~~~
139187

pandas/core/indexing.py

Lines changed: 136 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# pylint: disable=W0223
22

33
from datetime import datetime
4-
from pandas.core.common import _asarray_tuplesafe
4+
from pandas.core.common import _asarray_tuplesafe, is_list_like
55
from pandas.core.index import Index, MultiIndex, _ensure_index
66
from pandas.compat import range, zip
77
import pandas.compat as compat
@@ -86,27 +86,66 @@ def __setitem__(self, key, value):
8686
if len(key) > self.ndim:
8787
raise IndexingError('only tuples of length <= %d supported',
8888
self.ndim)
89-
indexer = self._convert_tuple(key)
89+
indexer = self._convert_tuple(key, is_setter=True)
9090
else:
91-
indexer = self._convert_to_indexer(key)
91+
indexer = self._convert_to_indexer(key, is_setter=True)
9292

9393
self._setitem_with_indexer(indexer, value)
9494

9595
def _has_valid_tuple(self, key):
9696
pass
9797

98-
def _convert_tuple(self, key):
98+
def _convert_tuple(self, key, is_setter=False):
9999
keyidx = []
100100
for i, k in enumerate(key):
101-
idx = self._convert_to_indexer(k, axis=i)
101+
idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
102102
keyidx.append(idx)
103103
return tuple(keyidx)
104104

105105
def _setitem_with_indexer(self, indexer, value):
106106

107107
# also has the side effect of consolidating in-place
108-
# mmm, spaghetti
108+
from pandas import Panel, DataFrame, Series
109109

110+
# maybe partial set
111+
if isinstance(indexer,tuple):
112+
nindexer = []
113+
for i, idx in enumerate(indexer):
114+
if isinstance(idx, dict):
115+
116+
# reindex the axis to the new value
117+
# and set inplace
118+
key,_ = _convert_missing_indexer(idx)
119+
labels = self.obj._get_axis(i) + Index([key])
120+
self.obj._data = self.obj.reindex_axis(labels,i)._data
121+
122+
nindexer.append(labels.get_loc(key))
123+
else:
124+
nindexer.append(idx)
125+
126+
indexer = tuple(nindexer)
127+
else:
128+
129+
indexer, missing = _convert_missing_indexer(indexer)
130+
131+
if missing:
132+
133+
# reindex the axis to the new value
134+
# and set inplace
135+
if self.ndim == 1:
136+
self.obj._data = self.obj.append(Series(value,index=[indexer]))._data
137+
return
138+
139+
elif self.ndim == 2:
140+
labels = self.obj._get_axis(0) + Index([indexer])
141+
self.obj._data = self.obj.reindex_axis(labels,0)._data
142+
return getattr(self.obj,self.name).__setitem__(indexer,value)
143+
144+
# set using setitem (Panel and > dims)
145+
elif self.ndim >= 3:
146+
return self.obj.__setitem__(indexer,value)
147+
148+
# align and set the values
110149
if self.obj._is_mixed_type:
111150
if not isinstance(indexer, tuple):
112151
indexer = self._tuplify(indexer)
@@ -192,14 +231,73 @@ def setter(item, v):
192231
def _align_series(self, indexer, ser):
193232
# indexer to assign Series can be tuple or scalar
194233
if isinstance(indexer, tuple):
234+
235+
aligners = [ not _is_null_slice(idx) for idx in indexer ]
236+
single_aligner = sum(aligners) == 1
237+
is_frame = self.obj.ndim == 2
238+
is_panel = self.obj.ndim >= 3
239+
240+
# are we a single alignable value on a non-primary
241+
# dim (e.g. panel: 1,2, or frame: 0) ?
242+
# hence need to align to a single axis dimension
243+
# rather that find all valid dims
244+
245+
# frame
246+
if is_frame:
247+
single_aligner = single_aligner and aligners[0]
248+
249+
# panel
250+
elif is_panel:
251+
single_aligner = single_aligner and (aligners[1] or aligners[2])
252+
253+
obj = self.obj
195254
for i, idx in enumerate(indexer):
196-
ax = self.obj.axes[i]
255+
ax = obj.axes[i]
256+
257+
# multiple aligners (or null slices)
197258
if com._is_sequence(idx) or isinstance(idx, slice):
259+
if single_aligner and _is_null_slice(idx):
260+
continue
198261
new_ix = ax[idx]
262+
if not is_list_like(new_ix):
263+
new_ix = Index([new_ix])
199264
if ser.index.equals(new_ix):
200265
return ser.values.copy()
201266
return ser.reindex(new_ix).values
202267

268+
# 2 dims
269+
elif single_aligner and is_frame:
270+
271+
# reindex along index
272+
ax = self.obj.axes[1]
273+
if ser.index.equals(ax):
274+
return ser.values.copy()
275+
return ser.reindex(ax).values
276+
277+
# >2 dims
278+
elif single_aligner:
279+
280+
broadcast = []
281+
for n, labels in enumerate(self.obj._get_plane_axes(i)):
282+
283+
# reindex along the matching dimensions
284+
if len(labels & ser.index):
285+
ser = ser.reindex(labels)
286+
else:
287+
broadcast.append((n,len(labels)))
288+
289+
# broadcast along other dims
290+
ser = ser.values.copy()
291+
for (axis,l) in broadcast:
292+
shape = [ -1 ] * (len(broadcast)+1)
293+
shape[axis] = l
294+
ser = np.tile(ser,l).reshape(shape)
295+
296+
if self.obj.ndim == 3:
297+
ser = ser.T
298+
299+
return ser
300+
203301
elif np.isscalar(indexer):
204302
ax = self.obj._get_axis(1)
205303

@@ -521,7 +619,7 @@ def _reindex(keys, level=None):
521619

522620
return result
523621

524-
def _convert_to_indexer(self, obj, axis=0):
622+
def _convert_to_indexer(self, obj, axis=0, is_setter=False):
525623
"""
526624
Convert indexing key into something we can use to do actual fancy
527625
indexing on an ndarray
@@ -639,7 +737,14 @@ def _convert_to_indexer(self, obj, axis=0):
639737
return indexer
640738

641739
else:
642-
return labels.get_loc(obj)
740+
try:
741+
return labels.get_loc(obj)
742+
except (KeyError):
743+
744+
# allow a not found key only if we are a setter
745+
if np.isscalar(obj) and is_setter:
746+
return { 'key' : obj }
747+
raise
643748

644749
def _tuplify(self, loc):
645750
tup = [slice(None, None) for _ in range(self.ndim)]
@@ -877,7 +982,7 @@ def _getitem_axis(self, key, axis=0):
877982

878983
return self._get_loc(key,axis=axis)
879984

880-
def _convert_to_indexer(self, obj, axis=0):
985+
def _convert_to_indexer(self, obj, axis=0, is_setter=False):
881986
""" much simpler as we only have to deal with our valid types """
882987
if self._has_valid_type(obj,axis):
883988
return obj
@@ -1028,6 +1133,12 @@ def _slice(self, indexer, axis=0):
10281133
return self.obj._get_values(indexer)
10291134

10301135
def _setitem_with_indexer(self, indexer, value):
1136+
1137+
# need to delegate to the super setter
1138+
if isinstance(indexer, dict):
1139+
return super(_SeriesIndexer, self)._setitem_with_indexer(indexer, value)
1140+
1141+
# fast access
10311142
self.obj._set_values(indexer, value)
10321143

10331144
def _check_bool_indexer(ax, key):
@@ -1053,6 +1164,21 @@ def _check_bool_indexer(ax, key):
10531164
return result
10541165

10551166

1167+
def _convert_missing_indexer(indexer):
1168+
""" reverse convert a missing indexer, which is a dict
1169+
return the scalar indexer and a boolean indicating if we converted """
1170+
1171+
if isinstance(indexer, dict):
1172+
1173+
# a missing key (but not a tuple indexer)
1174+
indexer = indexer['key']
1175+
1176+
if isinstance(indexer, bool):
1177+
raise KeyError("cannot use a single bool to index into setitem")
1178+
return indexer, True
1179+
1180+
return indexer, False
1181+
10561182
def _maybe_convert_indices(indices, n):
10571183
""" if we have negative indicies, translate to postive here
10581184
if have indicies that are out-of-bounds, raise an IndexError """

pandas/core/internals.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2647,7 +2647,7 @@ def reindex_axis(self, new_axis, indexer=None, method=None, axis=0, fill_value=N
26472647
if method is not None or limit is not None:
26482648
return self.reindex_axis0_with_method(new_axis, indexer=indexer,
26492649
method=method, fill_value=fill_value, limit=limit, copy=copy)
2650-
return self.reindex_items(new_axis, copy=copy, fill_value=fill_value)
2650+
return self.reindex_items(new_axis, indexer=indexer, copy=copy, fill_value=fill_value)
26512651

26522652
new_axis, indexer = cur_axis.reindex(
26532653
new_axis, method, copy_if_needed=True)
@@ -2709,7 +2709,7 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value):
27092709

27102710
return self.__class__(new_blocks, new_axes)
27112711

2712-
def reindex_items(self, new_items, copy=True, fill_value=None):
2712+
def reindex_items(self, new_items, indexer=None, copy=True, fill_value=None):
27132713
"""
27142714
27152715
"""
@@ -2719,8 +2719,8 @@ def reindex_items(self, new_items, copy=True, fill_value=None):
27192719
data = data.consolidate()
27202720
return data.reindex_items(new_items, copy=copy, fill_value=fill_value)
27212721

2722-
# TODO: this part could be faster (!)
2723-
new_items, indexer = self.items.reindex(new_items, copy_if_needed=True)
2722+
if indexer is None:
2723+
new_items, indexer = self.items.reindex(new_items, copy_if_needed=True)
27242724
new_axes = [new_items] + self.axes[1:]
27252725

27262726
# could have so me pathological (MultiIndex) issues here

pandas/tests/test_frame.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -866,9 +866,11 @@ def test_fancy_index_int_labels_exceptions(self):
866866
self.assertRaises(KeyError,
867867
self.frame.ix.__setitem__,
868868
(slice(None, None), ['E']), 1)
869-
self.assertRaises(KeyError,
870-
self.frame.ix.__setitem__,
871-
(slice(None, None), 'E'), 1)
869+
870+
# partial setting now allows this GH2578
871+
#self.assertRaises(KeyError,
872+
# self.frame.ix.__setitem__,
873+
# (slice(None, None), 'E'), 1)
872874

873875
def test_setitem_fancy_mixed_2d(self):
874876
self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5

0 commit comments

Comments
 (0)