diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index cd3c3848523f0..eafd4973b5253 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -70,7 +70,7 @@ Bug Fixes - Bug in ``HDFStore.append`` with strings whose encoded length exceded the max unencoded length (:issue:`11234`) - Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`) - Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`) - +- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`) - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5eb25a53d4533..0f3795fcad0c3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -443,11 +443,14 @@ def can_do_equal_len(): # we have an equal len Frame if isinstance(value, ABCDataFrame) and value.ndim > 1: sub_indexer = list(indexer) + multiindex_indexer = isinstance(labels, MultiIndex) for item in labels: if item in value: sub_indexer[info_axis] = item - v = self._align_series(tuple(sub_indexer), value[item]) + v = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) else: v = np.nan @@ -516,8 +519,28 @@ def can_do_equal_len(): self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) - def _align_series(self, indexer, ser): - # indexer to assign Series can be tuple, slice, scalar + def _align_series(self, indexer, ser, multiindex_indexer=False): + """ + Parameters + ---------- + indexer : tuple, slice, scalar + The indexer used to get the locations that will be set to + `ser` + + ser : pd.Series + The values to assign to the locations specified by `indexer` + + multiindex_indexer : boolean, optional + Defaults to False. Should be set to True if `indexer` was from + a `pd.MultiIndex`, to avoid unnecessary broadcasting. + + + Returns: + -------- + `np.array` of `ser` broadcast to the appropriate shape for assignment + to the locations selected by `indexer` + + """ if isinstance(indexer, (slice, np.ndarray, list, Index)): indexer = tuple([indexer]) @@ -555,7 +578,7 @@ def _align_series(self, indexer, ser): ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values # single indexer - if len(indexer) > 1: + if len(indexer) > 1 and not multiindex_indexer: l = len(indexer[1]) ser = np.tile(ser, l).reshape(l, -1).T diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index a2d789aaf8b70..36e825924995a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -762,32 +762,95 @@ def compare(result, expected): result2 = s.loc[0:3] assert_series_equal(result1,result2) - def test_loc_setitem_multiindex(self): + def test_setitem_multiindex(self): + for index_fn in ('ix', 'loc'): + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = pd.MultiIndex.from_product([np.arange(0,100), np.arange(0, 80)], names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) + check( + target=df, indexers=((t,n), 'X'), + value=0, compare_fn=self.assertEqual + ) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0,100), np.arange(0, 80)], names=['time', 'firm']) - t, n = 0, 2 + df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) + check( + target=df, indexers=((t,n), 'X'), + value=1, compare_fn=self.assertEqual + ) - df = DataFrame(np.nan,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - df.loc[(t,n),'X'] = 0 - result = df.loc[(t,n),'X'] - self.assertEqual(result, 0) + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) + check( + target=df, indexers=((t,n), 'X'), + value=2, compare_fn=self.assertEqual + ) - df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - df.loc[(t,n),'X'] = 1 - result = df.loc[(t,n),'X'] - self.assertEqual(result, 1) + # GH 7218, assinging with 0-dim arrays + df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) + check( + target=df, indexers=((t,n), 'X'), + value=np.array(3), compare_fn=self.assertEqual, + expected=3, + ) - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - df.loc[(t,n),'X'] = 2 - result = df.loc[(t,n),'X'] - self.assertEqual(result, 2) + # GH5206 + df = pd.DataFrame( + np.arange(25).reshape(5, 5), columns='A,B,C,D,E'.split(','), + dtype=float + ) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + df.ix[row_selection, col_selection] = df['F'] + output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + assert_frame_equal(df.ix[row_selection, col_selection], output) + check( + target=df, indexers=(row_selection, col_selection), + value=df['F'], compare_fn=assert_frame_equal, + expected=output, + ) - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - df.loc[(t,n), 'X'] = np.array(3) - result = df.loc[(t,n),'X'] - self.assertEqual(result,3) + # GH11372 + idx = pd.MultiIndex.from_product([ + ['A', 'B', 'C'], + pd.date_range('2015-01-01', '2015-04-01', freq='MS') + ]) + cols = pd.MultiIndex.from_product([ + ['foo', 'bar'], + pd.date_range('2016-01-01', '2016-02-01', freq='MS') + ]) + df = pd.DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + subidx = pd.MultiIndex.from_tuples( + [('A', pd.Timestamp('2015-01-01')), ('A', pd.Timestamp('2015-02-01'))] + ) + subcols = pd.MultiIndex.from_tuples( + [('foo', pd.Timestamp('2016-01-01')), ('foo', pd.Timestamp('2016-02-01'))] + ) + vals = pd.DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + check( + target=df, indexers=(subidx, subcols), + value=vals, compare_fn=assert_frame_equal, + ) + # set all columns + vals = pd.DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + check( + target=df, indexers=(subidx, slice(None, None, None)), + value=vals, compare_fn=assert_frame_equal, + ) + # identity + copy = df.copy() + check( + target=df, indexers=(df.index, df.columns), + value=df, compare_fn=assert_frame_equal, + expected=copy + ) def test_indexing_with_datetime_tz(self):