Skip to content

Commit ec3fb68

Browse files
committed
Merge pull request #4729 from jreback/dups_iloc
BUG: (GH4726) bug in getting a cross-sectional using iloc/loc with a duplicate items index
2 parents be2a65c + 48e8fed commit ec3fb68

File tree

3 files changed

+30
-15
lines changed

3 files changed

+30
-15
lines changed

doc/source/release.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,10 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
309309
- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs (:issue:`4576`)
310310
- Fix error/dtype conversion with setitem of ``None`` on ``Series/DataFrame`` (:issue:`4667`)
311311
- Fix decoding based on a passed in non-default encoding in ``pd.read_stata`` (:issue:`4626`)
312-
- Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename`` (:issue:`4718`, :issue:`4628`)
313312
- Fix ``DataFrame.from_records`` with a plain-vanilla ``ndarray``. (:issue:`4727`)
313+
- Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``,
314+
etc. (:issue:`4718`, :issue:`4628`)
315+
- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indicies (:issue:`4726`)
314316

315317
pandas 0.12
316318
===========

pandas/core/internals.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ def __init__(self, values, items, ref_items, ndim=None, fastpath=False, placemen
7272
self.items = _ensure_index(items)
7373
self.ref_items = _ensure_index(ref_items)
7474

75-
def _gi(self, arg):
76-
return self.values[arg]
77-
7875
@property
7976
def _consolidate_key(self):
8077
return (self._can_consolidate, self.dtype.name)
@@ -1165,9 +1162,6 @@ def __init__(self, values, items, ref_items, fastpath=False, placement=None, **k
11651162
super(DatetimeBlock, self).__init__(values, items, ref_items,
11661163
fastpath=True, placement=placement, **kwargs)
11671164

1168-
def _gi(self, arg):
1169-
return lib.Timestamp(self.values[arg])
1170-
11711165
def _can_hold_element(self, element):
11721166
if is_list_like(element):
11731167
element = np.array(element)
@@ -1200,7 +1194,7 @@ def _try_coerce_result(self, result):
12001194
if result.dtype == 'i8':
12011195
result = tslib.array_to_datetime(
12021196
result.astype(object).ravel()).reshape(result.shape)
1203-
elif isinstance(result, np.integer):
1197+
elif isinstance(result, (np.integer, np.datetime64)):
12041198
result = lib.Timestamp(result)
12051199
return result
12061200

@@ -1267,10 +1261,9 @@ def set(self, item, value):
12671261
self.values[loc] = value
12681262

12691263
def get_values(self, dtype=None):
1264+
# return object dtype as Timestamps
12701265
if dtype == object:
1271-
flat_i8 = self.values.ravel().view(np.int64)
1272-
res = tslib.ints_to_pydatetime(flat_i8)
1273-
return res.reshape(self.values.shape)
1266+
return lib.map_infer(self.values.ravel(), lib.Timestamp).reshape(self.values.shape)
12741267
return self.values
12751268

12761269

@@ -2272,7 +2265,8 @@ def xs(self, key, axis=1, copy=True):
22722265

22732266
def fast_2d_xs(self, loc, copy=False):
22742267
"""
2275-
2268+
get a cross sectional for a given location in the
2269+
items ; handle dups
22762270
"""
22772271
if len(self.blocks) == 1:
22782272
result = self.blocks[0].values[:, loc]
@@ -2284,15 +2278,20 @@ def fast_2d_xs(self, loc, copy=False):
22842278
raise Exception('cannot get view of mixed-type or '
22852279
'non-consolidated DataFrame')
22862280

2287-
dtype = _interleaved_dtype(self.blocks)
2288-
22892281
items = self.items
2282+
2283+
# non-unique (GH4726)
2284+
if not items.is_unique:
2285+
return self._interleave(items).ravel()
2286+
2287+
# unique
2288+
dtype = _interleaved_dtype(self.blocks)
22902289
n = len(items)
22912290
result = np.empty(n, dtype=dtype)
22922291
for blk in self.blocks:
22932292
for j, item in enumerate(blk.items):
22942293
i = items.get_loc(item)
2295-
result[i] = blk._gi((j, loc))
2294+
result[i] = blk._try_coerce_result(blk.iget((j, loc)))
22962295

22972296
return result
22982297

pandas/tests/test_indexing.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,6 +1215,20 @@ def test_astype_assignment_with_iloc(self):
12151215
result = df.get_dtype_counts().sort_index()
12161216
expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
12171217

1218+
def test_dups_loc(self):
1219+
1220+
# GH4726
1221+
# dup indexing with iloc/loc
1222+
df = DataFrame([[1,2,'foo','bar',Timestamp('20130101')]],
1223+
columns=['a','a','a','a','a'],index=[1])
1224+
expected = Series([1,2,'foo','bar',Timestamp('20130101')],index=['a','a','a','a','a'])
1225+
1226+
result = df.iloc[0]
1227+
assert_series_equal(result,expected)
1228+
1229+
result = df.loc[1]
1230+
assert_series_equal(result,expected)
1231+
12181232
if __name__ == '__main__':
12191233
import nose
12201234
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)