Skip to content

Commit b4f4e25

Browse files
committed
Merge remote branch 'y-p/fix1'
* y-p/fix1: ENH: warn user when invoking to_dict() on df with non-unique columns BUG: modify df.iteritems to support duplicate column labels #2219 TST: df.iteritems() should yield Series even with non-unique column labels BUG: df with dupe cols should raise KeyError on accessing non-existent col via list #2218 TST: df with dupe cols should raise KeyError on accessing non-existent col via list
2 parents ca8a6ba + bd45d39 commit b4f4e25

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

pandas/core/frame.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,8 @@ def keys(self):
633633

634634
def iteritems(self):
635635
"""Iterator over (column, series) pairs"""
636-
return ((k, self[k]) for k in self.columns)
636+
for i, k in enumerate(self.columns):
637+
yield (k,self.take([i],axis=1)[k])
637638

638639
def iterrows(self):
639640
"""
@@ -836,6 +837,10 @@ def to_dict(self, outtype='dict'):
836837
-------
837838
result : dict like {column -> {index -> value}}
838839
"""
840+
import warnings
841+
if not self.columns.is_unique:
842+
warnings.warn("DataFrame columns are not unique, some "
843+
"columns will be omitted.",UserWarning)
839844
if outtype.lower().startswith('d'):
840845
return dict((k, v.to_dict()) for k, v in self.iteritems())
841846
elif outtype.lower().startswith('l'):
@@ -1795,13 +1800,18 @@ def _getitem_array(self, key):
17951800
indexer = self.columns.get_indexer(key)
17961801
mask = indexer == -1
17971802
if mask.any():
1798-
raise KeyError("No column(s) named: %s" % str(key[mask]))
1803+
raise KeyError("No column(s) named: %s" %
1804+
com.pprint_thing(key[mask]))
17991805
result = self.reindex(columns=key)
18001806
if result.columns.name is None:
18011807
result.columns.name = self.columns.name
18021808
return result
18031809
else:
18041810
mask = self.columns.isin(key)
1811+
for k in key:
1812+
if k not in self.columns:
1813+
raise KeyError("No column(s) named: %s" %
1814+
com.pprint_thing(k))
18051815
return self.take(mask.nonzero()[0], axis=1)
18061816

18071817
def _slice(self, slobj, axis=0):

pandas/tests/test_frame.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ def test_getitem(self):
6262
self.assert_('random' not in self.frame)
6363
self.assertRaises(Exception, self.frame.__getitem__, 'random')
6464

65+
def test_getitem_dupe_cols(self):
66+
df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b'])
67+
try:
68+
df[['baf']]
69+
except KeyError:
70+
pass
71+
else:
72+
self.fail("Dataframe failed to raise KeyError")
73+
6574
def test_get(self):
6675
b = self.frame.get('B')
6776
assert_series_equal(b, self.frame['B'])
@@ -1147,6 +1156,11 @@ def test_get_value(self):
11471156
expected = self.frame[col][idx]
11481157
assert_almost_equal(result, expected)
11491158

1159+
def test_iteritems(self):
1160+
df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b'])
1161+
for k,v in df.iteritems():
1162+
self.assertEqual(type(v),Series)
1163+
11501164
def test_lookup(self):
11511165
def alt(df, rows, cols):
11521166
result = []
@@ -7523,6 +7537,7 @@ def __nonzero__(self):
75237537
self.assert_(r0.all())
75247538
self.assert_(r1.all())
75257539

7540+
75267541
if __name__ == '__main__':
75277542
# unittest.main()
75287543
import nose

0 commit comments

Comments
 (0)