Skip to content
Closed
12 changes: 8 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,13 +1104,17 @@ def to_records(self, index=True, convert_datetime64=True):
count += 1
elif index_names[0] is None:
index_names = ['index']
names = lmap(str, index_names) + lmap(str, self.columns)
names = (lmap(compat.text_type, index_names) +
lmap(compat.text_type, self.columns))
else:
arrays = [self[c].get_values() for c in self.columns]
names = lmap(str, self.columns)
names = lmap(compat.text_type, self.columns)

dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)])
return np.rec.fromarrays(arrays, dtype=dtype, names=names)
formats = [v.dtype for v in arrays]
return np.rec.fromarrays(
arrays,
dtype={'names': names, 'formats': formats}
)

@classmethod
def from_items(cls, items, columns=None, orient='columns'):
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/frame/test_convert_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,16 @@ def test_to_records_with_unicode_index(self):
.to_records()
expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
tm.assert_almost_equal(result, expected)

def test_to_records_with_unicode_column_names(self):
# Issue #11879. to_records used to raise an exception when used
# with column names containing non ascii caracters in Python 2
result = DataFrame(data={u"accented_name_é": [1.0]}).to_records()
# Note that numpy allows for unicode field names but dtypes need
# to be specified using dictionnary intsead of list of tuples.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you have a reference for this? is it listed as a numpy bug? (if not it should be)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is referenced here:
numpy/numpy#2407

expected = np.rec.array(
[(0, 1.0)],
dtype={"names": ["index", u"accented_name_é"],
"formats": ['<i8', '<f8']}
)
tm.assert_almost_equal(result, expected)