Skip to content

Commit 2ec9862

Browse files
authored
BUG: Fix multi-index colname references in read_csv c engine. (#42519)
1 parent 9de158f commit 2ec9862

File tree

5 files changed

+75
-1
lines changed

5 files changed

+75
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ MultiIndex
233233
I/O
234234
^^^
235235
- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
236-
-
236+
- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
237237
-
238238

239239
Period

pandas/_libs/parsers.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,8 @@ cdef class TextReader:
12801280
# generate extra (bogus) headers if there are more columns than headers
12811281
if j >= len(self.header[0]):
12821282
return j
1283+
elif self.has_mi_columns:
1284+
return tuple(header_row[j] for header_row in self.header)
12831285
else:
12841286
return self.header[0][j]
12851287
else:

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,29 @@ def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
257257
result = parser.read_csv(StringIO(data), dtype=str)
258258
expected = DataFrame({"a": ["1"], "a.1": ["1"]})
259259
tm.assert_frame_equal(result, expected)
260+
261+
262+
def test_dtype_multi_index(all_parsers):
263+
# GH 42446
264+
parser = all_parsers
265+
data = "A,B,B\nX,Y,Z\n1,2,3"
266+
267+
result = parser.read_csv(
268+
StringIO(data),
269+
header=list(range(2)),
270+
dtype={
271+
("A", "X"): np.int32,
272+
("B", "Y"): np.int32,
273+
("B", "Z"): np.float32,
274+
},
275+
)
276+
277+
expected = DataFrame(
278+
{
279+
("A", "X"): np.int32([1]),
280+
("B", "Y"): np.int32([2]),
281+
("B", "Z"): np.float32([3]),
282+
}
283+
)
284+
285+
tm.assert_frame_equal(result, expected)

pandas/tests/io/parser/test_converters.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,29 @@ def test_converter_index_col_bug(all_parsers):
161161

162162
xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
163163
tm.assert_frame_equal(rs, xp)
164+
165+
166+
def test_converter_multi_index(all_parsers):
167+
# GH 42446
168+
parser = all_parsers
169+
data = "A,B,B\nX,Y,Z\n1,2,3"
170+
171+
result = parser.read_csv(
172+
StringIO(data),
173+
header=list(range(2)),
174+
converters={
175+
("A", "X"): np.int32,
176+
("B", "Y"): np.int32,
177+
("B", "Z"): np.float32,
178+
},
179+
)
180+
181+
expected = DataFrame(
182+
{
183+
("A", "X"): np.int32([1]),
184+
("B", "Y"): np.int32([2]),
185+
("B", "Z"): np.float32([3]),
186+
}
187+
)
188+
189+
tm.assert_frame_equal(result, expected)

pandas/tests/io/parser/test_na_values.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,3 +570,23 @@ def test_str_nan_dropped(all_parsers):
570570
)
571571

572572
tm.assert_frame_equal(result, expected)
573+
574+
575+
def test_nan_multi_index(all_parsers):
576+
# GH 42446
577+
parser = all_parsers
578+
data = "A,B,B\nX,Y,Z\n1,2,inf"
579+
580+
result = parser.read_csv(
581+
StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"}
582+
)
583+
584+
expected = DataFrame(
585+
{
586+
("A", "X"): [1],
587+
("B", "Y"): [2],
588+
("B", "Z"): [np.nan],
589+
}
590+
)
591+
592+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)