Closed
Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the master branch of pandas.
Reproducible Example
def test_date_col_as_index_col(all_parsers):
data = """\
KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
"""
parser = all_parsers
result = parser.read_csv(
StringIO(data), header=None, prefix="X", parse_dates=[1], index_col=1
)
index = Index(
[
datetime(1999, 1, 27, 19, 0),
datetime(1999, 1, 27, 20, 0),
datetime(1999, 1, 27, 21, 0),
datetime(1999, 1, 27, 21, 0),
datetime(1999, 1, 27, 22, 0),
],
name="X1",
)
expected = DataFrame(
[
["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0],
["KORD", " 19:56:00", 0.01, 2.21, 7.2, 0.0, 260.0],
["KORD", " 20:56:00", -0.59, 2.21, 5.7, 0.0, 280.0],
["KORD", " 21:18:00", -0.99, 2.01, 3.6, 0.0, 270.0],
["KORD", " 21:56:00", -0.59, 1.71, 5.1, 0.0, 290.0],
],
columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"],
index=index,
)
> tm.assert_frame_equal(result, expected)
pandas/tests/io/parser/test_parse_dates.py:434:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/_libs/testing.pyx:52: in pandas._libs.testing.assert_almost_equal
cpdef assert_almost_equal(a, b,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values)
E AssertionError: DataFrame.iloc[:, 1] (column name="X2") are different
E
E DataFrame.iloc[:, 1] (column name="X2") values are different (100.0 %)
E [index]: [1999-01-27T19:00:00.000000000, 1999-01-27T20:00:00.000000000, 1999-01-27T21:00:00.000000000, 1999-01-27T21:00:00.000000000, 1999-01-27T22:00:00.000000000]
E [left]: [18:56:00, 19:56:00, 20:56:00, 21:18:00, 21:56:00]
E [right]: [ 18:56:00, 19:56:00, 20:56:00, 21:18:00, 21:56:00]
Issue Description
Looks like the new PyArrow version deletes the space in front of the string
Expected Behavior
Not sure, but probably should be kept
Installed Versions
https://github.com/pandas-dev/pandas/runs/4157976598?check_suite_focus=true
last working build on master:
https://github.com/pandas-dev/pandas/runs/4155457578?check_suite_focus=true