Skip to content

TST: change pyarrow skips to xfails #55576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_override_set_noconvert_columns():
Expand Down Expand Up @@ -515,8 +514,6 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
tm.assert_frame_equal(result, expected)


# Skip for now, actually only one test fails though, but its tricky to xfail
@skip_pyarrow
@pytest.mark.parametrize(
"sep,skip_blank_lines,exp_data",
[
Expand All @@ -536,7 +533,7 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
),
],
)
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
parser = all_parsers
data = """\
A,B,C
Expand All @@ -550,6 +547,12 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):

if sep == r"\s+":
data = data.replace(",", " ")
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(
raises=ValueError,
reason="the 'pyarrow' engine does not support regex separators",
)
request.applymarker(mark)

result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
expected = DataFrame(exp_data, columns=["A", "B", "C"])
Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/io/parser/common/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")

# GH#43650: Some expected failures with the pyarrow engine can occasionally
# cause a deadlock instead, so we skip these instead of xfailing
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.parametrize(
"data,kwargs,expected",
Expand Down Expand Up @@ -278,7 +274,8 @@ def test_empty_with_index(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
def test_empty_with_multi_index(all_parsers):
# see gh-10467
data = "x,y,z"
Expand All @@ -291,7 +288,8 @@ def test_empty_with_multi_index(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
def test_empty_with_reversed_multi_index(all_parsers):
data = "x,y,z"
parser = all_parsers
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/io/parser/common/test_ints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

# GH#43650: Some expected failures with the pyarrow engine can occasionally
# cause a deadlock instead, so we skip these instead of xfailing
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


def test_int_conversion(all_parsers):
Expand Down Expand Up @@ -102,12 +100,16 @@ def test_parse_integers_above_fp_precision(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow # Flaky
@pytest.mark.parametrize("sep", [" ", r"\s+"])
def test_integer_overflow_bug(all_parsers, sep):
# see gh-2601
data = "65248E10 11\n55555E55 22\n"
parser = all_parsers
if parser.engine == "pyarrow" and sep != " ":
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=None, sep=sep)
return

result = parser.read_csv(StringIO(data), header=None, sep=sep)
expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]])
Expand All @@ -124,7 +126,8 @@ def test_int64_min_issues(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# ValueError: The 'converters' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
@pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
def test_int64_overflow(all_parsers, conv):
data = """ID
Expand Down Expand Up @@ -168,7 +171,7 @@ def test_int64_overflow(all_parsers, conv):
parser.read_csv(StringIO(data), converters={"ID": conv})


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
Expand All @@ -182,7 +185,7 @@ def test_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
Expand All @@ -196,7 +199,7 @@ def test_outside_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@xfail_pyarrow # gets float64 dtype instead of object
@pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
def test_numeric_range_too_wide(all_parsers, exp_data):
# No numerical dtype can hold both negative and uint64
Expand Down
31 changes: 20 additions & 11 deletions pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_empty_decimal_marker(all_parsers):
Expand All @@ -44,7 +43,6 @@ def test_empty_decimal_marker(all_parsers):
parser.read_csv(StringIO(data), decimal="")


@skip_pyarrow
def test_bad_stream_exception(all_parsers, csv_dir_path):
# see gh-13652
#
Expand All @@ -65,7 +63,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
parser.read_csv(stream)


@skip_pyarrow
@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_malformed(all_parsers):
# see gh-6607
parser = all_parsers
Expand All @@ -80,7 +78,7 @@ def test_malformed(all_parsers):
parser.read_csv(StringIO(data), header=1, comment="#")


@skip_pyarrow
@xfail_pyarrow # ValueError: The 'iterator' option is not supported
@pytest.mark.parametrize("nrows", [5, 3, None])
def test_malformed_chunks(all_parsers, nrows):
data = """ignore
Expand All @@ -100,7 +98,7 @@ def test_malformed_chunks(all_parsers, nrows):
reader.read(nrows)


@skip_pyarrow
@xfail_pyarrow # does not raise
def test_catch_too_many_names(all_parsers):
# see gh-5156
data = """\
Expand All @@ -115,12 +113,17 @@ def test_catch_too_many_names(all_parsers):
else "Number of passed names did not match "
"number of header fields in the file"
)
depr_msg = "Passing a BlockManager to DataFrame is deprecated"
warn = None
if parser.engine == "pyarrow":
warn = DeprecationWarning

with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
def test_raise_on_no_columns(all_parsers, nrows):
parser = all_parsers
Expand Down Expand Up @@ -208,7 +211,6 @@ def test_read_csv_wrong_num_columns(all_parsers):
parser.read_csv(StringIO(data))


@skip_pyarrow
def test_null_byte_char(request, all_parsers):
# see gh-2741
data = "\x00,foo"
Expand All @@ -226,12 +228,19 @@ def test_null_byte_char(request, all_parsers):
out = parser.read_csv(StringIO(data), names=names)
tm.assert_frame_equal(out, expected)
else:
msg = "NULL byte detected"
if parser.engine == "pyarrow":
msg = (
"CSV parse error: Empty CSV file or block: "
"cannot infer number of columns"
)
else:
msg = "NULL byte detected"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), names=names)


@skip_pyarrow
# ValueError: the 'pyarrow' engine does not support sep=None with delim_whitespace=False
@xfail_pyarrow
@pytest.mark.filterwarnings("always::ResourceWarning")
def test_open_file(request, all_parsers):
# GH 39024
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/io/parser/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,19 +279,3 @@ def pyarrow_xfail(request):
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)


@pytest.fixture
def pyarrow_skip(request):
"""
Fixture that skips a test if the engine is pyarrow.
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
elif "all_parsers_all_precisions" in request.fixturenames:
# Return value is tuple of (engine, precision)
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
else:
return
if parser.engine == "pyarrow":
pytest.skip("pyarrow doesn't support this.")
14 changes: 10 additions & 4 deletions pandas/tests/io/parser/dtypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@xfail_pyarrow
Expand Down Expand Up @@ -55,9 +54,8 @@ def test_categorical_dtype(all_parsers, dtype):
tm.assert_frame_equal(actual, expected)


@skip_pyarrow # Flaky
@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
def test_categorical_dtype_single(all_parsers, dtype):
def test_categorical_dtype_single(all_parsers, dtype, request):
# see gh-10153
parser = all_parsers
data = """a,b,c
Expand All @@ -67,6 +65,13 @@ def test_categorical_dtype_single(all_parsers, dtype):
expected = DataFrame(
{"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
)
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(
strict=False,
reason="Flaky test sometimes gives object dtype instead of Categorical",
)
request.applymarker(mark)

actual = parser.read_csv(StringIO(data), dtype=dtype)
tm.assert_frame_equal(actual, expected)

Expand Down Expand Up @@ -141,6 +146,7 @@ def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
tm.assert_frame_equal(actual, expected)


# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
def test_categorical_dtype_chunksize_infer_categories(all_parsers):
# see gh-10153
Expand All @@ -161,6 +167,7 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers):
tm.assert_frame_equal(actual, expected)


# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
# see gh-10153
Expand Down Expand Up @@ -253,7 +260,6 @@ def test_categorical_coerces_numeric(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow # Flaky
def test_categorical_coerces_datetime(all_parsers):
parser = all_parsers
dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/io/parser/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.fixture(params=[True, False])
def buffer(request):
Expand All @@ -36,7 +34,6 @@ def parser_and_data(all_parsers, csv1):
return parser, data, expected


@skip_pyarrow
@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
def test_zip(parser_and_data, compression):
parser, data, expected = parser_and_data
Expand All @@ -54,7 +51,6 @@ def test_zip(parser_and_data, compression):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("compression", ["zip", "infer"])
def test_zip_error_multiple_files(parser_and_data, compression):
parser, data, expected = parser_and_data
Expand All @@ -70,7 +66,6 @@ def test_zip_error_multiple_files(parser_and_data, compression):
parser.read_csv(path, compression=compression)


@skip_pyarrow
def test_zip_error_no_files(parser_and_data):
parser, _, _ = parser_and_data

Expand All @@ -82,7 +77,6 @@ def test_zip_error_no_files(parser_and_data):
parser.read_csv(path, compression="zip")


@skip_pyarrow
def test_zip_error_invalid_zip(parser_and_data):
parser, _, _ = parser_and_data

Expand All @@ -92,7 +86,6 @@ def test_zip_error_invalid_zip(parser_and_data):
parser.read_csv(f, compression="zip")


@skip_pyarrow
@pytest.mark.parametrize("filename", [None, "test.{ext}"])
def test_compression(
request,
Expand Down Expand Up @@ -128,7 +121,6 @@ def test_compression(
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
def test_infer_compression(all_parsers, csv1, buffer, ext):
# see gh-9770
Expand All @@ -148,7 +140,6 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt):
# see gh-18071, gh-24130
parser = all_parsers
Expand All @@ -166,7 +157,6 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
def test_invalid_compression(all_parsers, invalid_compression):
parser = all_parsers
Expand All @@ -178,7 +168,6 @@ def test_invalid_compression(all_parsers, invalid_compression):
parser.read_csv("test_file.zip", **compress_kwargs)


@skip_pyarrow
def test_compression_tar_archive(all_parsers, csv_dir_path):
parser = all_parsers
path = os.path.join(csv_dir_path, "tar_csv.tar.gz")
Expand All @@ -200,7 +189,6 @@ def test_ignore_compression_extension(all_parsers):
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)


@skip_pyarrow
def test_writes_tar_gz(all_parsers):
parser = all_parsers
data = DataFrame(
Expand Down
Loading