Skip to content

TST: change pyarrow skips to xfails #55637

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions pandas/tests/io/parser/common/test_chunksize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)


@xfail_pyarrow # The 'chunksize' option is not supported
@pytest.mark.parametrize("index_col", [0, "index"])
def test_read_chunksize_with_index(all_parsers, index_col):
parser = all_parsers
Expand Down Expand Up @@ -51,6 +55,7 @@ def test_read_chunksize_with_index(all_parsers, index_col):
tm.assert_frame_equal(chunks[2], expected[4:])


@xfail_pyarrow # AssertionError: Regex pattern did not match
@pytest.mark.parametrize("chunksize", [1.3, "foo", 0])
def test_read_chunksize_bad(all_parsers, chunksize):
data = """index,A,B,C,D
Expand All @@ -69,6 +74,7 @@ def test_read_chunksize_bad(all_parsers, chunksize):
pass


@xfail_pyarrow # The 'nrows' option is not supported
@pytest.mark.parametrize("chunksize", [2, 8])
def test_read_chunksize_and_nrows(all_parsers, chunksize):
# see gh-15755
Expand All @@ -88,6 +94,7 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize):
tm.assert_frame_equal(concat(reader), expected)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_read_chunksize_and_nrows_changing_size(all_parsers):
data = """index,A,B,C,D
foo,2,3,4,5
Expand All @@ -109,6 +116,7 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
reader.get_chunk(size=3)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_get_chunk_passed_chunksize(all_parsers):
parser = all_parsers
data = """A,B,C
Expand All @@ -124,6 +132,7 @@ def test_get_chunk_passed_chunksize(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # The 'chunksize' option is not supported
@pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}])
def test_read_chunksize_compat(all_parsers, kwargs):
# see gh-12185
Expand All @@ -141,6 +150,7 @@ def test_read_chunksize_compat(all_parsers, kwargs):
tm.assert_frame_equal(concat(reader), result)


@xfail_pyarrow # The 'chunksize' option is not supported
def test_read_chunksize_jagged_names(all_parsers):
# see gh-23509
parser = all_parsers
Expand Down Expand Up @@ -171,7 +181,11 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers)

# Coercions should work without warnings.
with tm.assert_produces_warning(None):
warn = None
if parser.engine == "pyarrow":
warn = DeprecationWarning
depr_msg = "Passing a BlockManager to DataFrame"
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
with monkeypatch.context() as m:
m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
result = parser.read_csv(StringIO(data))
Expand All @@ -180,6 +194,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
assert result.a.dtype == float


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_warn_if_chunks_have_mismatched_type(all_parsers):
warning_type = None
parser = all_parsers
Expand Down Expand Up @@ -207,6 +222,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
assert df.a.dtype == object


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
@pytest.mark.parametrize("iterator", [True, False])
def test_empty_with_nrows_chunksize(all_parsers, iterator):
# see gh-9535
Expand All @@ -225,6 +241,7 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_read_csv_memory_growth_chunksize(all_parsers):
# see gh-24805
#
Expand All @@ -242,6 +259,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
pass


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_chunksize_with_usecols_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
Expand All @@ -267,6 +285,7 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers):
tm.assert_frame_equal(result, expected_frames[i])


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_chunksize_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
Expand Down
22 changes: 18 additions & 4 deletions pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
from pandas import DataFrame
import pandas._testing as tm

# TODO(1.4) Please xfail individual tests at release time
# instead of skip
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@pytest.mark.network
Expand Down Expand Up @@ -60,13 +62,15 @@ def test_local_file(all_parsers, csv_dir_path):
pytest.skip("Failing on: " + " ".join(platform.uname()))


@xfail_pyarrow # AssertionError: DataFrame.index are different
def test_path_path_lib(all_parsers):
parser = all_parsers
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)


@xfail_pyarrow # AssertionError: DataFrame.index are different
def test_path_local_path(all_parsers):
parser = all_parsers
df = tm.makeDataFrame()
Expand Down Expand Up @@ -206,10 +210,14 @@ def test_no_permission(all_parsers):
"in-quoted-field",
],
)
def test_eof_states(all_parsers, data, kwargs, expected, msg):
def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
# see gh-10728, gh-10548
parser = all_parsers

if parser.engine == "pyarrow" and "\r" not in data:
mark = pytest.mark.xfail(reason="The 'comment' option is not supported")
request.applymarker(mark)

if expected is None:
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), **kwargs)
Expand All @@ -218,6 +226,7 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_temporary_file(all_parsers):
# see gh-13398
parser = all_parsers
Expand Down Expand Up @@ -347,6 +356,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding):
assert not handle.closed


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_memory_map_compression(all_parsers, compression):
"""
Support memory map for compressed files.
Expand All @@ -365,6 +375,7 @@ def test_memory_map_compression(all_parsers, compression):
)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_context_manager(all_parsers, datapath):
# make sure that opened files are closed
parser = all_parsers
Expand All @@ -381,6 +392,7 @@ def test_context_manager(all_parsers, datapath):
assert reader.handles.handle.closed


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_context_manageri_user_provided(all_parsers, datapath):
# make sure that user-provided handles are not closed
parser = all_parsers
Expand All @@ -396,6 +408,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
assert not reader.handles.handle.closed


@xfail_pyarrow # ParserError: Empty CSV file
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
# GH 31488
parser = all_parsers
Expand All @@ -404,6 +417,7 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write):
parser.read_csv(path)


@xfail_pyarrow # ValueError: The 'memory_map' option is not supported
def test_memory_map(all_parsers, csv_dir_path):
mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
parser = all_parsers
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/io/parser/common/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from pandas import DataFrame
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
Expand Down Expand Up @@ -46,6 +50,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
# GH#38753
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/io/parser/common/test_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
)
import pandas._testing as tm

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
pytestmark = pytest.mark.filterwarnings(
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'iterator' option is not supported
def test_iterator(all_parsers):
# see gh-6607
data = """index,A,B,C,D
Expand All @@ -37,6 +41,7 @@ def test_iterator(all_parsers):
tm.assert_frame_equal(last_chunk, expected[3:])


@xfail_pyarrow # ValueError: The 'iterator' option is not supported
def test_iterator2(all_parsers):
parser = all_parsers
data = """A,B,C
Expand All @@ -56,6 +61,7 @@ def test_iterator2(all_parsers):
tm.assert_frame_equal(result[0], expected)


@xfail_pyarrow # ValueError: The 'chunksize' option is not supported
def test_iterator_stop_on_chunksize(all_parsers):
# gh-3967: stopping iteration when chunksize is specified
parser = all_parsers
Expand All @@ -77,6 +83,7 @@ def test_iterator_stop_on_chunksize(all_parsers):
tm.assert_frame_equal(concat(result), expected)


@xfail_pyarrow # AssertionError: Regex pattern did not match
@pytest.mark.parametrize(
"kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/parser/common/test_verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

import pytest

pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'verbose' option is not supported
def test_verbose_read(all_parsers, capsys):
parser = all_parsers
data = """a,b,c,d
Expand All @@ -32,6 +33,7 @@ def test_verbose_read(all_parsers, capsys):
assert captured.out == "Filled 3 NA values in column a\n"


@xfail_pyarrow # ValueError: The 'verbose' option is not supported
def test_verbose_read2(all_parsers, capsys):
parser = all_parsers
data = """a,b,c,d
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/io/parser/dtypes/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
)
import pandas._testing as tm

# TODO(1.4): Change me into individual xfails at release time
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_dtype_all_columns_empty(all_parsers):
# see gh-12048
parser = all_parsers
Expand All @@ -30,6 +30,7 @@ def test_dtype_all_columns_empty(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -42,6 +43,7 @@ def test_empty_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_index_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -56,6 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_multi_index_pass_dtype(all_parsers):
parser = all_parsers

Expand All @@ -72,6 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
parser = all_parsers

Expand All @@ -84,6 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
parser = all_parsers

Expand All @@ -96,6 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
# see gh-9424
parser = all_parsers
Expand Down Expand Up @@ -165,6 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
),
],
)
@xfail_pyarrow # CSV parse error: Empty CSV file or block
def test_empty_dtype(all_parsers, dtype, expected):
# see gh-14712
parser = all_parsers
Expand Down
Loading