pandas-dev · phofl · Oct 23, 2023 · Oct 22, 2023 · Oct 23, 2023
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
@@ -16,9 +16,13 @@
 )
 import pandas._testing as tm
 
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
 
 
+@xfail_pyarrow  # The 'chunksize' option is not supported
 @pytest.mark.parametrize("index_col", [0, "index"])
 def test_read_chunksize_with_index(all_parsers, index_col):
     parser = all_parsers
@@ -51,6 +55,7 @@ def test_read_chunksize_with_index(all_parsers, index_col):
     tm.assert_frame_equal(chunks[2], expected[4:])
 
 
+@xfail_pyarrow  # AssertionError: Regex pattern did not match
 @pytest.mark.parametrize("chunksize", [1.3, "foo", 0])
 def test_read_chunksize_bad(all_parsers, chunksize):
     data = """index,A,B,C,D
@@ -69,6 +74,7 @@ def test_read_chunksize_bad(all_parsers, chunksize):
             pass
 
 
+@xfail_pyarrow  # The 'nrows' option is not supported
 @pytest.mark.parametrize("chunksize", [2, 8])
 def test_read_chunksize_and_nrows(all_parsers, chunksize):
     # see gh-15755
@@ -88,6 +94,7 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize):
         tm.assert_frame_equal(concat(reader), expected)
 
 
+@xfail_pyarrow  # The 'chunksize' option is not supported
 def test_read_chunksize_and_nrows_changing_size(all_parsers):
     data = """index,A,B,C,D
 foo,2,3,4,5
@@ -109,6 +116,7 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers):
             reader.get_chunk(size=3)
 
 
+@xfail_pyarrow  # The 'chunksize' option is not supported
 def test_get_chunk_passed_chunksize(all_parsers):
     parser = all_parsers
     data = """A,B,C
@@ -124,6 +132,7 @@ def test_get_chunk_passed_chunksize(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # The 'chunksize' option is not supported
 @pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}])
 def test_read_chunksize_compat(all_parsers, kwargs):
     # see gh-12185
@@ -141,6 +150,7 @@ def test_read_chunksize_compat(all_parsers, kwargs):
         tm.assert_frame_equal(concat(reader), result)
 
 
+@xfail_pyarrow  # The 'chunksize' option is not supported
 def test_read_chunksize_jagged_names(all_parsers):
     # see gh-23509
     parser = all_parsers
@@ -171,7 +181,11 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
     data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers)
 
     # Coercions should work without warnings.
-    with tm.assert_produces_warning(None):
+    warn = None
+    if parser.engine == "pyarrow":
+        warn = DeprecationWarning
+    depr_msg = "Passing a BlockManager to DataFrame"
+    with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
         with monkeypatch.context() as m:
             m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
             result = parser.read_csv(StringIO(data))
@@ -180,6 +194,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
     assert result.a.dtype == float
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_warn_if_chunks_have_mismatched_type(all_parsers):
     warning_type = None
     parser = all_parsers
@@ -207,6 +222,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers):
     assert df.a.dtype == object
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 @pytest.mark.parametrize("iterator", [True, False])
 def test_empty_with_nrows_chunksize(all_parsers, iterator):
     # see gh-9535
@@ -225,6 +241,7 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_read_csv_memory_growth_chunksize(all_parsers):
     # see gh-24805
     #
@@ -242,6 +259,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
                 pass
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_chunksize_with_usecols_second_block_shorter(all_parsers):
     # GH#21211
     parser = all_parsers
@@ -267,6 +285,7 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers):
         tm.assert_frame_equal(result, expected_frames[i])
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_chunksize_second_block_shorter(all_parsers):
     # GH#21211
     parser = all_parsers

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -22,9 +22,11 @@
 from pandas import DataFrame
 import pandas._testing as tm
 
-# TODO(1.4) Please xfail individual tests at release time
-# instead of skip
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
 @pytest.mark.network
@@ -60,13 +62,15 @@ def test_local_file(all_parsers, csv_dir_path):
         pytest.skip("Failing on: " + " ".join(platform.uname()))
 
 
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
 def test_path_path_lib(all_parsers):
     parser = all_parsers
     df = tm.makeDataFrame()
     result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
     tm.assert_frame_equal(df, result)
 
 
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
 def test_path_local_path(all_parsers):
     parser = all_parsers
     df = tm.makeDataFrame()
@@ -206,10 +210,14 @@ def test_no_permission(all_parsers):
         "in-quoted-field",
     ],
 )
-def test_eof_states(all_parsers, data, kwargs, expected, msg):
+def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
     # see gh-10728, gh-10548
     parser = all_parsers
 
+    if parser.engine == "pyarrow" and "\r" not in data:
+        mark = pytest.mark.xfail(reason="The 'comment' option is not supported")
+        request.applymarker(mark)
+
     if expected is None:
         with pytest.raises(ParserError, match=msg):
             parser.read_csv(StringIO(data), **kwargs)
@@ -218,6 +226,7 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg):
         tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # ValueError: the 'pyarrow' engine does not support regex separators
 def test_temporary_file(all_parsers):
     # see gh-13398
     parser = all_parsers
@@ -347,6 +356,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding):
     assert not handle.closed
 
 
+@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_memory_map_compression(all_parsers, compression):
     """
     Support memory map for compressed files.
@@ -365,6 +375,7 @@ def test_memory_map_compression(all_parsers, compression):
         )
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_context_manager(all_parsers, datapath):
     # make sure that opened files are closed
     parser = all_parsers
@@ -381,6 +392,7 @@ def test_context_manager(all_parsers, datapath):
         assert reader.handles.handle.closed
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_context_manageri_user_provided(all_parsers, datapath):
     # make sure that user-provided handles are not closed
     parser = all_parsers
@@ -396,6 +408,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
             assert not reader.handles.handle.closed
 
 
+@xfail_pyarrow  # ParserError: Empty CSV file
 def test_file_descriptor_leak(all_parsers, using_copy_on_write):
     # GH 31488
     parser = all_parsers
@@ -404,6 +417,7 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write):
             parser.read_csv(path)
 
 
+@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_memory_map(all_parsers, csv_dir_path):
     mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
     parser = all_parsers

diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py
@@ -12,9 +12,13 @@
 from pandas import DataFrame
 import pandas._testing as tm
 
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
+@xfail_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
 def test_float_parser(all_parsers):
     # see gh-9565
     parser = all_parsers
@@ -46,6 +50,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
 def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
     # GH#38753

diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py
@@ -12,9 +12,13 @@
 )
 import pandas._testing as tm
 
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
+@xfail_pyarrow  # ValueError: The 'iterator' option is not supported
 def test_iterator(all_parsers):
     # see gh-6607
     data = """index,A,B,C,D
@@ -37,6 +41,7 @@ def test_iterator(all_parsers):
     tm.assert_frame_equal(last_chunk, expected[3:])
 
 
+@xfail_pyarrow  # ValueError: The 'iterator' option is not supported
 def test_iterator2(all_parsers):
     parser = all_parsers
     data = """A,B,C
@@ -56,6 +61,7 @@ def test_iterator2(all_parsers):
     tm.assert_frame_equal(result[0], expected)
 
 
+@xfail_pyarrow  # ValueError: The 'chunksize' option is not supported
 def test_iterator_stop_on_chunksize(all_parsers):
     # gh-3967: stopping iteration when chunksize is specified
     parser = all_parsers
@@ -77,6 +83,7 @@ def test_iterator_stop_on_chunksize(all_parsers):
     tm.assert_frame_equal(concat(result), expected)
 
 
+@xfail_pyarrow  # AssertionError: Regex pattern did not match
 @pytest.mark.parametrize(
     "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
 )

diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py
@@ -6,9 +6,10 @@
 
 import pytest
 
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
+@xfail_pyarrow  # ValueError: The 'verbose' option is not supported
 def test_verbose_read(all_parsers, capsys):
     parser = all_parsers
     data = """a,b,c,d
@@ -32,6 +33,7 @@ def test_verbose_read(all_parsers, capsys):
         assert captured.out == "Filled 3 NA values in column a\n"
 
 
+@xfail_pyarrow  # ValueError: The 'verbose' option is not supported
 def test_verbose_read2(all_parsers, capsys):
     parser = all_parsers
     data = """a,b,c,d

diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py
@@ -17,10 +17,10 @@
 )
 import pandas._testing as tm
 
-# TODO(1.4): Change me into individual xfails at release time
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_dtype_all_columns_empty(all_parsers):
     # see gh-12048
     parser = all_parsers
@@ -30,6 +30,7 @@ def test_dtype_all_columns_empty(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -42,6 +43,7 @@ def test_empty_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_index_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -56,6 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_multi_index_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -72,6 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
     parser = all_parsers
 
@@ -84,6 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
     parser = all_parsers
 
@@ -96,6 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
     # see gh-9424
     parser = all_parsers
@@ -165,6 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
         ),
     ],
 )
+@xfail_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_dtype(all_parsers, dtype, expected):
     # see gh-14712
     parser = all_parsers