From 9ae9ec6b970d2144a506e645a2ebe971e2aed9b4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 21 Oct 2020 14:06:57 +0200 Subject: [PATCH] Backport PR #37304: TST: correct parquet test expected partition column dtype for pyarrow 2.0 --- pandas/tests/io/test_parquet.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 306b2a7849586..6df13278fcb75 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -563,16 +563,20 @@ def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col): # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 # Previous behaviour was pyarrow partitioned columns become 'category' dtypes # These are added to back of dataframe on read. In new API category dtype is - # only used if partition field is string. - legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0") - if partition_col and legacy_read_table: - partition_col_type = "category" - else: - partition_col_type = "int32" - - expected_df[partition_col] = expected_df[partition_col].astype( - partition_col_type + # only used if partition field is string, but this changed again to use + # category dtype for all types (not only strings) in pyarrow 2.0.0 + pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and ( + LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0") ) + if partition_col: + if pa10: + partition_col_type = "int32" + else: + partition_col_type = "category" + + expected_df[partition_col] = expected_df[partition_col].astype( + partition_col_type + ) check_round_trip( df_compat,