From 9ae9ec6b970d2144a506e645a2ebe971e2aed9b4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 21 Oct 2020 14:06:57 +0200
Subject: [PATCH] Backport PR #37304: TST: correct parquet test expected
 partition column dtype for pyarrow 2.0

---
 pandas/tests/io/test_parquet.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 306b2a7849586..6df13278fcb75 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -563,16 +563,20 @@ def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
         # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
         # Previous behaviour was pyarrow partitioned columns become 'category' dtypes
         # These are added to back of dataframe on read. In new API category dtype is
-        # only used if partition field is string.
-        legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
-        if partition_col and legacy_read_table:
-            partition_col_type = "category"
-        else:
-            partition_col_type = "int32"
-
-        expected_df[partition_col] = expected_df[partition_col].astype(
-            partition_col_type
+        # only used if partition field is string, but this changed again to use
+        # category dtype for all types (not only strings) in pyarrow 2.0.0
+        pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and (
+            LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0")
         )
+        if partition_col:
+            if pa10:
+                partition_col_type = "int32"
+            else:
+                partition_col_type = "category"
+
+            expected_df[partition_col] = expected_df[partition_col].astype(
+                partition_col_type
+            )
 
         check_round_trip(
             df_compat,