Skip to content

DEPS: bump pyarrow version to 0.17.0 #38870 #41476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 17, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/deps/actions-37-db-min.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- openpyxl
- pandas-gbq
- google-cloud-bigquery>=1.27.2 # GH 36436
- pyarrow=0.17 # GH 38803
- pyarrow=0.17.0 # GH 38803
- pytables>=3.5.1
- scipy
- xarray=0.12.3
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37-db.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- pandas-gbq
- google-cloud-bigquery>=1.27.2 # GH 36436
- psycopg2
- pyarrow>=0.15.0
- pyarrow>=0.17.0
- pymysql
- pytables
- python-snappy
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- pytables=3.5.1
- python-dateutil=2.7.3
- pytz=2017.3
- pyarrow=0.15
- pyarrow=0.17.0
- scipy=1.2
- xlrd=1.2.0
- xlsxwriter=1.0.2
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies:
- numpy=1.19
- python-dateutil
- nomkl
- pyarrow=0.15.1
- pyarrow=0.17.0
- pytz
- s3fs>=0.4.0
- moto>=1.3.14
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/azure-macos-37.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: pandas-dev
channels:
- defaults
- conda-forge
dependencies:
- python=3.7.*

Expand All @@ -21,7 +22,7 @@ dependencies:
- numexpr
- numpy=1.17.3
- openpyxl
- pyarrow=0.15.1
- pyarrow=0.17.0
- pytables
- python-dateutil==2.7.3
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-37.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- numexpr
- numpy=1.17.*
- openpyxl
- pyarrow=0.15
- pyarrow=0.17.0
- pytables
- python-dateutil
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- numpy=1.18.*
- openpyxl
- jinja2
- pyarrow>=0.15.0
- pyarrow>=0.17.0
- pytables
- python-dateutil
- pytz
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ PyTables 3.5.1 HDF5-based reading / writing
blosc 1.17.0 Compression for HDF5
zlib Compression for HDF5
fastparquet 0.4.0 Parquet reading / writing
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
pyarrow 0.17.0 Parquet, ORC, and feather reading / writing
pyreadstat SPSS files (.sav) reading
========================= ================== =============================================================

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| openpyxl | 3.0.0 | X |
+-----------------+-----------------+---------+
| pyarrow | 0.15.0 | |
| pyarrow | 0.17.0 | X |
+-----------------+-----------------+---------+
| pymysql | 0.8.1 | X |
+-----------------+-----------------+---------+
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ dependencies:
- odfpy

- fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet
- pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
- pyarrow>=0.17.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
- python-snappy # required by pyarrow

- pyqt>=5.9.2 # pandas.read_clipboard
Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"odfpy": "1.3.0",
"openpyxl": "3.0.0",
"pandas_gbq": "0.12.0",
"pyarrow": "0.15.0",
"pyarrow": "0.17.0",
"pytest": "5.0.1",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_repr():
# Arrow interaction


pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
pyarrow_skip = td.skip_if_no("pyarrow")


@pyarrow_skip
Expand Down
16 changes: 6 additions & 10 deletions pandas/tests/arrays/masked/test_arrow_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import pandas._testing as tm

pa = pytest.importorskip("pyarrow", minversion="0.15.0")
pa = pytest.importorskip("pyarrow", minversion="0.17.0")

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

Expand All @@ -21,8 +21,6 @@ def data(request):


def test_arrow_array(data):
# protocol added in 0.15.0

arr = pa.array(data)
expected = pa.array(
data.to_numpy(object, na_value=None),
Expand All @@ -31,10 +29,8 @@ def test_arrow_array(data):
assert arr.equals(expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_roundtrip(data):
# roundtrip possible from arrow 0.16.0

df = pd.DataFrame({"a": data})
table = pa.table(df)
assert table.field("a").type == str(data.dtype.numpy_dtype)
Expand All @@ -43,7 +39,7 @@ def test_arrow_roundtrip(data):
tm.assert_frame_equal(result, df)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_load_from_zero_chunks(data):
# GH-41040

Expand All @@ -58,7 +54,7 @@ def test_arrow_load_from_zero_chunks(data):
tm.assert_frame_equal(result, df)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_from_arrow_uint():
# https://github.com/pandas-dev/pandas/issues/31896
# possible mismatch in types
Expand All @@ -70,7 +66,7 @@ def test_arrow_from_arrow_uint():
tm.assert_extension_array_equal(result, expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_sliced(data):
# https://github.com/pandas-dev/pandas/issues/38525

Expand Down Expand Up @@ -165,7 +161,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
tm.assert_numpy_array_equal(mask, mask_expected_empty)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_from_arrow_type_error(request, data):
# ensure that __from_arrow__ returns a TypeError when getting a wrong
# array type
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
period_array,
)

pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0")


@pyarrow_skip
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ def test_fillna_args(dtype, request):
arr.fillna(value=1)


@td.skip_if_no("pyarrow", min_version="0.15.0")
@td.skip_if_no("pyarrow")
def test_arrow_array(dtype):
# protocol added in 0.15.0
import pyarrow as pa
Expand All @@ -451,7 +451,7 @@ def test_arrow_array(dtype):
assert arr.equals(expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_roundtrip(dtype, dtype_object):
# roundtrip possible from arrow 1.0.0
import pyarrow as pa
Expand All @@ -467,7 +467,7 @@ def test_arrow_roundtrip(dtype, dtype_object):
assert result.loc[2, "a"] is pd.NA


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_load_from_zero_chunks(dtype, dtype_object):
# GH-41040
import pyarrow as pa
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@

import pandas as pd
import pandas._testing as tm
from pandas.util.version import Version

from pandas.io.feather_format import read_feather, to_feather # isort:skip

pyarrow = pytest.importorskip("pyarrow")


pyarrow_version = Version(pyarrow.__version__)
filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse")


Expand Down Expand Up @@ -89,12 +87,11 @@ def test_basic(self):
),
}
)
if pyarrow_version >= Version("0.17.0"):
df["periods"] = pd.period_range("2013", freq="M", periods=3)
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
# TODO temporary disable due to regression in pyarrow 0.17.1
# https://github.com/pandas-dev/pandas/issues/34255
# df["intervals"] = pd.interval_range(0, 3, 3)
df["periods"] = pd.period_range("2013", freq="M", periods=3)
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
# TODO temporary disable due to regression in pyarrow 0.17.1
# https://github.com/pandas-dev/pandas/issues/34255
# df["intervals"] = pd.interval_range(0, 3, 3)

assert df.dttz.dtype.tz.zone == "US/Eastern"
self.check_round_trip(df)
Expand Down
44 changes: 15 additions & 29 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
PY38,
is_platform_windows,
)
from pandas.compat.pyarrow import (
pa_version_under1p0,
pa_version_under2p0,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -653,8 +657,6 @@ def test_categorical(self, pa):
)
def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
s3fs = pytest.importorskip("s3fs")
if Version(pyarrow.__version__) <= Version("0.17.0"):
pytest.skip()
s3 = s3fs.S3FileSystem(**s3so)
kw = {"filesystem": s3}
check_round_trip(
Expand All @@ -666,8 +668,6 @@ def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
)

def test_s3_roundtrip(self, df_compat, s3_resource, pa, s3so):
if Version(pyarrow.__version__) <= Version("0.17.0"):
pytest.skip()
# GH #19134
s3so = {"storage_options": s3so}
check_round_trip(
Expand Down Expand Up @@ -698,14 +698,12 @@ def test_s3_roundtrip_for_dir(
# These are added to back of dataframe on read. In new API category dtype is
# only used if partition field is string, but this changed again to use
# category dtype for all types (not only strings) in pyarrow 2.0.0
pa10 = (Version(pyarrow.__version__) >= Version("1.0.0")) and (
Version(pyarrow.__version__) < Version("2.0.0")
)
if partition_col:
if pa10:
partition_col_type = "int32"
else:
partition_col_type = "category"
partition_col_type = (
"int32"
if (not pa_version_under1p0) and pa_version_under2p0
else "category"
)

expected_df[partition_col] = expected_df[partition_col].astype(
partition_col_type
Expand Down Expand Up @@ -795,7 +793,7 @@ def test_write_with_schema(self, pa):
out_df = df.astype(bool)
check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)

@td.skip_if_no("pyarrow", min_version="0.15.0")
@td.skip_if_no("pyarrow")
def test_additional_extension_arrays(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol
Expand All @@ -806,22 +804,10 @@ def test_additional_extension_arrays(self, pa):
"c": pd.Series(["a", None, "c"], dtype="string"),
}
)
if Version(pyarrow.__version__) >= Version("0.16.0"):
expected = df
else:
# de-serialized as plain int / object
expected = df.assign(
a=df.a.astype("int64"), b=df.b.astype("int64"), c=df.c.astype("object")
)
check_round_trip(df, pa, expected=expected)
check_round_trip(df, pa, expected=df)

df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
if Version(pyarrow.__version__) >= Version("0.16.0"):
expected = df
else:
# if missing values in integer, currently de-serialized as float
expected = df.assign(a=df.a.astype("float64"))
check_round_trip(df, pa, expected=expected)
check_round_trip(df, pa, expected=df)

@td.skip_if_no("pyarrow", min_version="1.0.0")
def test_pyarrow_backed_string_array(self, pa):
Expand All @@ -831,7 +817,7 @@ def test_pyarrow_backed_string_array(self, pa):
df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="arrow_string")})
check_round_trip(df, pa, expected=df)

@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_additional_extension_types(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol + by defining a custom ExtensionType
Expand All @@ -844,7 +830,7 @@ def test_additional_extension_types(self, pa):
)
check_round_trip(df, pa)

@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_use_nullable_dtypes(self, pa):
import pyarrow.parquet as pq

Expand Down Expand Up @@ -880,7 +866,7 @@ def test_timestamp_nanoseconds(self, pa):
check_round_trip(df, pa, write_kwargs={"version": "2.0"})

def test_timezone_aware_index(self, pa, timezone_aware_date_list):
if Version(pyarrow.__version__) >= Version("2.0.0"):
if not pa_version_under2p0:
# temporary skip this test until it is properly resolved
# https://github.com/pandas-dev/pandas/issues/37286
pytest.skip()
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ xlsxwriter
xlwt
odfpy
fastparquet>=0.3.2
pyarrow>=0.15.0
pyarrow>=0.17.0
python-snappy
pyqt5>=5.9.2
tables>=3.5.1
Expand Down