Skip to content

Commit df43fe8

Browse files
committed
fix: dtype parameter ineffective in Series IO
1 parent f433ecf commit df43fe8

File tree

5 files changed

+72
-4
lines changed

5 files changed

+72
-4
lines changed

bigframes/dtypes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,10 @@ def is_object_like(type_: Union[ExpressionType, str]) -> bool:
295295
# See: https://stackoverflow.com/a/40312924/101923 and
296296
# https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
297297
# for the way to identify object type.
298-
return type_ in ("object", "O") or getattr(type_, "kind", None) == "O"
298+
return type_ in ("object", "O") or (
299+
getattr(type_, "kind", None) == "O"
300+
and getattr(type_, "storage", None) != "pyarrow"
301+
)
299302

300303

301304
def is_string_like(type_: ExpressionType) -> bool:

tests/system/small/bigquery/test_json.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import geopandas as gpd # type: ignore
1818
import pandas as pd
19+
import pyarrow as pa
1920
import pytest
2021

2122
import bigframes.bigquery as bbq
@@ -174,7 +175,7 @@ def test_json_extract_array_from_json_strings():
174175
actual = bbq.json_extract_array(s, "$.a")
175176
expected = bpd.Series(
176177
[['"ab"', '"2"', '"3 xy"'], [], ['"4"', '"5"'], None],
177-
dtype=pd.StringDtype(storage="pyarrow"),
178+
dtype=pd.ArrowDtype(pa.list_(pa.string())),
178179
)
179180
pd.testing.assert_series_equal(
180181
actual.to_pandas(),
@@ -190,7 +191,7 @@ def test_json_extract_array_from_json_array_strings():
190191
actual = bbq.json_extract_array(s)
191192
expected = bpd.Series(
192193
[["1", "2", "3"], [], ["4", "5"]],
193-
dtype=pd.StringDtype(storage="pyarrow"),
194+
dtype=pd.ArrowDtype(pa.list_(pa.string())),
194195
)
195196
pd.testing.assert_series_equal(
196197
actual.to_pandas(),

tests/system/small/test_dataframe.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,19 @@ def test_df_construct_inline_respects_location():
166166
assert table.location == "europe-west1"
167167

168168

169+
def test_df_construct_dtype():
170+
data = {
171+
"int_col": [1, 2, 3],
172+
"string_col": ["1.1", "2.0", "3.5"],
173+
"float_col": [1.0, 2.0, 3.0],
174+
}
175+
dtype = pd.StringDtype(storage="pyarrow")
176+
bf_result = dataframe.DataFrame(data, dtype=dtype)
177+
pd_result = pd.DataFrame(data, dtype=dtype)
178+
pd_result.index = pd_result.index.astype("Int64")
179+
pandas.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
180+
181+
169182
def test_get_column(scalars_dfs):
170183
scalars_df, scalars_pandas_df = scalars_dfs
171184
col_name = "int64_col"

tests/system/small/test_series.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,57 @@ def test_series_construct_geodata():
228228
)
229229

230230

231+
@pytest.mark.parametrize(
232+
("dtype"),
233+
[
234+
pytest.param(pd.Int64Dtype(), id="int"),
235+
pytest.param(pd.Float64Dtype(), id="float"),
236+
pytest.param(pd.StringDtype(storage="pyarrow"), id="string"),
237+
],
238+
)
239+
def test_series_construct_w_dtype_for_int(dtype):
240+
data = [1, 2, 3]
241+
expected = pd.Series(data, dtype=dtype)
242+
expected.index = expected.index.astype("Int64")
243+
series = bigframes.pandas.Series(data, dtype=dtype)
244+
pd.testing.assert_series_equal(series.to_pandas(), expected)
245+
246+
247+
def test_series_construct_w_dtype_for_struct():
248+
# The data shows the struct fields are disordered and correctly handled during
249+
# construction.
250+
data = [
251+
{"a": 1, "c": "pandas", "b": dt.datetime(2020, 1, 20, 20, 20, 20, 20)},
252+
{"a": 2, "c": "pandas", "b": dt.datetime(2019, 1, 20, 20, 20, 20, 20)},
253+
{"a": 1, "c": "numpy", "b": None},
254+
]
255+
dtype = pd.ArrowDtype(
256+
pa.struct([("a", pa.int64()), ("c", pa.string()), ("b", pa.timestamp("us"))])
257+
)
258+
series = bigframes.pandas.Series(data, dtype=dtype)
259+
expected = pd.Series(data, dtype=dtype)
260+
expected.index = expected.index.astype("Int64")
261+
pd.testing.assert_series_equal(series.to_pandas(), expected)
262+
263+
264+
def test_series_construct_w_dtype_for_array_string():
265+
data = [["1", "2", "3"], [], ["4", "5"]]
266+
dtype = pd.ArrowDtype(pa.list_(pa.string()))
267+
series = bigframes.pandas.Series(data, dtype=dtype)
268+
expected = pd.Series(data, dtype=dtype)
269+
expected.index = expected.index.astype("Int64")
270+
pd.testing.assert_series_equal(series.to_pandas(), expected)
271+
272+
273+
def test_series_construct_w_dtype_for_array_struct():
274+
data = [[{"a": 1, "c": "aa"}, {"a": 2, "c": "bb"}], [], [{"a": 3, "c": "cc"}]]
275+
dtype = pd.ArrowDtype(pa.list_(pa.struct([("a", pa.int64()), ("c", pa.string())])))
276+
series = bigframes.pandas.Series(data, dtype=dtype)
277+
expected = pd.Series(data, dtype=dtype)
278+
expected.index = expected.index.astype("Int64")
279+
pd.testing.assert_series_equal(series.to_pandas(), expected)
280+
281+
231282
def test_series_keys(scalars_dfs):
232283
scalars_df, scalars_pandas_df = scalars_dfs
233284
bf_result = scalars_df["int64_col"].keys().to_pandas()

third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ def dtypes(self):
178178
... ))
179179
... )
180180
>>> s.struct.dtypes()
181-
project string[pyarrow]
182181
version Int64
182+
project string[pyarrow]
183183
dtype: object
184184
185185
Returns:

0 commit comments

Comments
 (0)