Skip to content

Commit 5a2baa6

Browse files
committed
fix test_df_drop_duplicates_w_json
1 parent fe263bb commit 5a2baa6

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

bigframes/core/array_value.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def from_table(
108108
raise ValueError("must set at most one of 'offests', 'primary_key'")
109109
if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
110110
msg = bfe.format_message(
111-
"JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
111+
"JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
112112
"is a preview feature and subject to change."
113113
)
114114
warnings.warn(msg, bfe.PreviewWarning)

tests/system/small/test_dataframe.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import bigframes.dataframe as dataframe
3333
import bigframes.pandas as bpd
3434
import bigframes.series as series
35+
import bigframes.dtypes as dtypes
3536
from tests.system.utils import (
3637
assert_dfs_equivalent,
3738
assert_pandas_df_equal,
@@ -4584,7 +4585,19 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
45844585
)
45854586
def test_df_drop_duplicates_w_json(json_df, keep):
45864587
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
4587-
pd_df = json_df.to_pandas().drop_duplicates(keep=keep)
4588+
4589+
# drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
4590+
# with Arrow string extension types. Temporary conversion to standard Pandas
4591+
# strings is required.
4592+
json_pandas_df = json_df.to_pandas()
4593+
json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
4594+
pd.StringDtype(storage="pyarrow")
4595+
)
4596+
4597+
pd_df = json_pandas_df.drop_duplicates(keep=keep)
4598+
pd_df["json_col"] = pd_df["json_col"].astype(
4599+
dtypes.JSON_DTYPE
4600+
)
45884601
pd.testing.assert_frame_equal(
45894602
pd_df,
45904603
bf_df,

0 commit comments

Comments
 (0)