Skip to content

Commit a2edcbf

Browse files
committed
fix test_df_drop_duplicates_w_json
1 parent fe263bb commit a2edcbf

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

bigframes/core/array_value.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def from_table(
108108
raise ValueError("must set at most one of 'offests', 'primary_key'")
109109
if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
110110
msg = bfe.format_message(
111-
"JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
111+
"JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
112112
"is a preview feature and subject to change."
113113
)
114114
warnings.warn(msg, bfe.PreviewWarning)

tests/system/small/test_dataframe.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import bigframes._config.display_options as display_options
3131
import bigframes.core.indexes as bf_indexes
3232
import bigframes.dataframe as dataframe
33+
import bigframes.dtypes as dtypes
3334
import bigframes.pandas as bpd
3435
import bigframes.series as series
3536
from tests.system.utils import (
@@ -4584,7 +4585,17 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
45844585
)
45854586
def test_df_drop_duplicates_w_json(json_df, keep):
45864587
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
4587-
pd_df = json_df.to_pandas().drop_duplicates(keep=keep)
4588+
4589+
# drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
4590+
# with Arrow string extension types. Temporary conversion to standard Pandas
4591+
# strings is required.
4592+
json_pandas_df = json_df.to_pandas()
4593+
json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
4594+
pd.StringDtype(storage="pyarrow")
4595+
)
4596+
4597+
pd_df = json_pandas_df.drop_duplicates(keep=keep)
4598+
pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE)
45884599
pd.testing.assert_frame_equal(
45894600
pd_df,
45904601
bf_df,

0 commit comments

Comments
 (0)