From d5753efecefd6699ac0496704b9d5e8c0d682c17 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 12 Feb 2025 22:17:30 +0000
Subject: [PATCH 1/7] feat: use JSONArrowType for JSON data

---
 bigframes/core/compile/ibis_types.py    |  2 +-
 bigframes/dtypes.py                     |  2 +-
 bigframes/session/_io/pandas.py         |  3 ---
 tests/system/small/test_dataframe_io.py | 10 +++++-----
 tests/system/small/test_series.py       |  2 +-
 5 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index 54a5a37736..54b0a1408a 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -75,7 +75,7 @@
         IBIS_GEO_TYPE,
         gpd.array.GeometryDtype(),
     ),
-    (ibis_dtypes.json, db_dtypes.JSONDtype()),
+    (ibis_dtypes.json, pd.ArrowDtype(db_dtypes.JSONArrowType())),
 )
 
 BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = {
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 5e9f1f108b..5f462222e0 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -62,7 +62,7 @@
 # No arrow equivalent
 GEO_DTYPE = gpd.array.GeometryDtype()
 # JSON
-JSON_DTYPE = db_dtypes.JSONDtype()
+JSON_DTYPE = pd.ArrowDtype(db_dtypes.JSONArrowType())
 OBJ_REF_DTYPE = pd.ArrowDtype(
     pa.struct(
         (
diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py
index a1549238b3..ca70ee774c 100644
--- a/bigframes/session/_io/pandas.py
+++ b/bigframes/session/_io/pandas.py
@@ -18,7 +18,6 @@
 from typing import Collection, Union
 
 import bigframes_vendored.constants as constants
-import db_dtypes  # type: ignore
 import geopandas  # type: ignore
 import numpy as np
 import pandas
@@ -125,8 +124,6 @@ def arrow_to_pandas(
             )
         elif isinstance(dtype, pandas.ArrowDtype):
             series = _arrow_to_pandas_arrowdtype(column, dtype)
-        elif isinstance(dtype, db_dtypes.JSONDtype):
-            series = db_dtypes.JSONArray(column)
         else:
             series = column.to_pandas(types_mapper=lambda _: dtype)
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 4758c2d5b4..bfa920e940 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -304,7 +304,7 @@ def test_load_json_w_unboxed_py_value(session):
     """
     df = session.read_gbq(sql, index_col="id")
 
-    assert df.dtypes["json_col"] == db_dtypes.JSONDtype()
+    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
     assert isinstance(df["json_col"][0], dict)
 
     assert df["json_col"][0]["boolean"]
@@ -321,13 +321,13 @@ def test_load_json_w_unboxed_py_value(session):
 
 def test_load_json_to_pandas_has_correct_result(session):
     df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
-    assert df.dtypes["json_col"] == db_dtypes.JSONDtype()
+    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
     result = df.to_pandas()
 
     # The order of keys within the JSON object shouldn't matter for equality checks.
     pd_df = pd.DataFrame(
         {"json_col": [{"bar": True, "foo": 10}]},
-        dtype=db_dtypes.JSONDtype(),
+        dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
     )
     pd_df.index = pd_df.index.astype("Int64")
     pd.testing.assert_series_equal(result.dtypes, pd_df.dtypes)
@@ -365,7 +365,7 @@ def test_load_json_in_struct(session):
     assert isinstance(df.dtypes["struct_col"].pyarrow_dtype, pa.StructType)
 
     data = df["struct_col"].struct.field("data")
-    assert data.dtype == db_dtypes.JSONDtype()
+    assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
     assert data[0]["boolean"]
     assert data[1]["int"] == 100
@@ -406,7 +406,7 @@ def test_load_json_in_array(session):
 
     data = df["array_col"].list
     assert data.len()[0] == 7
-    assert data[0].dtype == db_dtypes.JSONDtype()
+    assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
     assert data[0][0]["boolean"]
     assert data[1][0]["int"] == 100
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 980f2226b7..c1594983d1 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -383,7 +383,7 @@ def test_get_column(scalars_dfs, col_name, expected_dtype):
 def test_get_column_w_json(json_df, json_pandas_df):
     series = json_df["json_col"]
     series_pandas = series.to_pandas()
-    assert series.dtype == db_dtypes.JSONDtype()
+    assert series.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
     assert series_pandas.shape[0] == json_pandas_df.shape[0]
 
 

From d80e641824a560200faaa2a3b4ae06bfa5ec78e9 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 5 Mar 2025 22:19:17 +0000
Subject: [PATCH 2/7] fix related system tests

---
 bigframes/core/array_value.py            |  4 +-
 setup.py                                 |  3 +-
 tests/system/small/bigquery/test_json.py | 59 ++++++++++----------
 tests/system/small/test_dataframe_io.py  | 71 ++++++++++++------------
 tests/system/small/test_series.py        | 25 +++++----
 tests/system/small/test_session.py       | 24 ++++----
 6 files changed, 94 insertions(+), 92 deletions(-)

diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 9c44255941..431bc25464 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -108,8 +108,8 @@ def from_table(
             raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
             msg = bfe.format_message(
-                "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is "
-                "in preview; this behavior may change in future versions."
+                "JSON column interpretation as a custom PyArrow extention in `db_dtypes`  "
+                "is a preview feature and subject to change."
             )
             warnings.warn(msg, bfe.PreviewWarning)
         # define data source only for needed columns, this makes row-hashing cheaper
diff --git a/setup.py b/setup.py
index 9ea563b3cb..cb61043375 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,8 @@
     "ipywidgets >=7.7.1",
     "humanize >=4.6.0",
     "matplotlib >=3.7.1",
-    "db-dtypes >=1.4.0",
+    "db-dtypes@ git+https://github.com/googleapis/python-db-dtypes-pandas.git@main",
+    # "db-dtypes >=1.4.0",
     # For vendored ibis-framework.
     "atpublic>=2.3,<6",
     "parsy>=2,<3",
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 492c0cf9b6..bade725733 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -12,30 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import db_dtypes  # type: ignore
 import geopandas as gpd  # type: ignore
 import pandas as pd
 import pyarrow as pa
 import pytest
 
 import bigframes.bigquery as bbq
-import bigframes.dtypes
+import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
 
 
 @pytest.mark.parametrize(
     ("json_path", "expected_json"),
     [
-        pytest.param("$.a", [{"a": 10}], id="simple"),
-        pytest.param("$.a.b.c", [{"a": {"b": {"c": 10, "d": []}}}], id="nested"),
+        pytest.param("$.a", ['{"a": 10}'], id="simple"),
+        pytest.param("$.a.b.c", ['{"a": {"b": {"c": 10, "d": []}}}'], id="nested"),
     ],
 )
 def test_json_set_at_json_path(json_path, expected_json):
-    original_json = [{"a": {"b": {"c": "tester", "d": []}}}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": {"b": {"c": "tester", "d": []}}}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
 
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -45,18 +44,20 @@ def test_json_set_at_json_path(json_path, expected_json):
 @pytest.mark.parametrize(
     ("json_value", "expected_json"),
     [
-        pytest.param(10, [{"a": {"b": 10}}, {"a": {"b": 10}}], id="int"),
-        pytest.param(0.333, [{"a": {"b": 0.333}}, {"a": {"b": 0.333}}], id="float"),
-        pytest.param("eng", [{"a": {"b": "eng"}}, {"a": {"b": "eng"}}], id="string"),
-        pytest.param([1, 2], [{"a": {"b": 1}}, {"a": {"b": 2}}], id="series"),
+        pytest.param(10, ['{"a": {"b": 10}}', '{"a": {"b": 10}}'], id="int"),
+        pytest.param(0.333, ['{"a": {"b": 0.333}}', '{"a": {"b": 0.333}}'], id="float"),
+        pytest.param(
+            "eng", ['{"a": {"b": "eng"}}', '{"a": {"b": "eng"}}'], id="string"
+        ),
+        pytest.param([1, 2], ['{"a": {"b": 1}}', '{"a": {"b": 2}}'], id="series"),
     ],
 )
 def test_json_set_at_json_value_type(json_value, expected_json):
-    original_json = [{"a": {"b": "dev"}}, {"a": {"b": [1, 2]}}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": {"b": "dev"}}', '{"a": {"b": [1, 2]}}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
 
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -64,14 +65,14 @@ def test_json_set_at_json_value_type(json_value, expected_json):
 
 
 def test_json_set_w_more_pairs():
-    original_json = [{"a": 2}, {"b": 5}, {"c": 1}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": 2}', '{"b": 5}', '{"c": 1}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(
         s, json_path_value_pairs=[("$.a", 1), ("$.b", 2), ("$.a", [3, 4, 5])]
     )
 
-    expected_json = [{"a": 3, "b": 2}, {"a": 4, "b": 2}, {"a": 5, "b": 2, "c": 1}]
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -79,13 +80,13 @@ def test_json_set_w_more_pairs():
 
 
 def test_json_set_w_invalid_json_path_value_pairs():
-    s = bpd.Series([{"a": 10}], dtype=db_dtypes.JSONDtype())
+    s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
     with pytest.raises(ValueError):
         bbq.json_set(s, json_path_value_pairs=[("$.a", 1, 100)])  # type: ignore
 
 
 def test_json_set_w_invalid_value_type():
-    s = bpd.Series([{"a": 10}], dtype=db_dtypes.JSONDtype())
+    s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
     with pytest.raises(TypeError):
         bbq.json_set(
             s,
@@ -101,17 +102,18 @@ def test_json_set_w_invalid_value_type():
 
 
 def test_json_set_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.json_set(bpd.Series([1, 2]), json_path_value_pairs=[("$.a", 1)])
+        bbq.json_set(s, json_path_value_pairs=[("$.a", 1)])
 
 
 def test_json_extract_from_json():
     s = bpd.Series(
-        [{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}],
-        dtype=db_dtypes.JSONDtype(),
+        ['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
+        dtype=dtypes.JSON_DTYPE,
     )
     actual = bbq.json_extract(s, "$.a.b").to_pandas()
-    expected = bpd.Series([[1, 2], None, 0], dtype=db_dtypes.JSONDtype()).to_pandas()
+    expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE).to_pandas()
     pd.testing.assert_series_equal(
         actual,
         expected,
@@ -132,14 +134,15 @@ def test_json_extract_from_string():
 
 
 def test_json_extract_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.json_extract(bpd.Series([1, 2]), "$.a")
+        bbq.json_extract(s, "$.a")
 
 
 def test_json_extract_array_from_json():
     s = bpd.Series(
-        [{"a": ["ab", "2", "3 xy"]}, {"a": []}, {"a": ["4", "5"]}, {}],
-        dtype=db_dtypes.JSONDtype(),
+        ['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4", "5"]}', "{}"],
+        dtype=dtypes.JSON_DTYPE,
     )
     actual = bbq.json_extract_array(s, "$.a")
 
@@ -225,7 +228,7 @@ def test_json_extract_string_array_from_array_strings():
 
 def test_json_extract_string_array_as_float_array_from_array_strings():
     s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
-    actual = bbq.json_extract_string_array(s, value_dtype=bigframes.dtypes.FLOAT_DTYPE)
+    actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
     expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
     pd.testing.assert_series_equal(
         actual.to_pandas(),
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index bfa920e940..149821a741 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -22,6 +22,7 @@
 import pyarrow as pa
 import pytest
 
+import bigframes.dtypes as dtypes
 from tests.system import utils
 
 try:
@@ -278,7 +279,7 @@ def test_to_arrow_override_global_option(scalars_df_index):
     assert scalars_df_index._query_job.destination.table_id == table_id
 
 
-def test_load_json_w_unboxed_py_value(session):
+def test_load_json_w_json_string_items(session):
     sql = """
         SELECT 0 AS id, JSON_OBJECT('boolean', True) AS json_col,
         UNION ALL
@@ -292,31 +293,32 @@ def test_load_json_w_unboxed_py_value(session):
         UNION ALL
         SELECT 5, JSON_OBJECT('null', null),
         UNION ALL
+        SELECT 6, JSON_OBJECT('b', 2, 'a', 1),
+        UNION ALL
         SELECT
-            6,
+            7,
             JSON_OBJECT(
                 'dict',
                 JSON_OBJECT(
                     'int', 1,
-                    'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
+                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
                 )
             ),
     """
     df = session.read_gbq(sql, index_col="id")
 
     assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
-    assert isinstance(df["json_col"][0], dict)
 
-    assert df["json_col"][0]["boolean"]
-    assert df["json_col"][1]["int"] == 100
-    assert math.isclose(df["json_col"][2]["float"], 0.98)
-    assert df["json_col"][3]["string"] == "hello world"
-    assert df["json_col"][4]["array"] == [8, 9, 10]
-    assert df["json_col"][5]["null"] is None
-    assert df["json_col"][6]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    assert df["json_col"][0] == '{"boolean":true}'
+    assert df["json_col"][1] == '{"int":100}'
+    assert df["json_col"][2] == '{"float":0.98}'
+    assert df["json_col"][3] == '{"string":"hello world"}'
+    assert df["json_col"][4] == '{"array":[8,9,10]}'
+    assert df["json_col"][5] == '{"null":null}'
+
+    # Verifies JSON strings preserve array order, regardless of dictionary key order.
+    assert df["json_col"][6] == '{"a":1,"b":2}'
+    assert df["json_col"][7] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
 
 
 def test_load_json_to_pandas_has_correct_result(session):
@@ -324,9 +326,9 @@ def test_load_json_to_pandas_has_correct_result(session):
     assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
     result = df.to_pandas()
 
-    # The order of keys within the JSON object shouldn't matter for equality checks.
+    # These JSON strings are compatible with BigQuery's JSON storage,
     pd_df = pd.DataFrame(
-        {"json_col": [{"bar": True, "foo": 10}]},
+        {"json_col": ['{"bar":true,"foo":10}']},
         dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
     )
     pd_df.index = pd_df.index.astype("Int64")
@@ -367,16 +369,13 @@ def test_load_json_in_struct(session):
     data = df["struct_col"].struct.field("data")
     assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    assert data[0]["boolean"]
-    assert data[1]["int"] == 100
-    assert math.isclose(data[2]["float"], 0.98)
-    assert data[3]["string"] == "hello world"
-    assert data[4]["array"] == [8, 9, 10]
-    assert data[5]["null"] is None
-    assert data[6]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    assert data[0] == '{"boolean":true}'
+    assert data[1] == '{"int":100}'
+    assert data[2] == '{"float":0.98}'
+    assert data[3] == '{"string":"hello world"}'
+    assert data[4] == '{"array":[8,9,10]}'
+    assert data[5] == '{"null":null}'
+    assert data[6] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
 
 
 def test_load_json_in_array(session):
@@ -408,16 +407,13 @@ def test_load_json_in_array(session):
     assert data.len()[0] == 7
     assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    assert data[0][0]["boolean"]
-    assert data[1][0]["int"] == 100
-    assert math.isclose(data[2][0]["float"], 0.98)
-    assert data[3][0]["string"] == "hello world"
-    assert data[4][0]["array"] == [8, 9, 10]
-    assert data[5][0]["null"] is None
-    assert data[6][0]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    assert data[0][0] == '{"boolean":true}'
+    assert data[1][0] == '{"int":100}'
+    assert data[2][0] == '{"float":0.98}'
+    assert data[3][0] == '{"string":"hello world"}'
+    assert data[4][0] == '{"array":[8,9,10]}'
+    assert data[5][0] == '{"null":null}'
+    assert data[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
 
 
 def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
@@ -691,7 +687,8 @@ def test_to_gbq_w_json(bigquery_client):
     """Test the `to_gbq` API can get a JSON column."""
     s1 = bpd.Series([1, 2, 3, 4])
     s2 = bpd.Series(
-        ["a", 1, False, ["a", {"b": 1}], {"c": [1, 2, 3]}], dtype=db_dtypes.JSONDtype()
+        ['"a"', "1", "false", '["a", {"b": 1}]', '{"c": [1, 2, 3]}'],
+        dtype=dtypes.JSON_DTYPE,
     )
 
     df = bpd.DataFrame({"id": s1, "json_col": s2})
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index c1594983d1..7df115d5f7 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -26,6 +26,7 @@
 import pytest
 import shapely  # type: ignore
 
+import bigframes.dtypes as dtypes
 import bigframes.features
 import bigframes.pandas
 import bigframes.series as series
@@ -304,22 +305,22 @@ def test_series_construct_w_dtype_for_array_struct():
 
 def test_series_construct_w_dtype_for_json():
     data = [
-        1,
-        "str",
-        False,
-        ["a", {"b": 1}, None],
+        "1",
+        '"str"',
+        "false",
+        '["a", {"b": 1}, null]',
         None,
-        {"a": {"b": [1, 2, 3], "c": True}},
+        '{"a": {"b": [1, 2, 3], "c": true}}',
     ]
-    s = bigframes.pandas.Series(data, dtype=db_dtypes.JSONDtype())
+    s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
 
-    assert s[0] == 1
-    assert s[1] == "str"
-    assert s[2] is False
-    assert s[3][0] == "a"
-    assert s[3][1]["b"] == 1
+    assert s[0] == "1"
+    assert s[1] == '"str"'
+    assert s[2] == "false"
+    # TODO: check old branch results for null.
+    assert s[3] == '["a",{"b":1},null]'
     assert pd.isna(s[4])
-    assert s[5]["a"] == {"b": [1, 2, 3], "c": True}
+    assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
 
 
 def test_series_keys(scalars_dfs):
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 0c8da52774..c1a16492cc 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -759,13 +759,13 @@ def test_read_pandas_timedelta_index(session, write_engine):
 )
 def test_read_pandas_json_dataframes(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
     expected_df = pd.DataFrame(
-        {"my_col": pd.Series(json_data, dtype=db_dtypes.JSONDtype())}
+        {"my_col": pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)}
     )
 
     actual_result = session.read_pandas(
@@ -783,12 +783,12 @@ def test_read_pandas_json_dataframes(session, write_engine):
 )
 def test_read_pandas_json_series(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
-    expected_series = pd.Series(json_data, dtype=db_dtypes.JSONDtype())
+    expected_series = pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
 
     actual_result = session.read_pandas(
         expected_series, write_engine=write_engine
@@ -807,12 +807,12 @@ def test_read_pandas_json_series(session, write_engine):
 )
 def test_read_pandas_json_index(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
-    expected_index = pd.Index(json_data, dtype=db_dtypes.JSONDtype())
+    expected_index: pd.Index = pd.Index(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
     actual_result = session.read_pandas(
         expected_index, write_engine=write_engine
     ).to_pandas()

From 416a2268a18639296b9417042de48a409d551722 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 5 Mar 2025 23:09:50 +0000
Subject: [PATCH 3/7] fixes lint and doctest

---
 bigframes/bigquery/_operations/json.py  | 4 ++--
 bigframes/dtypes.py                     | 1 -
 tests/system/small/test_dataframe_io.py | 2 --
 tests/system/small/test_session.py      | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py
index 0223811ebc..07efc5fa51 100644
--- a/bigframes/bigquery/_operations/json.py
+++ b/bigframes/bigquery/_operations/json.py
@@ -53,7 +53,7 @@ def json_set(
         >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
         >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
             0    {"a":100,"b":"hi"}
-            Name: data, dtype: dbjson
+            Name: data, dtype: extension<dbjson<JSONArrowType>>[pyarrow]
 
     Args:
         input (bigframes.series.Series):
@@ -253,7 +253,7 @@ def parse_json(
         dtype: string
         >>> bbq.parse_json(s)
         0    {"class":{"students":[{"id":5},{"id":12}]}}
-        dtype: dbjson
+        dtype: extension<dbjson<JSONArrowType>>[pyarrow]
 
     Args:
         input (bigframes.series.Series):
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 5f462222e0..6b17fe0736 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -301,7 +301,6 @@ def is_object_like(type_: Union[ExpressionType, str]) -> bool:
     return type_ in ("object", "O") or (
         getattr(type_, "kind", None) == "O"
         and getattr(type_, "storage", None) != "pyarrow"
-        and getattr(type_, "name", None) != "dbjson"
     )
 
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 149821a741..16c3d19801 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
 from typing import Tuple
 
 import db_dtypes  # type:ignore
@@ -36,7 +35,6 @@
 from google.cloud import bigquery
 
 import bigframes
-from bigframes import dtypes
 import bigframes.dataframe
 import bigframes.features
 import bigframes.pandas as bpd
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index c1a16492cc..4b7495694b 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -22,7 +22,6 @@
 import warnings
 
 import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq
-import db_dtypes  # type: ignore
 import google
 import google.cloud.bigquery as bigquery
 import numpy as np

From 4dd298532d21244f105d1e6b25cbb1b199f2ab29 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 6 Mar 2025 00:00:57 +0000
Subject: [PATCH 4/7] switch db_dtypes into 1.4.2

---
 setup.py                    | 3 +--
 testing/constraints-3.9.txt | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index cb61043375..34e013c9a3 100644
--- a/setup.py
+++ b/setup.py
@@ -60,8 +60,7 @@
     "ipywidgets >=7.7.1",
     "humanize >=4.6.0",
     "matplotlib >=3.7.1",
-    "db-dtypes@ git+https://github.com/googleapis/python-db-dtypes-pandas.git@main",
-    # "db-dtypes >=1.4.0",
+    "db-dtypes >=1.4.2",
     # For vendored ibis-framework.
     "atpublic>=2.3,<6",
     "parsy>=2,<3",
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index b355e0915b..8c7c69efa7 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -25,7 +25,7 @@ tabulate==0.9
 ipywidgets==7.7.1
 humanize==4.6.0
 matplotlib==3.7.1
-db-dtypes==1.4.0
+db-dtypes==1.4.2
 # For vendored ibis-framework.
 atpublic==2.3
 parsy==2.0

From fe263bbbde49b3cb7ca989f8067f7d8fb9e1df6b Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 6 Mar 2025 05:47:51 +0000
Subject: [PATCH 5/7] fix tests

---
 tests/system/small/test_dataframe_io.py | 6 +++---
 tests/system/small/test_series.py       | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 16c3d19801..e80668939a 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -316,7 +316,7 @@ def test_load_json_w_json_string_items(session):
 
     # Verifies JSON strings preserve array order, regardless of dictionary key order.
     assert df["json_col"][6] == '{"a":1,"b":2}'
-    assert df["json_col"][7] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
+    assert df["json_col"][7] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
 
 
 def test_load_json_to_pandas_has_correct_result(session):
@@ -355,7 +355,7 @@ def test_load_json_in_struct(session):
                 'dict',
                 JSON_OBJECT(
                     'int', 1,
-                    'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
+                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
                 )
             ), 7),
     """
@@ -373,7 +373,7 @@ def test_load_json_in_struct(session):
     assert data[3] == '{"string":"hello world"}'
     assert data[4] == '{"array":[8,9,10]}'
     assert data[5] == '{"null":null}'
-    assert data[6] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
+    assert data[6] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
 
 
 def test_load_json_in_array(session):
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 7df115d5f7..d62af962fc 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -317,7 +317,6 @@ def test_series_construct_w_dtype_for_json():
     assert s[0] == "1"
     assert s[1] == '"str"'
     assert s[2] == "false"
-    # TODO: check old branch results for null.
     assert s[3] == '["a",{"b":1},null]'
     assert pd.isna(s[4])
     assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'

From a2edcbf650e9874116d8e642fa658d012c38226c Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 6 Mar 2025 23:55:13 +0000
Subject: [PATCH 6/7] fix test_df_drop_duplicates_w_json

---
 bigframes/core/array_value.py        |  2 +-
 tests/system/small/test_dataframe.py | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 431bc25464..7ede7b7e65 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -108,7 +108,7 @@ def from_table(
             raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
             msg = bfe.format_message(
-                "JSON column interpretation as a custom PyArrow extention in `db_dtypes`  "
+                "JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
                 "is a preview feature and subject to change."
             )
             warnings.warn(msg, bfe.PreviewWarning)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index f80b811217..9415f9657e 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -30,6 +30,7 @@
 import bigframes._config.display_options as display_options
 import bigframes.core.indexes as bf_indexes
 import bigframes.dataframe as dataframe
+import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
 import bigframes.series as series
 from tests.system.utils import (
@@ -4584,7 +4585,17 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
 )
 def test_df_drop_duplicates_w_json(json_df, keep):
     bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
-    pd_df = json_df.to_pandas().drop_duplicates(keep=keep)
+
+    # drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
+    # with Arrow string extension types. Temporary conversion to standard Pandas
+    # strings is required.
+    json_pandas_df = json_df.to_pandas()
+    json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
+        pd.StringDtype(storage="pyarrow")
+    )
+
+    pd_df = json_pandas_df.drop_duplicates(keep=keep)
+    pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE)
     pd.testing.assert_frame_equal(
         pd_df,
         bf_df,

From d30c4b56dcd7a390e0019f8b4d25e1bc2a8f6c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <tswast@gmail.com>
Date: Mon, 10 Mar 2025 16:15:48 -0500
Subject: [PATCH 7/7] commit suggestion

---
 bigframes/dtypes.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 6b17fe0736..22cc521e8e 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -62,7 +62,9 @@
 # No arrow equivalent
 GEO_DTYPE = gpd.array.GeometryDtype()
 # JSON
-JSON_DTYPE = pd.ArrowDtype(db_dtypes.JSONArrowType())
+# TODO: switch to pyarrow.json_(pyarrow.string()) when available.
+JSON_ARROW_TYPE = db_dtypes.JSONArrowType()
+JSON_DTYPE = pd.ArrowDtype(JSON_ARROW_TYPE)
 OBJ_REF_DTYPE = pd.ArrowDtype(
     pa.struct(
         (
@@ -80,7 +82,7 @@
             ),
             pa.field(
                 "details",
-                db_dtypes.JSONArrowType(),
+                JSON_ARROW_TYPE,
             ),
         )
     )