Skip to content

Commit 1ddf360

Browse files
committed
feat: support string astype json
1 parent a84ee75 commit 1ddf360

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

bigframes/core/compile/ibis_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def cast_ibis_value(
146146
ibis_dtypes.date,
147147
ibis_dtypes.timestamp,
148148
ibis_dtypes.Timestamp(timezone="UTC"),
149+
ibis_dtypes.json,
149150
),
150151
ibis_dtypes.date: (
151152
ibis_dtypes.string,

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,10 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
11481148
elif to_type == ibis_dtypes.time:
11491149
return x_converted.time()
11501150

1151+
if x.type() == ibis_dtypes.string and to_type == ibis_dtypes.json:
1152+
# TODO: test safe or not.
1153+
return parse_json_in_safe(x) if op.safe else parse_json(x)
1154+
11511155
# TODO: either inline this function, or push rest of this op into the function
11521156
return bigframes.core.compile.ibis_types.cast_ibis_value(x, to_type, safe=op.safe)
11531157

@@ -2031,6 +2035,11 @@ def parse_json(json_str: str) -> ibis_dtypes.JSON: # type: ignore[empty-body]
20312035
"""Converts a JSON-formatted STRING value to a JSON value."""
20322036

20332037

2038+
@ibis_udf.scalar.builtin(name="SAFE.PARSE_JSON")
2039+
def parse_json_in_safe(json_str: str) -> ibis_dtypes.JSON: # type: ignore[empty-body]
2040+
"""Converts a JSON-formatted STRING value to a JSON value."""
2041+
2042+
20342043
@ibis_udf.scalar.builtin(name="json_set")
20352044
def json_set( # type: ignore[empty-body]
20362045
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.String, json_value

tests/system/small/test_series.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import db_dtypes # type: ignore
2121
import geopandas as gpd # type: ignore
22+
import google.api_core.exceptions
2223
import numpy
2324
from packaging.version import Version
2425
import pandas as pd
@@ -3651,6 +3652,56 @@ def test_timestamp_astype_string():
36513652
assert bf_result.dtype == "string[pyarrow]"
36523653

36533654

3655+
def test_string_astype_json():
3656+
json_data = [
3657+
"1",
3658+
None,
3659+
'["1","3","5"]',
3660+
'{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
3661+
]
3662+
pd_series = pd.Series(json_data, dtype=dtypes.STRING_DTYPE)
3663+
bf_series = series.Series(json_data, dtype=dtypes.STRING_DTYPE)
3664+
3665+
pd_result = pd_series.astype(dtypes.JSON_DTYPE)
3666+
bf_result = bf_series.astype(dtypes.JSON_DTYPE)
3667+
3668+
pd.testing.assert_series_equal(
3669+
bf_result.to_pandas(), pd_result, check_index_type=False
3670+
)
3671+
assert bf_result.dtype == dtypes.JSON_DTYPE
3672+
3673+
3674+
def test_string_astype_json_raise_error():
3675+
json_data = [
3676+
"this is not a valid json string",
3677+
]
3678+
bf_series = series.Series(json_data, dtype=dtypes.STRING_DTYPE)
3679+
with pytest.raises(
3680+
google.api_core.exceptions.BadRequest, match="syntax error while parsing value"
3681+
):
3682+
bf_series.astype(dtypes.JSON_DTYPE, errors="raise").to_pandas()
3683+
3684+
3685+
def test_string_astype_json_in_safe_mode():
3686+
json_data = [
3687+
"1",
3688+
None,
3689+
"this is not a valid json string",
3690+
'["1","3","5"]',
3691+
'{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
3692+
]
3693+
bf_series = series.Series(json_data, dtype=dtypes.STRING_DTYPE)
3694+
bf_result = bf_series.astype(dtypes.JSON_DTYPE, errors="null")
3695+
3696+
json_data[2] = None
3697+
expected = pd.Series(json_data, dtype=dtypes.JSON_DTYPE)
3698+
3699+
pd.testing.assert_series_equal(
3700+
bf_result.to_pandas(), expected, check_index_type=False
3701+
)
3702+
assert bf_result.dtype == dtypes.JSON_DTYPE
3703+
3704+
36543705
@pytest.mark.parametrize(
36553706
"index",
36563707
[0, 5, -2],

third_party/bigframes_vendored/ibis/backends/sql/compilers/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,12 @@ def __sql_name__(self, op: ops.ScalarUDF | ops.AggUDF) -> str:
12221222
# not actually a table, but easier to quote individual namespace
12231223
# components this way
12241224
namespace = op.__udf_namespace__
1225+
1226+
# Function names prefixed with "SAFE.", such as `SAFE.PARSE_JSON`,
1227+
# are typically not quoted.
1228+
if funcname.startswith("SAFE."):
1229+
return funcname
1230+
12251231
return sg.table(funcname, db=namespace.database, catalog=namespace.catalog).sql(
12261232
self.dialect
12271233
)

0 commit comments

Comments
 (0)