Skip to content

Commit 46a9c53

Browse files
authored
feat: add bigframes.bigquery.json_value (#1697)
1 parent 9d4a59d commit 46a9c53

File tree

3 files changed

+64
-0
lines changed

3 files changed

+64
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
json_extract_array,
3939
json_extract_string_array,
4040
json_set,
41+
json_value,
4142
parse_json,
4243
)
4344
from bigframes.bigquery._operations.search import create_vector_index, vector_search
@@ -61,6 +62,7 @@
6162
"json_extract",
6263
"json_extract_array",
6364
"json_extract_string_array",
65+
"json_value",
6466
"parse_json",
6567
# search ops
6668
"create_vector_index",

bigframes/bigquery/_operations/json.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,40 @@ def json_extract_string_array(
231231
return array_series
232232

233233

234+
def json_value(
235+
input: series.Series,
236+
json_path: str,
237+
) -> series.Series:
238+
"""Extracts a JSON scalar value and converts it to a SQL ``STRING`` value. In
239+
addtion, this function:
240+
- Removes the outermost quotes and unescapes the values.
241+
- Returns a SQL ``NULL`` if a non-scalar value is selected.
242+
- Uses double quotes to escape invalid ``JSON_PATH`` characters in JSON keys.
243+
244+
**Examples:**
245+
246+
>>> import bigframes.pandas as bpd
247+
>>> import bigframes.bigquery as bbq
248+
>>> bpd.options.display.progress_bar = None
249+
250+
>>> s = bpd.Series(['{"name": "Jakob", "age": "6"}', '{"name": "Jakob", "age": []}'])
251+
>>> bbq.json_value(s, json_path="$.age")
252+
0 6
253+
1 <NA>
254+
dtype: string
255+
256+
Args:
257+
input (bigframes.series.Series):
258+
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
259+
json_path (str):
260+
The JSON path identifying the data that you want to obtain from the input.
261+
262+
Returns:
263+
bigframes.series.Series: A new Series with the JSON-formatted STRING.
264+
"""
265+
return input._apply_unary_op(ops.JSONValue(json_path=json_path))
266+
267+
234268
@utils.preview(name="The JSON-related API `parse_json`")
235269
def parse_json(
236270
input: series.Series,

tests/system/small/bigquery/test_json.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,34 @@ def test_json_extract_string_array_w_invalid_series_type():
212212
bbq.json_extract_string_array(s)
213213

214214

215+
def test_json_value_from_json():
216+
s = bpd.Series(
217+
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
218+
dtype=dtypes.JSON_DTYPE,
219+
)
220+
actual = bbq.json_value(s, "$.a.b")
221+
expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE)
222+
223+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
224+
225+
226+
def test_json_value_from_string():
227+
s = bpd.Series(
228+
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
229+
dtype=pd.StringDtype(storage="pyarrow"),
230+
)
231+
actual = bbq.json_value(s, "$.a.b")
232+
expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE)
233+
234+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
235+
236+
237+
def test_json_value_w_invalid_series_type():
238+
s = bpd.Series([1, 2])
239+
with pytest.raises(TypeError):
240+
bbq.json_value(s, "$.a")
241+
242+
215243
def test_parse_json_w_invalid_series_type():
216244
s = bpd.Series([1, 2])
217245
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)