Skip to content

Commit a9a23d2

Browse files
committed
increase test coverage
1 parent d94faf3 commit a9a23d2

File tree

2 files changed

+30
-22
lines changed

2 files changed

+30
-22
lines changed

bigframes/core/utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,17 @@ def replace_timedeltas_with_micros(dataframe: pd.DataFrame) -> List[str]:
245245
return updated_columns
246246

247247

248-
def search_for_nested_json_type(arrow_type: pa.DataType) -> bool:
248+
def _search_for_nested_json_type(arrow_type: pa.DataType) -> bool:
249249
"""
250-
Searches recursively for pa.JsonType within a PyArrow DataType.
250+
Searches recursively for JSON array type within a PyArrow DataType.
251251
"""
252-
if isinstance(arrow_type, pa.JsonType):
252+
if arrow_type == dtypes.JSON_ARROW_TYPE:
253253
return True
254254
if pa.types.is_list(arrow_type):
255-
return search_for_nested_json_type(arrow_type.value_type)
255+
return _search_for_nested_json_type(arrow_type.value_type)
256256
if pa.types.is_struct(arrow_type):
257257
return any(
258-
search_for_nested_json_type(field.type) for field in arrow_type.fields
258+
_search_for_nested_json_type(field.type) for field in arrow_type.fields
259259
)
260260
return False
261261

@@ -274,7 +274,9 @@ def replace_json_with_string(dataframe: pd.DataFrame) -> List[str]:
274274
if column_type == dtypes.JSON_DTYPE:
275275
dataframe[col] = dataframe[col].astype(dtypes.STRING_DTYPE)
276276
updated_columns.append(col)
277-
elif isinstance(column_type, pd.ArrowDtype):
277+
elif isinstance(column_type, pd.ArrowDtype) and _search_for_nested_json_type(
278+
column_type.pyarrow_dtype
279+
):
278280
raise NotImplementedError(
279281
f"Nested JSON types, found in column `{col}`: `{column_type}`', "
280282
f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
@@ -283,9 +285,11 @@ def replace_json_with_string(dataframe: pd.DataFrame) -> List[str]:
283285
if dataframe.index.dtype == dtypes.JSON_DTYPE:
284286
dataframe.index = dataframe.index.astype(dtypes.STRING_DTYPE)
285287
updated_columns.append(dataframe.index.name)
286-
elif isinstance(column_type, pd.ArrowDtype):
288+
elif isinstance(
289+
dataframe.index.dtype, pd.ArrowDtype
290+
) and _search_for_nested_json_type(dataframe.index.dtype.pyarrow_dtype):
287291
raise NotImplementedError(
288-
f"Nested JSON types, found in column `{col}`: `{column_type}`', "
292+
f"Nested JSON types, found in the index: `{dataframe.index.dtype}`', "
289293
f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
290294
)
291295

tests/system/small/test_session.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -840,19 +840,21 @@ def test_read_pandas_json_index(session, write_engine):
840840
)
841841
def test_read_pandas_w_nested_json(session, write_engine):
842842
data = [
843-
{"json_field": "1"},
844-
{"json_field": None},
845-
{"json_field": '["1","3","5"]'},
846-
{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'},
843+
[{"json_field": "1"}],
844+
[{"json_field": None}],
845+
[{"json_field": '["1","3","5"]'}],
846+
[{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}],
847847
]
848848
# PyArrow currently lacks support for creating structs or lists containing extension types.
849849
# See issue: https://github.com/apache/arrow/issues/45262
850-
pa_array = pa.array(data, type=pa.struct([("name", pa.string())]))
850+
pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())])))
851851
pd_s = pd.Series(
852852
arrays.ArrowExtensionArray(pa_array), # type: ignore
853-
dtype=pd.ArrowDtype(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)])),
853+
dtype=pd.ArrowDtype(
854+
pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)]))
855+
),
854856
)
855-
with pytest.raises(NotImplementedError):
857+
with pytest.raises(NotImplementedError, match="Nested JSON types, found in column"):
856858
# Until b/401630655 is resolved, json not compatible with allow_large_results=False
857859
session.read_pandas(pd_s, write_engine=write_engine).to_pandas(
858860
allow_large_results=True
@@ -868,19 +870,21 @@ def test_read_pandas_w_nested_json(session, write_engine):
868870
)
869871
def test_read_pandas_w_nested_json_index(session, write_engine):
870872
data = [
871-
{"json_field": "1"},
872-
{"json_field": None},
873-
{"json_field": '["1","3","5"]'},
874-
{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'},
873+
[{"json_field": "1"}],
874+
[{"json_field": None}],
875+
[{"json_field": '["1","3","5"]'}],
876+
[{"json_field": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}],
875877
]
876878
# PyArrow currently lacks support for creating structs or lists containing extension types.
877879
# See issue: https://github.com/apache/arrow/issues/45262
878-
pa_array = pa.array(data, type=pa.struct([("name", pa.string())]))
880+
pa_array = pa.array(data, type=pa.list_(pa.struct([("name", pa.string())])))
879881
pd_idx: pd.Index = pd.Index(
880882
arrays.ArrowExtensionArray(pa_array), # type: ignore
881-
dtype=pd.ArrowDtype(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)])),
883+
dtype=pd.ArrowDtype(
884+
pa.list_(pa.struct([("name", bigframes.dtypes.JSON_ARROW_TYPE)]))
885+
),
882886
)
883-
with pytest.raises(NotImplementedError):
887+
with pytest.raises(NotImplementedError, match="Nested JSON types, found in the index"):
884888
# Until b/401630655 is resolved, json not compatible with allow_large_results=False
885889
session.read_pandas(pd_idx, write_engine=write_engine).to_pandas(
886890
allow_large_results=True

0 commit comments

Comments
 (0)