Skip to content

Commit 4a2197b

Browse files
committed
add whatsnew and other minor changes (pandas-dev#41876)
1 parent 1c7c10c commit 4a2197b

File tree

3 files changed

+36
-30
lines changed

3 files changed

+36
-30
lines changed

doc/source/whatsnew/v1.4.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ MultiIndex
172172

173173
I/O
174174
^^^
175-
-
175+
- Bug in :func:`json_normalize` where ``errors=Ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
176176
-
177177

178178
Period

pandas/io/json/_normalize.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,32 @@ def _json_normalize(
380380
Returns normalized data with columns prefixed with the given string.
381381
"""
382382

383-
def _pull_field(js: dict[str, Any], spec: list | str) -> Scalar | Iterable:
383+
def _pull_field(
384+
js: dict[str, Any], spec: list | str, extract_record: bool = False
385+
) -> Scalar | Iterable:
384386
"""Internal function to pull field"""
385387
result = js
386-
if isinstance(spec, list):
387-
for field in spec:
388-
result = result[field]
389-
else:
390-
result = result[spec]
388+
try:
389+
if isinstance(spec, list):
390+
for field in spec:
391+
result = result[field]
392+
else:
393+
result = result[spec]
394+
except KeyError as e:
395+
if extract_record:
396+
raise KeyError(
397+
f"Key {e} not found. If specifying a record_path, all elements of "
398+
f"data should have the path."
399+
) from e
400+
else:
401+
if errors == "ignore":
402+
result = np.nan
403+
else:
404+
raise KeyError(
405+
f"Key {e} not found. To replace missing values of {e} with "
406+
f"np.nan, pass in errors='ignore'"
407+
) from e
408+
391409
return result
392410

393411
def _pull_records(js: dict[str, Any], spec: list | str) -> list:
@@ -396,7 +414,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
396414
_pull_field, but require to return list. And will raise error
397415
if has non iterable value.
398416
"""
399-
result = _pull_field(js, spec)
417+
result = _pull_field(js, spec, extract_record=True)
400418

401419
# GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not
402420
# null, otherwise return an empty list
@@ -469,16 +487,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
469487
for obj in data:
470488
for val, key in zip(_meta, meta_keys):
471489
if level + 1 == len(val):
472-
try:
473-
seen_meta[key] = _pull_field(obj, val[-1])
474-
except KeyError as e:
475-
if errors == "ignore":
476-
seen_meta[key] = np.nan
477-
else:
478-
raise KeyError(
479-
"Try running with errors='ignore' as key "
480-
f"{e} is not always present"
481-
) from e
490+
seen_meta[key] = _pull_field(obj, val[-1])
482491

483492
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
484493
else:
@@ -497,16 +506,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
497506
if level + 1 > len(val):
498507
meta_val = seen_meta[key]
499508
else:
500-
try:
501-
meta_val = _pull_field(obj, val[level:])
502-
except KeyError as e:
503-
if errors == "ignore":
504-
meta_val = np.nan
505-
else:
506-
raise KeyError(
507-
"Try running with errors='ignore' as key "
508-
f"{e} is not always present"
509-
) from e
509+
meta_val = _pull_field(obj, val[level:])
510510
meta_vals[key].append(meta_val)
511511
records.extend(recs)
512512

pandas/tests/io/json/test_normalize.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,10 @@ def test_json_normalize_errors(self, missing_metadata):
600600
# If meta keys are not always present a new option to set
601601
# errors='ignore' has been implemented
602602

603-
msg = "Try running with errors='ignore' as key 'name' is not always present"
603+
msg = (
604+
"Key 'name' not found. To replace missing values of "
605+
"'name' with np.nan, pass in errors='ignore'"
606+
)
604607
with pytest.raises(KeyError, match=msg):
605608
json_normalize(
606609
data=missing_metadata,
@@ -628,7 +631,10 @@ def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata
628631
# GH41876
629632
# Ensure errors='raise' works as intended even when a record_path of length
630633
# greater than one is passed in
631-
msg = "Try running with errors='ignore' as key 'name' is not always present"
634+
msg = (
635+
"Key 'name' not found. To replace missing values of "
636+
"'name' with np.nan, pass in errors='ignore'"
637+
)
632638
with pytest.raises(KeyError, match=msg):
633639
json_normalize(
634640
data=missing_metadata,

0 commit comments

Comments
 (0)