From 62138d08c1a682cf55707f43e3e084205c70559c Mon Sep 17 00:00:00 2001 From: Kyle Kelley Date: Tue, 28 Feb 2017 16:27:50 -0800 Subject: [PATCH] BUG: handle empty lists in json_normalize xref: #15534 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/json/normalize.py | 3 +++ pandas/tests/io/json/test_normalize.py | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 54df7514a882d..42116e5562e1e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -190,6 +190,7 @@ Other enhancements - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index f29472155da17..0e7d025e81851 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -157,6 +157,9 @@ def _pull_field(js, spec): return result + if isinstance(data, list) and len(data) is 0: + return DataFrame() + # A bit of a hackjob if isinstance(data, dict): data = [data] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index c60b81ffe504d..f881f4dafe0f3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -62,6 +62,11 @@ def test_simple_normalize(self): tm.assert_frame_equal(result, expected) + def test_empty_array(self): + result = json_normalize([]) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + def test_more_deeply_nested(self): data = [{'country': 'USA', 'states': [{'name': 'California',