pandas-dev · Marky0 · May 9, 2019 · May 9, 2019 · May 9, 2019 · May 9, 2019
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
@@ -111,6 +111,8 @@ def json_normalize(data, record_path=None, meta=None,
     record_path : string or list of strings, default None
         Path in each object to list of records. If not passed, data will be
         assumed to be an array of records
+        For an array of objects with missing key-value pairs in each record,
+        the first record needs to include all key-value pairs
     meta : list of paths (string or list of strings), default None
         Fields to use as metadata for each record in resulting table
     meta_prefix : string, default None
@@ -180,13 +182,21 @@ def json_normalize(data, record_path=None, meta=None,
     0          1
     1          2
     """
+
     def _pull_field(js, spec):
         result = js
         if isinstance(spec, list):
             for field in spec:
                 result = result[field]
         else:
-            result = result[spec]
+            # GH26284
+            try:
+                result = result[spec]
+                if not (isinstance(result, list)):
+                    # Allows import of single objects into dataframe GH26284
+                    result = [result]
+            except KeyError:
+                result = {}
 
         return result
 
@@ -241,6 +251,12 @@ def _recursive_extract(data, path, seen_meta, level=0):
         else:
             for obj in data:
                 recs = _pull_field(obj, path[0])
+                if recs == {}:
+                    # GH26284 Fill Missing key in this record
+                    # requires all required keys in first record
+                    for key in records[0]:
+                        recs[key] = np.nan
+                    recs = [recs]
 
                 # For repeating the metadata later
                 lengths.append(len(recs))

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -60,9 +60,9 @@ def author_missing_data():
     return [
         {'info': None},
         {'info':
-            {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
-            'author_name':
-         {'first': 'Jane', 'last_name': 'Doe'}
+             {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+              'author_name':
+             {'first': 'Jane', 'last_name': 'Doe'}
          }]
 
 
@@ -85,6 +85,49 @@ def missing_metadata():
     ]
 
 
+@pytest.fixture
+def deep_nested_missing_keys():
+    return [{
+        'keyC': [{
+            'keyCA': 'StringCA1',
+            'keyCB': {
+                'keyCBA': 4,
+                'keyCBB': 5,
+                'keyCBC': [{'keyCBCA': 6, 'keyCBCB': 7, 'keyCBCC': 8.2},
+                           {'keyCBCA': 'keyCBCA', 'keyCBCB': 10,
+                            'keyCBCC': 11},
+                           {'keyCBCA': 12, 'keyCBCB': [13], 'keyCBCC': 14}],
+                'keyCBD': 15
+            },
+            'keyCC': 16
+        }],
+        'keyD': 17,
+        'keyE': [{
+            'keyEA': 18,
+            'keyEB': {'keyEBA': 19, 'keyEBB': 20}
+        }]
+    }, {
+        'keyC': [{
+            'keyCA': {'StringCA2': 'StringCA2'},
+            'keyCB': {
+                'keyCBA': 34,
+                'keyCBB': 35,
+                'keyCBC': [
+                    {'keyCBCA': 'keyCBCA', 'keyCBCB': 37.1, 'keyCBCC': 38},
+                    {'keyCBCA': 39, 'keyCBCB': True, 'keyCBCC': 41},
+                    {'keyCBCA': 42, 'keyCBCB': 43, 'keyCBCC': {'test': 44}}],
+                'keyCBD': 45
+            },
+            'keyCC': False
+        }],
+        'keyD Missing': 47,
+        'keyE': [{
+            'keyEA': 48,
+            'Missing keyEB': 49
+        }]
+    }]
+
+
 class TestJSONNormalize:
 
     def test_simple_records(self):
@@ -262,8 +305,8 @@ def test_record_prefix(self, state_data):
 
     def test_non_ascii_key(self):
         testjson = (
-            b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
-            b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
+                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
+                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
         ).decode('utf8')
 
         testdata = {
@@ -383,12 +426,12 @@ def test_donot_drop_nonevalues(self):
         data = [
             {'info': None,
              'author_name':
-             {'first': 'Smith', 'last_name': 'Appleseed'}
+                 {'first': 'Smith', 'last_name': 'Appleseed'}
              },
             {'info':
-                {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+                 {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
              'author_name':
-                {'first': 'Jane', 'last_name': 'Doe'}
+                 {'first': 'Jane', 'last_name': 'Doe'}
              }
         ]
         result = nested_to_record(data)
@@ -460,3 +503,38 @@ def test_nonetype_multiple_levels(self):
             'location.country.state.town.info.y': -33.148521423339844,
             'location.country.state.town.info.z': 27.572303771972656}
         assert result == expected
+
+
+class TestMissingKeys:
+    # GH26284
+
+    def test_string(self, deep_nested_missing_keys):
+        data = ['StringCA1', {'StringCA2': 'StringCA2'}]
+        result = json_normalize(data=deep_nested_missing_keys,
+                                record_path=['keyC', 'keyCA'])
+        expected = DataFrame(data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_object(self, deep_nested_missing_keys):
+        data = {16, False}
+        result = json_normalize(data=deep_nested_missing_keys,
+                                record_path=['keyC', 'keyCC'])
+        expected = DataFrame(data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_object_array(self, deep_nested_missing_keys):
+        data = {'keyCBCA': [6, 'keyCBCA', 12, 'keyCBCA', 39, 42],
+                'keyCBCB': [7, 10, [13], 37.1, True, 43],
+                'keyCBCC': [8.2, 11, 14, 38, 41, {'test': 44}]}
+        result = json_normalize(data=deep_nested_missing_keys,
+                                record_path=['keyC', 'keyCB', 'keyCBC'])
+        expected = DataFrame(data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_Missing_Key(self, deep_nested_missing_keys):
+        data = {'keyEBA': [19.0, np.nan],
+                'keyEBB': [20.0, np.nan]}
+        result = json_normalize(data=deep_nested_missing_keys,
+                                record_path=['keyE', 'keyEB'])
+        expected = DataFrame(data)
+        tm.assert_frame_equal(result, expected)