diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index dbf7f4f49ce86..e34c8ea1fe899 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -162,14 +162,14 @@ def _write(self, obj, orient, double_precision, ensure_ascii, class JSONTableWriter(FrameWriter): - _default_orient = 'records' + _default_orient = 'values' def __init__(self, obj, orient, date_format, double_precision, ensure_ascii, date_unit, index, default_handler=None): """ Adds a `schema` attribute with the Table Schema, resets the index (can't do in caller, because the schema inference needs - to know what the index is, forces orient to records, and forces + to know what the index is, forces orient to values, and forces date_format to 'iso'. """ super(JSONTableWriter, self).__init__( @@ -177,9 +177,9 @@ def __init__(self, obj, orient, date_format, double_precision, date_unit, index, default_handler=default_handler) if date_format != 'iso': - msg = ("Trying to write with `orient='table'` and " - "`date_format='{fmt}'`. Table Schema requires dates " - "to be formatted with `date_format='iso'`" + msg = ("Trying to write with orient='table' and " + "date_format='{fmt}'. Table Schema requires dates " + "to be formatted with date_format='iso'" .format(fmt=date_format)) raise ValueError(msg) @@ -211,7 +211,7 @@ def __init__(self, obj, orient, date_format, double_precision, else: self.obj = obj.reset_index(drop=False) self.date_format = 'iso' - self.orient = 'records' + self.orient = 'values' self.index = index def _write(self, obj, orient, double_precision, ensure_ascii, @@ -221,7 +221,12 @@ def _write(self, obj, orient, double_precision, ensure_ascii, ensure_ascii, date_unit, iso_dates, default_handler) - serialized = '{{"schema": {schema}, "data": {data}}}'.format( + # add column names + column_names = dumps(obj.columns) + if len(data) > 2: + column_names = column_names + ',' + data = data[0] + column_names + data[1:] + serialized = '{{"schema":{schema},"data":{data}}}'.format( schema=dumps(self.schema), data=data) return serialized diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 971386c91944e..d22f85372accb 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -255,7 +255,7 @@ def build_table_schema(data, index=True, primary_key=None, version=True): schema['primaryKey'] = primary_key if version: - schema['pandas_version'] = '0.20.0' + schema['pandas_version'] = '0.25.0' return schema @@ -296,21 +296,28 @@ def parse_table_schema(json, precise_float): pandas.read_json """ table = loads(json, precise_float=precise_float) - col_order = [field['name'] for field in table['schema']['fields']] - df = DataFrame(table['data'], columns=col_order)[col_order] + version = table['schema']['pandas_version'] + if version == '0.20.0': + # Each table row is represented by a dict + col_order = [field['name'] for field in table['schema']['fields']] + df = DataFrame(table['data'], columns=col_order)[col_order] + elif version == '0.25.0': + # Each table row is represented by a list + col_order = table['data'][0] + df = DataFrame(table['data'][1:], columns=col_order)[col_order] dtypes = {field['name']: convert_json_field_to_pandas_type(field) for field in table['schema']['fields']} # Cannot directly use as_type with timezone data on object; raise for now if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()): - raise NotImplementedError('table="orient" can not yet read timezone ' - 'data') + raise NotImplementedError("orient='table' can not yet read timezone " + "data") # No ISO constructor for Timedelta as of yet, so need to raise if 'timedelta64' in dtypes.values(): - raise NotImplementedError('table="orient" can not yet read ' - 'ISO-formatted Timedelta data') + raise NotImplementedError("orient='table' can not yet read " + "ISO-formatted Timedelta data") df = df.astype(dtypes) @@ -322,5 +329,7 @@ def parse_table_schema(json, precise_float): else: df.index.names = [None if x.startswith('level_') else x for x in df.index.names] + # Reset columns dtype + df.columns = df.columns.values.tolist() return df diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 4cc62d3db124f..74a60d70cd194 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -208,8 +208,8 @@ def test_build_series(self): expected = OrderedDict([ ('schema', schema), - ('data', [OrderedDict([('id', 0), ('a', 1)]), - OrderedDict([('id', 1), ('a', 2)])])]) + ('data', [['id', 'a'], [0, 1], [1, 2]]) + ]) assert result == expected def test_to_json(self): @@ -243,32 +243,15 @@ def test_to_json(self): 'fields': fields, 'primaryKey': ['idx'], } - data = [ - OrderedDict([('idx', 0), ('A', 1), ('B', 'a'), - ('C', '2016-01-01T00:00:00.000Z'), - ('D', 'P0DT1H0M0S'), - ('E', 'a'), ('F', 'a'), ('G', 1.), - ('H', '2016-01-01T06:00:00.000Z') - ]), - OrderedDict([('idx', 1), ('A', 2), ('B', 'b'), - ('C', '2016-01-02T00:00:00.000Z'), - ('D', 'P0DT1H1M0S'), - ('E', 'b'), ('F', 'b'), ('G', 2.), - ('H', '2016-01-02T06:00:00.000Z') - ]), - OrderedDict([('idx', 2), ('A', 3), ('B', 'c'), - ('C', '2016-01-03T00:00:00.000Z'), - ('D', 'P0DT1H2M0S'), - ('E', 'c'), ('F', 'c'), ('G', 3.), - ('H', '2016-01-03T06:00:00.000Z') - ]), - OrderedDict([('idx', 3), ('A', 4), ('B', 'c'), - ('C', '2016-01-04T00:00:00.000Z'), - ('D', 'P0DT1H3M0S'), - ('E', 'c'), ('F', 'c'), ('G', 4.), - ('H', '2016-01-04T06:00:00.000Z') - ]), - ] + data = [['idx', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'], + [0, 1, 'a', '2016-01-01T00:00:00.000Z', 'P0DT1H0M0S', 'a', 'a', + 1., '2016-01-01T06:00:00.000Z'], + [1, 2, 'b', '2016-01-02T00:00:00.000Z', 'P0DT1H1M0S', 'b', 'b', + 2., '2016-01-02T06:00:00.000Z'], + [2, 3, 'c', '2016-01-03T00:00:00.000Z', 'P0DT1H2M0S', 'c', 'c', + 3., '2016-01-03T06:00:00.000Z'], + [3, 4, 'c', '2016-01-04T00:00:00.000Z', 'P0DT1H3M0S', 'c', 'c', + 4., '2016-01-04T06:00:00.000Z']] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected @@ -277,16 +260,14 @@ def test_to_json_float_index(self): result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') - - expected = ( - OrderedDict([('schema', { + expected = (OrderedDict([ + ('schema', { 'fields': [{'name': 'index', 'type': 'number'}, {'name': 'values', 'type': 'integer'}], 'primaryKey': ['index'] }), - ('data', [OrderedDict([('index', 1.0), ('values', 1)]), - OrderedDict([('index', 2.0), ('values', 1)])])]) - ) + ('data', [['index', 'values'], [1.0, 1], [2.0, 1]]) + ])) assert result == expected def test_to_json_period_index(self): @@ -300,10 +281,9 @@ def test_to_json_period_index(self): {'name': 'values', 'type': 'integer'}] schema = {'fields': fields, 'primaryKey': ['index']} - data = [OrderedDict([('index', '2015-11-01T00:00:00.000Z'), - ('values', 1)]), - OrderedDict([('index', '2016-02-01T00:00:00.000Z'), - ('values', 1)])] + data = [['index', 'values'], + ['2015-11-01T00:00:00.000Z', 1], + ['2016-02-01T00:00:00.000Z', 1]] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected @@ -320,10 +300,7 @@ def test_to_json_categorical_index(self): 'ordered': False}, {'name': 'values', 'type': 'integer'}], 'primaryKey': ['index']}), - ('data', [ - OrderedDict([('index', 'a'), - ('values', 1)]), - OrderedDict([('index', 'b'), ('values', 1)])])]) + ('data', [['index', 'values'], ['a', 1], ['b', 1]])]) ) assert result == expected @@ -428,9 +405,7 @@ def test_categorical(self): expected = OrderedDict([ ('schema', {'fields': fields, 'primaryKey': ['idx']}), - ('data', [OrderedDict([('idx', 0), ('values', 'a')]), - OrderedDict([('idx', 1), ('values', 'b')]), - OrderedDict([('idx', 2), ('values', 'a')])])]) + ('data', [['idx', 'values'], [0, 'a'], [1, 'b'], [2, 'a']])]) assert result == expected @pytest.mark.parametrize('idx,nm,prop', [ diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index b222d679a6012..9b3e170ac1f5a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1197,9 +1197,10 @@ def test_data_frame_size_after_to_json(self): @pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'], ['1', '2'], ['1.', '2.']]) - @pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']]) + @pytest.mark.parametrize('columns', [None, [1, 2], [1., 2.], ['a', 'b'], + ['1', '2'], ['1.', '2.']]) def test_from_json_to_json_table_index_and_columns(self, index, columns): - # GH25433 GH25435 + # GH19129 GH25433 GH25435 expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) dfjson = expected.to_json(orient='table') result = pd.read_json(dfjson, orient='table') @@ -1229,6 +1230,50 @@ def test_read_json_table_convert_axes_raises(self): with pytest.raises(ValueError, match=msg): pd.read_json(dfjson, orient='table', convert_axes=True) + @pytest.mark.parametrize('index, dfjson', [ + (None, + '{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":0,"a":1,"b":3.0,"c":"5"},{"index":1,"a":2,"b":4.0,' + '"c":"6"}]}'), + ([1, 2], + '{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":1,"a":1,"b":3.0,"c":"5"},{"index":2,"a":2,"b":4.0,' + '"c":"6"}]}'), + ([1., 2.], + '{"schema":{"fields":[{"name":"index","type":"number"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":1.0,"a":1,"b":3.0,"c":"5"},{"index":2.0,"a":2,' + '"b":4.0,"c":"6"}]}'), + (['a', 'b'], + '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":"a","a":1,"b":3.0,"c":"5"},{"index":"b","a":2,' + '"b":4.0,"c":"6"}]}'), + (['1', '2'], + '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":"1","a":1,"b":3.0,"c":"5"},{"index":"2","a":2,' + '"b":4.0,"c":"6"}]}'), + (['1.', '2.'], + '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",' + '"type":"integer"},{"name":"b","type":"number"},{"name":"c",' + '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},' + '"data":[{"index":"1.","a":1,"b":3.0,"c":"5"},{"index":"2.","a":2,' + '"b":4.0,"c":"6"}]}') + ]) + def test_read_json_table_version_0_20_0(self, index, dfjson): + expected = pd.DataFrame([[1, 3., '5'], [2, 4., '6']], + index=index, columns=['a', 'b', 'c']) + result = pd.read_json(dfjson, orient='table') + assert_frame_equal(result, expected) + @pytest.mark.parametrize('data, expected', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), @@ -1265,16 +1310,9 @@ def test_index_false_to_json_split(self, data, expected): def test_index_false_to_json_table(self, data): # GH 17394 # Testing index=False in to_json with orient='table' - result = data.to_json(orient='table', index=False) result = json.loads(result) - - expected = { - 'schema': pd.io.json.build_table_schema(data, index=False), - 'data': DataFrame(data).to_dict(orient='records') - } - - assert result == expected + assert 'primaryKey' not in result['schema'] @pytest.mark.parametrize('orient', [ 'records', 'index', 'columns', 'values'