From b0923d4b821aaa1bb209c5ec252b52d10b70294e Mon Sep 17 00:00:00 2001 From: Chandrasekar Sivaraman Date: Fri, 28 Feb 2025 01:11:33 +0100 Subject: [PATCH 1/2] changed approach from changin to_json method to improving read_json to parse schema for numeric column names --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/json/_table_schema.py | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..9eee8495fc615 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -63,6 +63,7 @@ Other enhancements - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) - :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :func:`read_json` with ``orient="table"`` now correctly restores non-string column names when reading JSON data, ensuring that column names retain their original types as specified in the schema (:issue:`19129`). - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 7879be18b52c9..0debe9c963dae 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -366,17 +366,29 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame: :class:`Index` name of 'index' and :class:`MultiIndex` names starting with 'level_' are not supported. + To handle cases where column names are non-string types (e.g., integers), + all column names are first converted to strings when constructing the DataFrame. + After applying the correct data types using `astype(dtypes)`, the column names + are restored to their original types as specified in the schema. + This ensures compatibility with `to_json(orient="table")` while maintaining + the integrity of non-string column names. + See Also -------- build_table_schema : Inverse function. pandas.read_json """ table = ujson_loads(json, precise_float=precise_float) - col_order = [field["name"] for field in table["schema"]["fields"]] + col_order = [ + field["name"] if isinstance(field["name"], str) else str(field["name"]) + for field in table["schema"]["fields"] + ] df = DataFrame(table["data"], columns=col_order)[col_order] dtypes = { - field["name"]: convert_json_field_to_pandas_type(field) + field["name"] + if isinstance(field["name"], str) + else str(field["name"]): convert_json_field_to_pandas_type(field) for field in table["schema"]["fields"] } @@ -388,6 +400,15 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame: df = df.astype(dtypes) + # Convert column names back to their original types + original_types = { + str(field["name"]) + if not isinstance(field["name"], str) + else field["name"]: field["name"] + for field in table["schema"]["fields"] + } + df.columns = [original_types[col] for col in df.columns] + if "primaryKey" in table["schema"]: df = df.set_index(table["schema"]["primaryKey"]) if len(df.index.names) == 1: From c6c23be7785166bb8b8539046649fb8b630944a3 Mon Sep 17 00:00:00 2001 From: Chandrasekar Sivaraman Date: Sat, 26 Apr 2025 18:17:44 +0200 Subject: [PATCH 2/2] added test for to_json method using all orients to check if a column with int type is preserved --- pandas/tests/io/json/test_pandas.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 144b36166261b..a5176863a3985 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2283,3 +2283,21 @@ def test_large_number(): ) expected = Series([9999999999999999]) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "orient", ["records", "index", "columns", "split", "values", "table"] +) +def test_to_json_read_json_numeric_columns_all_orients(orient): + df = DataFrame([[1, 2, 3, 4]], columns=[5, 6, 7, 8]) + + with tm.ensure_clean("tmp.json") as path: + df.to_json(path, orient=orient) + result = read_json(path, orient=orient) + + # Column types must be int in all cases + assert all(isinstance(col, int) for col in result.columns) + + # Content check (relaxed for "values") + if orient != "values": + tm.assert_frame_equal(result, df, check_column_type=False)