diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bc5d66e8e7955..f75242d32df1a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -876,6 +876,7 @@ I/O - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`) - Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`) - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) +- Bug in :func:`read_json` raising with ``orient="table"`` and ``NA`` value (:issue:`40255`) - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index db963b06d3282..d686c2f68a5dc 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -196,11 +196,11 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: if typ == "string": return "object" elif typ == "integer": - return "int64" + return field.get("extDtype", "int64") elif typ == "number": - return "float64" + return field.get("extDtype", "float64") elif typ == "boolean": - return "bool" + return field.get("extDtype", "bool") elif typ == "duration": return "timedelta64" elif typ == "datetime": diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index ae926173e129b..cf521aafdc241 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -8,9 +8,13 @@ import pytest from pandas import ( + NA, DataFrame, + Index, array, + read_json, ) +import pandas._testing as tm from pandas.core.arrays.integer import Int64Dtype from pandas.core.arrays.string_ import StringDtype from pandas.core.series import Series @@ -273,3 +277,43 @@ def test_to_json(self, df): expected = OrderedDict([("schema", schema), ("data", data)]) assert result == expected + + def test_json_ext_dtype_reading_roundtrip(self): + # GH#40255 + df = DataFrame( + { + "a": Series([2, NA], dtype="Int64"), + "b": Series([1.5, NA], dtype="Float64"), + "c": Series([True, NA], dtype="boolean"), + }, + index=Index([1, NA], dtype="Int64"), + ) + expected = df.copy() + data_json = df.to_json(orient="table", indent=4) + result = read_json(data_json, orient="table") + tm.assert_frame_equal(result, expected) + + def test_json_ext_dtype_reading(self): + # GH#40255 + data_json = """{ + "schema":{ + "fields":[ + { + "name":"a", + "type":"integer", + "extDtype":"Int64" + } + ], + }, + "data":[ + { + "a":2 + }, + { + "a":null + } + ] + }""" + result = read_json(data_json, orient="table") + expected = DataFrame({"a": Series([2, NA], dtype="Int64")}) + tm.assert_frame_equal(result, expected)