From fbc21b9f1dc4b06938c5957ed32012b9bd2982cb Mon Sep 17 00:00:00 2001 From: joncrall Date: Sun, 3 Apr 2022 16:22:39 -0400 Subject: [PATCH 1/4] Fix NaN bug in ujson --- pandas/_libs/src/ujson/lib/ultrajson.h | 2 ++ pandas/_libs/src/ujson/lib/ultrajsondec.c | 4 ++-- pandas/_libs/src/ujson/python/JSONtoObj.c | 2 ++ pandas/tests/io/json/test_ujson.py | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 71df0c5a186b7..2bad0adbb9ef5 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -149,6 +149,7 @@ enum JSTYPES { JT_ARRAY, // Array structure JT_OBJECT, // Key/Value structure JT_INVALID, // Internal, do not return nor expect + JT_NAN, // Not A Number JT_POS_INF, // Positive infinity JT_NEG_INF, // Negative infinity }; @@ -289,6 +290,7 @@ typedef struct __JSONObjectDecoder { JSOBJ (*newTrue)(void *prv); JSOBJ (*newFalse)(void *prv); JSOBJ (*newNull)(void *prv); + JSOBJ (*newNaN)(void *prv); JSOBJ (*newPosInf)(void *prv); JSOBJ (*newNegInf)(void *prv); JSOBJ (*newObject)(void *prv, void *decoder); diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index c7779b8b428ae..bb9c22263d1bd 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -293,9 +293,9 @@ JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { if (*(offset++) != 'a') goto SET_NAN_ERROR; if (*(offset++) != 'N') goto SET_NAN_ERROR; - ds->lastType = JT_NULL; + ds->lastType = JT_NAN; ds->start = offset; - return ds->dec->newNull(ds->prv); + return ds->dec->newNaN(ds->prv); SET_NAN_ERROR: return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index c58f25b8f99ea..6aeebf51f6d0a 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -459,6 +459,8 @@ JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } +JSOBJ Object_newNaN(void *prv) { return PyFloat_FromDouble(Py_NAN); } + JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e82a888f47388..e5537b527ac2d 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -417,6 +417,9 @@ def test_encode_time_conversion_dateutil(self): def test_encode_as_null(self, decoded_input): assert ujson.encode(decoded_input) == "null", "Expected null" + def test_decode_nan(self, decoded_input): + assert math.isnan(ujson.dumps("[NaN]")[0]) + def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) From 0a6e3ae0ab8a8c401b309f08691f953f003ed02e Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 30 May 2022 21:21:25 -0400 Subject: [PATCH 2/4] fix merge conflict --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index ac9f8b02c7acb..141dd5d434a19 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -819,7 +819,7 @@ I/O - Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) - Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) - :meth:`to_html` now excludes the ``border`` attribute from ```` elements when ``border`` keyword is set to ``False``. -- +- Bug in :func:`read_json` reading ``NaN`` values in corner cases (:issue:`46627`) Period ^^^^^^ From 679053cbb9a6d2b03dbf1ddd93c452ff705c5661 Mon Sep 17 00:00:00 2001 From: joncrall Date: Wed, 6 Apr 2022 00:42:08 -0400 Subject: [PATCH 3/4] add Object_newNaN to dec --- pandas/_libs/src/ujson/python/JSONtoObj.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 6aeebf51f6d0a..5aab061f80209 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -512,6 +512,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { JSONObjectDecoder dec = { Object_newString, Object_objectAddKey, Object_arrayAddItem, Object_newTrue, Object_newFalse, Object_newNull, + Object_newNaN, Object_newPosInf, Object_newNegInf, Object_newObject, Object_endObject, Object_newArray, Object_endArray, Object_newInteger, Object_newLong, Object_newUnsignedLong, From d04ec083790e0bb5d34ee8f4b13501b312120ad8 Mon Sep 17 00:00:00 2001 From: joncrall Date: Wed, 6 Apr 2022 00:42:25 -0400 Subject: [PATCH 4/4] test for bug #46627 --- pandas/tests/io/json/test_pandas.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 576d99f25e25c..4012b494b8847 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1326,6 +1326,16 @@ def test_read_json_large_numbers2(self): expected = DataFrame(1.404366e21, index=["articleId"], columns=[0]) tm.assert_frame_equal(result, expected) + def test_read_json_nans(self, nulls_fixture, request): + # GH 46627 + json = StringIO('[NaN, {}, null, 1]') + result = read_json(json) + assert result.iloc[0, 0] is not None # used to return None here + assert np.isnan(result.iloc[0, 0]) + assert result.iloc[1, 0] == {} + assert result.iloc[2, 0] is None + assert result.iloc[3, 0] == 1 + def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])