-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: Index with null value not serialized correctly to json #50400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
f26cf16
23cd66d
8f5e6e5
147cd88
ef6195a
9afd12b
a6de6ce
52e20ea
53165b3
d3cecc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1283,6 +1283,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
type_num = PyArray_TYPE(labels); | ||
|
||
for (i = 0; i < num; i++) { | ||
int is_null = 0; // Whether current val is a null | ||
item = PyArray_GETITEM(labels, dataptr); | ||
if (!item) { | ||
NpyArr_freeLabels(ret, num); | ||
|
@@ -1320,9 +1321,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
|
||
if (is_datetimelike) { | ||
if (nanosecVal == get_nat()) { | ||
len = 4; | ||
cLabel = PyObject_Malloc(len + 1); | ||
strncpy(cLabel, "null", len + 1); | ||
is_null = 1; | ||
} else { | ||
if (enc->datetimeIso) { | ||
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { | ||
|
@@ -1348,25 +1347,49 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
len = strlen(cLabel); | ||
} | ||
} | ||
} else { // Fallback to string representation | ||
// Replace item with the string to keep it alive. | ||
Py_SETREF(item, PyObject_Str(item)); | ||
if (item == NULL) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} else { | ||
// NA values need special handling | ||
if (PyFloat_Check(item)) { | ||
double fval = PyFloat_AS_DOUBLE(item); | ||
is_null = npy_isnan(fval); | ||
} else if (item == Py_None || object_is_na_type(item)) { | ||
is_null = 1; | ||
} else if (object_is_decimal_type(item)) { | ||
PyObject *is_null_obj = PyObject_CallMethod(item, | ||
"is_nan", | ||
NULL); | ||
is_null = (is_null_obj == Py_True); | ||
if (!is_null_obj) { | ||
goto INVALID; | ||
} | ||
Py_DECREF(is_null); | ||
} else { | ||
// Otherwise, fallback to string representation | ||
// Replace item with the string to keep it alive. | ||
Py_SETREF(item, PyObject_Str(item)); | ||
if (item == NULL) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
cLabel = (char *)PyUnicode_AsUTF8(item); | ||
len = strlen(cLabel); | ||
} | ||
} | ||
|
||
cLabel = (char *)PyUnicode_AsUTF8(item); | ||
len = strlen(cLabel); | ||
if (is_null) { | ||
len = 4; | ||
cLabel = PyObject_Malloc(len + 1); | ||
strncpy(cLabel, "null", len + 1); | ||
} | ||
|
||
// Add 1 to include NULL terminator | ||
ret[i] = PyObject_Malloc(len + 1); | ||
memcpy(ret[i], cLabel, len + 1); | ||
Py_DECREF(item); | ||
|
||
if (is_datetimelike) { | ||
if (is_datetimelike || is_null) { | ||
PyObject_Free(cLabel); | ||
} | ||
|
||
|
@@ -1512,8 +1535,20 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |
tc->type = JT_UTF8; | ||
return; | ||
} else if (object_is_decimal_type(obj)) { | ||
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); | ||
tc->type = JT_DOUBLE; | ||
/* Check for null, since null can't go thru double path */ | ||
PyObject *is_null_obj = PyObject_CallMethod(obj, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this not be replaced with the function you are introducing? Seems like it should work to keep logic consistent? |
||
"is_nan", | ||
NULL); | ||
if (is_null_obj == Py_False) { | ||
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); | ||
tc->type = JT_DOUBLE; | ||
} else { | ||
tc->type = JT_NULL; | ||
} | ||
if (!is_null_obj) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should move this up to 1542 before the condition of equating to Py_False. This makes for a more consistent pattern where we always check for NULL after a |
||
goto INVALID; | ||
} | ||
Py_DECREF(is_null_obj); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In a case where |
||
return; | ||
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { | ||
if (object_is_nat_type(obj)) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have a similar expression already in missing.pyx? I wonder if there's a way for us to be consistent about our missing object evaluation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
checknull
is probably the equivalent. Without having a C API, I don't really think we can unify anything sadly.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha. Not a blocker here, but it might be nice to create a separate missing.c file with the proper implementation and then can wrap that from missing.pyx
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On a second read I think we should at least create a separate function for
int is_null(PyObject *obj)
here. Can just be static in the current translation unit - makes the code more readable than the nested if statements