-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: to_json not serializing non-nanosecond numpy dt64 correctly #53757
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e648188
4136ce1
c0e745d
4759340
13ddc60
b45657b
190f43a
cb08985
71fadf5
d95e7cf
f861b6a
c3299de
c9da070
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,6 +131,7 @@ typedef struct __PyObjectEncoder { | |
|
||
int datetimeIso; | ||
NPY_DATETIMEUNIT datetimeUnit; | ||
NPY_DATETIMEUNIT valueUnit; | ||
|
||
// output format style for pandas data types | ||
int outputFormat; | ||
|
@@ -350,7 +351,8 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, | |
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), | ||
JSONTypeContext *tc, size_t *len) { | ||
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len); | ||
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit; | ||
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len); | ||
return GET_TC(tc)->cStr; | ||
} | ||
|
||
|
@@ -364,8 +366,9 @@ static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), | |
/* JSON callback */ | ||
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, | ||
size_t *len) { | ||
if (!PyDate_Check(obj)) { | ||
PyErr_SetString(PyExc_TypeError, "Expected date object"); | ||
if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) { | ||
PyErr_SetString(PyExc_TypeError, "Expected date or datetime object"); | ||
((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Off-topic, but we should be setting the errorMsg whenever we return NULL in a callback. Otherwise, there is a possibility of a segfault, since ujson will still think it's in a valid state and try to continue encoding, instead of instantly returning (allowing the Python exception to propagate up). |
||
return NULL; | ||
} | ||
|
||
|
@@ -502,6 +505,10 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { | |
GET_TC(tc)->itemValue = obj; | ||
Py_INCREF(obj); | ||
((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); | ||
// Also write the resolution (unit) of the ndarray | ||
PyArray_Descr *dtype = PyArray_DESCR(npyarr->array); | ||
((PyObjectEncoder *)tc->encoder)->valueUnit = | ||
get_datetime_metadata_from_dtype(dtype).base; | ||
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; | ||
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; | ||
} else { | ||
|
@@ -1255,6 +1262,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
char **ret; | ||
char *dataptr, *cLabel; | ||
int type_num; | ||
PyArray_Descr *dtype; | ||
NPY_DATETIMEUNIT base = enc->datetimeUnit; | ||
|
||
if (!labels) { | ||
|
@@ -1283,6 +1291,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
stride = PyArray_STRIDE(labels, 0); | ||
dataptr = PyArray_DATA(labels); | ||
type_num = PyArray_TYPE(labels); | ||
dtype = PyArray_DESCR(labels); | ||
|
||
for (i = 0; i < num; i++) { | ||
item = PyArray_GETITEM(labels, dataptr); | ||
|
@@ -1293,7 +1302,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
} | ||
|
||
int is_datetimelike = 0; | ||
npy_int64 nanosecVal; | ||
npy_int64 i8date; | ||
NPY_DATETIMEUNIT dateUnit = NPY_FR_ns; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the idea here that this should default to NS for things that are stored within object arrays? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's for the other stuff like timedeltas and dates. We use nanosecond reso there. I didn't try to fix those cases as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think it would take to handle those consistently? I don't know how common this would happen but I guess its strange to only do this for numpy-typed arrays and not object arrays that may contain datetimes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. knee-jerk i think that for object-dtype we should just call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The object stuff definitely adds a lot of complexity that may not be worth it, but that would be a breaking change to just pass as a str. Particularly for nested containers within an object array that would not work There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might work for object arrays, already. I haven't checked yet, I'm not on the right branch. Will update later in the day. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, it turns out that this didn't work already. Sorry for the confusion. I've fixed this in the latest commit. |
||
if (PyTypeNum_ISDATETIME(type_num)) { | ||
is_datetimelike = 1; | ||
PyArray_VectorUnaryFunc *castfunc = | ||
|
@@ -1303,35 +1313,37 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
"Cannot cast numpy dtype %d to long", | ||
enc->npyType); | ||
} | ||
castfunc(dataptr, &nanosecVal, 1, NULL, NULL); | ||
castfunc(dataptr, &i8date, 1, NULL, NULL); | ||
dateUnit = get_datetime_metadata_from_dtype(dtype).base; | ||
} else if (PyDate_Check(item) || PyDelta_Check(item)) { | ||
is_datetimelike = 1; | ||
if (PyObject_HasAttrString(item, "_value")) { | ||
// see test_date_index_and_values for case with non-nano | ||
nanosecVal = get_long_attr(item, "_value"); | ||
i8date = get_long_attr(item, "_value"); | ||
} else { | ||
if (PyDelta_Check(item)) { | ||
nanosecVal = total_seconds(item) * | ||
i8date = total_seconds(item) * | ||
1000000000LL; // nanoseconds per second | ||
} else { | ||
// datetime.* objects don't follow above rules | ||
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); | ||
i8date = PyDateTimeToEpoch(item, NPY_FR_ns); | ||
} | ||
} | ||
} | ||
|
||
if (is_datetimelike) { | ||
if (nanosecVal == get_nat()) { | ||
if (i8date == get_nat()) { | ||
len = 4; | ||
cLabel = PyObject_Malloc(len + 1); | ||
strncpy(cLabel, "null", len + 1); | ||
} else { | ||
if (enc->datetimeIso) { | ||
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { | ||
cLabel = int64ToIsoDuration(nanosecVal, &len); | ||
// TODO(username): non-nano timedelta support? | ||
cLabel = int64ToIsoDuration(i8date, &len); | ||
} else { | ||
if (type_num == NPY_DATETIME) { | ||
cLabel = int64ToIso(nanosecVal, base, &len); | ||
cLabel = int64ToIso(i8date, dateUnit, base, &len); | ||
} else { | ||
cLabel = PyDateTimeToIso(item, base, &len); | ||
} | ||
|
@@ -1346,7 +1358,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
int size_of_cLabel = 21; // 21 chars for int 64 | ||
cLabel = PyObject_Malloc(size_of_cLabel); | ||
snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, | ||
NpyDateTimeToEpoch(nanosecVal, base)); | ||
NpyDateTimeToEpoch(i8date, base)); | ||
len = strlen(cLabel); | ||
} | ||
} | ||
|
@@ -1538,13 +1550,25 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |
tc->type = JT_UTF8; | ||
return; | ||
} else if (PyArray_IsScalar(obj, Datetime)) { | ||
npy_int64 longVal; | ||
if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { | ||
tc->type = JT_NULL; | ||
return; | ||
} | ||
PyArray_Descr *dtype = PyArray_DescrFromScalar(obj); | ||
if (!PyTypeNum_ISDATETIME(dtype->type_num)) { | ||
PyErr_Format(PyExc_ValueError, "Could not get resolution of datetime"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason we aren't returning here? Feels a bit off to let execution continue after setting error There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My bad, updated. |
||
return; | ||
} | ||
|
||
PyArray_Descr *outcode = PyArray_DescrFromType(NPY_INT64); | ||
PyArray_CastScalarToCtype(obj, &longVal, outcode); | ||
Py_DECREF(outcode); | ||
|
||
if (enc->datetimeIso) { | ||
pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; | ||
GET_TC(tc)->longValue = longVal; | ||
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; | ||
enc->valueUnit = get_datetime_metadata_from_dtype(dtype).base; | ||
tc->type = JT_UTF8; | ||
} else { | ||
NPY_DATETIMEUNIT base = | ||
|
Uh oh!
There was an error while loading. Please reload this page.