From 431495b3776fe49c551c1b305a7b5ccfe7083ff9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Feb 2023 19:56:37 -0800 Subject: [PATCH 01/36] initial file setup and build --- .../_libs/tslibs/src/datetime/pd_datetime.c | 50 +++++++++++++++++++ setup.py | 23 +++++++++ 2 files changed, 73 insertions(+) create mode 100644 pandas/_libs/tslibs/src/datetime/pd_datetime.c diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c new file mode 100644 index 0000000000000..c73185b7605e0 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -0,0 +1,50 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define PY_SSIZE_T_CLEAN +#include + +static PyObject * +spam_system(PyObject *self, PyObject *args) +{ + const char *command; + int sts; + + if (!PyArg_ParseTuple(args, "s", &command)) + return NULL; + sts = system(command); + return PyLong_FromLong(sts); +} + + +static PyMethodDef SpamMethods[] = { + {"system", spam_system, METH_VARARGS, "Execute a shell command."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef spammodule = { + PyModuleDef_HEAD_INIT, "spam", /* name of module */ + spam_doc, /* module documentation, may be NULL */ + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + SpamMethods}; + + +PyMODINIT_FUNC +PyInit_spam(void) +{ + return PyModule_Create(&spammodule); +} diff --git a/setup.py b/setup.py index 87ef0b6029a64..160e34cdabb07 100755 --- a/setup.py +++ b/setup.py @@ -133,6 +133,7 @@ def initialize_options(self): pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), pjoin(util, "move.c"), + pjoin(tsbase, "datetime", "pd_datetime.c"), ] for root, dirs, files in os.walk("pandas"): @@ -659,6 +660,28 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): # ---------------------------------------------------------------------- +# ---------------------------------------------------------------------- +# pd_datetime +pd_dt_ext = Extension( + "pandas._libs.tslibs.src.datetime.pd_datetime", + depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], + sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), + include_dirs=[ + "pandas/_libs/src/ujson/python", + "pandas/_libs/src/ujson/lib", + "pandas/_libs/src/datetime", + numpy.get_include(), + ], + extra_compile_args=(extra_compile_args), + extra_link_args=extra_link_args, + define_macros=macros, +) + + +extensions.append(pd_dt_ext) + +# ---------------------------------------------------------------------- + if __name__ == "__main__": # Freeze to support parallel compilation when using spawn instead of fork From c3dca727770b94f5b1696c26811ee7087bd737c8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 18 Feb 2023 08:04:54 -0800 Subject: [PATCH 02/36] Building standalone impl --- .../_libs/tslibs/src/datetime/pd_datetime.c | 235 ++++++++++++++++-- .../_libs/tslibs/src/datetime/pd_datetime.h | 42 ++++ setup.py | 3 - 3 files changed, 256 insertions(+), 24 deletions(-) create mode 100644 pandas/_libs/tslibs/src/datetime/pd_datetime.h diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index c73185b7605e0..d9aaa046d43c2 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -17,34 +17,227 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #define PY_SSIZE_T_CLEAN #include -static PyObject * -spam_system(PyObject *self, PyObject *args) -{ - const char *command; - int sts; +#include "datetime.h" +#include "pd_datetime.h" + +static const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - if (!PyArg_ParseTuple(args, "s", &command)) - return NULL; - sts = system(command); - return PyLong_FromLong(sts); +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +static int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); } +/* + * Calculates the days offset from the 1970 epoch. + */ +static npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; -static PyMethodDef SpamMethods[] = { - {"system", spam_system, METH_VARARGS, "Execute a shell command."}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +static npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +static void +pandas_datetime_destructor(PyObject *op) +{ + void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); + PyMem_Free(ptr); +} -static struct PyModuleDef spammodule = { - PyModuleDef_HEAD_INIT, "spam", /* name of module */ - spam_doc, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - SpamMethods}; +static PyMethodDef module_methods[] = { + {NULL, NULL} +}; +static struct PyModuleDef pandas_datetimemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_pandas_datetime", + + .m_doc = "Internal module with datetime support for other extensions", + .m_size = 01, + .m_methods = module_methods +}; PyMODINIT_FUNC -PyInit_spam(void) +PyInit_pandas_datetime(void) { - return PyModule_Create(&spammodule); + PyObject *module = PyModule_Create(&pandas_datetimemodule); + if (module == NULL) + return NULL; + + PyDateTime_IMPORT; + + PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return NULL; + } + capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; + + PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return NULL; + } + + if (PyModule_AddObject(module, "datetime_CAPI", capsule) < 0) { + Py_DECREF(capsule); + // cpython doesn't PyMem_Free(capi) here - mistake or intentional? + return NULL; + } + + return module; } diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h new file mode 100644 index 0000000000000..50cea0bfb05ef --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -0,0 +1,42 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H + + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct { + npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); + } PandasDateTime_CAPI; + + #define PandasDateTime_CAPSULE_NAME "pandas._libs.tslibs.src.datetime.pd_datetime_CAPI" + + /* block used as part of public API */ +#ifndef _PANDAS_DATETIME_IMPL + static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; + + #define PandasDateTime_IMPORT \ + PandasDateTimeAPI = (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) +#endif /* !define(_PANDAS_DATETIME_IMPL) */ + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H) */ diff --git a/setup.py b/setup.py index 160e34cdabb07..4459a151d38ff 100755 --- a/setup.py +++ b/setup.py @@ -667,9 +667,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), include_dirs=[ - "pandas/_libs/src/ujson/python", - "pandas/_libs/src/ujson/lib", - "pandas/_libs/src/datetime", numpy.get_include(), ], extra_compile_args=(extra_compile_args), From d77a2ed8e7c5d7ec59db27602c8c2eb35beb6847 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 18 Feb 2023 08:55:55 -0800 Subject: [PATCH 03/36] more build --- pandas/_libs/tslibs/np_datetime.pxd | 13 ++++++++++--- pandas/_libs/tslibs/np_datetime.pyx | 1 - pandas/_libs/tslibs/src/datetime/pd_datetime.c | 2 ++ pandas/_libs/tslibs/src/datetime/pd_datetime.h | 3 +-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 3faef6ed5d46e..83cde31467377 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,14 +54,12 @@ cdef extern from "numpy/ndarraytypes.h": int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT + cdef extern from "src/datetime/np_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds - void pandas_datetime_to_datetimestruct(npy_datetime val, - NPY_DATETIMEUNIT fr, - npy_datetimestruct *result) nogil npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil @@ -71,6 +69,15 @@ cdef extern from "src/datetime/np_datetime.h": pandas_timedeltastruct *result ) nogil +cdef extern from "src/datetime/pd_datetime.h": + void PandasDateTime_IMPORT() + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + +PandasDateTime_IMPORT + cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index aa3411385595b..430d786fc885a 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -56,7 +56,6 @@ cdef extern from "src/datetime/np_datetime_strings.h": const char *format, int format_len, FormatRequirement exact) - # ---------------------------------------------------------------------- # numpy object inspection diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index d9aaa046d43c2..58e690783c8bd 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -14,6 +14,8 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt */ +#define _PANDAS_DATETIME_IMPL + #define PY_SSIZE_T_CLEAN #include diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 50cea0bfb05ef..f98cb6a51b08e 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -33,10 +33,9 @@ extern "C" { #define PandasDateTime_IMPORT \ PandasDateTimeAPI = (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) -#endif /* !define(_PANDAS_DATETIME_IMPL) */ +#endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus } #endif - #endif /* !defined(PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H) */ From 4525e7716a3ed01ee539f11387e906358b80e716 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 18 Feb 2023 09:14:39 -0800 Subject: [PATCH 04/36] linting --- pandas/_libs/tslibs/np_datetime.pxd | 3 +-- pandas/_libs/tslibs/src/datetime/pd_datetime.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 83cde31467377..bcdad71e96e99 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -60,7 +60,6 @@ cdef extern from "src/datetime/np_datetime.h": int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds - npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil @@ -76,7 +75,7 @@ cdef extern from "src/datetime/pd_datetime.h": NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil -PandasDateTime_IMPORT +PandasDateTime_IMPORT cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 58e690783c8bd..a16dd50e08a80 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -194,8 +194,7 @@ static npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, } static void -pandas_datetime_destructor(PyObject *op) -{ +pandas_datetime_destructor(PyObject *op) { void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); PyMem_Free(ptr); } @@ -207,15 +206,14 @@ static PyMethodDef module_methods[] = { static struct PyModuleDef pandas_datetimemodule = { PyModuleDef_HEAD_INIT, .m_name = "_pandas_datetime", - + .m_doc = "Internal module with datetime support for other extensions", .m_size = 01, .m_methods = module_methods }; PyMODINIT_FUNC -PyInit_pandas_datetime(void) -{ +PyInit_pandas_datetime(void) { PyObject *module = PyModule_Create(&pandas_datetimemodule); if (module == NULL) return NULL; @@ -228,8 +226,10 @@ PyInit_pandas_datetime(void) return NULL; } capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; - - PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); + + PyObject *capsule = PyCapsule_New(capi, + PandasDateTime_CAPSULE_NAME, + pandas_datetime_destructor); if (capsule == NULL) { PyMem_Free(capi); return NULL; @@ -240,6 +240,6 @@ PyInit_pandas_datetime(void) // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return NULL; } - + return module; } From 782b9718897d93ac9df7a04ac7c12c42ac4ee1c0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 18 Feb 2023 15:13:05 -0800 Subject: [PATCH 05/36] more updates --- pandas/_libs/missing.pyx | 1 + .../_libs/src/ujson/python/date_conversions.c | 1 + pandas/_libs/src/ujson/python/ujson.c | 2 + pandas/_libs/tslibs/np_datetime.pxd | 5 +- .../_libs/tslibs/src/datetime/np_datetime.c | 101 ------------------ .../_libs/tslibs/src/datetime/np_datetime.h | 3 - .../_libs/tslibs/src/datetime/pd_datetime.h | 3 + 7 files changed, 11 insertions(+), 105 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index e6516b004a973..30078ba0f3029 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -34,6 +34,7 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_timedelta64_value, + import_pandas_datetime ) from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c index 86cb68f869cb0..369ea2c4ea551 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ b/pandas/_libs/src/ujson/python/date_conversions.c @@ -10,6 +10,7 @@ The full license is in the LICENSE file, distributed with this software. #include "date_conversions.h" #include <../../../tslibs/src/datetime/np_datetime.h> +#include <../../../tslibs/src/datetime/pd_datetime.h> #include <../../../tslibs/src/datetime/np_datetime_strings.h> /* diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c index c12f88d2f9354..6cbd18d6e6c8e 100644 --- a/pandas/_libs/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -40,6 +40,7 @@ Numeric decoder derived from TCL library #include #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY #include "numpy/arrayobject.h" +#include <../../../tslibs/src/datetime/pd_datetime.h> /* objToJSON */ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); @@ -447,5 +448,6 @@ PyMODINIT_FUNC PyInit_json(void) { } */ + PandasDateTime_IMPORT; return module; } diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index bcdad71e96e99..7cfce180cd9a4 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -74,8 +74,11 @@ cdef extern from "src/datetime/pd_datetime.h": void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil + void PandasDateTime_IMPORT() -PandasDateTime_IMPORT +# You must call this before using the PandasDateTime CAPI functions +cdef inline void import_pandas_datetime(): + PandasDateTime_IMPORT cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 2bac6c720c3b6..7d6ce3e1f9e5c 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -444,107 +444,6 @@ int convert_pydatetime_to_datetimestruct(PyObject *dtobj, return 0; } - -/* - * Converts a datetime from a datetimestruct to a datetime based - * on a metadata unit. The date is assumed to be valid. - */ -npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString( - PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } - } - return ret; -} - /* * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ * diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 6ab915e517cfb..3d4b59e056c4e 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -53,9 +53,6 @@ PyObject *extract_utc_offset(PyObject *obj); int convert_pydatetime_to_datetimestruct(PyObject *dtobj, npy_datetimestruct *out); -npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts); - void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index f98cb6a51b08e..a6ca7a1f1451c 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -33,6 +33,9 @@ extern "C" { #define PandasDateTime_IMPORT \ PandasDateTimeAPI = (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) + +#define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ + PandasDateTimeAPI->npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus From 69abccdfdd6880b58b540663cbf22e3521a20261 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 19 Feb 2023 10:13:43 -0800 Subject: [PATCH 06/36] Capsule location change --- pandas/_libs/src/ujson/python/ujson.c | 3 +++ pandas/_libs/tslibs/src/datetime/pd_datetime.h | 6 ++++-- setup.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c index 6cbd18d6e6c8e..7e6deb1bb5c37 100644 --- a/pandas/_libs/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -449,5 +449,8 @@ PyMODINIT_FUNC PyInit_json(void) { */ PandasDateTime_IMPORT; + if (PandasDateTimeAPI == NULL && PyErr_Occurred()) + return NULL; + return module; } diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index a6ca7a1f1451c..63acd4a86f486 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -10,7 +10,6 @@ Distributed under the terms of the BSD Simplified License. #ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H #define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H - #ifndef NPY_NO_DEPRECATED_API #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API @@ -25,7 +24,10 @@ extern "C" { npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); } PandasDateTime_CAPI; - #define PandasDateTime_CAPSULE_NAME "pandas._libs.tslibs.src.datetime.pd_datetime_CAPI" + // Todo: the capsule name has to match an importable location? So can't + // provide full file path in current location given it is not a package + // see also setup.py + #define PandasDateTime_CAPSULE_NAME "pandas._libs.pandas_datetime" /* block used as part of public API */ #ifndef _PANDAS_DATETIME_IMPL diff --git a/setup.py b/setup.py index 4459a151d38ff..4dba468ba827e 100755 --- a/setup.py +++ b/setup.py @@ -663,7 +663,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): # ---------------------------------------------------------------------- # pd_datetime pd_dt_ext = Extension( - "pandas._libs.tslibs.src.datetime.pd_datetime", + "pandas._libs.pandas_datetime", depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), include_dirs=[ From 69460bd96899509f5616e7c0c425596c3c04f466 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 19 Feb 2023 20:07:33 -0800 Subject: [PATCH 07/36] working import --- pandas/_libs/__init__.py | 2 ++ pandas/_libs/tslibs/src/datetime/pd_datetime.c | 4 ++-- pandas/_libs/tslibs/src/datetime/pd_datetime.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index f119e280f5867..0005b7226d865 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -7,9 +7,11 @@ "Timestamp", "iNaT", "Interval", + "pandas_datetime", ] +import pandas._libs.pandas_datetime as pandas_datetime from pandas._libs.interval import Interval from pandas._libs.tslibs import ( NaT, diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index a16dd50e08a80..b627411ef56c0 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -205,7 +205,7 @@ static PyMethodDef module_methods[] = { static struct PyModuleDef pandas_datetimemodule = { PyModuleDef_HEAD_INIT, - .m_name = "_pandas_datetime", + .m_name = "pandas._libs.pandas_datetime", .m_doc = "Internal module with datetime support for other extensions", .m_size = 01, @@ -235,7 +235,7 @@ PyInit_pandas_datetime(void) { return NULL; } - if (PyModule_AddObject(module, "datetime_CAPI", capsule) < 0) { + if (PyModule_AddObject(module, "pandas_datetime_CAPI", capsule) < 0) { Py_DECREF(capsule); // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return NULL; diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 63acd4a86f486..cc35d3e479a6e 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -27,7 +27,7 @@ extern "C" { // Todo: the capsule name has to match an importable location? So can't // provide full file path in current location given it is not a package // see also setup.py - #define PandasDateTime_CAPSULE_NAME "pandas._libs.pandas_datetime" + #define PandasDateTime_CAPSULE_NAME "pandas._libs.pandas_datetime.pandas_datetime_CAPI" /* block used as part of public API */ #ifndef _PANDAS_DATETIME_IMPL From 776f0f5497adf7a89f47969ccf56333ba85dd88f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 14:12:15 -0800 Subject: [PATCH 08/36] working imports? --- pandas/__init__.py | 2 +- pandas/_libs/index.pyx | 5 ++ pandas/_libs/missing.pyx | 2 + pandas/_libs/tslib.pyx | 6 ++ pandas/_libs/tslibs/conversion.pyx | 5 +- pandas/_libs/tslibs/dtypes.pyx | 3 + pandas/_libs/tslibs/fields.pyx | 2 + pandas/_libs/tslibs/np_datetime.pxd | 1 - pandas/_libs/tslibs/offsets.pyx | 3 + pandas/_libs/tslibs/parsing.pyx | 3 + pandas/_libs/tslibs/period.pyx | 3 + .../_libs/tslibs/src/datetime/pd_datetime.c | 61 +++++++++++-------- .../_libs/tslibs/src/datetime/pd_datetime.h | 9 ++- pandas/_libs/tslibs/strptime.pyx | 4 ++ pandas/_libs/tslibs/timedeltas.pyx | 3 + pandas/_libs/tslibs/timestamps.pyx | 3 + pandas/_libs/tslibs/tzconversion.pyx | 4 ++ 17 files changed, 86 insertions(+), 33 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 1a549c09d22f7..2e5d19d52469e 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -183,7 +183,6 @@ __git_version__ = v.get("full-revisionid") del get_versions, v - # module level doc-string __doc__ = """ pandas - a powerful data analysis and manipulation library for Python @@ -303,6 +302,7 @@ "offsets", "option_context", "options", + "pandas_datetime_CAPI", "period_range", "pivot", "pivot_table", diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 2a6c26d595548..2db96c3d65cbe 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_unit_from_dtype, + import_pandas_datetime, ) + +import_pandas_datetime() + + from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport _Timedelta from pandas._libs.tslibs.timestamps cimport _Timestamp diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 30078ba0f3029..c6c154d3a8e88 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -37,6 +37,8 @@ from pandas._libs.tslibs.np_datetime cimport ( import_pandas_datetime ) +import_pandas_datetime() + from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op cdef: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 19dd7aabe6b8e..d33bca3555bc4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -39,7 +39,13 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_datetime_to_datetimestruct, pydate_to_dt64, string_to_dts, + import_pandas_datetime, ) + + +import_pandas_datetime() + + from pandas._libs.tslibs.strptime cimport parse_today_now from pandas._libs.util cimport ( is_datetime64_object, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 03a53b1b451e9..95c21fac85832 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -17,10 +17,10 @@ from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, - import_datetime, time, timedelta, tzinfo, + import_datetime, ) import_datetime() @@ -47,8 +47,11 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, + import_pandas_datetime, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.nattype cimport ( diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 699e8aba76dd6..140a305f1061c 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -5,8 +5,11 @@ from enum import Enum from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_conversion_factor, + import_pandas_datetime ) +import_pandas_datetime() + cdef class PeriodDtypeBase: """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 242e7159d29b5..2165c4ad9357e 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -47,8 +47,10 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, + import_pandas_datetime, ) +import_pandas_datetime() @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 7cfce180cd9a4..4bbcd4109157c 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -74,7 +74,6 @@ cdef extern from "src/datetime/pd_datetime.h": void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil - void PandasDateTime_IMPORT() # You must call this before using the PandasDateTime CAPI functions cdef inline void import_pandas_datetime(): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e8e0ddbb404de..7b9ce82c394af 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -64,8 +64,11 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydate_to_dtstruct, + import_pandas_datetime, ) +import_pandas_datetime() + from .dtypes cimport PeriodDtypeCode from .timedeltas cimport ( _Timedelta, diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 445683968c58f..ae94b85e92e1e 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -71,8 +71,11 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, npy_datetimestruct, string_to_dts, + import_pandas_datetime, ) +import_pandas_datetime() + from pandas._libs.tslibs.strptime import array_strptime from pandas._libs.tslibs.util cimport ( diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 77c8be6be07e4..22ac92149189d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -52,8 +52,11 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, + import_pandas_datetime ) +import_pandas_datetime() + from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index b627411ef56c0..ac279070b629b 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -199,31 +199,12 @@ pandas_datetime_destructor(PyObject *op) { PyMem_Free(ptr); } -static PyMethodDef module_methods[] = { - {NULL, NULL} -}; - -static struct PyModuleDef pandas_datetimemodule = { - PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.pandas_datetime", - - .m_doc = "Internal module with datetime support for other extensions", - .m_size = 01, - .m_methods = module_methods -}; - -PyMODINIT_FUNC -PyInit_pandas_datetime(void) { - PyObject *module = PyModule_Create(&pandas_datetimemodule); - if (module == NULL) - return NULL; - +static int pandas_datetime_exec(PyObject *module) { PyDateTime_IMPORT; - PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); if (capi == NULL) { PyErr_NoMemory(); - return NULL; + return -1; } capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; @@ -232,14 +213,44 @@ PyInit_pandas_datetime(void) { pandas_datetime_destructor); if (capsule == NULL) { PyMem_Free(capi); - return NULL; + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, "pd_datetime.c could not import module pandas"); + Py_DECREF(capsule); + // cpython doesn't PyMem_Free(capi) here - mistake or intentional? + return -1; } - if (PyModule_AddObject(module, "pandas_datetime_CAPI", capsule) < 0) { + if (PyModule_AddObject(pandas, "pandas_datetime_CAPI", capsule) < 0) { Py_DECREF(capsule); // cpython doesn't PyMem_Free(capi) here - mistake or intentional? - return NULL; + return -1; } - return module; + return 0; +} + +static PyModuleDef_Slot pandas_datetime_slots[] = { + {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_datetimemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_datetime", + + .m_doc = "Internal module with datetime support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_datetime_slots +}; + +PyMODINIT_FUNC +PyInit_pandas_datetime(void) { + return PyModuleDef_Init(&pandas_datetimemodule); } diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index cc35d3e479a6e..66539e3d2f845 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -24,10 +24,9 @@ extern "C" { npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); } PandasDateTime_CAPI; - // Todo: the capsule name has to match an importable location? So can't - // provide full file path in current location given it is not a package - // see also setup.py - #define PandasDateTime_CAPSULE_NAME "pandas._libs.pandas_datetime.pandas_datetime_CAPI" + // The capsule name appears limited to module.attributename; see bpo-32414 + // cpython has an open PR gh-6898 to fix, but hasn't had traction for years + #define PandasDateTime_CAPSULE_NAME "pandas.pandas_datetime_CAPI" /* block used as part of public API */ #ifndef _PANDAS_DATETIME_IMPL @@ -37,7 +36,7 @@ extern "C" { PandasDateTimeAPI = (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) #define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ - PandasDateTimeAPI->npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) + PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), (npy_datetimestruct)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index cf847746f16cd..2d741cb2c2662 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -62,7 +62,11 @@ from pandas._libs.tslibs.np_datetime cimport ( pydate_to_dt64, pydatetime_to_dt64, string_to_dts, + import_pandas_datetime, ) + +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.util cimport ( diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a8c5474f0f532..aaf809beceb94 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -58,8 +58,11 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, + import_pandas_datetime, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9e9dab155a5cf..c320b7426506e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -92,8 +92,11 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydatetime_to_dtstruct, + import_pandas_datetime, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index c5f3b0ab7154f..9073a7b3e0d9d 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -37,7 +37,11 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, pandas_datetime_to_datetimestruct, pydatetime_to_dt64, + import_pandas_datetime, ) + +import_pandas_datetime() + from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_fixed_offset, From c4a05b56ce58c67e5f422638c98c59655eb1edf6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 15:56:10 -0800 Subject: [PATCH 09/36] moved lots --- .../_libs/src/ujson/python/date_conversions.c | 164 -- .../_libs/src/ujson/python/date_conversions.h | 39 - pandas/_libs/src/ujson/python/objToJSON.c | 7 +- pandas/_libs/tslibs/np_datetime.pxd | 13 +- pandas/_libs/tslibs/np_datetime.pyx | 3 +- .../_libs/tslibs/src/datetime/np_datetime.c | 992 -------- .../_libs/tslibs/src/datetime/np_datetime.h | 99 - .../tslibs/src/datetime/np_datetime_strings.c | 1150 --------- .../tslibs/src/datetime/np_datetime_strings.h | 111 - .../_libs/tslibs/src/datetime/pd_datetime.c | 2132 +++++++++++++++++ .../_libs/tslibs/src/datetime/pd_datetime.h | 83 + setup.py | 30 +- 12 files changed, 2232 insertions(+), 2591 deletions(-) delete mode 100644 pandas/_libs/src/ujson/python/date_conversions.c delete mode 100644 pandas/_libs/src/ujson/python/date_conversions.h delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.c delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.h delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.c delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.h diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c deleted file mode 100644 index 369ea2c4ea551..0000000000000 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ /dev/null @@ -1,164 +0,0 @@ -/* -Copyright (c) 2020, PyData Development Team -All rights reserved. -Distributed under the terms of the BSD Simplified License. -The full license is in the LICENSE file, distributed with this software. -*/ - -// Conversion routines that are useful for serialization, -// but which don't interact with JSON objects directly - -#include "date_conversions.h" -#include <../../../tslibs/src/datetime/np_datetime.h> -#include <../../../tslibs/src/datetime/pd_datetime.h> -#include <../../../tslibs/src/datetime/np_datetime_strings.h> - -/* - * Function: scaleNanosecToUnit - * ----------------------------- - * - * Scales an integer value representing time in nanoseconds to provided unit. - * - * Mutates the provided value directly. Returns 0 on success, non-zero on error. - */ -int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { - switch (unit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - *value /= 1000LL; - break; - case NPY_FR_ms: - *value /= 1000000LL; - break; - case NPY_FR_s: - *value /= 1000000000LL; - break; - default: - return -1; - } - - return 0; -} - -/* Converts the int64_t representation of a datetime to ISO; mutates len */ -char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { - npy_datetimestruct dts; - int ret_code; - - pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - // datetime64 is always naive - ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); - if (ret_code != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { - scaleNanosecToUnit(&dt, base); - return dt; -} - -/* Convert PyDatetime To ISO C-string. mutates len */ -char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, - size_t *len) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(obj, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - return NULL; - } - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - // Check to see if PyDateTime has a timezone. - // Don't convert to UTC if it doesn't. - int is_tz_aware = 0; - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - if (offset == NULL) { - PyObject_Free(result); - return NULL; - } - is_tz_aware = offset != Py_None; - Py_DECREF(offset); - } - ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); - - if (ret != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - return NULL; - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(dt, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - // TODO(username): is setting errMsg required? - // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - // return NULL; - } - - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); - return NpyDateTimeToEpoch(npy_dt, base); -} - -/* Converts the int64_t representation of a duration to ISO; mutates len */ -char *int64ToIsoDuration(int64_t value, size_t *len) { - pandas_timedeltastruct tds; - int ret_code; - - pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); - - // Max theoretical length of ISO Duration with 64 bit day - // as the largest unit is 70 characters + 1 for a null terminator - char *result = PyObject_Malloc(71); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - - ret_code = make_iso_8601_timedelta(&tds, result, len); - if (ret_code == -1) { - PyErr_SetString(PyExc_ValueError, - "Could not convert timedelta value to string"); - PyObject_Free(result); - return NULL; - } - - return result; -} diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/src/ujson/python/date_conversions.h deleted file mode 100644 index efd707f04197c..0000000000000 --- a/pandas/_libs/src/ujson/python/date_conversions.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2020, PyData Development Team -All rights reserved. -Distributed under the terms of the BSD Simplified License. -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ -#define PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ - -#define PY_SSIZE_T_CLEAN -#include -#include - -// Scales value inplace from nanosecond resolution to unit resolution -int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); - -// Converts an int64 object representing a date to ISO format -// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z -// while base="ns" yields "2020-01-01T00:00:00.000000000Z" -// len is mutated to save the length of the returned string -char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len); - -// TODO(username): this function doesn't do a lot; should augment or -// replace with scaleNanosecToUnit -npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base); - -// Converts a Python object representing a Date / Datetime to ISO format -// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z -// while base="ns" yields "2020-01-01T00:00:00.000000000Z" -// len is mutated to save the length of the returned string -char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len); - -// Convert a Python Date/Datetime to Unix epoch with resolution base -npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base); - -char *int64ToIsoDuration(int64_t value, size_t *len); - -#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index e892b50515327..2e96e92d6b63f 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -47,8 +47,8 @@ Numeric decoder derived from TCL library #include #include #include -#include "date_conversions.h" #include "datetime.h" +#include "pd_datetime.h" npy_int64 get_nat(void) { return NPY_MIN_INT64; } @@ -1977,6 +1977,11 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, return NULL; } + PandasDateTime_IMPORT; + if (PandasDateTimeAPI == NULL) { + return NULL; + } + static char *kwlist[] = {"obj", "ensure_ascii", "double_precision", diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 4bbcd4109157c..72a0d6252957a 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -55,7 +55,7 @@ cdef extern from "numpy/ndarraytypes.h": int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT -cdef extern from "src/datetime/np_datetime.h": +cdef extern from "src/datetime/pd_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds @@ -68,13 +68,17 @@ cdef extern from "src/datetime/np_datetime.h": pandas_timedeltastruct *result ) nogil -cdef extern from "src/datetime/pd_datetime.h": void PandasDateTime_IMPORT() void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, npy_datetimestruct *result) nogil + ctypedef enum FormatRequirement: + PARTIAL_MATCH + EXACT_MATCH + INFER_FORMAT + # You must call this before using the PandasDateTime CAPI functions cdef inline void import_pandas_datetime(): PandasDateTime_IMPORT @@ -133,8 +137,3 @@ cdef int64_t convert_reso( bint round_ok, ) except? -1 -cdef extern from "src/datetime/np_datetime_strings.h": - ctypedef enum FormatRequirement: - PARTIAL_MATCH - EXACT_MATCH - INFER_FORMAT diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 430d786fc885a..837d1e089cca7 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -35,7 +35,7 @@ from numpy cimport ( from pandas._libs.tslibs.util cimport get_c_string_buf_and_size -cdef extern from "src/datetime/np_datetime.h": +cdef extern from "src/datetime/pd_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) @@ -48,7 +48,6 @@ cdef extern from "src/datetime/np_datetime.h": PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) -cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c deleted file mode 100644 index 7d6ce3e1f9e5c..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ /dev/null @@ -1,992 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt - -*/ - -#define NO_IMPORT - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include - -#include -#include -#include -#include "np_datetime.h" - - -const npy_datetimestruct _AS_MIN_DTS = { - 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; -const npy_datetimestruct _FS_MIN_DTS = { - 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; -const npy_datetimestruct _PS_MIN_DTS = { - 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; -const npy_datetimestruct _NS_MIN_DTS = { - 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; -const npy_datetimestruct _US_MIN_DTS = { - -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; -const npy_datetimestruct _MS_MIN_DTS = { - -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; -const npy_datetimestruct _S_MIN_DTS = { - -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; -const npy_datetimestruct _M_MIN_DTS = { - -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; - -const npy_datetimestruct _AS_MAX_DTS = { - 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; -const npy_datetimestruct _FS_MAX_DTS = { - 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; -const npy_datetimestruct _PS_MAX_DTS = { - 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; -const npy_datetimestruct _NS_MAX_DTS = { - 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; -const npy_datetimestruct _US_MAX_DTS = { - 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; -const npy_datetimestruct _MS_MAX_DTS = { - 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; -const npy_datetimestruct _S_MAX_DTS = { - 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; -const npy_datetimestruct _M_MAX_DTS = { - 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; - - -const int days_per_month_table[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* - * Returns 1 if the given year is a leap year, 0 otherwise. - */ -int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); -} - -/* - * Adjusts a datetimestruct based on a minutes offset. Assumes - * the current values are valid.g - */ -void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { - int isleap; - - /* MINUTES */ - dts->min += minutes; - while (dts->min < 0) { - dts->min += 60; - dts->hour--; - } - while (dts->min >= 60) { - dts->min -= 60; - dts->hour++; - } - - /* HOURS */ - while (dts->hour < 0) { - dts->hour += 24; - dts->day--; - } - while (dts->hour >= 24) { - dts->hour -= 24; - dts->day++; - } - - /* DAYS */ - if (dts->day < 1) { - dts->month--; - if (dts->month < 1) { - dts->year--; - dts->month = 12; - } - isleap = is_leapyear(dts->year); - dts->day += days_per_month_table[isleap][dts->month - 1]; - } else if (dts->day > 28) { - isleap = is_leapyear(dts->year); - if (dts->day > days_per_month_table[isleap][dts->month - 1]) { - dts->day -= days_per_month_table[isleap][dts->month - 1]; - dts->month++; - if (dts->month > 12) { - dts->year++; - dts->month = 1; - } - } - } -} - -/* - * Calculates the days offset from the 1970 epoch. - */ -npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - const int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } - - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; - - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } - - /* Add the days */ - days += dts->day - 1; - - return days; -} - -/* - * Modifies '*days_' to be the day offset within the year, - * and returns the year. - */ -static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - if (days >= 0) { - year = 400 * (days / days_per_400years); - days = days % days_per_400years; - } else { - year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); - days = days % days_per_400years; - if (days < 0) { - days += days_per_400years; - } - } - - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } - } - - *days_ = days; - return year + 2000; -} - -/* - * Adjusts a datetimestruct based on a seconds offset. Assumes - * the current values are valid. - */ -NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts, - int seconds) { - int minutes; - - dts->sec += seconds; - if (dts->sec < 0) { - minutes = dts->sec / 60; - dts->sec = dts->sec % 60; - if (dts->sec < 0) { - --minutes; - dts->sec += 60; - } - add_minutes_to_datetimestruct(dts, minutes); - } else if (dts->sec >= 60) { - minutes = dts->sec / 60; - dts->sec = dts->sec % 60; - add_minutes_to_datetimestruct(dts, minutes); - } -} - -/* - * Fills in the year, month, day in 'dts' based on the days - * offset from 1970. - */ -static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - const int *month_lengths; - int i; - - dts->year = days_to_yearsdays(&days); - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = days + 1; - return; - } else { - days -= month_lengths[i]; - } - } -} - -/* - * Compares two npy_datetimestruct objects chronologically - */ -int cmp_npy_datetimestruct(const npy_datetimestruct *a, - const npy_datetimestruct *b) { - if (a->year > b->year) { - return 1; - } else if (a->year < b->year) { - return -1; - } - - if (a->month > b->month) { - return 1; - } else if (a->month < b->month) { - return -1; - } - - if (a->day > b->day) { - return 1; - } else if (a->day < b->day) { - return -1; - } - - if (a->hour > b->hour) { - return 1; - } else if (a->hour < b->hour) { - return -1; - } - - if (a->min > b->min) { - return 1; - } else if (a->min < b->min) { - return -1; - } - - if (a->sec > b->sec) { - return 1; - } else if (a->sec < b->sec) { - return -1; - } - - if (a->us > b->us) { - return 1; - } else if (a->us < b->us) { - return -1; - } - - if (a->ps > b->ps) { - return 1; - } else if (a->ps < b->ps) { - return -1; - } - - if (a->as > b->as) { - return 1; - } else if (a->as < b->as) { - return -1; - } - - return 0; -} -/* -* Returns the offset from utc of the timezone as a timedelta. -* The caller is responsible for ensuring that the tzinfo -* attribute exists on the datetime object. -* -* If the passed object is timezone naive, Py_None is returned. -* If extraction of the offset fails, NULL is returned. -* -* NOTE: This function is not vendored from numpy. -*/ -PyObject *extract_utc_offset(PyObject *obj) { - PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); - if (tmp == NULL) { - return NULL; - } - if (tmp != Py_None) { - PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); - if (offset == NULL) { - Py_DECREF(tmp); - return NULL; - } - return offset; - } - return tmp; -} - -/* - * - * Converts a Python datetime.datetime or datetime.date - * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) - * to convert to UTC time. - * - * The following implementation just asks for attributes, and thus - * supports datetime duck typing. The tzinfo time zone conversion - * requires this style of access as well. - * - * Returns -1 on error, 0 on success, and 1 (with no error set) - * if obj doesn't have the needed date or datetime attributes. - */ -int convert_pydatetime_to_datetimestruct(PyObject *dtobj, - npy_datetimestruct *out) { - // Assumes that obj is a valid datetime object - PyObject *tmp; - PyObject *obj = (PyObject*)dtobj; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); - out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); - out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); - - // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use - // PyDateTime_Check here, and less verbose attribute lookups. - - /* Check for time attributes (if not there, return success as a date) */ - if (!PyObject_HasAttrString(obj, "hour") || - !PyObject_HasAttrString(obj, "minute") || - !PyObject_HasAttrString(obj, "second") || - !PyObject_HasAttrString(obj, "microsecond")) { - return 0; - } - - out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); - out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); - out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); - out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); - - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - /* Apply the time zone offset if datetime obj is tz-aware */ - if (offset != NULL) { - if (offset == Py_None) { - Py_DECREF(offset); - return 0; - } - PyObject *tmp_int; - int seconds_offset, minutes_offset; - /* - * The timedelta should have a function "total_seconds" - * which contains the value we want. - */ - tmp = PyObject_CallMethod(offset, "total_seconds", ""); - Py_DECREF(offset); - if (tmp == NULL) { - return -1; - } - tmp_int = PyNumber_Long(tmp); - if (tmp_int == NULL) { - Py_DECREF(tmp); - return -1; - } - seconds_offset = PyLong_AsLong(tmp_int); - if (seconds_offset == -1 && PyErr_Occurred()) { - Py_DECREF(tmp_int); - Py_DECREF(tmp); - return -1; - } - Py_DECREF(tmp_int); - Py_DECREF(tmp); - - /* Convert to a minutes offset and apply it */ - minutes_offset = seconds_offset / 60; - - add_minutes_to_datetimestruct(out, -minutes_offset); - } - } - - return 0; -} - -/* - * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ - * - * Computes the python `ret, d = divmod(d, unit)`. - * - * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch - * for subsequent calls to this command - it is able to deduce that `*d >= 0`. - */ -npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; -} - -/* - * Converts a datetime based on the given metadata into a datetimestruct - */ -void pandas_datetime_to_datetimestruct(npy_datetime dt, - NPY_DATETIMEUNIT base, - npy_datetimestruct *out) { - npy_int64 perday; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - perday = 24LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = dt; - break; - - case NPY_FR_m: - perday = 24LL * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60); - out->min = (int)dt; - break; - - case NPY_FR_s: - perday = 24LL * 60 * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60 * 60); - out->min = (int)extract_unit(&dt, 60); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - perday = 24LL * 60 * 60 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 60); - out->sec = (int)extract_unit(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000); - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); - } -} - -/* - * Converts a timedelta from a timedeltastruct to a timedelta based - * on a metadata unit. The timedelta is assumed to be valid. - * - * Returns 0 on success, -1 on failure. - */ -void pandas_timedelta_to_timedeltastruct(npy_timedelta td, - NPY_DATETIMEUNIT base, - pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(pandas_timedeltastruct)); - - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_m: - - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; - - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; - - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy timedelta metadata is corrupted with " - "invalid base unit"); - } - - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; -} - - -/* - * This function returns a pointer to the DateTimeMetaData - * contained within the provided datetime dtype. - * - * Copied near-verbatim from numpy/core/src/multiarray/datetime.c - */ -PyArray_DatetimeMetaData -get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); -} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h deleted file mode 100644 index 3d4b59e056c4e..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt - -*/ - -#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ -#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include - -typedef struct { - npy_int64 days; - npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; -} pandas_timedeltastruct; - -extern const npy_datetimestruct _AS_MIN_DTS; -extern const npy_datetimestruct _AS_MAX_DTS; -extern const npy_datetimestruct _FS_MIN_DTS; -extern const npy_datetimestruct _FS_MAX_DTS; -extern const npy_datetimestruct _PS_MIN_DTS; -extern const npy_datetimestruct _PS_MAX_DTS; -extern const npy_datetimestruct _NS_MIN_DTS; -extern const npy_datetimestruct _NS_MAX_DTS; -extern const npy_datetimestruct _US_MIN_DTS; -extern const npy_datetimestruct _US_MAX_DTS; -extern const npy_datetimestruct _MS_MIN_DTS; -extern const npy_datetimestruct _MS_MAX_DTS; -extern const npy_datetimestruct _S_MIN_DTS; -extern const npy_datetimestruct _S_MAX_DTS; -extern const npy_datetimestruct _M_MIN_DTS; -extern const npy_datetimestruct _M_MAX_DTS; - -// stuff pandas needs -// ---------------------------------------------------------------------------- - -PyObject *extract_utc_offset(PyObject *obj); - -int convert_pydatetime_to_datetimestruct(PyObject *dtobj, - npy_datetimestruct *out); - -void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, - npy_datetimestruct *result); - -void pandas_timedelta_to_timedeltastruct(npy_timedelta val, - NPY_DATETIMEUNIT fr, - pandas_timedeltastruct *result); - -extern const int days_per_month_table[2][12]; - -// stuff numpy-derived code needs in header -// ---------------------------------------------------------------------------- - -int is_leapyear(npy_int64 year); - -/* - * Calculates the days offset from the 1970 epoch. - */ -npy_int64 -get_datetimestruct_days(const npy_datetimestruct *dts); - - -/* - * Compares two npy_datetimestruct objects chronologically - */ -int cmp_npy_datetimestruct(const npy_datetimestruct *a, - const npy_datetimestruct *b); - - -/* - * Adjusts a datetimestruct based on a minutes offset. Assumes - * the current values are valid. - */ -void -add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); - -/* - * This function returns the DateTimeMetaData - * contained within the provided datetime dtype. - */ -PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( - PyArray_Descr *dtype); - - -#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c deleted file mode 100644 index f1f03e6467eac..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ /dev/null @@ -1,1150 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Written by Mark Wiebe (mwwiebe@gmail.com) -Copyright (c) 2011 by Enthought, Inc. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -See NUMPY_LICENSE.txt for the license. - -This file implements string parsing and creation for NumPy datetime. - -*/ - -#define PY_SSIZE_T_CLEAN -#define NO_IMPORT - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include - -#include - -#include -#include -#include - -#include "np_datetime.h" -#include "np_datetime_strings.h" - - -/* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * + ':' separator between hours, minutes, and seconds is optional. When - * omitted, each component must be 2 digits if it appears. (GH-10041) - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets set to 1 if the parsed time contains timezone, - * to 0 otherwise. - * 'out_tzoffset' gets set to timezone offset by minutes - * if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * - * Returns 0 on success, -1 on failure. - */ - -typedef enum { - COMPARISON_SUCCESS, - COMPLETED_PARTIAL_MATCH, - COMPARISON_ERROR -} DatetimePartParseResult; -// This function will advance the pointer on format -// and decrement characters_remaining by n on success -// On failure will return COMPARISON_ERROR without incrementing -// If `format_requirement` is PARTIAL_MATCH, and the `format` string has -// been exhausted, then return COMPLETED_PARTIAL_MATCH. -static DatetimePartParseResult compare_format( - const char **format, - int *characters_remaining, - const char *compare_to, - int n, - const FormatRequirement format_requirement -) { - if (format_requirement == INFER_FORMAT) { - return COMPARISON_SUCCESS; - } - if (*characters_remaining < 0) { - return COMPARISON_ERROR; - } - if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { - return COMPLETED_PARTIAL_MATCH; - } - if (*characters_remaining < n) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - if (strncmp(*format, compare_to, n)) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - *format += n; - *characters_remaining -= n; - return COMPARISON_SUCCESS; - } - } - return COMPARISON_SUCCESS; -} - -int parse_iso_8601_datetime(const char *str, int len, int want_exc, - npy_datetimestruct *out, - NPY_DATETIMEUNIT *out_bestunit, - int *out_local, int *out_tzoffset, - const char* format, int format_len, - FormatRequirement format_requirement) { - if (len < 0 || format_len < 0) - goto parse_error; - int year_leap = 0; - int i, numdigits; - const char *substr; - int sublen; - NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - DatetimePartParseResult comparison; - - /* If year-month-day are separated by a valid separator, - * months/days without leading zeroes will be parsed - * (though not iso8601). If the components aren't separated, - * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are - * forbidden here (but parsed as YYMMDD elsewhere). - */ - int has_ymd_sep = 0; - char ymd_sep = '\0'; - char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; - int valid_ymd_sep_len = sizeof(valid_ymd_sep); - - /* hour-minute-second may or may not separated by ':'. If not, then - * each component must be 2 digits. */ - int has_hms_sep = 0; - int hour_was_2_digits = 0; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - substr = str; - sublen = len; - - /* Skip leading whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* Leading '-' sign for negative year */ - if (*substr == '-') { - ++substr; - --sublen; - } - - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE YEAR (4 digits) */ - comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - - out->year = 0; - if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && - isdigit(substr[2]) && isdigit(substr[3])) { - out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + - 10 * (substr[2] - '0') + (substr[3] - '0'); - - substr += 4; - sublen -= 4; - } - - /* Negate the year if necessary */ - if (str[0] == '-') { - out->year = -out->year; - } - /* Check whether it's a leap-year */ - year_leap = is_leapyear(out->year); - - /* Next character must be a separator, start of month, or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_Y; - goto finish; - } - - if (!isdigit(*substr)) { - for (i = 0; i < valid_ymd_sep_len; ++i) { - if (*substr == valid_ymd_sep[i]) { - break; - } - } - if (i == valid_ymd_sep_len) { - goto parse_error; - } - has_ymd_sep = 1; - ymd_sep = valid_ymd_sep[i]; - ++substr; - --sublen; - - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Cannot have trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } - - /* PARSE THE MONTH */ - comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->month = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->month = 10 * out->month + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->month < 1 || out->month > 12) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be the separator, start of day, or end of string */ - if (sublen == 0) { - bestunit = NPY_FR_M; - /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ - if (!has_ymd_sep) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - if (out_local != NULL) { - *out_local = 0; - } - goto finish; - } - - if (has_ymd_sep) { - /* Must have separator, but cannot be trailing */ - if (*substr != ymd_sep || sublen == 1) { - goto parse_error; - } - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* PARSE THE DAY */ - comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->day = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->day = 10 * out->day + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->day < 1 || - out->day > days_per_month_table[year_leap][out->month - 1]) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be a 'T', ' ', or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_D; - goto finish; - } - - if ((*substr != 'T' && *substr != ' ') || sublen == 1) { - goto parse_error; - } - comparison = compare_format(&format, &format_len, substr, 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - - /* PARSE THE HOURS */ - comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->hour = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional */ - if (isdigit(*substr)) { - hour_was_2_digits = 1; - out->hour = 10 * out->hour + (*substr - '0'); - ++substr; - --sublen; - if (out->hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", - str); - } - goto error; - } - } - - /* Next character must be a ':' or the end of the string */ - if (sublen == 0) { - if (!hour_was_2_digits) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_h; - goto finish; - } - - if (*substr == ':') { - has_hms_sep = 1; - ++substr; - --sublen; - /* Cannot have a trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else if (!isdigit(*substr)) { - if (!hour_was_2_digits) { - goto parse_error; - } - goto parse_timezone; - } - - /* PARSE THE MINUTES */ - comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->min = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->min = 10 * out->min + (*substr - '0'); - ++substr; - --sublen; - if (out->min >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - if (sublen == 0) { - bestunit = NPY_FR_m; - if (format_len) { - goto parse_error; - } - goto finish; - } - - /* If we make it through this condition block, then the next - * character is a digit. */ - if (has_hms_sep && *substr == ':') { - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - /* Cannot have a trailing ':' */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } else if (!has_hms_sep && isdigit(*substr)) { - } else { - goto parse_timezone; - } - - /* PARSE THE SECONDS */ - comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->sec = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->sec = 10 * out->sec + (*substr - '0'); - ++substr; - --sublen; - if (out->sec >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - /* Next character may be a '.' indicating fractional seconds */ - if (sublen > 0 && *substr == '.') { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, ".", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else { - bestunit = NPY_FR_s; - goto parse_timezone; - } - - /* PARSE THE MICROSECONDS (0 to 6 digits) */ - comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->us *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->us += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_us; - } else { - bestunit = NPY_FR_ms; - } - goto parse_timezone; - } - - /* PARSE THE PICOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->ps *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->ps += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_ps; - } else { - bestunit = NPY_FR_ns; - } - goto parse_timezone; - } - - /* PARSE THE ATTOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->as *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->as += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (numdigits > 3) { - bestunit = NPY_FR_as; - } else { - bestunit = NPY_FR_fs; - } - -parse_timezone: - /* trim any whitespace between time/timezone */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if (sublen == 0) { - // Unlike NumPy, treating no time zone as naive - if (format_len > 0) { - goto parse_error; - } - goto finish; - } - - /* UTC specifier */ - if (*substr == 'Z') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* "Z" should be equivalent to tz offset "+00:00" */ - if (out_local != NULL) { - *out_local = 1; - } - - if (out_tzoffset != NULL) { - *out_tzoffset = 0; - } - - if (sublen == 1) { - if (format_len > 0) { - goto parse_error; - } - goto finish; - } else { - ++substr; - --sublen; - } - } else if (*substr == '-' || *substr == '+') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Time zone offset */ - int offset_neg = 0, offset_hour = 0, offset_minute = 0; - - /* - * Since "local" means local with respect to the current - * machine, we say this is non-local. - */ - - if (*substr == '-') { - offset_neg = 1; - } - ++substr; - --sublen; - - /* The hours offset */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_hour = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - - /* The minutes offset is optional */ - if (sublen > 0) { - /* Optional ':' */ - if (*substr == ':') { - ++substr; - --sublen; - } - - /* The minutes offset (at the end of the string) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_minute >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_minute = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - } - - /* Apply the time zone offset */ - if (offset_neg) { - offset_hour = -offset_hour; - offset_minute = -offset_minute; - } - if (out_local != NULL) { - *out_local = 1; - // Unlike NumPy, do not change internal value to local time - *out_tzoffset = 60 * offset_hour + offset_minute; - } - } - - /* Skip trailing whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if ((sublen != 0) || (format_len != 0)) { - goto parse_error; - } - -finish: - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - return 0; - -parse_error: - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", str, - (int)(substr - str)); - } - return -1; - -error: - return -1; -} - -/* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { - int len = 0; - - switch (base) { - /* Generic units can only be used to represent NaT */ - /* return 4;*/ - case NPY_FR_as: - len += 3; /* "###" */ - case NPY_FR_fs: - len += 3; /* "###" */ - case NPY_FR_ps: - len += 3; /* "###" */ - case NPY_FR_ns: - len += 3; /* "###" */ - case NPY_FR_us: - len += 3; /* "###" */ - case NPY_FR_ms: - len += 4; /* ".###" */ - case NPY_FR_s: - len += 3; /* ":##" */ - case NPY_FR_m: - len += 3; /* ":##" */ - case NPY_FR_h: - len += 3; /* "T##" */ - case NPY_FR_D: - case NPY_FR_W: - len += 3; /* "-##" */ - case NPY_FR_M: - len += 3; /* "-##" */ - case NPY_FR_Y: - len += 21; /* 64-bit year */ - break; - default: - len += 3; /* handle the now defunct NPY_FR_B */ - break; - } - - if (base >= NPY_FR_h) { - if (local) { - len += 5; /* "+####" or "-####" */ - } else { - len += 1; /* "Z" */ - } - } - - len += 1; /* NULL terminator */ - - return len; -} - - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string using timezone Z (UTC). If the string fits in - * the space exactly, it leaves out the NULL terminator and returns success. - * - * The differences from ISO 8601 are the 'NaT' string, and - * the number of year digits is >= 4 instead of strictly 4. - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, - int utc, NPY_DATETIMEUNIT base) { - char *substr = outstr; - int sublen = outlen; - int tmplen; - - /* - * Print weeks with the same precision as days. - * - * TODO: Could print weeks with YYYY-Www format if the week - * epoch is a Monday. - */ - if (base == NPY_FR_W) { - base = NPY_FR_D; - } - -/* YEAR */ -/* - * Can't use PyOS_snprintf, because it always produces a '\0' - * character at the end, and NumPy string types are permitted - * to have data all the way to the end of the buffer. - */ -#ifdef _WIN32 - tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#else - tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#endif // _WIN32 - /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen > sublen) { - goto string_too_short; - } - substr += tmplen; - sublen -= tmplen; - - /* Stop if the unit is years */ - if (base == NPY_FR_Y) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* MONTH */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->month / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->month % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is months */ - if (base == NPY_FR_M) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* DAY */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->day / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->day % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is days */ - if (base == NPY_FR_D) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* HOUR */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'T'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->hour % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is hours */ - if (base == NPY_FR_h) { - goto add_time_zone; - } - - /* MINUTE */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->min / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->min % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is minutes */ - if (base == NPY_FR_m) { - goto add_time_zone; - } - - /* SECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->sec % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is seconds */ - if (base == NPY_FR_s) { - goto add_time_zone; - } - - /* MILLISECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '.'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen < 4) { - goto string_too_short; - } - substr[3] = (char)((dts->us / 1000) % 10 + '0'); - substr += 4; - sublen -= 4; - - /* Stop if the unit is milliseconds */ - if (base == NPY_FR_ms) { - goto add_time_zone; - } - - /* MICROSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->us % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is microseconds */ - if (base == NPY_FR_us) { - goto add_time_zone; - } - - /* NANOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is nanoseconds */ - if (base == NPY_FR_ns) { - goto add_time_zone; - } - - /* PICOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->ps % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is picoseconds */ - if (base == NPY_FR_ps) { - goto add_time_zone; - } - - /* FEMTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->as / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is femtoseconds */ - if (base == NPY_FR_fs) { - goto add_time_zone; - } - - /* ATTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->as % 10 + '0'); - substr += 3; - sublen -= 3; - -add_time_zone: - /* UTC "Zulu" time */ - if (utc) { - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'Z'; - substr += 1; - sublen -= 1; - } - /* Add a NULL terminator, and return */ - if (sublen > 0) { - substr[0] = '\0'; - } - - return 0; - -string_too_short: - PyErr_Format(PyExc_RuntimeError, - "The string provided for NumPy ISO datetime formatting " - "was too short, with length %d", - outlen); - return -1; -} - - -int make_iso_8601_timedelta(pandas_timedeltastruct *tds, - char *outstr, size_t *outlen) { - *outlen = 0; - *outlen += snprintf(outstr, 60, // NOLINT - "P%" NPY_INT64_FMT - "DT%" NPY_INT32_FMT - "H%" NPY_INT32_FMT - "M%" NPY_INT32_FMT, - tds->days, tds->hrs, tds->min, tds->sec); - outstr += *outlen; - - if (tds->ns != 0) { - *outlen += snprintf(outstr, 12, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us, tds->ns); - } else if (tds->us != 0) { - *outlen += snprintf(outstr, 9, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us); - } else if (tds->ms != 0) { - *outlen += snprintf(outstr, 6, // NOLINT - ".%03" NPY_INT32_FMT "S", tds->ms); - } else { - *outlen += snprintf(outstr, 2, // NOLINT - "%s", "S"); - } - - return 0; -} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h deleted file mode 100644 index a635192d70809..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Written by Mark Wiebe (mwwiebe@gmail.com) -Copyright (c) 2011 by Enthought, Inc. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -See NUMPY_LICENSE.txt for the license. - -This file implements string parsing and creation for NumPy datetime. - -*/ - -#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ -#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -/* 'format_requirement' can be one of three values: - * * PARTIAL_MATCH : Only require a partial match with 'format'. - * For example, if the string is '2020-01-01 05:00:00' and - * 'format' is '%Y-%m-%d', then parse '2020-01-01'; - * * EXACT_MATCH : require an exact match with 'format'. If the - * string is '2020-01-01', then the only format which will - * be able to parse it without error is '%Y-%m-%d'; - * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). - */ -typedef enum { - PARTIAL_MATCH, - EXACT_MATCH, - INFER_FORMAT -} FormatRequirement; - -/* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + The date "20100312" is parsed as the year 20100312, not as - * equivalent to "2010-03-12". The '-' in the dates are not optional. - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. - * 'out_tzoffset' gets set to timezone offset by minutes - * if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * - * Returns 0 on success, -1 on failure. - */ -int -parse_iso_8601_datetime(const char *str, int len, int want_exc, - npy_datetimestruct *out, - NPY_DATETIMEUNIT *out_bestunit, - int *out_local, - int *out_tzoffset, - const char* format, - int format_len, - FormatRequirement format_requirement); - -/* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -int -get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string using timezone Z (UTC). - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -int -make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, - int utc, NPY_DATETIMEUNIT base); - -/* - * Converts an pandas_timedeltastruct to an ISO 8601 string. - * - * Mutates outlen to provide size of (non-NULL terminated) string. - * - * Currently has no error handling - */ -int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, - size_t *outlen); -#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index ac279070b629b..6db69ad302e8b 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -22,6 +22,40 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "datetime.h" #include "pd_datetime.h" +const npy_datetimestruct _AS_MIN_DTS = { + 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; +const npy_datetimestruct _FS_MIN_DTS = { + 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; +const npy_datetimestruct _PS_MIN_DTS = { + 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; +const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +const npy_datetimestruct _US_MIN_DTS = { + -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; +const npy_datetimestruct _MS_MIN_DTS = { + -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +const npy_datetimestruct _S_MIN_DTS = { + -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; +const npy_datetimestruct _M_MIN_DTS = { + -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; + +const npy_datetimestruct _AS_MAX_DTS = { + 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; +const npy_datetimestruct _FS_MAX_DTS = { + 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; +const npy_datetimestruct _PS_MAX_DTS = { + 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; +const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; +const npy_datetimestruct _US_MAX_DTS = { + 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +const npy_datetimestruct _MS_MAX_DTS = { + 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; +const npy_datetimestruct _S_MAX_DTS = { + 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +const npy_datetimestruct _M_MAX_DTS = { + 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; + static const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; @@ -193,6 +227,2104 @@ static npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, return ret; } +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +static int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} + +/* +* Returns the offset from utc of the timezone as a timedelta. +* The caller is responsible for ensuring that the tzinfo +* attribute exists on the datetime object. +* +* If the passed object is timezone naive, Py_None is returned. +* If extraction of the offset fails, NULL is returned. +* +* NOTE: This function is not vendored from numpy. +*/ +static PyObject *extract_utc_offset(PyObject *obj) { + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; + } + return offset; + } + return tmp; +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * The following implementation just asks for attributes, and thus + * supports datetime duck typing. The tzinfo time zone conversion + * requires this style of access as well. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + /* Apply the time zone offset if datetime obj is tz-aware */ + if (offset != NULL) { + if (offset == Py_None) { + Py_DECREF(offset); + return 0; + } + PyObject *tmp_int; + int seconds_offset, minutes_offset; + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + Py_DECREF(offset); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyLong_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +static npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +static void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +static void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +static int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +static int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (utc) { + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + } + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + +static int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} + + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + // datetime64 is always naive + ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + // Check to see if PyDateTime has a timezone. + // Don't convert to UTC if it doesn't. + int is_tz_aware = 0; + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + if (offset == NULL) { + PyObject_Free(result); + return NULL; + } + is_tz_aware = offset != Py_None; + Py_DECREF(offset); + } + ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ + +typedef enum { + COMPARISON_SUCCESS, + COMPLETED_PARTIAL_MATCH, + COMPARISON_ERROR +} DatetimePartParseResult; +// This function will advance the pointer on format +// and decrement characters_remaining by n on success +// On failure will return COMPARISON_ERROR without incrementing +// If `format_requirement` is PARTIAL_MATCH, and the `format` string has +// been exhausted, then return COMPLETED_PARTIAL_MATCH. +static DatetimePartParseResult compare_format( + const char **format, + int *characters_remaining, + const char *compare_to, + int n, + const FormatRequirement format_requirement +) { + if (format_requirement == INFER_FORMAT) { + return COMPARISON_SUCCESS; + } + if (*characters_remaining < 0) { + return COMPARISON_ERROR; + } + if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { + return COMPLETED_PARTIAL_MATCH; + } + if (*characters_remaining < n) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + if (strncmp(*format, compare_to, n)) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + *format += n; + *characters_remaining -= n; + return COMPARISON_SUCCESS; + } + } + return COMPARISON_SUCCESS; +} + +static int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset, + const char* format, int format_len, + FormatRequirement format_requirement) { + if (len < 0 || format_len < 0) + goto parse_error; + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + DatetimePartParseResult comparison; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_Y; + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + bestunit = NPY_FR_M; + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* PARSE THE DAY */ + comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_D; + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, substr, 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_h; + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + bestunit = NPY_FR_m; + if (format_len) { + goto parse_error; + } + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, ".", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else { + bestunit = NPY_FR_s; + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } else { + bestunit = NPY_FR_ns; + } + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } + +parse_timezone: + /* trim any whitespace between time/timezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + if (format_len > 0) { + goto parse_error; + } + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + if (format_len > 0) { + goto parse_error; + } + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if ((sublen != 0) || (format_len != 0)) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +static char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +static PyArray_DatetimeMetaData +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} + + + static void pandas_datetime_destructor(PyObject *op) { void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 66539e3d2f845..8dca020da51e5 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -20,8 +20,61 @@ Distributed under the terms of the BSD Simplified License. extern "C" { #endif +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + +extern const npy_datetimestruct _AS_MIN_DTS; +extern const npy_datetimestruct _AS_MAX_DTS; +extern const npy_datetimestruct _FS_MIN_DTS; +extern const npy_datetimestruct _FS_MAX_DTS; +extern const npy_datetimestruct _PS_MIN_DTS; +extern const npy_datetimestruct _PS_MAX_DTS; +extern const npy_datetimestruct _NS_MIN_DTS; +extern const npy_datetimestruct _NS_MAX_DTS; +extern const npy_datetimestruct _US_MIN_DTS; +extern const npy_datetimestruct _US_MAX_DTS; +extern const npy_datetimestruct _MS_MIN_DTS; +extern const npy_datetimestruct _MS_MAX_DTS; +extern const npy_datetimestruct _S_MIN_DTS; +extern const npy_datetimestruct _S_MAX_DTS; +extern const npy_datetimestruct _M_MIN_DTS; +extern const npy_datetimestruct _M_MAX_DTS; + +/* 'format_requirement' can be one of three values: + * * PARTIAL_MATCH : Only require a partial match with 'format'. + * For example, if the string is '2020-01-01 05:00:00' and + * 'format' is '%Y-%m-%d', then parse '2020-01-01'; + * * EXACT_MATCH : require an exact match with 'format'. If the + * string is '2020-01-01', then the only format which will + * be able to parse it without error is '%Y-%m-%d'; + * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). + */ +typedef enum { + PARTIAL_MATCH, + EXACT_MATCH, + INFER_FORMAT +} FormatRequirement; + typedef struct { npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); + int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); + char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT); + char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); + char *(*int64ToIsoDuration)(int64_t, size_t *); + void (*pandas_datetime_to_datetimestruct)(npy_datetime, NPY_DATETIMEUNIT, npy_datetimestruct *); + void (*pandas_timedelta_to_timedeltastruct)(npy_datetime, NPY_DATETIMEUNIT, pandas_timedeltastruct *); + int (*convert_pydatetime_to_datetimestruct)(PyObject *, npy_datetimestruct *); + int (*cmp_npy_datetimestruct)(const npy_datetimestruct *, const npy_datetimestruct *); + PyArray_DatetimeMetaData (*get_datetime_metadata_from_dtype)(PyArray_Descr *); + int (*parse_iso_8601_datetime)(const char*, int, int, npy_datetimestruct *, NPY_DATETIMEUNIT *, int *, int *, const char*, int, FormatRequirement); + int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT); + int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, NPY_DATETIMEUNIT); + int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); + } PandasDateTime_CAPI; // The capsule name appears limited to module.attributename; see bpo-32414 @@ -37,6 +90,36 @@ extern "C" { #define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), (npy_datetimestruct)) +#define scaleNanosecToUnit(value, unit) \ + PandasDateTimeAPI->scaleNanosecToUnit((value), (unit)) +#define int64ToIso(value, base, len) \ + PandasDateTimeAPI->int64ToIso((value), (base), (len)) +#define NpyDateTimeToEpoch(dt, base) \ + PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base)) +#define PyDateTimeToIso(obj, base, len) \ + PandasDateTimeAPI->PyDateTimeToIso((obj), (base), (len)) +#define PyDateTimeToEpoch(dt, base) \ + PandasDateTimeAPI->PyDateTimeToEpoch((dt), (base)) +#define int64ToIsoDuration(value, len) \ + PandasDateTimeAPI->int64ToIsoDuration((value), (len)) +#define pandas_datetime_to_datetimestruct(dt, base, out) \ + PandasDateTimeAPI->pandas_datetime_to_datetimestruct((dt), (base), (out)) +#define pandas_timedelta_to_timedeltastruct(td, base, out) \ + PandasDateTimeAPI->pandas_timedelta_to_timedeltastruct((td), (base), (out)) +#define convert_pydatetime_to_datetimestruct(dtobj, out) \ + PandasDateTimeAPI->convert_pydatetime_to_datetimestruct((dtobj), (out)) +#define cmp_npy_datetimestruct(a, b) \ + PandasDateTimeAPI->cmp_npy_datetimestruct((a), (b)) +#define get_datetime_metadata_from_dtype(dtype) \ + PandasDateTimeAPI->get_datetime_metadata_from_dtype((dtype)) +#define parse_iso_8601_datetime(str, len, want_exc, out, out_bestunit, out_local, out_tzoffset, format, format_len, format_requirement) \ + PandasDateTimeAPI->parse_iso_8601_datetime((str), (len), (want_exc), (out), (out_bestunit), (out_local), (out_tzoffset), (format), (format_len), (format_requirement)) +#define get_datetime_iso_8601_strlen(local, base) \ + PandasDateTimeAPI->get_datetime_iso_8601_strlen((local), (base)) +#define make_iso_8601_datetime(dts, outstr, outlen, utc, base) \ + PandasDateTimeAPI->make_iso_8601_datetime((dts), (outstr), (outlen), (utc), (base)) +#define make_iso_8601_timedelta(tds, outstr, outlen) \ + PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ #ifdef __cplusplus diff --git a/setup.py b/setup.py index 4dba468ba827e..1f379cc3a00b2 100755 --- a/setup.py +++ b/setup.py @@ -122,8 +122,6 @@ def initialize_options(self): ujson_python = pjoin(base, "ujson", "python") ujson_lib = pjoin(base, "ujson", "lib") self._clean_exclude = [ - pjoin(dt, "np_datetime.c"), - pjoin(dt, "np_datetime_strings.c"), pjoin(parser, "tokenizer.c"), pjoin(parser, "io.c"), pjoin(ujson_python, "ujson.c"), @@ -338,7 +336,7 @@ def run(self): if os.environ.get("PANDAS_CI", "0") == "1": extra_compile_args.append("-Werror") if debugging_symbols_requested: - extra_compile_args.append("-g") + extra_compile_args.append("-g3") extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-O0") @@ -436,8 +434,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): klib_include = ["pandas/_libs/src/klib"] tseries_depends = [ - "pandas/_libs/tslibs/src/datetime/np_datetime.h", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.h", + "pandas/_libs/tslibs/src/datetime/pd_datetime.h", ] ext_data = { @@ -498,7 +495,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslib": { "pyxfile": "_libs/tslib", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, @@ -506,26 +502,19 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", "depends": tseries_depends, - "sources": [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", - ], }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", @@ -536,33 +525,27 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.period": { "pyxfile": "_libs/tslibs/period", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.strptime": { "pyxfile": "_libs/tslibs/strptime", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.vectorized": { "pyxfile": "_libs/tslibs/vectorized", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.testing": {"pyxfile": "_libs/testing"}, "_libs.window.aggregations": { @@ -628,26 +611,21 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas._libs.json", depends=[ "pandas/_libs/src/ujson/lib/ultrajson.h", - "pandas/_libs/src/ujson/python/date_conversions.h", + "pandas/_libs/tslibs/src/datetime/pd_datetime.h", ], sources=( [ "pandas/_libs/src/ujson/python/ujson.c", "pandas/_libs/src/ujson/python/objToJSON.c", - "pandas/_libs/src/ujson/python/date_conversions.c", "pandas/_libs/src/ujson/python/JSONtoObj.c", "pandas/_libs/src/ujson/lib/ultrajsonenc.c", "pandas/_libs/src/ujson/lib/ultrajsondec.c", ] - + [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", - ] ), include_dirs=[ "pandas/_libs/src/ujson/python", "pandas/_libs/src/ujson/lib", - "pandas/_libs/src/datetime", + "pandas/_libs/tslibs/src/datetime", numpy.get_include(), ], extra_compile_args=(extra_compile_args), From a995ee29afbe20ca12093fa79859a5e3129f616e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 23:17:36 -0800 Subject: [PATCH 10/36] Working impl --- pandas/_libs/src/ujson/python/ujson.c | 5 -- pandas/_libs/tslibs/np_datetime.pyx | 1 + .../_libs/tslibs/src/datetime/pd_datetime.c | 49 ++++++------------ .../_libs/tslibs/src/datetime/pd_datetime.h | 50 +++++++++++++------ pandas/_libs/tslibs/vectorized.pyx | 4 ++ setup.py | 14 ++++-- 6 files changed, 64 insertions(+), 59 deletions(-) diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c index 7e6deb1bb5c37..c12f88d2f9354 100644 --- a/pandas/_libs/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -40,7 +40,6 @@ Numeric decoder derived from TCL library #include #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY #include "numpy/arrayobject.h" -#include <../../../tslibs/src/datetime/pd_datetime.h> /* objToJSON */ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); @@ -448,9 +447,5 @@ PyMODINIT_FUNC PyInit_json(void) { } */ - PandasDateTime_IMPORT; - if (PandasDateTimeAPI == NULL && PyErr_Occurred()) - return NULL; - return module; } diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 837d1e089cca7..114b5ea40b7b6 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -20,6 +20,7 @@ from cpython.object cimport ( ) import_datetime() +PandasDateTime_IMPORT import numpy as np diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 6db69ad302e8b..a82b17231cafa 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -22,40 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "datetime.h" #include "pd_datetime.h" -const npy_datetimestruct _AS_MIN_DTS = { - 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; -const npy_datetimestruct _FS_MIN_DTS = { - 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; -const npy_datetimestruct _PS_MIN_DTS = { - 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; -const npy_datetimestruct _NS_MIN_DTS = { - 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; -const npy_datetimestruct _US_MIN_DTS = { - -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; -const npy_datetimestruct _MS_MIN_DTS = { - -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; -const npy_datetimestruct _S_MIN_DTS = { - -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; -const npy_datetimestruct _M_MIN_DTS = { - -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; - -const npy_datetimestruct _AS_MAX_DTS = { - 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; -const npy_datetimestruct _FS_MAX_DTS = { - 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; -const npy_datetimestruct _PS_MAX_DTS = { - 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; -const npy_datetimestruct _NS_MAX_DTS = { - 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; -const npy_datetimestruct _US_MAX_DTS = { - 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; -const npy_datetimestruct _MS_MAX_DTS = { - 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; -const npy_datetimestruct _S_MAX_DTS = { - 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; -const npy_datetimestruct _M_MAX_DTS = { - 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; - static const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; @@ -2339,6 +2305,21 @@ static int pandas_datetime_exec(PyObject *module) { return -1; } capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; + capi->scaleNanosecToUnit = scaleNanosecToUnit; + capi->int64ToIso = int64ToIso; + capi->NpyDateTimeToEpoch = NpyDateTimeToEpoch; + capi->PyDateTimeToIso = PyDateTimeToIso; + capi->PyDateTimeToEpoch = PyDateTimeToEpoch; + capi->int64ToIsoDuration = int64ToIsoDuration; + capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; + capi->pandas_timedelta_to_timedeltastruct = pandas_timedelta_to_timedeltastruct; + capi->convert_pydatetime_to_datetimestruct = convert_pydatetime_to_datetimestruct; + capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; + capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; + capi->parse_iso_8601_datetime = parse_iso_8601_datetime; + capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; + capi->make_iso_8601_datetime = make_iso_8601_datetime; + capi->make_iso_8601_timedelta = make_iso_8601_timedelta; PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 8dca020da51e5..7ff99f71f88de 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -25,22 +25,40 @@ typedef struct { npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; } pandas_timedeltastruct; -extern const npy_datetimestruct _AS_MIN_DTS; -extern const npy_datetimestruct _AS_MAX_DTS; -extern const npy_datetimestruct _FS_MIN_DTS; -extern const npy_datetimestruct _FS_MAX_DTS; -extern const npy_datetimestruct _PS_MIN_DTS; -extern const npy_datetimestruct _PS_MAX_DTS; -extern const npy_datetimestruct _NS_MIN_DTS; -extern const npy_datetimestruct _NS_MAX_DTS; -extern const npy_datetimestruct _US_MIN_DTS; -extern const npy_datetimestruct _US_MAX_DTS; -extern const npy_datetimestruct _MS_MIN_DTS; -extern const npy_datetimestruct _MS_MAX_DTS; -extern const npy_datetimestruct _S_MIN_DTS; -extern const npy_datetimestruct _S_MAX_DTS; -extern const npy_datetimestruct _M_MIN_DTS; -extern const npy_datetimestruct _M_MAX_DTS; +const npy_datetimestruct _AS_MIN_DTS = { + 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; +const npy_datetimestruct _FS_MIN_DTS = { + 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; +const npy_datetimestruct _PS_MIN_DTS = { + 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; +const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +const npy_datetimestruct _US_MIN_DTS = { + -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; +const npy_datetimestruct _MS_MIN_DTS = { + -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +const npy_datetimestruct _S_MIN_DTS = { + -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; +const npy_datetimestruct _M_MIN_DTS = { + -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; + +const npy_datetimestruct _AS_MAX_DTS = { + 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; +const npy_datetimestruct _FS_MAX_DTS = { + 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; +const npy_datetimestruct _PS_MAX_DTS = { + 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; +const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; +const npy_datetimestruct _US_MAX_DTS = { + 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +const npy_datetimestruct _MS_MAX_DTS = { + 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; +const npy_datetimestruct _S_MAX_DTS = { + 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +const npy_datetimestruct _M_MAX_DTS = { + 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; + /* 'format_requirement' can be one of three values: * * PARTIAL_MATCH : Only require a partial match with 'format'. diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 06e09d890de69..eaa2c873c3f9f 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -31,7 +31,11 @@ from .np_datetime cimport ( NPY_FR_ns, npy_datetimestruct, pandas_datetime_to_datetimestruct, + import_pandas_datetime, ) + +import_pandas_datetime() + from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts from .timezones cimport is_utc diff --git a/setup.py b/setup.py index 1f379cc3a00b2..4dc21367c1f38 100755 --- a/setup.py +++ b/setup.py @@ -433,6 +433,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): klib_include = ["pandas/_libs/src/klib"] +tseries_includes = ["pandas/_libs/tslibs/src/datetime"] tseries_depends = [ "pandas/_libs/tslibs/src/datetime/pd_datetime.h", ] @@ -502,6 +503,9 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, + # TODO: xstrtod symbol visibility is really murky here + "include": klib_include, + "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", @@ -511,14 +515,16 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", "depends": tseries_depends, + "includes": tseries_includes, }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, + "includes": tseries_includes, }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", - "include": klib_include, + "include": tseries_includes + klib_include, "depends": ["pandas/_libs/src/parser/tokenizer.h"], "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, @@ -536,6 +542,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", + "include": tseries_includes, "depends": tseries_depends, }, "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, @@ -625,9 +632,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): include_dirs=[ "pandas/_libs/src/ujson/python", "pandas/_libs/src/ujson/lib", - "pandas/_libs/tslibs/src/datetime", numpy.get_include(), - ], + ] + tseries_includes, extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, @@ -644,7 +650,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas._libs.pandas_datetime", depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), - include_dirs=[ + include_dirs=tseries_includes + [ numpy.get_include(), ], extra_compile_args=(extra_compile_args), From 05e28ad392d6e20d29dd922f12c223aebec2a7ae Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 23:24:00 -0800 Subject: [PATCH 11/36] styling --- .../_libs/tslibs/src/datetime/pd_datetime.c | 3651 ++++++++--------- .../_libs/tslibs/src/datetime/pd_datetime.h | 174 +- pandas/_libs/tslibs/vectorized.pyx | 2 +- setup.py | 7 +- 4 files changed, 1914 insertions(+), 1920 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index a82b17231cafa..38e14c713594f 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -30,167 +30,160 @@ static const int days_per_month_table[2][12] = { * Returns 1 if the given year is a leap year, 0 otherwise. */ static int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); } /* * Calculates the days offset from the 1970 epoch. */ static npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - const int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } - - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; - - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } - - /* Add the days */ - days += dts->day - 1; - - return days; + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; } /* * Converts a datetime from a datetimestruct to a datetime based * on a metadata unit. The date is assumed to be valid. */ -static npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString( - PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } +static npy_datetime +npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString(PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; } - return ret; + } + return ret; } /* @@ -199,114 +192,115 @@ static npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, * * Scales an integer value representing time in nanoseconds to provided unit. * - * Mutates the provided value directly. Returns 0 on success, non-zero on error. + * Mutates the provided value directly. Returns 0 on success, non-zero on + * error. */ static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { - switch (unit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - *value /= 1000LL; - break; - case NPY_FR_ms: - *value /= 1000000LL; - break; - case NPY_FR_s: - *value /= 1000000000LL; - break; - default: - return -1; - } + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } - return 0; + return 0; } /* * Compares two npy_datetimestruct objects chronologically */ static int cmp_npy_datetimestruct(const npy_datetimestruct *a, - const npy_datetimestruct *b) { - if (a->year > b->year) { - return 1; - } else if (a->year < b->year) { - return -1; - } + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } - if (a->month > b->month) { - return 1; - } else if (a->month < b->month) { - return -1; - } + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } - if (a->day > b->day) { - return 1; - } else if (a->day < b->day) { - return -1; - } + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } - if (a->hour > b->hour) { - return 1; - } else if (a->hour < b->hour) { - return -1; - } + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } - if (a->min > b->min) { - return 1; - } else if (a->min < b->min) { - return -1; - } + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } - if (a->sec > b->sec) { - return 1; - } else if (a->sec < b->sec) { - return -1; - } + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } - if (a->us > b->us) { - return 1; - } else if (a->us < b->us) { - return -1; - } + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } - if (a->ps > b->ps) { - return 1; - } else if (a->ps < b->ps) { - return -1; - } + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } - if (a->as > b->as) { - return 1; - } else if (a->as < b->as) { - return -1; - } + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } - return 0; + return 0; } /* -* Returns the offset from utc of the timezone as a timedelta. -* The caller is responsible for ensuring that the tzinfo -* attribute exists on the datetime object. -* -* If the passed object is timezone naive, Py_None is returned. -* If extraction of the offset fails, NULL is returned. -* -* NOTE: This function is not vendored from numpy. -*/ + * Returns the offset from utc of the timezone as a timedelta. + * The caller is responsible for ensuring that the tzinfo + * attribute exists on the datetime object. + * + * If the passed object is timezone naive, Py_None is returned. + * If extraction of the offset fails, NULL is returned. + * + * NOTE: This function is not vendored from numpy. + */ static PyObject *extract_utc_offset(PyObject *obj) { - PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); - if (tmp == NULL) { - return NULL; - } - if (tmp != Py_None) { - PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); - if (offset == NULL) { - Py_DECREF(tmp); - return NULL; - } - return offset; + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; } - return tmp; + return offset; + } + return tmp; } /* @@ -314,49 +308,49 @@ static PyObject *extract_utc_offset(PyObject *obj) { * the current values are valid.g */ void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { - int isleap; + int isleap; - /* MINUTES */ - dts->min += minutes; - while (dts->min < 0) { - dts->min += 60; - dts->hour--; - } - while (dts->min >= 60) { - dts->min -= 60; - dts->hour++; - } + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } - /* HOURS */ - while (dts->hour < 0) { - dts->hour += 24; - dts->day--; - } - while (dts->hour >= 24) { - dts->hour -= 24; - dts->day++; - } + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } - /* DAYS */ - if (dts->day < 1) { - dts->month--; - if (dts->month < 1) { - dts->year--; - dts->month = 12; - } - isleap = is_leapyear(dts->year); - dts->day += days_per_month_table[isleap][dts->month - 1]; - } else if (dts->day > 28) { - isleap = is_leapyear(dts->year); - if (dts->day > days_per_month_table[isleap][dts->month - 1]) { - dts->day -= days_per_month_table[isleap][dts->month - 1]; - dts->month++; - if (dts->month > 12) { - dts->year++; - dts->month = 1; - } - } + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } } + } } /* @@ -373,77 +367,77 @@ void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { * if obj doesn't have the needed date or datetime attributes. */ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, - npy_datetimestruct *out) { - // Assumes that obj is a valid datetime object - PyObject *tmp; - PyObject *obj = (PyObject*)dtobj; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); - out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); - out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); - - // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use - // PyDateTime_Check here, and less verbose attribute lookups. - - /* Check for time attributes (if not there, return success as a date) */ - if (!PyObject_HasAttrString(obj, "hour") || - !PyObject_HasAttrString(obj, "minute") || - !PyObject_HasAttrString(obj, "second") || - !PyObject_HasAttrString(obj, "microsecond")) { + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject *)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + /* Apply the time zone offset if datetime obj is tz-aware */ + if (offset != NULL) { + if (offset == Py_None) { + Py_DECREF(offset); return 0; - } + } + PyObject *tmp_int; + int seconds_offset, minutes_offset; + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + Py_DECREF(offset); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyLong_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); - out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); - out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); - out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); - out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); - - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - /* Apply the time zone offset if datetime obj is tz-aware */ - if (offset != NULL) { - if (offset == Py_None) { - Py_DECREF(offset); - return 0; - } - PyObject *tmp_int; - int seconds_offset, minutes_offset; - /* - * The timedelta should have a function "total_seconds" - * which contains the value we want. - */ - tmp = PyObject_CallMethod(offset, "total_seconds", ""); - Py_DECREF(offset); - if (tmp == NULL) { - return -1; - } - tmp_int = PyNumber_Long(tmp); - if (tmp_int == NULL) { - Py_DECREF(tmp); - return -1; - } - seconds_offset = PyLong_AsLong(tmp_int); - if (seconds_offset == -1 && PyErr_Occurred()) { - Py_DECREF(tmp_int); - Py_DECREF(tmp); - return -1; - } - Py_DECREF(tmp_int); - Py_DECREF(tmp); - - /* Convert to a minutes offset and apply it */ - minutes_offset = seconds_offset / 60; - - add_minutes_to_datetimestruct(out, -minutes_offset); - } + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); } + } - return 0; + return 0; } /* @@ -455,16 +449,16 @@ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, * for subsequent calls to this command - it is able to deduce that `*d >= 0`. */ static npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; } /* @@ -472,39 +466,39 @@ static npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { * and returns the year. */ static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - if (days >= 0) { - year = 400 * (days / days_per_400years); - days = days % days_per_400years; - } else { - year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); - days = days % days_per_400years; - if (days < 0) { - days += days_per_400years; - } + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; } + } - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } } + } - *days_ = days; - return year + 2000; + *days_ = days; + return year + 2000; } /* @@ -512,170 +506,167 @@ static npy_int64 days_to_yearsdays(npy_int64 *days_) { * offset from 1970. */ static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - const int *month_lengths; - int i; - - dts->year = days_to_yearsdays(&days); - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = days + 1; - return; - } else { - days -= month_lengths[i]; - } + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; } + } } - /* * Converts a datetime based on the given metadata into a datetimestruct */ static void pandas_datetime_to_datetimestruct(npy_datetime dt, - NPY_DATETIMEUNIT base, - npy_datetimestruct *out) { - npy_int64 perday; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - perday = 24LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = dt; - break; - - case NPY_FR_m: - perday = 24LL * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60); - out->min = (int)dt; - break; - - case NPY_FR_s: - perday = 24LL * 60 * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60 * 60); - out->min = (int)extract_unit(&dt, 60); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - perday = 24LL * 60 * 60 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 60); - out->sec = (int)extract_unit(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000); - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); - } + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = + (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = + (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } } /* @@ -685,363 +676,359 @@ static void pandas_datetime_to_datetimestruct(npy_datetime dt, * Returns 0 on success, -1 on failure. */ static void pandas_timedelta_to_timedeltastruct(npy_timedelta td, - NPY_DATETIMEUNIT base, - pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(pandas_timedeltastruct)); - - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } - if (sign < 0) - out->days = -out->days; + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - ifrac = td - (out->days * per_day + sfrac); + if (sign < 0) + out->days = -out->days; - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; + ifrac = td - (out->days * per_day + sfrac); - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } + case NPY_FR_us: - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } + if (frac < 0) { + sign = -1; - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } - if (sign < 0) - out->days = -out->days; + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } - ifrac = td - (out->days * per_day + sfrac); + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } - case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } + if (sign < 0) + out->days = -out->days; - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } + ifrac = td - (out->days * per_day + sfrac); - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; + case NPY_FR_ms: - if (sign < 0) - out->days = -out->days; + per_day = 86400000LL; + per_sec = 1000LL; - ifrac = td - (out->days * per_day + sfrac); + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; + if (frac < 0) { + sign = -1; - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - if (sign < 0) - out->days = -out->days; + if (sign < 0) + out->days = -out->days; - ifrac = td - (out->days * per_day + sfrac); + ifrac = td - (out->days * per_day + sfrac); - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } - case NPY_FR_m: + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; + sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; + if (sign < 0) + out->days = -out->days; - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; + ifrac = td - (out->days * per_day + sfrac); - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy timedelta metadata is corrupted with " - "invalid base unit"); - } + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; } /* @@ -1049,53 +1036,53 @@ static void pandas_timedelta_to_timedeltastruct(npy_timedelta td, * objects with the given local and unit settings. */ static int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { - int len = 0; + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } - switch (base) { - /* Generic units can only be used to represent NaT */ - /* return 4;*/ - case NPY_FR_as: - len += 3; /* "###" */ - case NPY_FR_fs: - len += 3; /* "###" */ - case NPY_FR_ps: - len += 3; /* "###" */ - case NPY_FR_ns: - len += 3; /* "###" */ - case NPY_FR_us: - len += 3; /* "###" */ - case NPY_FR_ms: - len += 4; /* ".###" */ - case NPY_FR_s: - len += 3; /* ":##" */ - case NPY_FR_m: - len += 3; /* ":##" */ - case NPY_FR_h: - len += 3; /* "T##" */ - case NPY_FR_D: - case NPY_FR_W: - len += 3; /* "-##" */ - case NPY_FR_M: - len += 3; /* "-##" */ - case NPY_FR_Y: - len += 21; /* 64-bit year */ - break; - default: - len += 3; /* handle the now defunct NPY_FR_B */ - break; - } - - if (base >= NPY_FR_h) { - if (local) { - len += 5; /* "+####" or "-####" */ - } else { - len += 1; /* "Z" */ - } + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ } + } - len += 1; /* NULL terminator */ + len += 1; /* NULL terminator */ - return len; + return len; } /* @@ -1112,21 +1099,21 @@ static int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { * Returns 0 on success, -1 on failure (for example if the output * string was too short). */ -static int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, - int utc, NPY_DATETIMEUNIT base) { - char *substr = outstr; - int sublen = outlen; - int tmplen; - - /* - * Print weeks with the same precision as days. - * - * TODO: Could print weeks with YYYY-Www format if the week - * epoch is a Monday. - */ - if (base == NPY_FR_W) { - base = NPY_FR_D; - } +static int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, + int outlen, int utc, NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } /* YEAR */ /* @@ -1135,415 +1122,409 @@ static int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int out * to have data all the way to the end of the buffer. */ #ifdef _WIN32 - tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); #else - tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); #endif // _WIN32 - /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen > sublen) { - goto string_too_short; - } - substr += tmplen; - sublen -= tmplen; - - /* Stop if the unit is years */ - if (base == NPY_FR_Y) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; - /* MONTH */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->month / 10) + '0'); - if (sublen < 3) { - goto string_too_short; + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; } - substr[2] = (char)((dts->month % 10) + '0'); - substr += 3; - sublen -= 3; + return 0; + } - /* Stop if the unit is months */ - if (base == NPY_FR_M) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; - /* DAY */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->day / 10) + '0'); - if (sublen < 3) { - goto string_too_short; + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; } - substr[2] = (char)((dts->day % 10) + '0'); - substr += 3; - sublen -= 3; + return 0; + } - /* Stop if the unit is days */ - if (base == NPY_FR_D) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; - /* HOUR */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'T'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen < 3) { - goto string_too_short; + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; } - substr[2] = (char)((dts->hour % 10) + '0'); - substr += 3; - sublen -= 3; + return 0; + } - /* Stop if the unit is hours */ - if (base == NPY_FR_h) { - goto add_time_zone; - } + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; - /* MINUTE */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->min / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->min % 10) + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } - /* Stop if the unit is minutes */ - if (base == NPY_FR_m) { - goto add_time_zone; - } + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; - /* SECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->sec % 10) + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } - /* Stop if the unit is seconds */ - if (base == NPY_FR_s) { - goto add_time_zone; - } + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; - /* MILLISECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '.'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen < 4) { - goto string_too_short; - } - substr[3] = (char)((dts->us / 1000) % 10 + '0'); - substr += 4; - sublen -= 4; + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } - /* Stop if the unit is milliseconds */ - if (base == NPY_FR_ms) { - goto add_time_zone; - } + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; - /* MICROSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->us % 10 + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } - /* Stop if the unit is microseconds */ - if (base == NPY_FR_us) { - goto add_time_zone; - } + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; - /* NANOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } - /* Stop if the unit is nanoseconds */ - if (base == NPY_FR_ns) { - goto add_time_zone; - } + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; - /* PICOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->ps % 10 + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } - /* Stop if the unit is picoseconds */ - if (base == NPY_FR_ps) { - goto add_time_zone; - } + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; - /* FEMTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->as / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } - /* Stop if the unit is femtoseconds */ - if (base == NPY_FR_fs) { - goto add_time_zone; - } + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; - /* ATTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->as % 10 + '0'); - substr += 3; - sublen -= 3; + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; add_time_zone: - /* UTC "Zulu" time */ - if (utc) { - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'Z'; - substr += 1; - sublen -= 1; - } - /* Add a NULL terminator, and return */ - if (sublen > 0) { - substr[0] = '\0'; + /* UTC "Zulu" time */ + if (utc) { + if (sublen < 1) { + goto string_too_short; } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + } + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } - return 0; + return 0; string_too_short: - PyErr_Format(PyExc_RuntimeError, - "The string provided for NumPy ISO datetime formatting " - "was too short, with length %d", - outlen); - return -1; + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; } - -static int make_iso_8601_timedelta(pandas_timedeltastruct *tds, - char *outstr, size_t *outlen) { +static int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, + size_t *outlen) { *outlen = 0; - *outlen += snprintf(outstr, 60, // NOLINT - "P%" NPY_INT64_FMT - "DT%" NPY_INT32_FMT - "H%" NPY_INT32_FMT - "M%" NPY_INT32_FMT, - tds->days, tds->hrs, tds->min, tds->sec); + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT "DT%" NPY_INT32_FMT "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); outstr += *outlen; if (tds->ns != 0) { - *outlen += snprintf(outstr, 12, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us, tds->ns); + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT "S", + tds->ms, tds->us, tds->ns); } else if (tds->us != 0) { - *outlen += snprintf(outstr, 9, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us); + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT "%03" NPY_INT32_FMT "S", tds->ms, + tds->us); } else if (tds->ms != 0) { - *outlen += snprintf(outstr, 6, // NOLINT + *outlen += snprintf(outstr, 6, // NOLINT ".%03" NPY_INT32_FMT "S", tds->ms); } else { - *outlen += snprintf(outstr, 2, // NOLINT + *outlen += snprintf(outstr, 2, // NOLINT "%s", "S"); } return 0; } - /* Converts the int64_t representation of a datetime to ISO; mutates len */ static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { - npy_datetimestruct dts; - int ret_code; + npy_datetimestruct dts; + int ret_code; - pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - // datetime64 is always naive - ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); - if (ret_code != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - } + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + // datetime64 is always naive + ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; } static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { - scaleNanosecToUnit(&dt, base); - return dt; + scaleNanosecToUnit(&dt, base); + return dt; } /* Convert PyDatetime To ISO C-string. mutates len */ static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, - size_t *len) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(obj, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - return NULL; - } - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - // Check to see if PyDateTime has a timezone. - // Don't convert to UTC if it doesn't. - int is_tz_aware = 0; - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - if (offset == NULL) { - PyObject_Free(result); - return NULL; - } - is_tz_aware = offset != Py_None; - Py_DECREF(offset); - } - ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + size_t *len) { + npy_datetimestruct dts; + int ret; - if (ret != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - return NULL; + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + // Check to see if PyDateTime has a timezone. + // Don't convert to UTC if it doesn't. + int is_tz_aware = 0; + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + if (offset == NULL) { + PyObject_Free(result); + return NULL; + } + is_tz_aware = offset != Py_None; + Py_DECREF(offset); + } + ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; } static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(dt, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - // TODO(username): is setting errMsg required? - // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - // return NULL; - } + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); - return NpyDateTimeToEpoch(npy_dt, base); + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); } /* @@ -1576,22 +1557,19 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { */ typedef enum { - COMPARISON_SUCCESS, - COMPLETED_PARTIAL_MATCH, - COMPARISON_ERROR + COMPARISON_SUCCESS, + COMPLETED_PARTIAL_MATCH, + COMPARISON_ERROR } DatetimePartParseResult; // This function will advance the pointer on format // and decrement characters_remaining by n on success // On failure will return COMPARISON_ERROR without incrementing // If `format_requirement` is PARTIAL_MATCH, and the `format` string has // been exhausted, then return COMPLETED_PARTIAL_MATCH. -static DatetimePartParseResult compare_format( - const char **format, - int *characters_remaining, - const char *compare_to, - int n, - const FormatRequirement format_requirement -) { +static DatetimePartParseResult +compare_format(const char **format, int *characters_remaining, + const char *compare_to, int n, + const FormatRequirement format_requirement) { if (format_requirement == INFER_FORMAT) { return COMPARISON_SUCCESS; } @@ -1618,664 +1596,676 @@ static DatetimePartParseResult compare_format( } static int parse_iso_8601_datetime(const char *str, int len, int want_exc, - npy_datetimestruct *out, - NPY_DATETIMEUNIT *out_bestunit, - int *out_local, int *out_tzoffset, - const char* format, int format_len, - FormatRequirement format_requirement) { - if (len < 0 || format_len < 0) - goto parse_error; - int year_leap = 0; - int i, numdigits; - const char *substr; - int sublen; - NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - DatetimePartParseResult comparison; - - /* If year-month-day are separated by a valid separator, - * months/days without leading zeroes will be parsed - * (though not iso8601). If the components aren't separated, - * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are - * forbidden here (but parsed as YYMMDD elsewhere). - */ - int has_ymd_sep = 0; - char ymd_sep = '\0'; - char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; - int valid_ymd_sep_len = sizeof(valid_ymd_sep); - - /* hour-minute-second may or may not separated by ':'. If not, then - * each component must be 2 digits. */ - int has_hms_sep = 0; - int hour_was_2_digits = 0; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - substr = str; - sublen = len; - - /* Skip leading whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* Leading '-' sign for negative year */ - if (*substr == '-') { - ++substr; - --sublen; - } - - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE YEAR (4 digits) */ - comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset, + const char *format, int format_len, + FormatRequirement format_requirement) { + if (len < 0 || format_len < 0) + goto parse_error; + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + DatetimePartParseResult comparison; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = + compare_format(&format, &format_len, " ", 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + goto finish; } + } - out->year = 0; - if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && - isdigit(substr[2]) && isdigit(substr[3])) { - out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + - 10 * (substr[2] - '0') + (substr[3] - '0'); + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } - substr += 4; - sublen -= 4; - } + if (sublen == 0) { + goto parse_error; + } - /* Negate the year if necessary */ - if (str[0] == '-') { - out->year = -out->year; - } - /* Check whether it's a leap-year */ - year_leap = is_leapyear(out->year); + /* PARSE THE YEAR (4 digits) */ + comparison = + compare_format(&format, &format_len, "%Y", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } - /* Next character must be a separator, start of month, or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_Y; - goto finish; - } + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); - if (!isdigit(*substr)) { - for (i = 0; i < valid_ymd_sep_len; ++i) { - if (*substr == valid_ymd_sep[i]) { - break; - } - } - if (i == valid_ymd_sep_len) { - goto parse_error; - } - has_ymd_sep = 1; - ymd_sep = valid_ymd_sep[i]; - ++substr; - --sublen; + substr += 4; + sublen -= 4; + } - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Cannot have trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); - /* PARSE THE MONTH */ - comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->month = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->month = 10 * out->month + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; } - if (out->month < 1 || out->month > 12) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); - } - goto error; + if (format_len) { + goto parse_error; } + bestunit = NPY_FR_Y; + goto finish; + } - /* Next character must be the separator, start of day, or end of string */ - if (sublen == 0) { - bestunit = NPY_FR_M; - /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ - if (!has_ymd_sep) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - if (out_local != NULL) { - *out_local = 0; - } - goto finish; + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } } - - if (has_ymd_sep) { - /* Must have separator, but cannot be trailing */ - if (*substr != ymd_sep || sublen == 1) { - goto parse_error; - } - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } + if (i == valid_ymd_sep_len) { + goto parse_error; } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; - /* PARSE THE DAY */ - comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); + comparison = + compare_format(&format, &format_len, &ymd_sep, 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + goto finish; } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; } - out->day = (*substr - '0'); + } + + /* PARSE THE MONTH */ + comparison = + compare_format(&format, &format_len, "%m", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); ++substr; --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->day = 10 * out->day + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->day < 1 || - out->day > days_per_month_table[year_leap][out->month - 1]) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); - } - goto error; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); } + goto error; + } - /* Next character must be a 'T', ' ', or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_D; - goto finish; + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + bestunit = NPY_FR_M; + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; } + goto finish; + } - if ((*substr != 'T' && *substr != ' ') || sublen == 1) { - goto parse_error; + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; } - comparison = compare_format(&format, &format_len, substr, 1, format_requirement); + ++substr; + --sublen; + comparison = + compare_format(&format, &format_len, &ymd_sep, 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + goto finish; } + } + + /* PARSE THE DAY */ + comparison = + compare_format(&format, &format_len, "%d", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); ++substr; --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } - /* PARSE THE HOURS */ - comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; + if (format_len) { + goto parse_error; } - out->hour = (*substr - '0'); + bestunit = NPY_FR_D; + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + comparison = + compare_format(&format, &format_len, substr, 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + comparison = + compare_format(&format, &format_len, "%H", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); ++substr; --sublen; - /* Second digit optional */ - if (isdigit(*substr)) { - hour_was_2_digits = 1; - out->hour = 10 * out->hour + (*substr - '0'); - ++substr; - --sublen; - if (out->hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", - str); - } - goto error; - } + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", str); + } + goto error; } + } - /* Next character must be a ':' or the end of the string */ - if (sublen == 0) { - if (!hour_was_2_digits) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_h; - goto finish; + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; } - - if (*substr == ':') { - has_hms_sep = 1; - ++substr; - --sublen; - /* Cannot have a trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else if (!isdigit(*substr)) { - if (!hour_was_2_digits) { - goto parse_error; - } - goto parse_timezone; + if (format_len) { + goto parse_error; } + bestunit = NPY_FR_h; + goto finish; + } - /* PARSE THE MINUTES */ - comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + comparison = + compare_format(&format, &format_len, ":", 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + goto finish; } - /* First digit required */ - out->min = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->min = 10 * out->min + (*substr - '0'); - ++substr; - --sublen; - if (out->min >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; } + goto parse_timezone; + } - if (sublen == 0) { - bestunit = NPY_FR_m; - if (format_len) { - goto parse_error; - } - goto finish; + /* PARSE THE MINUTES */ + comparison = + compare_format(&format, &format_len, "%M", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", str); + } + goto error; } + } else if (!has_hms_sep) { + goto parse_error; + } - /* If we make it through this condition block, then the next - * character is a digit. */ - if (has_hms_sep && *substr == ':') { - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - /* Cannot have a trailing ':' */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } else if (!has_hms_sep && isdigit(*substr)) { - } else { - goto parse_timezone; + if (sublen == 0) { + bestunit = NPY_FR_m; + if (format_len) { + goto parse_error; } + goto finish; + } - /* PARSE THE SECONDS */ - comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + comparison = + compare_format(&format, &format_len, ":", 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; + goto finish; } - /* First digit required */ - out->sec = (*substr - '0'); ++substr; --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->sec = 10 * out->sec + (*substr - '0'); - ++substr; - --sublen; - if (out->sec >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } - /* Next character may be a '.' indicating fractional seconds */ - if (sublen > 0 && *substr == '.') { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, ".", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else { - bestunit = NPY_FR_s; - goto parse_timezone; + /* PARSE THE SECONDS */ + comparison = + compare_format(&format, &format_len, "%S", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", str); + } + goto error; } + } else if (!has_hms_sep) { + goto parse_error; + } - /* PARSE THE MICROSECONDS (0 to 6 digits) */ - comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + comparison = + compare_format(&format, &format_len, ".", 1, format_requirement); if (comparison == COMPARISON_ERROR) { - goto parse_error; + goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->us *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->us += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } + goto finish; } + } else { + bestunit = NPY_FR_s; + goto parse_timezone; + } - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_us; - } else { - bestunit = NPY_FR_ms; - } - goto parse_timezone; - } - - /* PARSE THE PICOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->ps *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->ps += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + comparison = + compare_format(&format, &format_len, "%f", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; } + } - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_ps; - } else { - bestunit = NPY_FR_ns; - } - goto parse_timezone; - } - - /* PARSE THE ATTOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->as *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->as += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; } + } + if (sublen == 0 || !isdigit(*substr)) { if (numdigits > 3) { - bestunit = NPY_FR_as; + bestunit = NPY_FR_ps; } else { - bestunit = NPY_FR_fs; + bestunit = NPY_FR_ns; } + goto parse_timezone; + } -parse_timezone: - /* trim any whitespace between time/timezone */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; } + } - if (sublen == 0) { - // Unlike NumPy, treating no time zone as naive - if (format_len > 0) { - goto parse_error; - } - goto finish; - } + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } - /* UTC specifier */ - if (*substr == 'Z') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* "Z" should be equivalent to tz offset "+00:00" */ - if (out_local != NULL) { - *out_local = 1; - } +parse_timezone: + /* trim any whitespace between time/timezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = + compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } - if (out_tzoffset != NULL) { - *out_tzoffset = 0; - } + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + if (format_len > 0) { + goto parse_error; + } + goto finish; + } - if (sublen == 1) { - if (format_len > 0) { - goto parse_error; - } - goto finish; - } else { - ++substr; - --sublen; - } - } else if (*substr == '-' || *substr == '+') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Time zone offset */ - int offset_neg = 0, offset_hour = 0, offset_minute = 0; + /* UTC specifier */ + if (*substr == 'Z') { + comparison = + compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } - /* - * Since "local" means local with respect to the current - * machine, we say this is non-local. - */ + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } - if (*substr == '-') { - offset_neg = 1; - } - ++substr; - --sublen; + if (sublen == 1) { + if (format_len > 0) { + goto parse_error; + } + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + comparison = + compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; - /* The hours offset */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_hour = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ - /* The minutes offset is optional */ - if (sublen > 0) { - /* Optional ':' */ - if (*substr == ':') { - ++substr; - --sublen; - } - - /* The minutes offset (at the end of the string) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_minute >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_minute = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - } + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; - /* Apply the time zone offset */ - if (offset_neg) { - offset_hour = -offset_hour; - offset_minute = -offset_minute; - } - if (out_local != NULL) { - *out_local = 1; - // Unlike NumPy, do not change internal value to local time - *out_tzoffset = 60 * offset_hour + offset_minute; + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; } - /* Skip trailing whitespace */ - while (sublen > 0 && isspace(*substr)) { + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { ++substr; --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } } - if ((sublen != 0) || (format_len != 0)) { - goto parse_error; + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; } + } -finish: - if (out_bestunit != NULL) { - *out_bestunit = bestunit; + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = + compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; } - return 0; + } + + if ((sublen != 0) || (format_len != 0)) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + return 0; parse_error: - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", str, - (int)(substr - str)); - } - return -1; + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; error: - return -1; + return -1; } - /* Converts the int64_t representation of a duration to ISO; mutates len */ static char *int64ToIsoDuration(int64_t value, size_t *len) { - pandas_timedeltastruct tds; - int ret_code; + pandas_timedeltastruct tds; + int ret_code; - pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); - // Max theoretical length of ISO Duration with 64 bit day - // as the largest unit is 70 characters + 1 for a null terminator - char *result = PyObject_Malloc(71); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } - ret_code = make_iso_8601_timedelta(&tds, result, len); - if (ret_code == -1) { - PyErr_SetString(PyExc_ValueError, - "Could not convert timedelta value to string"); - PyObject_Free(result); - return NULL; - } + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } - return result; + return result; } /* @@ -2286,13 +2276,10 @@ static char *int64ToIsoDuration(int64_t value, size_t *len) { */ static PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); } - - -static void -pandas_datetime_destructor(PyObject *op) { +static void pandas_datetime_destructor(PyObject *op) { void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); PyMem_Free(ptr); } @@ -2312,8 +2299,10 @@ static int pandas_datetime_exec(PyObject *module) { capi->PyDateTimeToEpoch = PyDateTimeToEpoch; capi->int64ToIsoDuration = int64ToIsoDuration; capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; - capi->pandas_timedelta_to_timedeltastruct = pandas_timedelta_to_timedeltastruct; - capi->convert_pydatetime_to_datetimestruct = convert_pydatetime_to_datetimestruct; + capi->pandas_timedelta_to_timedeltastruct = + pandas_timedelta_to_timedeltastruct; + capi->convert_pydatetime_to_datetimestruct = + convert_pydatetime_to_datetimestruct; capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; capi->parse_iso_8601_datetime = parse_iso_8601_datetime; @@ -2321,8 +2310,7 @@ static int pandas_datetime_exec(PyObject *module) { capi->make_iso_8601_datetime = make_iso_8601_datetime; capi->make_iso_8601_timedelta = make_iso_8601_timedelta; - PyObject *capsule = PyCapsule_New(capi, - PandasDateTime_CAPSULE_NAME, + PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, pandas_datetime_destructor); if (capsule == NULL) { PyMem_Free(capi); @@ -2335,9 +2323,10 @@ static int pandas_datetime_exec(PyObject *module) { // done when cpython gh-6898 gets implemented PyObject *pandas = PyImport_ImportModule("pandas"); if (!pandas) { - PyErr_SetString(PyExc_ImportError, "pd_datetime.c could not import module pandas"); + PyErr_SetString(PyExc_ImportError, + "pd_datetime.c could not import module pandas"); Py_DECREF(capsule); - // cpython doesn't PyMem_Free(capi) here - mistake or intentional? + // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return -1; } @@ -2354,16 +2343,14 @@ static PyModuleDef_Slot pandas_datetime_slots[] = { {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; static struct PyModuleDef pandas_datetimemodule = { - PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.pandas_datetime", + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_datetime", - .m_doc = "Internal module with datetime support for other extensions", - .m_size = 0, - .m_methods = NULL, - .m_slots = pandas_datetime_slots -}; + .m_doc = "Internal module with datetime support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_datetime_slots}; -PyMODINIT_FUNC -PyInit_pandas_datetime(void) { +PyMODINIT_FUNC PyInit_pandas_datetime(void) { return PyModuleDef_Init(&pandas_datetimemodule); } diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 7ff99f71f88de..456712c01d723 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. */ -#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H -#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ #ifndef NPY_NO_DEPRECATED_API #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION @@ -21,45 +21,42 @@ extern "C" { #endif typedef struct { - npy_int64 days; - npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; } pandas_timedeltastruct; -const npy_datetimestruct _AS_MIN_DTS = { - 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; -const npy_datetimestruct _FS_MIN_DTS = { - 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; -const npy_datetimestruct _PS_MIN_DTS = { - 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; -const npy_datetimestruct _NS_MIN_DTS = { - 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; -const npy_datetimestruct _US_MIN_DTS = { - -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; -const npy_datetimestruct _MS_MIN_DTS = { - -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +const npy_datetimestruct _AS_MIN_DTS = {1969, 12, 31, 23, 59, + 50, 776627, 963145, 224193}; +const npy_datetimestruct _FS_MIN_DTS = {1969, 12, 31, 21, 26, + 16, 627963, 145224, 193000}; +const npy_datetimestruct _PS_MIN_DTS = {1969, 9, 16, 5, 57, + 7, 963145, 224193, 0}; +const npy_datetimestruct _NS_MIN_DTS = {1677, 9, 21, 0, 12, + 43, 145224, 193000, 0}; +const npy_datetimestruct _US_MIN_DTS = {-290308, 12, 21, 19, 59, + 05, 224193, 0, 0}; +const npy_datetimestruct _MS_MIN_DTS = {-292275055, 5, 16, 16, 47, + 4, 193000, 0, 0}; const npy_datetimestruct _S_MIN_DTS = { -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; const npy_datetimestruct _M_MIN_DTS = { -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; -const npy_datetimestruct _AS_MAX_DTS = { - 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; -const npy_datetimestruct _FS_MAX_DTS = { - 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; -const npy_datetimestruct _PS_MAX_DTS = { - 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; -const npy_datetimestruct _NS_MAX_DTS = { - 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; -const npy_datetimestruct _US_MAX_DTS = { - 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; -const npy_datetimestruct _MS_MAX_DTS = { - 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; -const npy_datetimestruct _S_MAX_DTS = { - 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +const npy_datetimestruct _AS_MAX_DTS = {1970, 1, 1, 0, 0, + 9, 223372, 36854, 775807}; +const npy_datetimestruct _FS_MAX_DTS = {1970, 1, 1, 2, 33, + 43, 372036, 854775, 807000}; +const npy_datetimestruct _PS_MAX_DTS = {1970, 4, 17, 18, 2, + 52, 36854, 775807, 0}; +const npy_datetimestruct _NS_MAX_DTS = {2262, 4, 11, 23, 47, + 16, 854775, 807000, 0}; +const npy_datetimestruct _US_MAX_DTS = {294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +const npy_datetimestruct _MS_MAX_DTS = {292278994, 8, 17, 7, 12, + 55, 807000, 0, 0}; +const npy_datetimestruct _S_MAX_DTS = {292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; const npy_datetimestruct _M_MAX_DTS = { 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; - /* 'format_requirement' can be one of three values: * * PARTIAL_MATCH : Only require a partial match with 'format'. * For example, if the string is '2020-01-01 05:00:00' and @@ -69,73 +66,82 @@ const npy_datetimestruct _M_MAX_DTS = { * be able to parse it without error is '%Y-%m-%d'; * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). */ -typedef enum { - PARTIAL_MATCH, - EXACT_MATCH, - INFER_FORMAT -} FormatRequirement; - - typedef struct { - npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); - int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); - char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *); - npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT); - char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); - npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); - char *(*int64ToIsoDuration)(int64_t, size_t *); - void (*pandas_datetime_to_datetimestruct)(npy_datetime, NPY_DATETIMEUNIT, npy_datetimestruct *); - void (*pandas_timedelta_to_timedeltastruct)(npy_datetime, NPY_DATETIMEUNIT, pandas_timedeltastruct *); - int (*convert_pydatetime_to_datetimestruct)(PyObject *, npy_datetimestruct *); - int (*cmp_npy_datetimestruct)(const npy_datetimestruct *, const npy_datetimestruct *); - PyArray_DatetimeMetaData (*get_datetime_metadata_from_dtype)(PyArray_Descr *); - int (*parse_iso_8601_datetime)(const char*, int, int, npy_datetimestruct *, NPY_DATETIMEUNIT *, int *, int *, const char*, int, FormatRequirement); - int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT); - int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, NPY_DATETIMEUNIT); - int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); - - } PandasDateTime_CAPI; - - // The capsule name appears limited to module.attributename; see bpo-32414 - // cpython has an open PR gh-6898 to fix, but hasn't had traction for years - #define PandasDateTime_CAPSULE_NAME "pandas.pandas_datetime_CAPI" - - /* block used as part of public API */ +typedef enum { PARTIAL_MATCH, EXACT_MATCH, INFER_FORMAT } FormatRequirement; + +typedef struct { + npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, + const npy_datetimestruct *); + int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); + char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT); + char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); + char *(*int64ToIsoDuration)(int64_t, size_t *); + void (*pandas_datetime_to_datetimestruct)(npy_datetime, NPY_DATETIMEUNIT, + npy_datetimestruct *); + void (*pandas_timedelta_to_timedeltastruct)(npy_datetime, NPY_DATETIMEUNIT, + pandas_timedeltastruct *); + int (*convert_pydatetime_to_datetimestruct)(PyObject *, npy_datetimestruct *); + int (*cmp_npy_datetimestruct)(const npy_datetimestruct *, + const npy_datetimestruct *); + PyArray_DatetimeMetaData (*get_datetime_metadata_from_dtype)(PyArray_Descr *); + int (*parse_iso_8601_datetime)(const char *, int, int, npy_datetimestruct *, + NPY_DATETIMEUNIT *, int *, int *, const char *, + int, FormatRequirement); + int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT); + int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, + NPY_DATETIMEUNIT); + int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); +} PandasDateTime_CAPI; + +// The capsule name appears limited to module.attributename; see bpo-32414 +// cpython has an open PR gh-6898 to fix, but hasn't had traction for years +#define PandasDateTime_CAPSULE_NAME "pandas.pandas_datetime_CAPI" + +/* block used as part of public API */ #ifndef _PANDAS_DATETIME_IMPL - static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; +static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; - #define PandasDateTime_IMPORT \ - PandasDateTimeAPI = (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) +#define PandasDateTime_IMPORT \ + PandasDateTimeAPI = \ + (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) -#define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ - PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), (npy_datetimestruct)) -#define scaleNanosecToUnit(value, unit) \ +#define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ + PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), \ + (npy_datetimestruct)) +#define scaleNanosecToUnit(value, unit) \ PandasDateTimeAPI->scaleNanosecToUnit((value), (unit)) -#define int64ToIso(value, base, len) \ +#define int64ToIso(value, base, len) \ PandasDateTimeAPI->int64ToIso((value), (base), (len)) -#define NpyDateTimeToEpoch(dt, base) \ +#define NpyDateTimeToEpoch(dt, base) \ PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base)) -#define PyDateTimeToIso(obj, base, len) \ +#define PyDateTimeToIso(obj, base, len) \ PandasDateTimeAPI->PyDateTimeToIso((obj), (base), (len)) -#define PyDateTimeToEpoch(dt, base) \ +#define PyDateTimeToEpoch(dt, base) \ PandasDateTimeAPI->PyDateTimeToEpoch((dt), (base)) -#define int64ToIsoDuration(value, len) \ +#define int64ToIsoDuration(value, len) \ PandasDateTimeAPI->int64ToIsoDuration((value), (len)) -#define pandas_datetime_to_datetimestruct(dt, base, out) \ +#define pandas_datetime_to_datetimestruct(dt, base, out) \ PandasDateTimeAPI->pandas_datetime_to_datetimestruct((dt), (base), (out)) -#define pandas_timedelta_to_timedeltastruct(td, base, out) \ +#define pandas_timedelta_to_timedeltastruct(td, base, out) \ PandasDateTimeAPI->pandas_timedelta_to_timedeltastruct((td), (base), (out)) -#define convert_pydatetime_to_datetimestruct(dtobj, out) \ +#define convert_pydatetime_to_datetimestruct(dtobj, out) \ PandasDateTimeAPI->convert_pydatetime_to_datetimestruct((dtobj), (out)) -#define cmp_npy_datetimestruct(a, b) \ +#define cmp_npy_datetimestruct(a, b) \ PandasDateTimeAPI->cmp_npy_datetimestruct((a), (b)) -#define get_datetime_metadata_from_dtype(dtype) \ +#define get_datetime_metadata_from_dtype(dtype) \ PandasDateTimeAPI->get_datetime_metadata_from_dtype((dtype)) -#define parse_iso_8601_datetime(str, len, want_exc, out, out_bestunit, out_local, out_tzoffset, format, format_len, format_requirement) \ - PandasDateTimeAPI->parse_iso_8601_datetime((str), (len), (want_exc), (out), (out_bestunit), (out_local), (out_tzoffset), (format), (format_len), (format_requirement)) -#define get_datetime_iso_8601_strlen(local, base) \ +#define parse_iso_8601_datetime(str, len, want_exc, out, out_bestunit, \ + out_local, out_tzoffset, format, format_len, \ + format_requirement) \ + PandasDateTimeAPI->parse_iso_8601_datetime( \ + (str), (len), (want_exc), (out), (out_bestunit), (out_local), \ + (out_tzoffset), (format), (format_len), (format_requirement)) +#define get_datetime_iso_8601_strlen(local, base) \ PandasDateTimeAPI->get_datetime_iso_8601_strlen((local), (base)) -#define make_iso_8601_datetime(dts, outstr, outlen, utc, base) \ - PandasDateTimeAPI->make_iso_8601_datetime((dts), (outstr), (outlen), (utc), (base)) +#define make_iso_8601_datetime(dts, outstr, outlen, utc, base) \ + PandasDateTimeAPI->make_iso_8601_datetime((dts), (outstr), (outlen), (utc), \ + (base)) #define make_iso_8601_timedelta(tds, outstr, outlen) \ PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) #endif /* !defined(_PANDAS_DATETIME_IMPL) */ @@ -143,4 +149,4 @@ typedef enum { #ifdef __cplusplus } #endif -#endif /* !defined(PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H) */ +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index eaa2c873c3f9f..a406f6c0dcd75 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -29,9 +29,9 @@ from .nattype cimport ( from .np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/setup.py b/setup.py index 4dc21367c1f38..12a993d5f600f 100755 --- a/setup.py +++ b/setup.py @@ -116,7 +116,6 @@ def initialize_options(self): base = pjoin("pandas", "_libs", "src") tsbase = pjoin("pandas", "_libs", "tslibs", "src") - dt = pjoin(tsbase, "datetime") util = pjoin("pandas", "util") parser = pjoin(base, "parser") ujson_python = pjoin(base, "ujson", "python") @@ -633,7 +632,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/ujson/python", "pandas/_libs/src/ujson/lib", numpy.get_include(), - ] + tseries_includes, + ] + + tseries_includes, extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, @@ -650,7 +650,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas._libs.pandas_datetime", depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), - include_dirs=tseries_includes + [ + include_dirs=tseries_includes + + [ numpy.get_include(), ], extra_compile_args=(extra_compile_args), From d93837471b10b6d112e2760aa5da7a760e8f51e6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 23:33:24 -0800 Subject: [PATCH 12/36] import cleanups --- pandas/__init__.py | 1 - pandas/_libs/__init__.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 2e5d19d52469e..4fc2ce08cae73 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -302,7 +302,6 @@ "offsets", "option_context", "options", - "pandas_datetime_CAPI", "period_range", "pivot", "pivot_table", diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index 0005b7226d865..a409bea1941f3 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -7,11 +7,13 @@ "Timestamp", "iNaT", "Interval", - "pandas_datetime", ] -import pandas._libs.pandas_datetime as pandas_datetime +# Below import needs to happen first to ensure pandas top level +# module gets monkeypatched with the pandas_datetime_CAPI +# see pandas_datetime_exec in pd_datetime.c +import pandas._libs.pandas_datetime as pandas_datetime # noqa # isort: skip from pandas._libs.interval import Interval from pandas._libs.tslibs import ( NaT, From df687b393dc88d2b6f35f48992898f521ca01a87 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 23:33:56 -0800 Subject: [PATCH 13/36] revert init change --- pandas/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/__init__.py b/pandas/__init__.py index 4fc2ce08cae73..1a549c09d22f7 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -183,6 +183,7 @@ __git_version__ = v.get("full-revisionid") del get_versions, v + # module level doc-string __doc__ = """ pandas - a powerful data analysis and manipulation library for Python From a26f312283283ae620f5b61d2b3f3bc1bcc761fe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 20 Feb 2023 23:38:02 -0800 Subject: [PATCH 14/36] cleanups --- pandas/_libs/src/ujson/python/objToJSON.c | 2 +- pandas/_libs/tslibs/np_datetime.pxd | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 2e96e92d6b63f..1b8ba8f3f7e6c 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1980,7 +1980,7 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, PandasDateTime_IMPORT; if (PandasDateTimeAPI == NULL) { return NULL; - } + } static char *kwlist[] = {"obj", "ensure_ascii", diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 72a0d6252957a..7e19eb084b59e 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -60,6 +60,10 @@ cdef extern from "src/datetime/pd_datetime.h": int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil @@ -70,10 +74,6 @@ cdef extern from "src/datetime/pd_datetime.h": void PandasDateTime_IMPORT() - void pandas_datetime_to_datetimestruct(npy_datetime val, - NPY_DATETIMEUNIT fr, - npy_datetimestruct *result) nogil - ctypedef enum FormatRequirement: PARTIAL_MATCH EXACT_MATCH @@ -136,4 +136,3 @@ cdef int64_t convert_reso( NPY_DATETIMEUNIT to_reso, bint round_ok, ) except? -1 - From 96b6f96cf3a8c0db8050a9a923bb25eae67b11af Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 21 Feb 2023 00:00:38 -0800 Subject: [PATCH 15/36] isort fixups --- pandas/_libs/missing.pyx | 2 +- pandas/_libs/tslib.pyx | 3 +-- pandas/_libs/tslibs/conversion.pyx | 4 ++-- pandas/_libs/tslibs/dtypes.pyx | 2 +- pandas/_libs/tslibs/fields.pyx | 3 ++- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c6c154d3a8e88..a94f672a5fff4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -34,7 +34,7 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_timedelta64_value, - import_pandas_datetime + import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d33bca3555bc4..b4251d5804ee6 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -34,15 +34,14 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, check_dts_bounds, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydate_to_dt64, string_to_dts, - import_pandas_datetime, ) - import_pandas_datetime() diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 95c21fac85832..31bc7ef02319d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -17,10 +17,10 @@ from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, + import_datetime, time, timedelta, tzinfo, - import_datetime, ) import_datetime() @@ -40,6 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_implementation_bounds, + import_pandas_datetime, npy_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -47,7 +48,6 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 140a305f1061c..eb24e631e0a36 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -5,7 +5,7 @@ from enum import Enum from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_conversion_factor, - import_pandas_datetime + import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2165c4ad9357e..2c230ab40d8ad 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -43,15 +43,16 @@ from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, - import_pandas_datetime, ) import_pandas_datetime() + @cython.wraparound(False) @cython.boundscheck(False) def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7b9ce82c394af..c3188e21ef789 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -60,11 +60,11 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydate_to_dtstruct, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ae94b85e92e1e..7a9ea1a058b42 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -69,9 +69,9 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + import_pandas_datetime, npy_datetimestruct, string_to_dts, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 22ac92149189d..98caae31b1ec7 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -49,10 +49,10 @@ from pandas._libs.tslibs.np_datetime cimport ( astype_overflowsafe, check_dts_bounds, get_timedelta64_value, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, - import_pandas_datetime ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 2d741cb2c2662..542afa9315a60 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -57,12 +57,12 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, check_dts_bounds, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pydate_to_dt64, pydatetime_to_dt64, string_to_dts, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index aaf809beceb94..a1222f5d86556 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -54,11 +54,11 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_timedelta64_value, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c320b7426506e..545669c866c21 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -88,11 +88,11 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydatetime_to_dtstruct, - import_pandas_datetime, ) import_pandas_datetime() diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 9073a7b3e0d9d..d29b7fd6bdc89 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -34,10 +34,10 @@ from pandas._libs.tslibs.dtypes cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pydatetime_to_dt64, - import_pandas_datetime, ) import_pandas_datetime() From 7b28333a71b3c76294d1bb11c6a2cc3824800b95 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 21 Feb 2023 06:53:33 -0800 Subject: [PATCH 16/36] api test fix --- pandas/_libs/__init__.py | 2 +- pandas/_libs/tslibs/src/datetime/pd_datetime.c | 2 +- pandas/_libs/tslibs/src/datetime/pd_datetime.h | 2 +- pandas/tests/api/test_api.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index a409bea1941f3..2d08dc02bc482 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -13,7 +13,7 @@ # Below import needs to happen first to ensure pandas top level # module gets monkeypatched with the pandas_datetime_CAPI # see pandas_datetime_exec in pd_datetime.c -import pandas._libs.pandas_datetime as pandas_datetime # noqa # isort: skip +import pandas._libs.pandas_datetime # noqa # isort: skip from pandas._libs.interval import Interval from pandas._libs.tslibs import ( NaT, diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 38e14c713594f..52456104999c8 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -2330,7 +2330,7 @@ static int pandas_datetime_exec(PyObject *module) { return -1; } - if (PyModule_AddObject(pandas, "pandas_datetime_CAPI", capsule) < 0) { + if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { Py_DECREF(capsule); // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return -1; diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index 456712c01d723..b7d105e202ad2 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -96,7 +96,7 @@ typedef struct { // The capsule name appears limited to module.attributename; see bpo-32414 // cpython has an open PR gh-6898 to fix, but hasn't had traction for years -#define PandasDateTime_CAPSULE_NAME "pandas.pandas_datetime_CAPI" +#define PandasDateTime_CAPSULE_NAME "pandas._pandas_datetime_CAPI" /* block used as part of public API */ #ifndef _PANDAS_DATETIME_IMPL diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 94430e23b054a..3454a005d21e4 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -181,6 +181,7 @@ class TestPDApi(Base): "_config", "_libs", "_is_numpy_dev", + "_pandas_datetime_CAPI", "_testing", "_typing", "_version", From ccea2b399e37bb727d7f5f059f53653e342f46ca Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 21 Feb 2023 14:08:41 -0800 Subject: [PATCH 17/36] removed unneeded tokenizer add --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 12a993d5f600f..eeef28241bc47 100755 --- a/setup.py +++ b/setup.py @@ -502,9 +502,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, - # TODO: xstrtod symbol visibility is really murky here "include": klib_include, - "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", From 2bf72646455c4f24c0d9889ba96319fa08a49a19 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 25 Feb 2023 16:37:34 -0800 Subject: [PATCH 18/36] Hacked together parser capsule --- pandas/_libs/__init__.py | 3 +- pandas/_libs/lib.pyx | 6 +- pandas/_libs/parsers.pyx | 23 +-- pandas/_libs/pd_parser.c | 271 ++++++++++++++++++++++++++++++++ pandas/_libs/pd_parser.h | 110 +++++++++++++ pandas/_libs/src/parse_helper.h | 100 ------------ setup.py | 39 ++++- 7 files changed, 433 insertions(+), 119 deletions(-) create mode 100644 pandas/_libs/pd_parser.c create mode 100644 pandas/_libs/pd_parser.h delete mode 100644 pandas/_libs/src/parse_helper.h diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index 2d08dc02bc482..a7224a1d02f4b 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -10,9 +10,10 @@ ] -# Below import needs to happen first to ensure pandas top level +# Below imports needs to happen first to ensure pandas top level # module gets monkeypatched with the pandas_datetime_CAPI # see pandas_datetime_exec in pd_datetime.c +import pandas._libs.pandas_parser # noqa # isort: skip import pandas._libs.pandas_datetime # noqa # isort: skip from pandas._libs.interval import Interval from pandas._libs.tslibs import ( diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 9e8dd3e4c4a77..fae6c62afd04b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "numpy/ndarrayobject.h": bint PyArray_CheckScalar(obj) nogil - -cdef extern from "src/parse_helper.h": +cdef extern from "pd_parser.h": int floatify(object, float64_t *result, int *maybe_int) except -1 + void PandasParser_IMPORT() + +PandasParser_IMPORT from pandas._libs cimport util from pandas._libs.util cimport ( diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 5bddaa61d3196..766d82706394f 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h": int seen_uint int seen_null + void COLITER_NEXT(coliter_t, const char *) nogil + +cdef extern from "pd_parser.h": + void *new_rd_source(object obj) except NULL + + int del_rd_source(void *src) + + void* buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status, const char *encoding_errors) + void uint_state_init(uint_state *self) int uint64_conflict(uint_state *self) @@ -291,14 +301,9 @@ cdef extern from "parser/tokenizer.h": int to_boolean(const char *item, uint8_t *val) nogil + void PandasParser_IMPORT() -cdef extern from "parser/io.h": - void *new_rd_source(object obj) except NULL - - int del_rd_source(void *src) - - void* buffer_rd_bytes(void *source, size_t nbytes, - size_t *bytes_read, int *status, const char *encoding_errors) +PandasParser_IMPORT cdef class TextReader: @@ -610,8 +615,8 @@ cdef class TextReader: ptr = new_rd_source(source) self.parser.source = ptr - self.parser.cb_io = &buffer_rd_bytes - self.parser.cb_cleanup = &del_rd_source + # self.parser.cb_io = &(PandasParser_CAPI.buffer_rd_bytes) + # self.parser.cb_cleanup = &(PandasParser_CAPI.del_rd_source) cdef _get_header(self, list prelim_header): # header is now a list of lists, so field_count should use header[0] diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c new file mode 100644 index 0000000000000..d3adfe1b20c00 --- /dev/null +++ b/pandas/_libs/pd_parser.c @@ -0,0 +1,271 @@ +/* + +Copyright (c) 2023, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ +#define _PANDAS_PARSER_IMPL + +#include "pd_parser.h" +#include "src/parser/io.h" +#include "src/parser/tokenizer.h" + +static int pandas_to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + /* Switch to precise xstrtod GH 31364 */ + *p_value = + precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +static int pandas_floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = pandas_to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + +static void *pandas_new_rd_source(PyObject *obj) { return new_rd_source(obj); } + +static int pandas_del_rd_source(void *src) { return del_rd_source(src); } + +static void *pandas_buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status, + const char *encoding_errors) { + return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors); +} + +static void pandas_uint_state_init(uint_state *self) { uint_state_init(self); } + +static int pandas_uint64_conflict(uint_state *self) { + return uint64_conflict(self); +} + +static void pandas_coliter_setup(coliter_t *self, parser_t *parser, int64_t i, + int64_t start) { + coliter_setup(self, parser, i, start); +} + +static parser_t *pandas_parser_new() { return parser_new(); } + +static int pandas_parser_init(parser_t *self) { return parser_init(self); } + +static void pandas_parser_free(parser_t *self) { parser_free(self); } + +static void pandas_parser_del(parser_t *self) { parser_del(self); } + +static int pandas_parser_add_skiprow(parser_t *self, int64_t row) { + return parser_add_skiprow(self, row); +} + +static int pandas_parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { + return parser_set_skipfirstnrows(self, nrows); +} + +static void pandas_parser_set_default_options(parser_t *self) { + parser_set_default_options(self); +} + +static int pandas_parser_consume_rows(parser_t *self, size_t nrows) { + return parser_consume_rows(self, nrows); +} + +static int pandas_parser_trim_buffers(parser_t *self) { + return parser_trim_buffers(self); +} + +static int pandas_tokenize_all_rows(parser_t *self, + const char *encoding_errors) { + return tokenize_all_rows(self, encoding_errors); +} + +static int pandas_tokenize_nrows(parser_t *self, size_t nrows, + const char *encoding_errors) { + return tokenize_nrows(self, nrows, encoding_errors); +} + +static int64_t pandas_str_to_int64(const char *p_item, int64_t int_min, + int64_t int_max, int *error, char tsep) { + return str_to_int64(p_item, int_min, int_max, error, tsep); +} + +static uint64_t pandas_str_to_uint64(uint_state *state, const char *p_item, + int64_t int_max, uint64_t uint_max, + int *error, char tsep) { + return str_to_uint64(state, p_item, int_max, uint_max, error, tsep); +} + +static double pandas_xstrtod(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing, int *error, + int *maybe_int) { + return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int); +} + +static double pandas_precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) { + return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, + maybe_int); +} + +static double pandas_round_trip(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing, int *error, + int *maybe_int) { + return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int); +} + +static int pandas_to_boolean(const char *item, uint8_t *val) { + return to_boolean(item, val); +} + +static void pandas_parser_destructor(PyObject *op) { + void *ptr = PyCapsule_GetPointer(op, PandasParser_CAPSULE_NAME); + PyMem_Free(ptr); +} + +static int pandas_parser_exec(PyObject *module) { + PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return -1; + } + + capi->to_double = pandas_to_double; + capi->floatify = pandas_floatify; + capi->new_rd_source = pandas_new_rd_source; + capi->del_rd_source = pandas_del_rd_source; + capi->buffer_rd_bytes = pandas_buffer_rd_bytes; + capi->uint_state_init = pandas_uint_state_init; + capi->uint64_conflict = pandas_uint64_conflict; + capi->coliter_setup = pandas_coliter_setup; + capi->parser_new = pandas_parser_new; + capi->parser_init = pandas_parser_init; + capi->parser_free = pandas_parser_free; + capi->parser_del = pandas_parser_del; + capi->parser_add_skiprow = pandas_parser_add_skiprow; + capi->parser_set_skipfirstnrows = pandas_parser_set_skipfirstnrows; + capi->parser_set_default_options = pandas_parser_set_default_options; + capi->parser_consume_rows = pandas_parser_consume_rows; + capi->parser_trim_buffers = pandas_parser_trim_buffers; + capi->tokenize_all_rows = pandas_tokenize_all_rows; + capi->tokenize_nrows = pandas_tokenize_nrows; + capi->str_to_int64 = pandas_str_to_int64; + capi->str_to_uint64 = pandas_str_to_uint64; + capi->xstrtod = pandas_xstrtod; + capi->precise_xstrtod = pandas_precise_xstrtod; + capi->round_trip = pandas_round_trip; + capi->to_boolean = pandas_to_boolean; + + PyObject *capsule = + PyCapsule_New(capi, PandasParser_CAPSULE_NAME, pandas_parser_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, + "pd_parser.c could not import module pandas"); + Py_DECREF(capsule); + return -1; + } + + if (PyModule_AddObject(pandas, "_pandas_parser_CAPI", capsule) < 0) { + Py_DECREF(capsule); + return -1; + } + + return 0; +} + +static PyModuleDef_Slot pandas_parser_slots[] = { + {Py_mod_exec, pandas_parser_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_parsermodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_parser", + + .m_doc = "Internal module with parser support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_parser_slots}; + +PyMODINIT_FUNC PyInit_pandas_parser(void) { + return PyModuleDef_Init(&pandas_parsermodule); +} diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h new file mode 100644 index 0000000000000..b852dd1511127 --- /dev/null +++ b/pandas/_libs/pd_parser.h @@ -0,0 +1,110 @@ +/* + +Copyright (c) 2023, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ +#ifndef PANDAS__LIBS_PD_PARSER_H_ +#define PANDAS__LIBS_PD_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "Python.h" +#include "src/parser/tokenizer.h" + +typedef struct { + int (*to_double)(char *, double *, char, char, int *); + int (*floatify)(PyObject *, double *, int *); + void *(*new_rd_source)(PyObject *); + int (*del_rd_source)(void *); + void *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *); + void (*uint_state_init)(uint_state *); + int (*uint64_conflict)(uint_state *); + void (*coliter_setup)(coliter_t *, parser_t *, int64_t, int64_t); + parser_t *(*parser_new)(); + int (*parser_init)(parser_t *); + void (*parser_free)(parser_t *); + void (*parser_del)(parser_t *); + int (*parser_add_skiprow)(parser_t *, int64_t); + int (*parser_set_skipfirstnrows)(parser_t *, int64_t); + void (*parser_set_default_options)(parser_t *); + int (*parser_consume_rows)(parser_t *, size_t); + int (*parser_trim_buffers)(parser_t *); + int (*tokenize_all_rows)(parser_t *, const char *); + int (*tokenize_nrows)(parser_t *, size_t, const char *); + int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char); + uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t, + int *, char); + double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *); + double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *, + int *); + double (*round_trip)(const char *, char **, char, char, char, int, int *, + int *); + int (*to_boolean)(const char *, uint8_t *); +} PandasParser_CAPI; + +#define PandasParser_CAPSULE_NAME "pandas._pandas_parser_CAPI" + +#ifndef _PANDAS_PARSER_IMPL +static PandasParser_CAPI *PandasParserAPI = NULL; + +#define PandasParser_IMPORT \ + PandasParserAPI = \ + (PandasParser_CAPI *)PyCapsule_Import(PandasParser_CAPSULE_NAME, 0) + +#define to_double(item, p_value, sci, decimal, maybe_int) \ + PandasParserAPI->to_double((item), (p_value), (sci), (decimal), (maybe_int)) +#define floatify(str, result, maybe_int) \ + PandasParserAPI->floatify((str), (result), (maybe_int)) +#define new_rd_source(obj) PandasParserAPI->new_rd_source((obj)) +#define del_rd_source(src) PandasParserAPI->del_rd_source((src)) +#define buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors) \ + PandasParserAPI->buffer_rd_bytes((source), (nbytes), (bytes_read), (status), \ + (encoding_errors)) +#define uint_state_init(self) PandasParserAPI->uint_state_init((self)) +#define uint64_conflict(self) PandasParserAPI->uint64_conflict((self)) +#define coliter_setup(self, parser, i, start) \ + PandasParserAPI->coliter_setup((self), (parser), (i), (start)) +#define parser_new PandasParserAPI->parser_new +#define parser_init(self) PandasParserAPI->parser_init((self)) +#define parser_free(self) PandasParserAPI->parser_free((self)) +#define parser_del(self) PandasParserAPI->parser_del((self)) +#define parser_add_skiprow(self, row) \ + PandasParserAPI->parser_add_skiprow((self), (row)) +#define parser_set_skipfirstnrows(self, nrows) \ + PandasParserAPI->parser_set_skipfirstnrows((self), (nrows)) +#define parser_set_default_options(self) \ + PandasParserAPI->parser_set_default_options((self)) +#define parser_consume_rows(self, nrows) \ + PandasParserAPI->parser_consume_rows((self), (nrows)) +#define parser_tokenize_all_rows(self, encoding_errors) \ + PandasParserAPI->parser_tokenize_all_rows((self), (encoding_errors)) +#define parser_tokenize_nrows(self, nrows, encoding_errors) \ + PandasParserAPI->parser_tokenize_nrows((self), (nrows), (encoding_errors)) +#define str_to_int64(p_item, int_min, int_max, error, t_sep) \ + PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \ + (t_sep)) +#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep) \ + PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max), \ + (error), (t_sep)) +#define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \ + PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, \ + maybe_int) \ + PandasParserAPI->precise_xstrtod((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \ + PandasParserAPI->round_trip((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define to_boolean(item, val) PandasParserAPI->to_boolean((item), (val)) +#endif /* !defined(_PANDAS_PARSER_IMPL) */ + +#ifdef __cplusplus +} +#endif +#endif // PANDAS__LIBS_PD_PARSER_H_ diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h deleted file mode 100644 index d161c4e29fe15..0000000000000 --- a/pandas/_libs/src/parse_helper.h +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ -#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ - -#include -#include "parser/tokenizer.h" - -int to_double(char *item, double *p_value, char sci, char decimal, - int *maybe_int) { - char *p_end = NULL; - int error = 0; - - /* Switch to precise xstrtod GH 31364 */ - *p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, - &error, maybe_int); - - return (error == 0) && (!*p_end); -} - -int floatify(PyObject *str, double *result, int *maybe_int) { - int status; - char *data; - PyObject *tmp = NULL; - const char sci = 'E'; - const char dec = '.'; - - if (PyBytes_Check(str)) { - data = PyBytes_AS_STRING(str); - } else if (PyUnicode_Check(str)) { - tmp = PyUnicode_AsUTF8String(str); - if (tmp == NULL) { - return -1; - } - data = PyBytes_AS_STRING(tmp); - } else { - PyErr_SetString(PyExc_TypeError, "Invalid object type"); - return -1; - } - - status = to_double(data, result, sci, dec, maybe_int); - - if (!status) { - /* handle inf/-inf infinity/-infinity */ - if (strlen(data) == 3) { - if (0 == strcasecmp(data, "inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 4) { - if (0 == strcasecmp(data, "-inf")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 8) { - if (0 == strcasecmp(data, "infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 9) { - if (0 == strcasecmp(data, "-infinity")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else { - goto parsingerror; - } - } - - Py_XDECREF(tmp); - return 0; - -parsingerror: - PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); - Py_XDECREF(tmp); - return -1; -} - -#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/setup.py b/setup.py index eeef28241bc47..6851e806c33f4 100755 --- a/setup.py +++ b/setup.py @@ -131,6 +131,7 @@ def initialize_options(self): pjoin(ujson_lib, "ultrajsondec.c"), pjoin(util, "move.c"), pjoin(tsbase, "datetime", "pd_datetime.c"), + pjoin("pandas", "_libs", "pd_parser.c"), ] for root, dirs, files in os.walk("pandas"): @@ -471,19 +472,15 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, "include": klib_include, # due to tokenizer import - "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, "_libs.parsers": { "pyxfile": "_libs/parsers", - "include": klib_include + ["pandas/_libs/src"], + "include": klib_include + ["pandas/_libs/src", "pandas/_libs"], "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", - ], - "sources": [ - "pandas/_libs/src/parser/tokenizer.c", - "pandas/_libs/src/parser/io.c", + "pandas/_libs/src/pd_parser.h", ], }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, @@ -646,7 +643,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): # pd_datetime pd_dt_ext = Extension( "pandas._libs.pandas_datetime", - depends=["pandas._libs.tslibs.datetime.pd_datetime.h"], + depends=["pandas/_libs/tslibs/datetime/pd_datetime.h"], sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), include_dirs=tseries_includes + [ @@ -660,6 +657,34 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): extensions.append(pd_dt_ext) +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# pd_datetime +pd_parser_ext = Extension( + "pandas._libs.pandas_parser", + depends=["pandas/_libs/pd_parser.h"], + sources=( + [ + "pandas/_libs/src/parser/tokenizer.c", + "pandas/_libs/src/parser/io.c", + "pandas/_libs/pd_parser.c", + ] + ), + include_dirs=[ + "pandas/_libs/src", + "pandas/_libs/src/parser", + "pandas/_libs/src/klib", + ], + extra_compile_args=(extra_compile_args), + extra_link_args=extra_link_args, + define_macros=macros, +) + + +extensions.append(pd_parser_ext) + + # ---------------------------------------------------------------------- From 418910d1b1c397c29fed2a6ad506a5193998fd55 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 25 Feb 2023 17:08:43 -0800 Subject: [PATCH 19/36] Symbol cleanups --- pandas/_libs/parsers.pyx | 20 ++++++++++---------- pandas/_libs/pd_parser.h | 10 ++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 766d82706394f..ce59058e2fe65 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -289,15 +289,15 @@ cdef extern from "pd_parser.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - float64_t xstrtod(const char *p, char **q, char decimal, + double xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + double precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing, int *error, int *maybe_int) nogil - float64_t precise_xstrtod(const char *p, char **q, char decimal, - char sci, char tsep, int skip_trailing, - int *error, int *maybe_int) nogil - float64_t round_trip(const char *p, char **q, char decimal, - char sci, char tsep, int skip_trailing, - int *error, int *maybe_int) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -492,11 +492,11 @@ cdef class TextReader: if float_precision == "round_trip": # see gh-15140 - self.parser.double_converter = round_trip + self.parser.double_converter = &round_trip elif float_precision == "legacy": - self.parser.double_converter = xstrtod + self.parser.double_converter = &xstrtod elif float_precision == "high" or float_precision is None: - self.parser.double_converter = precise_xstrtod + self.parser.double_converter = &precise_xstrtod else: raise ValueError(f"Unrecognized float_precision option: " f"{float_precision}") diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h index b852dd1511127..4e64ce94052d0 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/pd_parser.h @@ -81,10 +81,12 @@ static PandasParser_CAPI *PandasParserAPI = NULL; PandasParserAPI->parser_set_default_options((self)) #define parser_consume_rows(self, nrows) \ PandasParserAPI->parser_consume_rows((self), (nrows)) -#define parser_tokenize_all_rows(self, encoding_errors) \ - PandasParserAPI->parser_tokenize_all_rows((self), (encoding_errors)) -#define parser_tokenize_nrows(self, nrows, encoding_errors) \ - PandasParserAPI->parser_tokenize_nrows((self), (nrows), (encoding_errors)) +#define parser_trim_buffers(self) \ + PandasParserAPI->parser_trim_buffers((self)) +#define tokenize_all_rows(self, encoding_errors) \ + PandasParserAPI->tokenize_all_rows((self), (encoding_errors)) +#define tokenize_nrows(self, nrows, encoding_errors) \ + PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors)) #define str_to_int64(p_item, int_min, int_max, error, t_sep) \ PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \ (t_sep)) From a4f7e1a068962d313779e0971c1b96d7d24ca9d3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 25 Feb 2023 17:35:27 -0800 Subject: [PATCH 20/36] Resolved all undefined symbols in parsers.pyx --- pandas/_libs/parsers.pyx | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ce59058e2fe65..e8be1b9646b7e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - float64_t (*double_converter)(const char *, char **, - char, char, char, - int, int *, int *) nogil + double (*double_converter)(const char *, char **, + char, char, char, + int, int *, int *) nogil # error handling char *warn_msg @@ -305,6 +305,25 @@ cdef extern from "pd_parser.h": PandasParser_IMPORT +# When not invoked directly but rather assigned as a function, +# cdef extern'ed declarations seem to leave behind an undefined symbol +cdef double xstrtod_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) + + +cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) + + +cdef double round_trip_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) + cdef class TextReader: """ @@ -492,11 +511,11 @@ cdef class TextReader: if float_precision == "round_trip": # see gh-15140 - self.parser.double_converter = &round_trip + self.parser.double_converter = round_trip_wrapper elif float_precision == "legacy": - self.parser.double_converter = &xstrtod + self.parser.double_converter = xstrtod_wrapper elif float_precision == "high" or float_precision is None: - self.parser.double_converter = &precise_xstrtod + self.parser.double_converter = precise_xstrtod_wrapper else: raise ValueError(f"Unrecognized float_precision option: " f"{float_precision}") From ad1d14986dc45fef671855a8e42e0ffa7d7325c3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 25 Feb 2023 17:43:28 -0800 Subject: [PATCH 21/36] IO callbacks restored --- pandas/_libs/parsers.pyx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e8be1b9646b7e..48a7999ac99e5 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -325,6 +325,15 @@ cdef double round_trip_wrapper(const char *p, char **q, char decimal, return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) +cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes, + size_t *bytes_read, int *status, + const char *encoding_errors): + return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors) + +cdef int del_rd_source_wrapper(void *src): + return del_rd_source(src) + + cdef class TextReader: """ @@ -634,8 +643,8 @@ cdef class TextReader: ptr = new_rd_source(source) self.parser.source = ptr - # self.parser.cb_io = &(PandasParser_CAPI.buffer_rd_bytes) - # self.parser.cb_cleanup = &(PandasParser_CAPI.del_rd_source) + self.parser.cb_io = buffer_rd_bytes_wrapper + self.parser.cb_cleanup = del_rd_source_wrapper cdef _get_header(self, list prelim_header): # header is now a list of lists, so field_count should use header[0] From 58872543cbcfac24bb4988a7af37d95e3069f33f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 10:08:01 -0800 Subject: [PATCH 22/36] Fix build and test failures --- pandas/_libs/pd_parser.h | 2 +- pandas/tests/api/test_api.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h index 4e64ce94052d0..e7da9a7ce1c68 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/pd_parser.h @@ -13,7 +13,7 @@ Distributed under the terms of the BSD Simplified License. extern "C" { #endif -#include "Python.h" +#include #include "src/parser/tokenizer.h" typedef struct { diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 3454a005d21e4..463ed6051e910 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -182,6 +182,7 @@ class TestPDApi(Base): "_libs", "_is_numpy_dev", "_pandas_datetime_CAPI", + "_pandas_parser_CAPI", "_testing", "_typing", "_version", From 138ea0de8826221ea49614c4d022c01eff3c1aa5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 11:12:01 -0800 Subject: [PATCH 23/36] Try relative imports for MSFT compat --- pandas/_libs/pd_parser.c | 4 ++-- pandas/_libs/pd_parser.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c index d3adfe1b20c00..171808b538f3d 100644 --- a/pandas/_libs/pd_parser.c +++ b/pandas/_libs/pd_parser.c @@ -9,8 +9,8 @@ Distributed under the terms of the BSD Simplified License. #define _PANDAS_PARSER_IMPL #include "pd_parser.h" -#include "src/parser/io.h" -#include "src/parser/tokenizer.h" +#include "io.h" +#include "tokenizer.h" static int pandas_to_double(char *item, double *p_value, char sci, char decimal, int *maybe_int) { diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h index e7da9a7ce1c68..b3a463b933300 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/pd_parser.h @@ -14,7 +14,7 @@ extern "C" { #endif #include -#include "src/parser/tokenizer.h" +#include "tokenizer.h" typedef struct { int (*to_double)(char *, double *, char, char, int *); From 679d03d4d976c9256ea8f4ba49a766971d7494d7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 11:15:43 -0800 Subject: [PATCH 24/36] try py_ssize_t_clean macro --- pandas/_libs/pd_parser.c | 4 ++-- pandas/_libs/pd_parser.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c index 171808b538f3d..d3adfe1b20c00 100644 --- a/pandas/_libs/pd_parser.c +++ b/pandas/_libs/pd_parser.c @@ -9,8 +9,8 @@ Distributed under the terms of the BSD Simplified License. #define _PANDAS_PARSER_IMPL #include "pd_parser.h" -#include "io.h" -#include "tokenizer.h" +#include "src/parser/io.h" +#include "src/parser/tokenizer.h" static int pandas_to_double(char *item, double *p_value, char sci, char decimal, int *maybe_int) { diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h index b3a463b933300..b6dc05f19ff14 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/pd_parser.h @@ -13,8 +13,9 @@ Distributed under the terms of the BSD Simplified License. extern "C" { #endif +#define PY_SSIZE_T_CLEAN #include -#include "tokenizer.h" +#include "src/parser/tokenizer.h" typedef struct { int (*to_double)(char *, double *, char, char, int *); From 7c4e3656b10fd060f30fff8838ecd4f15e06fbe0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 11:46:51 -0800 Subject: [PATCH 25/36] Removed double tokenizer include --- pandas/_libs/pd_parser.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c index d3adfe1b20c00..7a7fd2b1f0df5 100644 --- a/pandas/_libs/pd_parser.c +++ b/pandas/_libs/pd_parser.c @@ -10,7 +10,6 @@ Distributed under the terms of the BSD Simplified License. #include "pd_parser.h" #include "src/parser/io.h" -#include "src/parser/tokenizer.h" static int pandas_to_double(char *item, double *p_value, char sci, char decimal, int *maybe_int) { From 5aee18a5ef5d99508ab8ae72b433b6a489e4e30d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 12:03:41 -0800 Subject: [PATCH 26/36] removed unneeded include path --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 6851e806c33f4..b05559f426801 100755 --- a/setup.py +++ b/setup.py @@ -673,7 +673,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ), include_dirs=[ "pandas/_libs/src", - "pandas/_libs/src/parser", "pandas/_libs/src/klib", ], extra_compile_args=(extra_compile_args), From a0523be31bcdd8083c36b2c5ef64a0ff270b6839 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 16:10:18 -0800 Subject: [PATCH 27/36] more cleanups --- pandas/_libs/__init__.py | 4 ++-- setup.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index a7224a1d02f4b..2c532cda480f0 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -13,8 +13,8 @@ # Below imports needs to happen first to ensure pandas top level # module gets monkeypatched with the pandas_datetime_CAPI # see pandas_datetime_exec in pd_datetime.c -import pandas._libs.pandas_parser # noqa # isort: skip -import pandas._libs.pandas_datetime # noqa # isort: skip +import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport] +import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport] from pandas._libs.interval import Interval from pandas._libs.tslibs import ( NaT, diff --git a/setup.py b/setup.py index b05559f426801..b1bb29ae18969 100755 --- a/setup.py +++ b/setup.py @@ -672,7 +672,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ] ), include_dirs=[ - "pandas/_libs/src", "pandas/_libs/src/klib", ], extra_compile_args=(extra_compile_args), From 554d7018491f62c6dfa8526b09d3b9a8b81929b5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 16:15:04 -0800 Subject: [PATCH 28/36] noexcept --- pandas/_libs/parsers.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 48a7999ac99e5..21a2e84c5d774 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -327,10 +327,10 @@ cdef double round_trip_wrapper(const char *p, char **q, char decimal, cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes, size_t *bytes_read, int *status, - const char *encoding_errors): + const char *encoding_errors) noexcept: return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors) -cdef int del_rd_source_wrapper(void *src): +cdef int del_rd_source_wrapper(void *src) noexcept: return del_rd_source(src) From 726d93d61aa8523c5d8c84bac9341e1971ae0f87 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 26 Feb 2023 16:40:05 -0800 Subject: [PATCH 29/36] signature cleanup --- pandas/_libs/pd_parser.c | 2 +- pandas/_libs/pd_parser.h | 2 +- pandas/_libs/src/parser/tokenizer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c index 7a7fd2b1f0df5..560e4424df600 100644 --- a/pandas/_libs/pd_parser.c +++ b/pandas/_libs/pd_parser.c @@ -116,7 +116,7 @@ static void pandas_coliter_setup(coliter_t *self, parser_t *parser, int64_t i, coliter_setup(self, parser, i, start); } -static parser_t *pandas_parser_new() { return parser_new(); } +static parser_t *pandas_parser_new(void) { return parser_new(); } static int pandas_parser_init(parser_t *self) { return parser_init(self); } diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h index b6dc05f19ff14..acdc08bbad484 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/pd_parser.h @@ -26,7 +26,7 @@ typedef struct { void (*uint_state_init)(uint_state *); int (*uint64_conflict)(uint_state *); void (*coliter_setup)(coliter_t *, parser_t *, int64_t, int64_t); - parser_t *(*parser_new)(); + parser_t *(*parser_new)(void); int (*parser_init)(parser_t *); void (*parser_free)(parser_t *); void (*parser_del)(parser_t *); diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index c337c3eaf1318..1ddd7ec7be986 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -105,7 +105,7 @@ void parser_set_default_options(parser_t *self) { self->skip_footer = 0; } -parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } +parser_t *parser_new(void) { return (parser_t *)calloc(1, sizeof(parser_t)); } int parser_clear_data_buffers(parser_t *self) { free_if_not_null((void *)&self->stream); From d2fe542e87a07d3703b22918439e451cb87753ec Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 27 Feb 2023 16:51:18 -0800 Subject: [PATCH 30/36] simplify parser impl --- pandas/_libs/pd_parser.c | 148 ++++++++------------------------------- 1 file changed, 28 insertions(+), 120 deletions(-) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c index 560e4424df600..15d82b59df3e8 100644 --- a/pandas/_libs/pd_parser.c +++ b/pandas/_libs/pd_parser.c @@ -11,7 +11,7 @@ Distributed under the terms of the BSD Simplified License. #include "pd_parser.h" #include "src/parser/io.h" -static int pandas_to_double(char *item, double *p_value, char sci, char decimal, +static int to_double(char *item, double *p_value, char sci, char decimal, int *maybe_int) { char *p_end = NULL; int error = 0; @@ -23,7 +23,7 @@ static int pandas_to_double(char *item, double *p_value, char sci, char decimal, return (error == 0) && (!*p_end); } -static int pandas_floatify(PyObject *str, double *result, int *maybe_int) { +static int floatify(PyObject *str, double *result, int *maybe_int) { int status; char *data; PyObject *tmp = NULL; @@ -43,7 +43,7 @@ static int pandas_floatify(PyObject *str, double *result, int *maybe_int) { return -1; } - status = pandas_to_double(data, result, sci, dec, maybe_int); + status = to_double(data, result, sci, dec, maybe_int); if (!status) { /* handle inf/-inf infinity/-infinity */ @@ -95,98 +95,6 @@ static int pandas_floatify(PyObject *str, double *result, int *maybe_int) { return -1; } -static void *pandas_new_rd_source(PyObject *obj) { return new_rd_source(obj); } - -static int pandas_del_rd_source(void *src) { return del_rd_source(src); } - -static void *pandas_buffer_rd_bytes(void *source, size_t nbytes, - size_t *bytes_read, int *status, - const char *encoding_errors) { - return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors); -} - -static void pandas_uint_state_init(uint_state *self) { uint_state_init(self); } - -static int pandas_uint64_conflict(uint_state *self) { - return uint64_conflict(self); -} - -static void pandas_coliter_setup(coliter_t *self, parser_t *parser, int64_t i, - int64_t start) { - coliter_setup(self, parser, i, start); -} - -static parser_t *pandas_parser_new(void) { return parser_new(); } - -static int pandas_parser_init(parser_t *self) { return parser_init(self); } - -static void pandas_parser_free(parser_t *self) { parser_free(self); } - -static void pandas_parser_del(parser_t *self) { parser_del(self); } - -static int pandas_parser_add_skiprow(parser_t *self, int64_t row) { - return parser_add_skiprow(self, row); -} - -static int pandas_parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { - return parser_set_skipfirstnrows(self, nrows); -} - -static void pandas_parser_set_default_options(parser_t *self) { - parser_set_default_options(self); -} - -static int pandas_parser_consume_rows(parser_t *self, size_t nrows) { - return parser_consume_rows(self, nrows); -} - -static int pandas_parser_trim_buffers(parser_t *self) { - return parser_trim_buffers(self); -} - -static int pandas_tokenize_all_rows(parser_t *self, - const char *encoding_errors) { - return tokenize_all_rows(self, encoding_errors); -} - -static int pandas_tokenize_nrows(parser_t *self, size_t nrows, - const char *encoding_errors) { - return tokenize_nrows(self, nrows, encoding_errors); -} - -static int64_t pandas_str_to_int64(const char *p_item, int64_t int_min, - int64_t int_max, int *error, char tsep) { - return str_to_int64(p_item, int_min, int_max, error, tsep); -} - -static uint64_t pandas_str_to_uint64(uint_state *state, const char *p_item, - int64_t int_max, uint64_t uint_max, - int *error, char tsep) { - return str_to_uint64(state, p_item, int_max, uint_max, error, tsep); -} - -static double pandas_xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing, int *error, - int *maybe_int) { - return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int); -} - -static double pandas_precise_xstrtod(const char *p, char **q, char decimal, - char sci, char tsep, int skip_trailing, - int *error, int *maybe_int) { - return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, - maybe_int); -} - -static double pandas_round_trip(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing, int *error, - int *maybe_int) { - return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int); -} - -static int pandas_to_boolean(const char *item, uint8_t *val) { - return to_boolean(item, val); -} static void pandas_parser_destructor(PyObject *op) { void *ptr = PyCapsule_GetPointer(op, PandasParser_CAPSULE_NAME); @@ -200,31 +108,31 @@ static int pandas_parser_exec(PyObject *module) { return -1; } - capi->to_double = pandas_to_double; - capi->floatify = pandas_floatify; - capi->new_rd_source = pandas_new_rd_source; - capi->del_rd_source = pandas_del_rd_source; - capi->buffer_rd_bytes = pandas_buffer_rd_bytes; - capi->uint_state_init = pandas_uint_state_init; - capi->uint64_conflict = pandas_uint64_conflict; - capi->coliter_setup = pandas_coliter_setup; - capi->parser_new = pandas_parser_new; - capi->parser_init = pandas_parser_init; - capi->parser_free = pandas_parser_free; - capi->parser_del = pandas_parser_del; - capi->parser_add_skiprow = pandas_parser_add_skiprow; - capi->parser_set_skipfirstnrows = pandas_parser_set_skipfirstnrows; - capi->parser_set_default_options = pandas_parser_set_default_options; - capi->parser_consume_rows = pandas_parser_consume_rows; - capi->parser_trim_buffers = pandas_parser_trim_buffers; - capi->tokenize_all_rows = pandas_tokenize_all_rows; - capi->tokenize_nrows = pandas_tokenize_nrows; - capi->str_to_int64 = pandas_str_to_int64; - capi->str_to_uint64 = pandas_str_to_uint64; - capi->xstrtod = pandas_xstrtod; - capi->precise_xstrtod = pandas_precise_xstrtod; - capi->round_trip = pandas_round_trip; - capi->to_boolean = pandas_to_boolean; + capi->to_double = to_double; + capi->floatify = floatify; + capi->new_rd_source = new_rd_source; + capi->del_rd_source = del_rd_source; + capi->buffer_rd_bytes = buffer_rd_bytes; + capi->uint_state_init = uint_state_init; + capi->uint64_conflict = uint64_conflict; + capi->coliter_setup = coliter_setup; + capi->parser_new = parser_new; + capi->parser_init = parser_init; + capi->parser_free = parser_free; + capi->parser_del = parser_del; + capi->parser_add_skiprow = parser_add_skiprow; + capi->parser_set_skipfirstnrows = parser_set_skipfirstnrows; + capi->parser_set_default_options = parser_set_default_options; + capi->parser_consume_rows = parser_consume_rows; + capi->parser_trim_buffers = parser_trim_buffers; + capi->tokenize_all_rows = tokenize_all_rows; + capi->tokenize_nrows = tokenize_nrows; + capi->str_to_int64 = str_to_int64; + capi->str_to_uint64 = str_to_uint64; + capi->xstrtod = xstrtod; + capi->precise_xstrtod = precise_xstrtod; + capi->round_trip = round_trip; + capi->to_boolean = to_boolean; PyObject *capsule = PyCapsule_New(capi, PandasParser_CAPSULE_NAME, pandas_parser_destructor); From 49a2739e9f6bed18f35557170d8602fbafa09a18 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 27 Feb 2023 16:55:41 -0800 Subject: [PATCH 31/36] retain np_datetime_string license --- pandas/_libs/tslibs/src/datetime/pd_datetime.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index b7d105e202ad2..ef6a85a3855f1 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -5,6 +5,14 @@ All rights reserved. Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +Written by Mark Wiebe (mwwiebe@gmail.com) + +Copyright (c) 2011 by Enthought, Inc. +Copyright (c) 2005-2011, NumPy Developers + +All rights reserved. +See NUMPY_LICENSE.txt for the license. */ #ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ From 3981ec29375edefb0ed53e89e5d7ab79f04f0b84 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Feb 2023 08:50:32 -0800 Subject: [PATCH 32/36] retained old file structure where possible --- .../tslibs/src/datetime/date_conversions.h | 39 + .../_libs/tslibs/src/datetime/np_datetime.c | 1058 ++++++++ .../_libs/tslibs/src/datetime/np_datetime.h | 119 + .../tslibs/src/datetime/np_datetime_strings.c | 1150 +++++++++ .../tslibs/src/datetime/np_datetime_strings.h | 111 + .../_libs/tslibs/src/datetime/pd_datetime.c | 2256 ----------------- .../_libs/tslibs/src/datetime/pd_datetime.h | 51 +- setup.py | 14 +- 8 files changed, 2492 insertions(+), 2306 deletions(-) create mode 100644 pandas/_libs/tslibs/src/datetime/date_conversions.h create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.h create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.h diff --git a/pandas/_libs/tslibs/src/datetime/date_conversions.h b/pandas/_libs/tslibs/src/datetime/date_conversions.h new file mode 100644 index 0000000000000..45ba710dd42f2 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/date_conversions.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include + +// Scales value inplace from nanosecond resolution to unit resolution +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit); + +// Converts an int64 object representing a date to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len); + +// TODO(username): this function doesn't do a lot; should augment or +// replace with scaleNanosecToUnit +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base); + +// Converts a Python object representing a Date / Datetime to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len); + +// Convert a Python Date/Datetime to Unix epoch with resolution base +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base); + +char *int64ToIsoDuration(int64_t value, size_t *len); + +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c new file mode 100644 index 0000000000000..69a1bab3618c3 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -0,0 +1,1058 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include +#include +#include +#include "np_datetime.h" + + +const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + +/* + * Adjusts a datetimestruct based on a seconds offset. Assumes + * the current values are valid. + */ +NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts, + int seconds) { + int minutes; + + dts->sec += seconds; + if (dts->sec < 0) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + if (dts->sec < 0) { + --minutes; + dts->sec += 60; + } + add_minutes_to_datetimestruct(dts, minutes); + } else if (dts->sec >= 60) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + add_minutes_to_datetimestruct(dts, minutes); + } +} + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} +/* +* Returns the offset from utc of the timezone as a timedelta. +* The caller is responsible for ensuring that the tzinfo +* attribute exists on the datetime object. +* +* If the passed object is timezone naive, Py_None is returned. +* If extraction of the offset fails, NULL is returned. +* +* NOTE: This function is not vendored from numpy. +*/ +PyObject *extract_utc_offset(PyObject *obj) { + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; + } + return offset; + } + return tmp; +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * The following implementation just asks for attributes, and thus + * supports datetime duck typing. The tzinfo time zone conversion + * requires this style of access as well. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + /* Apply the time zone offset if datetime obj is tz-aware */ + if (offset != NULL) { + if (offset == Py_None) { + Py_DECREF(offset); + return 0; + } + PyObject *tmp_int; + int seconds_offset, minutes_offset; + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + Py_DECREF(offset); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyLong_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; +} + + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +PyArray_DatetimeMetaData +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h new file mode 100644 index 0000000000000..68f72683ab2e4 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -0,0 +1,119 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + +static const npy_datetimestruct _AS_MIN_DTS = { + 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; +static const npy_datetimestruct _FS_MIN_DTS = { + 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; +static const npy_datetimestruct _PS_MIN_DTS = { + 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; +static const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +static const npy_datetimestruct _US_MIN_DTS = { + -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; +static const npy_datetimestruct _MS_MIN_DTS = { + -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +static const npy_datetimestruct _S_MIN_DTS = { + -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; +static const npy_datetimestruct _M_MIN_DTS = { + -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; + +static const npy_datetimestruct _AS_MAX_DTS = { + 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; +static const npy_datetimestruct _FS_MAX_DTS = { + 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; +static const npy_datetimestruct _PS_MAX_DTS = { + 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; +static const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; +static const npy_datetimestruct _US_MAX_DTS = { + 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +static const npy_datetimestruct _MS_MAX_DTS = { + 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; +static const npy_datetimestruct _S_MAX_DTS = { + 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +static const npy_datetimestruct _M_MAX_DTS = { + 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; + +// stuff pandas needs +// ---------------------------------------------------------------------------- + +PyObject *extract_utc_offset(PyObject *obj); + +int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out); + +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts); + +void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, + npy_datetimestruct *result); + +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result); + +extern const int days_per_month_table[2][12]; + +// stuff numpy-derived code needs in header +// ---------------------------------------------------------------------------- + +int is_leapyear(npy_int64 year); + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 +get_datetimestruct_days(const npy_datetimestruct *dts); + + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b); + + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid. + */ +void +add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); + +/* + * This function returns the DateTimeMetaData + * contained within the provided datetime dtype. + */ +PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( + PyArray_Descr *dtype); + + +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c new file mode 100644 index 0000000000000..f1f03e6467eac --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -0,0 +1,1150 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#define PY_SSIZE_T_CLEAN +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include + +#include +#include +#include + +#include "np_datetime.h" +#include "np_datetime_strings.h" + + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ + +typedef enum { + COMPARISON_SUCCESS, + COMPLETED_PARTIAL_MATCH, + COMPARISON_ERROR +} DatetimePartParseResult; +// This function will advance the pointer on format +// and decrement characters_remaining by n on success +// On failure will return COMPARISON_ERROR without incrementing +// If `format_requirement` is PARTIAL_MATCH, and the `format` string has +// been exhausted, then return COMPLETED_PARTIAL_MATCH. +static DatetimePartParseResult compare_format( + const char **format, + int *characters_remaining, + const char *compare_to, + int n, + const FormatRequirement format_requirement +) { + if (format_requirement == INFER_FORMAT) { + return COMPARISON_SUCCESS; + } + if (*characters_remaining < 0) { + return COMPARISON_ERROR; + } + if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { + return COMPLETED_PARTIAL_MATCH; + } + if (*characters_remaining < n) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + if (strncmp(*format, compare_to, n)) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + *format += n; + *characters_remaining -= n; + return COMPARISON_SUCCESS; + } + } + return COMPARISON_SUCCESS; +} + +int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset, + const char* format, int format_len, + FormatRequirement format_requirement) { + if (len < 0 || format_len < 0) + goto parse_error; + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + DatetimePartParseResult comparison; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_Y; + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + bestunit = NPY_FR_M; + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* PARSE THE DAY */ + comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_D; + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, substr, 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_h; + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + bestunit = NPY_FR_m; + if (format_len) { + goto parse_error; + } + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, ".", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else { + bestunit = NPY_FR_s; + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } else { + bestunit = NPY_FR_ns; + } + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } + +parse_timezone: + /* trim any whitespace between time/timezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + if (format_len > 0) { + goto parse_error; + } + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + if (format_len > 0) { + goto parse_error; + } + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if ((sublen != 0) || (format_len != 0)) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (utc) { + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + } + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h new file mode 100644 index 0000000000000..a635192d70809 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -0,0 +1,111 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +/* 'format_requirement' can be one of three values: + * * PARTIAL_MATCH : Only require a partial match with 'format'. + * For example, if the string is '2020-01-01 05:00:00' and + * 'format' is '%Y-%m-%d', then parse '2020-01-01'; + * * EXACT_MATCH : require an exact match with 'format'. If the + * string is '2020-01-01', then the only format which will + * be able to parse it without error is '%Y-%m-%d'; + * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). + */ +typedef enum { + PARTIAL_MATCH, + EXACT_MATCH, + INFER_FORMAT +} FormatRequirement; + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int +parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, + int *out_tzoffset, + const char* format, + int format_len, + FormatRequirement format_requirement); + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base); + +/* + * Converts an pandas_timedeltastruct to an ISO 8601 string. + * + * Mutates outlen to provide size of (non-NULL terminated) string. + * + * Currently has no error handling + */ +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, + size_t *outlen); +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index 52456104999c8..a2a6220ed0924 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -22,2262 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "datetime.h" #include "pd_datetime.h" -static const int days_per_month_table[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* - * Returns 1 if the given year is a leap year, 0 otherwise. - */ -static int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); -} - -/* - * Calculates the days offset from the 1970 epoch. - */ -static npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - const int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } - - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; - - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } - - /* Add the days */ - days += dts->day - 1; - - return days; -} - -/* - * Converts a datetime from a datetimestruct to a datetime based - * on a metadata unit. The date is assumed to be valid. - */ -static npy_datetime -npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString(PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } - } - return ret; -} - -/* - * Function: scaleNanosecToUnit - * ----------------------------- - * - * Scales an integer value representing time in nanoseconds to provided unit. - * - * Mutates the provided value directly. Returns 0 on success, non-zero on - * error. - */ -static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { - switch (unit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - *value /= 1000LL; - break; - case NPY_FR_ms: - *value /= 1000000LL; - break; - case NPY_FR_s: - *value /= 1000000000LL; - break; - default: - return -1; - } - - return 0; -} - -/* - * Compares two npy_datetimestruct objects chronologically - */ -static int cmp_npy_datetimestruct(const npy_datetimestruct *a, - const npy_datetimestruct *b) { - if (a->year > b->year) { - return 1; - } else if (a->year < b->year) { - return -1; - } - - if (a->month > b->month) { - return 1; - } else if (a->month < b->month) { - return -1; - } - - if (a->day > b->day) { - return 1; - } else if (a->day < b->day) { - return -1; - } - - if (a->hour > b->hour) { - return 1; - } else if (a->hour < b->hour) { - return -1; - } - - if (a->min > b->min) { - return 1; - } else if (a->min < b->min) { - return -1; - } - - if (a->sec > b->sec) { - return 1; - } else if (a->sec < b->sec) { - return -1; - } - - if (a->us > b->us) { - return 1; - } else if (a->us < b->us) { - return -1; - } - - if (a->ps > b->ps) { - return 1; - } else if (a->ps < b->ps) { - return -1; - } - - if (a->as > b->as) { - return 1; - } else if (a->as < b->as) { - return -1; - } - - return 0; -} - -/* - * Returns the offset from utc of the timezone as a timedelta. - * The caller is responsible for ensuring that the tzinfo - * attribute exists on the datetime object. - * - * If the passed object is timezone naive, Py_None is returned. - * If extraction of the offset fails, NULL is returned. - * - * NOTE: This function is not vendored from numpy. - */ -static PyObject *extract_utc_offset(PyObject *obj) { - PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); - if (tmp == NULL) { - return NULL; - } - if (tmp != Py_None) { - PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); - if (offset == NULL) { - Py_DECREF(tmp); - return NULL; - } - return offset; - } - return tmp; -} - -/* - * Adjusts a datetimestruct based on a minutes offset. Assumes - * the current values are valid.g - */ -void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { - int isleap; - - /* MINUTES */ - dts->min += minutes; - while (dts->min < 0) { - dts->min += 60; - dts->hour--; - } - while (dts->min >= 60) { - dts->min -= 60; - dts->hour++; - } - - /* HOURS */ - while (dts->hour < 0) { - dts->hour += 24; - dts->day--; - } - while (dts->hour >= 24) { - dts->hour -= 24; - dts->day++; - } - - /* DAYS */ - if (dts->day < 1) { - dts->month--; - if (dts->month < 1) { - dts->year--; - dts->month = 12; - } - isleap = is_leapyear(dts->year); - dts->day += days_per_month_table[isleap][dts->month - 1]; - } else if (dts->day > 28) { - isleap = is_leapyear(dts->year); - if (dts->day > days_per_month_table[isleap][dts->month - 1]) { - dts->day -= days_per_month_table[isleap][dts->month - 1]; - dts->month++; - if (dts->month > 12) { - dts->year++; - dts->month = 1; - } - } - } -} - -/* - * - * Converts a Python datetime.datetime or datetime.date - * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) - * to convert to UTC time. - * - * The following implementation just asks for attributes, and thus - * supports datetime duck typing. The tzinfo time zone conversion - * requires this style of access as well. - * - * Returns -1 on error, 0 on success, and 1 (with no error set) - * if obj doesn't have the needed date or datetime attributes. - */ -static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, - npy_datetimestruct *out) { - // Assumes that obj is a valid datetime object - PyObject *tmp; - PyObject *obj = (PyObject *)dtobj; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); - out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); - out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); - - // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use - // PyDateTime_Check here, and less verbose attribute lookups. - - /* Check for time attributes (if not there, return success as a date) */ - if (!PyObject_HasAttrString(obj, "hour") || - !PyObject_HasAttrString(obj, "minute") || - !PyObject_HasAttrString(obj, "second") || - !PyObject_HasAttrString(obj, "microsecond")) { - return 0; - } - - out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); - out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); - out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); - out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); - - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - /* Apply the time zone offset if datetime obj is tz-aware */ - if (offset != NULL) { - if (offset == Py_None) { - Py_DECREF(offset); - return 0; - } - PyObject *tmp_int; - int seconds_offset, minutes_offset; - /* - * The timedelta should have a function "total_seconds" - * which contains the value we want. - */ - tmp = PyObject_CallMethod(offset, "total_seconds", ""); - Py_DECREF(offset); - if (tmp == NULL) { - return -1; - } - tmp_int = PyNumber_Long(tmp); - if (tmp_int == NULL) { - Py_DECREF(tmp); - return -1; - } - seconds_offset = PyLong_AsLong(tmp_int); - if (seconds_offset == -1 && PyErr_Occurred()) { - Py_DECREF(tmp_int); - Py_DECREF(tmp); - return -1; - } - Py_DECREF(tmp_int); - Py_DECREF(tmp); - - /* Convert to a minutes offset and apply it */ - minutes_offset = seconds_offset / 60; - - add_minutes_to_datetimestruct(out, -minutes_offset); - } - } - - return 0; -} - -/* - * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ - * - * Computes the python `ret, d = divmod(d, unit)`. - * - * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch - * for subsequent calls to this command - it is able to deduce that `*d >= 0`. - */ -static npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; -} - -/* - * Modifies '*days_' to be the day offset within the year, - * and returns the year. - */ -static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - if (days >= 0) { - year = 400 * (days / days_per_400years); - days = days % days_per_400years; - } else { - year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); - days = days % days_per_400years; - if (days < 0) { - days += days_per_400years; - } - } - - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } - } - - *days_ = days; - return year + 2000; -} - -/* - * Fills in the year, month, day in 'dts' based on the days - * offset from 1970. - */ -static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - const int *month_lengths; - int i; - - dts->year = days_to_yearsdays(&days); - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = days + 1; - return; - } else { - days -= month_lengths[i]; - } - } -} - -/* - * Converts a datetime based on the given metadata into a datetimestruct - */ -static void pandas_datetime_to_datetimestruct(npy_datetime dt, - NPY_DATETIMEUNIT base, - npy_datetimestruct *out) { - npy_int64 perday; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - perday = 24LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = dt; - break; - - case NPY_FR_m: - perday = 24LL * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60); - out->min = (int)dt; - break; - - case NPY_FR_s: - perday = 24LL * 60 * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60 * 60); - out->min = (int)extract_unit(&dt, 60); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - perday = 24LL * 60 * 60 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 60); - out->sec = (int)extract_unit(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = - (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = - (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); - } -} - -/* - * Converts a timedelta from a timedeltastruct to a timedelta based - * on a metadata unit. The timedelta is assumed to be valid. - * - * Returns 0 on success, -1 on failure. - */ -static void pandas_timedelta_to_timedeltastruct(npy_timedelta td, - NPY_DATETIMEUNIT base, - pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(pandas_timedeltastruct)); - - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_m: - - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; - - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; - - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy timedelta metadata is corrupted with " - "invalid base unit"); - } - - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; -} - -/* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -static int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { - int len = 0; - - switch (base) { - /* Generic units can only be used to represent NaT */ - /* return 4;*/ - case NPY_FR_as: - len += 3; /* "###" */ - case NPY_FR_fs: - len += 3; /* "###" */ - case NPY_FR_ps: - len += 3; /* "###" */ - case NPY_FR_ns: - len += 3; /* "###" */ - case NPY_FR_us: - len += 3; /* "###" */ - case NPY_FR_ms: - len += 4; /* ".###" */ - case NPY_FR_s: - len += 3; /* ":##" */ - case NPY_FR_m: - len += 3; /* ":##" */ - case NPY_FR_h: - len += 3; /* "T##" */ - case NPY_FR_D: - case NPY_FR_W: - len += 3; /* "-##" */ - case NPY_FR_M: - len += 3; /* "-##" */ - case NPY_FR_Y: - len += 21; /* 64-bit year */ - break; - default: - len += 3; /* handle the now defunct NPY_FR_B */ - break; - } - - if (base >= NPY_FR_h) { - if (local) { - len += 5; /* "+####" or "-####" */ - } else { - len += 1; /* "Z" */ - } - } - - len += 1; /* NULL terminator */ - - return len; -} - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string using timezone Z (UTC). If the string fits in - * the space exactly, it leaves out the NULL terminator and returns success. - * - * The differences from ISO 8601 are the 'NaT' string, and - * the number of year digits is >= 4 instead of strictly 4. - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -static int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, - int outlen, int utc, NPY_DATETIMEUNIT base) { - char *substr = outstr; - int sublen = outlen; - int tmplen; - - /* - * Print weeks with the same precision as days. - * - * TODO: Could print weeks with YYYY-Www format if the week - * epoch is a Monday. - */ - if (base == NPY_FR_W) { - base = NPY_FR_D; - } - -/* YEAR */ -/* - * Can't use PyOS_snprintf, because it always produces a '\0' - * character at the end, and NumPy string types are permitted - * to have data all the way to the end of the buffer. - */ -#ifdef _WIN32 - tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#else - tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#endif // _WIN32 - /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen > sublen) { - goto string_too_short; - } - substr += tmplen; - sublen -= tmplen; - - /* Stop if the unit is years */ - if (base == NPY_FR_Y) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* MONTH */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->month / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->month % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is months */ - if (base == NPY_FR_M) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* DAY */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->day / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->day % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is days */ - if (base == NPY_FR_D) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* HOUR */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'T'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->hour % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is hours */ - if (base == NPY_FR_h) { - goto add_time_zone; - } - - /* MINUTE */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->min / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->min % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is minutes */ - if (base == NPY_FR_m) { - goto add_time_zone; - } - - /* SECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->sec % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is seconds */ - if (base == NPY_FR_s) { - goto add_time_zone; - } - - /* MILLISECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '.'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen < 4) { - goto string_too_short; - } - substr[3] = (char)((dts->us / 1000) % 10 + '0'); - substr += 4; - sublen -= 4; - - /* Stop if the unit is milliseconds */ - if (base == NPY_FR_ms) { - goto add_time_zone; - } - - /* MICROSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->us % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is microseconds */ - if (base == NPY_FR_us) { - goto add_time_zone; - } - - /* NANOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is nanoseconds */ - if (base == NPY_FR_ns) { - goto add_time_zone; - } - - /* PICOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->ps % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is picoseconds */ - if (base == NPY_FR_ps) { - goto add_time_zone; - } - - /* FEMTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->as / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is femtoseconds */ - if (base == NPY_FR_fs) { - goto add_time_zone; - } - - /* ATTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->as % 10 + '0'); - substr += 3; - sublen -= 3; - -add_time_zone: - /* UTC "Zulu" time */ - if (utc) { - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'Z'; - substr += 1; - sublen -= 1; - } - /* Add a NULL terminator, and return */ - if (sublen > 0) { - substr[0] = '\0'; - } - - return 0; - -string_too_short: - PyErr_Format(PyExc_RuntimeError, - "The string provided for NumPy ISO datetime formatting " - "was too short, with length %d", - outlen); - return -1; -} - -static int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr, - size_t *outlen) { - *outlen = 0; - *outlen += snprintf(outstr, 60, // NOLINT - "P%" NPY_INT64_FMT "DT%" NPY_INT32_FMT "H%" NPY_INT32_FMT - "M%" NPY_INT32_FMT, - tds->days, tds->hrs, tds->min, tds->sec); - outstr += *outlen; - - if (tds->ns != 0) { - *outlen += snprintf(outstr, 12, // NOLINT - ".%03" NPY_INT32_FMT "%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT "S", - tds->ms, tds->us, tds->ns); - } else if (tds->us != 0) { - *outlen += snprintf(outstr, 9, // NOLINT - ".%03" NPY_INT32_FMT "%03" NPY_INT32_FMT "S", tds->ms, - tds->us); - } else if (tds->ms != 0) { - *outlen += snprintf(outstr, 6, // NOLINT - ".%03" NPY_INT32_FMT "S", tds->ms); - } else { - *outlen += snprintf(outstr, 2, // NOLINT - "%s", "S"); - } - - return 0; -} - -/* Converts the int64_t representation of a datetime to ISO; mutates len */ -static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { - npy_datetimestruct dts; - int ret_code; - - pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - // datetime64 is always naive - ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); - if (ret_code != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { - scaleNanosecToUnit(&dt, base); - return dt; -} - -/* Convert PyDatetime To ISO C-string. mutates len */ -static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, - size_t *len) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(obj, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - return NULL; - } - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - // Check to see if PyDateTime has a timezone. - // Don't convert to UTC if it doesn't. - int is_tz_aware = 0; - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - if (offset == NULL) { - PyObject_Free(result); - return NULL; - } - is_tz_aware = offset != Py_None; - Py_DECREF(offset); - } - ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); - - if (ret != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - return NULL; - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(dt, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - // TODO(username): is setting errMsg required? - // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - // return NULL; - } - - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); - return NpyDateTimeToEpoch(npy_dt, base); -} - -/* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * + ':' separator between hours, minutes, and seconds is optional. When - * omitted, each component must be 2 digits if it appears. (GH-10041) - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets set to 1 if the parsed time contains timezone, - * to 0 otherwise. - * 'out_tzoffset' gets set to timezone offset by minutes - * if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * - * Returns 0 on success, -1 on failure. - */ - -typedef enum { - COMPARISON_SUCCESS, - COMPLETED_PARTIAL_MATCH, - COMPARISON_ERROR -} DatetimePartParseResult; -// This function will advance the pointer on format -// and decrement characters_remaining by n on success -// On failure will return COMPARISON_ERROR without incrementing -// If `format_requirement` is PARTIAL_MATCH, and the `format` string has -// been exhausted, then return COMPLETED_PARTIAL_MATCH. -static DatetimePartParseResult -compare_format(const char **format, int *characters_remaining, - const char *compare_to, int n, - const FormatRequirement format_requirement) { - if (format_requirement == INFER_FORMAT) { - return COMPARISON_SUCCESS; - } - if (*characters_remaining < 0) { - return COMPARISON_ERROR; - } - if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { - return COMPLETED_PARTIAL_MATCH; - } - if (*characters_remaining < n) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - if (strncmp(*format, compare_to, n)) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - *format += n; - *characters_remaining -= n; - return COMPARISON_SUCCESS; - } - } - return COMPARISON_SUCCESS; -} - -static int parse_iso_8601_datetime(const char *str, int len, int want_exc, - npy_datetimestruct *out, - NPY_DATETIMEUNIT *out_bestunit, - int *out_local, int *out_tzoffset, - const char *format, int format_len, - FormatRequirement format_requirement) { - if (len < 0 || format_len < 0) - goto parse_error; - int year_leap = 0; - int i, numdigits; - const char *substr; - int sublen; - NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - DatetimePartParseResult comparison; - - /* If year-month-day are separated by a valid separator, - * months/days without leading zeroes will be parsed - * (though not iso8601). If the components aren't separated, - * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are - * forbidden here (but parsed as YYMMDD elsewhere). - */ - int has_ymd_sep = 0; - char ymd_sep = '\0'; - char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; - int valid_ymd_sep_len = sizeof(valid_ymd_sep); - - /* hour-minute-second may or may not separated by ':'. If not, then - * each component must be 2 digits. */ - int has_hms_sep = 0; - int hour_was_2_digits = 0; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - substr = str; - sublen = len; - - /* Skip leading whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = - compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* Leading '-' sign for negative year */ - if (*substr == '-') { - ++substr; - --sublen; - } - - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE YEAR (4 digits) */ - comparison = - compare_format(&format, &format_len, "%Y", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - - out->year = 0; - if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && - isdigit(substr[2]) && isdigit(substr[3])) { - out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + - 10 * (substr[2] - '0') + (substr[3] - '0'); - - substr += 4; - sublen -= 4; - } - - /* Negate the year if necessary */ - if (str[0] == '-') { - out->year = -out->year; - } - /* Check whether it's a leap-year */ - year_leap = is_leapyear(out->year); - - /* Next character must be a separator, start of month, or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_Y; - goto finish; - } - - if (!isdigit(*substr)) { - for (i = 0; i < valid_ymd_sep_len; ++i) { - if (*substr == valid_ymd_sep[i]) { - break; - } - } - if (i == valid_ymd_sep_len) { - goto parse_error; - } - has_ymd_sep = 1; - ymd_sep = valid_ymd_sep[i]; - ++substr; - --sublen; - - comparison = - compare_format(&format, &format_len, &ymd_sep, 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Cannot have trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } - - /* PARSE THE MONTH */ - comparison = - compare_format(&format, &format_len, "%m", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->month = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->month = 10 * out->month + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->month < 1 || out->month > 12) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be the separator, start of day, or end of string */ - if (sublen == 0) { - bestunit = NPY_FR_M; - /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ - if (!has_ymd_sep) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - if (out_local != NULL) { - *out_local = 0; - } - goto finish; - } - - if (has_ymd_sep) { - /* Must have separator, but cannot be trailing */ - if (*substr != ymd_sep || sublen == 1) { - goto parse_error; - } - ++substr; - --sublen; - comparison = - compare_format(&format, &format_len, &ymd_sep, 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* PARSE THE DAY */ - comparison = - compare_format(&format, &format_len, "%d", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->day = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->day = 10 * out->day + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->day < 1 || - out->day > days_per_month_table[year_leap][out->month - 1]) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be a 'T', ' ', or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_D; - goto finish; - } - - if ((*substr != 'T' && *substr != ' ') || sublen == 1) { - goto parse_error; - } - comparison = - compare_format(&format, &format_len, substr, 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - - /* PARSE THE HOURS */ - comparison = - compare_format(&format, &format_len, "%H", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->hour = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional */ - if (isdigit(*substr)) { - hour_was_2_digits = 1; - out->hour = 10 * out->hour + (*substr - '0'); - ++substr; - --sublen; - if (out->hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", str); - } - goto error; - } - } - - /* Next character must be a ':' or the end of the string */ - if (sublen == 0) { - if (!hour_was_2_digits) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_h; - goto finish; - } - - if (*substr == ':') { - has_hms_sep = 1; - ++substr; - --sublen; - /* Cannot have a trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - comparison = - compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else if (!isdigit(*substr)) { - if (!hour_was_2_digits) { - goto parse_error; - } - goto parse_timezone; - } - - /* PARSE THE MINUTES */ - comparison = - compare_format(&format, &format_len, "%M", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->min = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->min = 10 * out->min + (*substr - '0'); - ++substr; - --sublen; - if (out->min >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - if (sublen == 0) { - bestunit = NPY_FR_m; - if (format_len) { - goto parse_error; - } - goto finish; - } - - /* If we make it through this condition block, then the next - * character is a digit. */ - if (has_hms_sep && *substr == ':') { - comparison = - compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - /* Cannot have a trailing ':' */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } else if (!has_hms_sep && isdigit(*substr)) { - } else { - goto parse_timezone; - } - - /* PARSE THE SECONDS */ - comparison = - compare_format(&format, &format_len, "%S", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->sec = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->sec = 10 * out->sec + (*substr - '0'); - ++substr; - --sublen; - if (out->sec >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - /* Next character may be a '.' indicating fractional seconds */ - if (sublen > 0 && *substr == '.') { - ++substr; - --sublen; - comparison = - compare_format(&format, &format_len, ".", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else { - bestunit = NPY_FR_s; - goto parse_timezone; - } - - /* PARSE THE MICROSECONDS (0 to 6 digits) */ - comparison = - compare_format(&format, &format_len, "%f", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->us *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->us += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_us; - } else { - bestunit = NPY_FR_ms; - } - goto parse_timezone; - } - - /* PARSE THE PICOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->ps *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->ps += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_ps; - } else { - bestunit = NPY_FR_ns; - } - goto parse_timezone; - } - - /* PARSE THE ATTOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->as *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->as += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (numdigits > 3) { - bestunit = NPY_FR_as; - } else { - bestunit = NPY_FR_fs; - } - -parse_timezone: - /* trim any whitespace between time/timezone */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = - compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if (sublen == 0) { - // Unlike NumPy, treating no time zone as naive - if (format_len > 0) { - goto parse_error; - } - goto finish; - } - - /* UTC specifier */ - if (*substr == 'Z') { - comparison = - compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* "Z" should be equivalent to tz offset "+00:00" */ - if (out_local != NULL) { - *out_local = 1; - } - - if (out_tzoffset != NULL) { - *out_tzoffset = 0; - } - - if (sublen == 1) { - if (format_len > 0) { - goto parse_error; - } - goto finish; - } else { - ++substr; - --sublen; - } - } else if (*substr == '-' || *substr == '+') { - comparison = - compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Time zone offset */ - int offset_neg = 0, offset_hour = 0, offset_minute = 0; - - /* - * Since "local" means local with respect to the current - * machine, we say this is non-local. - */ - - if (*substr == '-') { - offset_neg = 1; - } - ++substr; - --sublen; - - /* The hours offset */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_hour = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - - /* The minutes offset is optional */ - if (sublen > 0) { - /* Optional ':' */ - if (*substr == ':') { - ++substr; - --sublen; - } - - /* The minutes offset (at the end of the string) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_minute >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_minute = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - } - - /* Apply the time zone offset */ - if (offset_neg) { - offset_hour = -offset_hour; - offset_minute = -offset_minute; - } - if (out_local != NULL) { - *out_local = 1; - // Unlike NumPy, do not change internal value to local time - *out_tzoffset = 60 * offset_hour + offset_minute; - } - } - - /* Skip trailing whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = - compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if ((sublen != 0) || (format_len != 0)) { - goto parse_error; - } - -finish: - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - return 0; - -parse_error: - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", str, - (int)(substr - str)); - } - return -1; - -error: - return -1; -} - -/* Converts the int64_t representation of a duration to ISO; mutates len */ -static char *int64ToIsoDuration(int64_t value, size_t *len) { - pandas_timedeltastruct tds; - int ret_code; - - pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); - - // Max theoretical length of ISO Duration with 64 bit day - // as the largest unit is 70 characters + 1 for a null terminator - char *result = PyObject_Malloc(71); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - - ret_code = make_iso_8601_timedelta(&tds, result, len); - if (ret_code == -1) { - PyErr_SetString(PyExc_ValueError, - "Could not convert timedelta value to string"); - PyObject_Free(result); - return NULL; - } - - return result; -} - -/* - * This function returns a pointer to the DateTimeMetaData - * contained within the provided datetime dtype. - * - * Copied near-verbatim from numpy/core/src/multiarray/datetime.c - */ -static PyArray_DatetimeMetaData -get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); -} static void pandas_datetime_destructor(PyObject *op) { void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h index ef6a85a3855f1..e80e9bbeb9e6c 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -23,59 +23,14 @@ See NUMPY_LICENSE.txt for the license. #endif // NPY_NO_DEPRECATED_API #include +#include "np_datetime.h" +#include "np_datetime_strings.h" +#include "date_conversions.h" #ifdef __cplusplus extern "C" { #endif -typedef struct { - npy_int64 days; - npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; -} pandas_timedeltastruct; - -const npy_datetimestruct _AS_MIN_DTS = {1969, 12, 31, 23, 59, - 50, 776627, 963145, 224193}; -const npy_datetimestruct _FS_MIN_DTS = {1969, 12, 31, 21, 26, - 16, 627963, 145224, 193000}; -const npy_datetimestruct _PS_MIN_DTS = {1969, 9, 16, 5, 57, - 7, 963145, 224193, 0}; -const npy_datetimestruct _NS_MIN_DTS = {1677, 9, 21, 0, 12, - 43, 145224, 193000, 0}; -const npy_datetimestruct _US_MIN_DTS = {-290308, 12, 21, 19, 59, - 05, 224193, 0, 0}; -const npy_datetimestruct _MS_MIN_DTS = {-292275055, 5, 16, 16, 47, - 4, 193000, 0, 0}; -const npy_datetimestruct _S_MIN_DTS = { - -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; -const npy_datetimestruct _M_MIN_DTS = { - -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; - -const npy_datetimestruct _AS_MAX_DTS = {1970, 1, 1, 0, 0, - 9, 223372, 36854, 775807}; -const npy_datetimestruct _FS_MAX_DTS = {1970, 1, 1, 2, 33, - 43, 372036, 854775, 807000}; -const npy_datetimestruct _PS_MAX_DTS = {1970, 4, 17, 18, 2, - 52, 36854, 775807, 0}; -const npy_datetimestruct _NS_MAX_DTS = {2262, 4, 11, 23, 47, - 16, 854775, 807000, 0}; -const npy_datetimestruct _US_MAX_DTS = {294247, 1, 10, 4, 0, 54, 775807, 0, 0}; -const npy_datetimestruct _MS_MAX_DTS = {292278994, 8, 17, 7, 12, - 55, 807000, 0, 0}; -const npy_datetimestruct _S_MAX_DTS = {292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; -const npy_datetimestruct _M_MAX_DTS = { - 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; - -/* 'format_requirement' can be one of three values: - * * PARTIAL_MATCH : Only require a partial match with 'format'. - * For example, if the string is '2020-01-01 05:00:00' and - * 'format' is '%Y-%m-%d', then parse '2020-01-01'; - * * EXACT_MATCH : require an exact match with 'format'. If the - * string is '2020-01-01', then the only format which will - * be able to parse it without error is '%Y-%m-%d'; - * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). - */ -typedef enum { PARTIAL_MATCH, EXACT_MATCH, INFER_FORMAT } FormatRequirement; - typedef struct { npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, const npy_datetimestruct *); diff --git a/setup.py b/setup.py index b1bb29ae18969..1362bce0b3e18 100755 --- a/setup.py +++ b/setup.py @@ -116,17 +116,20 @@ def initialize_options(self): base = pjoin("pandas", "_libs", "src") tsbase = pjoin("pandas", "_libs", "tslibs", "src") + dt = pjoin(tsbase, "datetime") util = pjoin("pandas", "util") parser = pjoin(base, "parser") ujson_python = pjoin(base, "ujson", "python") ujson_lib = pjoin(base, "ujson", "lib") self._clean_exclude = [ + pjoin(dt, "np_datetime.c"), + pjoin(dt, "np_datetime_strings.c"), + pjoin(dt, "date_conversions.c"), pjoin(parser, "tokenizer.c"), pjoin(parser, "io.c"), pjoin(ujson_python, "ujson.c"), pjoin(ujson_python, "objToJSON.c"), pjoin(ujson_python, "JSONtoObj.c"), - pjoin(ujson_python, "date_conversions.c"), pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), pjoin(util, "move.c"), @@ -644,7 +647,14 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): pd_dt_ext = Extension( "pandas._libs.pandas_datetime", depends=["pandas/_libs/tslibs/datetime/pd_datetime.h"], - sources=(["pandas/_libs/tslibs/src/datetime/pd_datetime.c"]), + sources=( + [ + "pandas/_libs/tslibs/src/datetime/np_datetime.c", + "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", + "pandas/_libs/tslibs/src/datetime/date_conversions.c", + "pandas/_libs/tslibs/src/datetime/pd_datetime.c", + ] + ), include_dirs=tseries_includes + [ numpy.get_include(), From fb751008bf98f51160d1a731884920d04569945f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 28 Feb 2023 08:56:12 -0800 Subject: [PATCH 33/36] Commited C file --- .../tslibs/src/datetime/date_conversions.c | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 pandas/_libs/tslibs/src/datetime/date_conversions.c diff --git a/pandas/_libs/tslibs/src/datetime/date_conversions.c b/pandas/_libs/tslibs/src/datetime/date_conversions.c new file mode 100644 index 0000000000000..e2d583470fa51 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/date_conversions.c @@ -0,0 +1,163 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +// Conversion routines that are useful for serialization, +// but which don't interact with JSON objects directly + +#include "date_conversions.h" +#include "np_datetime.h" +#include "np_datetime_strings.h" + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + // datetime64 is always naive + ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + // Check to see if PyDateTime has a timezone. + // Don't convert to UTC if it doesn't. + int is_tz_aware = 0; + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + if (offset == NULL) { + PyObject_Free(result); + return NULL; + } + is_tz_aware = offset != Py_None; + Py_DECREF(offset); + } + ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} From f51e7f43bdd4de5c2045e289e792af10a305b5ce Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 2 Mar 2023 15:23:34 -0800 Subject: [PATCH 34/36] Removed erroneous comments --- pandas/_libs/tslibs/src/datetime/pd_datetime.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c index a2a6220ed0924..73f63706f2a88 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -70,13 +70,11 @@ static int pandas_datetime_exec(PyObject *module) { PyErr_SetString(PyExc_ImportError, "pd_datetime.c could not import module pandas"); Py_DECREF(capsule); - // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return -1; } if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { Py_DECREF(capsule); - // cpython doesn't PyMem_Free(capi) here - mistake or intentional? return -1; } From f4dac4f85b08a57d8ae7749d59ec75146c03568a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 6 Mar 2023 12:11:04 -0800 Subject: [PATCH 35/36] graft pd_parser.c --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index d2b1b8cb887bc..34a3b1e5613f3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -58,3 +58,4 @@ prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above graft pandas/_libs/src graft pandas/_libs/tslibs/src +graft pandas/_libs/pd_parser.c From 12179c77c4d1553673ab2d0a0f46569802d6b1c4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 6 Mar 2023 12:39:53 -0800 Subject: [PATCH 36/36] graft -> include --- MANIFEST.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 34a3b1e5613f3..361cd8ff9ec22 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -58,4 +58,5 @@ prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above graft pandas/_libs/src graft pandas/_libs/tslibs/src -graft pandas/_libs/pd_parser.c +include pandas/_libs/pd_parser.h +include pandas/_libs/pd_parser.c