diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7dce3cad9d339..0adf6f722c9ce 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -36,6 +36,7 @@ from pandas._libs.tslibs.np_datetime cimport ( dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, + get_unit_from_dtype, npy_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, @@ -234,7 +235,9 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True): result = result.copy() return result - unit = get_datetime64_unit(arr.flat[0]) + if arr.dtype.kind != "M": + raise TypeError("ensure_datetime64ns arr must have datetime64 dtype") + unit = get_unit_from_dtype(arr.dtype) if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # without raising explicitly here, we end up with a SystemError # built-in function ensure_datetime64ns returned a result with an error diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c2bbc4fe764fe..4ab10bc431d09 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -1,3 +1,4 @@ +cimport numpy as cnp from cpython.datetime cimport ( date, datetime, @@ -79,3 +80,5 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil cdef int _string_to_dts(str val, npy_datetimestruct* dts, int* out_local, int* out_tzoffset, bint want_exc) except? -1 + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype) diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi index db0c277b73bd5..5227de4e72f44 100644 --- a/pandas/_libs/tslibs/np_datetime.pyi +++ b/pandas/_libs/tslibs/np_datetime.pyi @@ -1 +1,6 @@ +import numpy as np + class OutOfBoundsDatetime(ValueError): ... + +# only exposed for testing +def py_get_unit_from_dtype(dtype: np.dtype): ... diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 79a58478d630a..5f4ef84a79586 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -19,6 +19,9 @@ from cpython.object cimport ( PyDateTime_IMPORT +cimport numpy as cnp + +cnp.import_array() from numpy cimport int64_t from pandas._libs.tslibs.util cimport get_c_string_buf_and_size @@ -42,6 +45,8 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, @@ -74,6 +79,22 @@ cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: """ return (obj).obmeta.base + +cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype): + # NB: caller is responsible for ensuring this is *some* datetime64 or + # timedelta64 dtype, otherwise we can segfault + cdef: + cnp.PyArray_Descr* descr = dtype + PyArray_DatetimeMetaData meta + meta = get_datetime_metadata_from_dtype(descr) + return meta.base + + +def py_get_unit_from_dtype(dtype): + # for testing get_unit_from_dtype; adds 896 bytes to the .so file. + return get_unit_from_dtype(dtype) + + # ---------------------------------------------------------------------- # Comparison diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 8f59f53a555d8..12e20df256293 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -768,3 +768,15 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, "invalid base unit"); } } + + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +PyArray_DatetimeMetaData +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 0bbc24ed822c5..8e58be1ca8383 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -75,5 +75,12 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a, void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); +/* + * This function returns the DateTimeMetaData + * contained within the provided datetime dtype. + */ +PyArray_DatetimeMetaData get_datetime_metadata_from_dtype( + PyArray_Descr *dtype); + #endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/tests/tslibs/test_np_datetime.py b/pandas/tests/tslibs/test_np_datetime.py new file mode 100644 index 0000000000000..00a2f90217434 --- /dev/null +++ b/pandas/tests/tslibs/test_np_datetime.py @@ -0,0 +1,37 @@ +import numpy as np + +from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype + + +def test_get_unit_from_dtype(): + # datetime64 + assert py_get_unit_from_dtype(np.dtype("M8[Y]")) == 0 + assert py_get_unit_from_dtype(np.dtype("M8[M]")) == 1 + assert py_get_unit_from_dtype(np.dtype("M8[W]")) == 2 + # B has been deprecated and removed -> no 3 + assert py_get_unit_from_dtype(np.dtype("M8[D]")) == 4 + assert py_get_unit_from_dtype(np.dtype("M8[h]")) == 5 + assert py_get_unit_from_dtype(np.dtype("M8[m]")) == 6 + assert py_get_unit_from_dtype(np.dtype("M8[s]")) == 7 + assert py_get_unit_from_dtype(np.dtype("M8[ms]")) == 8 + assert py_get_unit_from_dtype(np.dtype("M8[us]")) == 9 + assert py_get_unit_from_dtype(np.dtype("M8[ns]")) == 10 + assert py_get_unit_from_dtype(np.dtype("M8[ps]")) == 11 + assert py_get_unit_from_dtype(np.dtype("M8[fs]")) == 12 + assert py_get_unit_from_dtype(np.dtype("M8[as]")) == 13 + + # timedelta64 + assert py_get_unit_from_dtype(np.dtype("m8[Y]")) == 0 + assert py_get_unit_from_dtype(np.dtype("m8[M]")) == 1 + assert py_get_unit_from_dtype(np.dtype("m8[W]")) == 2 + # B has been deprecated and removed -> no 3 + assert py_get_unit_from_dtype(np.dtype("m8[D]")) == 4 + assert py_get_unit_from_dtype(np.dtype("m8[h]")) == 5 + assert py_get_unit_from_dtype(np.dtype("m8[m]")) == 6 + assert py_get_unit_from_dtype(np.dtype("m8[s]")) == 7 + assert py_get_unit_from_dtype(np.dtype("m8[ms]")) == 8 + assert py_get_unit_from_dtype(np.dtype("m8[us]")) == 9 + assert py_get_unit_from_dtype(np.dtype("m8[ns]")) == 10 + assert py_get_unit_from_dtype(np.dtype("m8[ps]")) == 11 + assert py_get_unit_from_dtype(np.dtype("m8[fs]")) == 12 + assert py_get_unit_from_dtype(np.dtype("m8[as]")) == 13