|
1 | 1 | cimport cython
|
2 | 2 | from cpython.datetime cimport (
|
| 3 | + PyDateTime_CheckExact, |
3 | 4 | PyDateTime_DATE_GET_HOUR,
|
4 | 5 | PyDateTime_DATE_GET_MICROSECOND,
|
5 | 6 | PyDateTime_DATE_GET_MINUTE,
|
@@ -229,7 +230,13 @@ def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit):
|
229 | 230 |
|
230 | 231 |
|
231 | 232 | cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts):
|
232 |
| - dts.year = PyDateTime_GET_YEAR(dt) |
| 233 | + if PyDateTime_CheckExact(dt): |
| 234 | + dts.year = PyDateTime_GET_YEAR(dt) |
| 235 | + else: |
| 236 | + # We use dt.year instead of PyDateTime_GET_YEAR because with Timestamp |
| 237 | + # we override year such that PyDateTime_GET_YEAR is incorrect. |
| 238 | + dts.year = dt.year |
| 239 | + |
233 | 240 | dts.month = PyDateTime_GET_MONTH(dt)
|
234 | 241 | dts.day = PyDateTime_GET_DAY(dt)
|
235 | 242 | dts.hour = PyDateTime_DATE_GET_HOUR(dt)
|
@@ -282,6 +289,7 @@ cpdef ndarray astype_overflowsafe(
|
282 | 289 | ndarray values,
|
283 | 290 | cnp.dtype dtype,
|
284 | 291 | bint copy=True,
|
| 292 | + bint round_ok=True, |
285 | 293 | ):
|
286 | 294 | """
|
287 | 295 | Convert an ndarray with datetime64[X] to datetime64[Y]
|
@@ -314,20 +322,24 @@ cpdef ndarray astype_overflowsafe(
|
314 | 322 | "datetime64/timedelta64 values and dtype must have a unit specified"
|
315 | 323 | )
|
316 | 324 |
|
317 |
| - if (<object>values).dtype.byteorder == ">": |
318 |
| - # GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap |
319 |
| - values = values.astype(values.dtype.newbyteorder("<")) |
320 |
| - |
321 | 325 | if from_unit == to_unit:
|
322 | 326 | # Check this before allocating result for perf, might save some memory
|
323 | 327 | if copy:
|
324 | 328 | return values.copy()
|
325 | 329 | return values
|
326 | 330 |
|
327 | 331 | elif from_unit > to_unit:
|
328 |
| - # e.g. ns -> us, so there is no risk of overflow, so we can use |
329 |
| - # numpy's astype safely. Note there _is_ risk of truncation. |
330 |
| - return values.astype(dtype) |
| 332 | + if round_ok: |
| 333 | + # e.g. ns -> us, so there is no risk of overflow, so we can use |
| 334 | + # numpy's astype safely. Note there _is_ risk of truncation. |
| 335 | + return values.astype(dtype) |
| 336 | + else: |
| 337 | + iresult2 = astype_round_check(values.view("i8"), from_unit, to_unit) |
| 338 | + return iresult2.view(dtype) |
| 339 | + |
| 340 | + if (<object>values).dtype.byteorder == ">": |
| 341 | + # GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap |
| 342 | + values = values.astype(values.dtype.newbyteorder("<")) |
331 | 343 |
|
332 | 344 | cdef:
|
333 | 345 | ndarray i8values = values.view("i8")
|
@@ -356,10 +368,11 @@ cpdef ndarray astype_overflowsafe(
|
356 | 368 | check_dts_bounds(&dts, to_unit)
|
357 | 369 | except OutOfBoundsDatetime as err:
|
358 | 370 | if is_td:
|
359 |
| - tdval = np.timedelta64(value).view(values.dtype) |
| 371 | + from_abbrev = np.datetime_data(values.dtype)[0] |
| 372 | + np_val = np.timedelta64(value, from_abbrev) |
360 | 373 | msg = (
|
361 |
| - "Cannot convert {tdval} to {dtype} without overflow" |
362 |
| - .format(tdval=str(tdval), dtype=str(dtype)) |
| 374 | + "Cannot convert {np_val} to {dtype} without overflow" |
| 375 | + .format(np_val=str(np_val), dtype=str(dtype)) |
363 | 376 | )
|
364 | 377 | raise OutOfBoundsTimedelta(msg) from err
|
365 | 378 | else:
|
@@ -453,6 +466,52 @@ cdef int op_to_op_code(op):
|
453 | 466 | return Py_GT
|
454 | 467 |
|
455 | 468 |
|
| 469 | +cdef ndarray astype_round_check( |
| 470 | + ndarray i8values, |
| 471 | + NPY_DATETIMEUNIT from_unit, |
| 472 | + NPY_DATETIMEUNIT to_unit |
| 473 | +): |
| 474 | + # cases with from_unit > to_unit, e.g. ns->us, raise if the conversion |
| 475 | + # involves truncation, e.g. 1500ns->1us |
| 476 | + cdef: |
| 477 | + Py_ssize_t i, N = i8values.size |
| 478 | + |
| 479 | + # equiv: iresult = np.empty((<object>i8values).shape, dtype="i8") |
| 480 | + ndarray iresult = cnp.PyArray_EMPTY( |
| 481 | + i8values.ndim, i8values.shape, cnp.NPY_INT64, 0 |
| 482 | + ) |
| 483 | + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values) |
| 484 | + |
| 485 | + # Note the arguments to_unit, from unit are swapped vs how they |
| 486 | + # are passed when going to a higher-frequency reso. |
| 487 | + int64_t mult = get_conversion_factor(to_unit, from_unit) |
| 488 | + int64_t value, mod |
| 489 | + |
| 490 | + for i in range(N): |
| 491 | + # Analogous to: item = i8values[i] |
| 492 | + value = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0] |
| 493 | + |
| 494 | + if value == NPY_DATETIME_NAT: |
| 495 | + new_value = NPY_DATETIME_NAT |
| 496 | + else: |
| 497 | + new_value, mod = divmod(value, mult) |
| 498 | + if mod != 0: |
| 499 | + # TODO: avoid runtime import |
| 500 | + from pandas._libs.tslibs.dtypes import npy_unit_to_abbrev |
| 501 | + from_abbrev = npy_unit_to_abbrev(from_unit) |
| 502 | + to_abbrev = npy_unit_to_abbrev(to_unit) |
| 503 | + raise ValueError( |
| 504 | + f"Cannot losslessly cast '{value} {from_abbrev}' to {to_abbrev}" |
| 505 | + ) |
| 506 | + |
| 507 | + # Analogous to: iresult[i] = new_value |
| 508 | + (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value |
| 509 | + |
| 510 | + cnp.PyArray_MultiIter_NEXT(mi) |
| 511 | + |
| 512 | + return iresult |
| 513 | + |
| 514 | + |
456 | 515 | @cython.overflowcheck(True)
|
457 | 516 | cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
|
458 | 517 | """
|
@@ -489,5 +548,59 @@ cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT
|
489 | 548 | return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
|
490 | 549 | elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
|
491 | 550 | return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
|
| 551 | + |
| 552 | + |
| 553 | +cdef int64_t convert_reso( |
| 554 | + int64_t value, |
| 555 | + NPY_DATETIMEUNIT from_reso, |
| 556 | + NPY_DATETIMEUNIT to_reso, |
| 557 | + bint round_ok, |
| 558 | +) except? -1: |
| 559 | + cdef: |
| 560 | + int64_t res_value, mult, div, mod |
| 561 | + |
| 562 | + if from_reso == to_reso: |
| 563 | + return value |
| 564 | + |
| 565 | + elif to_reso < from_reso: |
| 566 | + # e.g. ns -> us, no risk of overflow, but can be lossy rounding |
| 567 | + mult = get_conversion_factor(to_reso, from_reso) |
| 568 | + div, mod = divmod(value, mult) |
| 569 | + if mod > 0 and not round_ok: |
| 570 | + raise ValueError("Cannot losslessly convert units") |
| 571 | + |
| 572 | + # Note that when mod > 0, we follow np.timedelta64 in always |
| 573 | + # rounding down. |
| 574 | + res_value = div |
| 575 | + |
| 576 | + elif ( |
| 577 | + from_reso == NPY_FR_Y |
| 578 | + or from_reso == NPY_FR_M |
| 579 | + or to_reso == NPY_FR_Y |
| 580 | + or to_reso == NPY_FR_M |
| 581 | + ): |
| 582 | + # Converting by multiplying isn't _quite_ right bc the number of |
| 583 | + # seconds in a month/year isn't fixed. |
| 584 | + res_value = _convert_reso_with_dtstruct(value, from_reso, to_reso) |
| 585 | + |
492 | 586 | else:
|
493 |
| - raise ValueError(from_unit, to_unit) |
| 587 | + # e.g. ns -> us, risk of overflow, but no risk of lossy rounding |
| 588 | + mult = get_conversion_factor(from_reso, to_reso) |
| 589 | + with cython.overflowcheck(True): |
| 590 | + # Note: caller is responsible for re-raising as OutOfBoundsTimedelta |
| 591 | + res_value = value * mult |
| 592 | + |
| 593 | + return res_value |
| 594 | + |
| 595 | + |
| 596 | +cdef int64_t _convert_reso_with_dtstruct( |
| 597 | + int64_t value, |
| 598 | + NPY_DATETIMEUNIT from_unit, |
| 599 | + NPY_DATETIMEUNIT to_unit, |
| 600 | +) except? -1: |
| 601 | + cdef: |
| 602 | + npy_datetimestruct dts |
| 603 | + |
| 604 | + pandas_datetime_to_datetimestruct(value, from_unit, &dts) |
| 605 | + check_dts_bounds(&dts, to_unit) |
| 606 | + return npy_datetimestruct_to_datetime(to_unit, &dts) |
0 commit comments