From ef1a5880901880f1ce1b9a0b510461aed0267996 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 16 Nov 2023 07:38:36 -0500 Subject: [PATCH 1/6] remove pandas/tests/frame UB --- .../src/vendored/numpy/datetime/np_datetime.c | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 01e11e5138a8e..169cb75d0b773 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -70,14 +70,16 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif #endif -#define PD_CHECK_OVERFLOW(FUNC) \ +#define PD_RAISE_FOR_OVERFLOW \ + PyGILState_STATE gstate = PyGILState_Ensure(); \ + PyErr_SetString(PyExc_OverflowError, "Overflow occurred in " __FILE__ ":"); \ + PyGILState_Release(gstate); \ + return -1; + +#define PD_CHECK_OVERFLOW(EXPR) \ do { \ - if ((FUNC) != 0) { \ - PyGILState_STATE gstate = PyGILState_Ensure(); \ - PyErr_SetString(PyExc_OverflowError, \ - "Overflow occurred in npy_datetimestruct_to_datetime"); \ - PyGILState_Release(gstate); \ - return -1; \ + if ((EXPR) != 0) { \ + PD_RAISE_FOR_OVERFLOW \ } \ } while (0) @@ -150,9 +152,10 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { int i, month; npy_int64 year, days = 0; const int *month_lengths; + int did_overflow = 0; - year = dts->year - 1970; - days = year * 365; + did_overflow |= checked_int64_sub(dts->year, 1970, &year); + did_overflow |= checked_int64_mul(year, 365, &days); /* Adjust for leap years */ if (days >= 0) { @@ -160,32 +163,32 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { * 1968 is the closest leap year before 1970. * Exclude the current year, so add 1. */ - year += 1; + did_overflow |= checked_int64_add(year, 1, &year); /* Add one day for each 4 years */ - days += year / 4; + did_overflow |= checked_int64_add(days, year / 4, &days); /* 1900 is the closest previous year divisible by 100 */ - year += 68; + did_overflow |= checked_int64_add(year, 68, &year); /* Subtract one day for each 100 years */ - days -= year / 100; + did_overflow |= checked_int64_sub(days, year / 100, &days); /* 1600 is the closest previous year divisible by 400 */ - year += 300; + did_overflow |= checked_int64_add(year, 300, &year); /* Add one day for each 400 years */ - days += year / 400; + did_overflow |= checked_int64_add(days, year / 400, &days); } else { /* * 1972 is the closest later year after 1970. * Include the current year, so subtract 2. */ - year -= 2; + did_overflow |= checked_int64_sub(year, 2, &year); /* Subtract one day for each 4 years */ - days += year / 4; + did_overflow |= checked_int64_add(days, year / 4, &days); /* 2000 is the closest later year divisible by 100 */ - year -= 28; + did_overflow |= checked_int64_sub(year, 28, &year); /* Add one day for each 100 years */ - days -= year / 100; + did_overflow |= checked_int64_add(days, year / 100, &days); /* 2000 is also the closest later year divisible by 400 */ /* Subtract one day for each 400 years */ - days += year / 400; + did_overflow |= checked_int64_add(days, year / 400, &days); } month_lengths = days_per_month_table[is_leapyear(dts->year)]; @@ -193,12 +196,15 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { /* Add the months */ for (i = 0; i < month; ++i) { - days += month_lengths[i]; + did_overflow |= checked_int64_add(days, month_lengths[i], &days); } /* Add the days */ - days += dts->day - 1; + did_overflow |= checked_int64_add(days, dts->day - 1, &days); + if (did_overflow) { + PD_RAISE_FOR_OVERFLOW; + } return days; } From b2b492a2d58773e15de3af3d02ab2340de054617 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 16 Nov 2023 13:12:42 -0500 Subject: [PATCH 2/6] updated macro with filename / line num --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 169cb75d0b773..919b30c966b6e 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -70,9 +70,13 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif #endif +#define XSTR(a) STR(a) +#define STR(a) #a + #define PD_RAISE_FOR_OVERFLOW \ PyGILState_STATE gstate = PyGILState_Ensure(); \ - PyErr_SetString(PyExc_OverflowError, "Overflow occurred in " __FILE__ ":"); \ + PyErr_SetString(PyExc_OverflowError, \ + "Overflow occurred at " __FILE__ ":" XSTR(__LINE__)); \ PyGILState_Release(gstate); \ return -1; From 13cb7e5f3e56196d0ccffef4e084251586bdce16 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 11:26:00 -0500 Subject: [PATCH 3/6] fix typo --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 919b30c966b6e..fd27dbf377862 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -189,7 +189,7 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { /* 2000 is the closest later year divisible by 100 */ did_overflow |= checked_int64_sub(year, 28, &year); /* Add one day for each 100 years */ - did_overflow |= checked_int64_add(days, year / 100, &days); + did_overflow |= checked_int64_sub(days, year / 100, &days); /* 2000 is also the closest later year divisible by 400 */ /* Subtract one day for each 400 years */ did_overflow |= checked_int64_add(days, year / 400, &days); From d2a1014f9f4087cef6dbd08b0eb974b8140091e6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 12:23:34 -0500 Subject: [PATCH 4/6] stay with PD_CHECK_OVERFLOW --- .../src/vendored/numpy/datetime/np_datetime.c | 54 ++++++++----------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index fd27dbf377862..ac4ce5133f03b 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -70,20 +70,14 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif #endif -#define XSTR(a) STR(a) -#define STR(a) #a - -#define PD_RAISE_FOR_OVERFLOW \ - PyGILState_STATE gstate = PyGILState_Ensure(); \ - PyErr_SetString(PyExc_OverflowError, \ - "Overflow occurred at " __FILE__ ":" XSTR(__LINE__)); \ - PyGILState_Release(gstate); \ - return -1; - -#define PD_CHECK_OVERFLOW(EXPR) \ +#define PD_CHECK_OVERFLOW(FUNC) \ do { \ - if ((EXPR) != 0) { \ - PD_RAISE_FOR_OVERFLOW \ + if ((FUNC) != 0) { \ + PyGILState_STATE gstate = PyGILState_Ensure(); \ + PyErr_SetString(PyExc_OverflowError, \ + "Overflow occurred in npy_datetimestruct_to_datetime"); \ + PyGILState_Release(gstate); \ + return -1; \ } \ } while (0) @@ -156,10 +150,9 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { int i, month; npy_int64 year, days = 0; const int *month_lengths; - int did_overflow = 0; - did_overflow |= checked_int64_sub(dts->year, 1970, &year); - did_overflow |= checked_int64_mul(year, 365, &days); + PD_CHECK_OVERFLOW(checked_int64_sub(dts->year, 1970, &year)); + PD_CHECK_OVERFLOW(checked_int64_mul(year, 365, &days)); /* Adjust for leap years */ if (days >= 0) { @@ -167,32 +160,32 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { * 1968 is the closest leap year before 1970. * Exclude the current year, so add 1. */ - did_overflow |= checked_int64_add(year, 1, &year); + PD_CHECK_OVERFLOW(checked_int64_add(year, 1, &year)); /* Add one day for each 4 years */ - did_overflow |= checked_int64_add(days, year / 4, &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days)); /* 1900 is the closest previous year divisible by 100 */ - did_overflow |= checked_int64_add(year, 68, &year); + PD_CHECK_OVERFLOW(checked_int64_add(year, 68, &year)); /* Subtract one day for each 100 years */ - did_overflow |= checked_int64_sub(days, year / 100, &days); + PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days)); /* 1600 is the closest previous year divisible by 400 */ - did_overflow |= checked_int64_add(year, 300, &year); + PD_CHECK_OVERFLOW(checked_int64_add(year, 300, &year)); /* Add one day for each 400 years */ - did_overflow |= checked_int64_add(days, year / 400, &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days)); } else { /* * 1972 is the closest later year after 1970. * Include the current year, so subtract 2. */ - did_overflow |= checked_int64_sub(year, 2, &year); + PD_CHECK_OVERFLOW(checked_int64_sub(year, 2, &year)); /* Subtract one day for each 4 years */ - did_overflow |= checked_int64_add(days, year / 4, &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days)); /* 2000 is the closest later year divisible by 100 */ - did_overflow |= checked_int64_sub(year, 28, &year); + PD_CHECK_OVERFLOW(checked_int64_sub(year, 28, &year)); /* Add one day for each 100 years */ - did_overflow |= checked_int64_sub(days, year / 100, &days); + PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days)); /* 2000 is also the closest later year divisible by 400 */ /* Subtract one day for each 400 years */ - did_overflow |= checked_int64_add(days, year / 400, &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days)); } month_lengths = days_per_month_table[is_leapyear(dts->year)]; @@ -200,15 +193,12 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { /* Add the months */ for (i = 0; i < month; ++i) { - did_overflow |= checked_int64_add(days, month_lengths[i], &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, month_lengths[i], &days)); } /* Add the days */ - did_overflow |= checked_int64_add(days, dts->day - 1, &days); + PD_CHECK_OVERFLOW(checked_int64_add(days, dts->day - 1, &days)); - if (did_overflow) { - PD_RAISE_FOR_OVERFLOW; - } return days; } From 842b7c15586e719a6a71d01f5a35b562e511bfe2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 14:00:30 -0500 Subject: [PATCH 5/6] updates --- .../src/vendored/numpy/datetime/np_datetime.c | 18 +++++++++++++++--- pandas/tests/frame/test_constructors.py | 7 ++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index ac4ce5133f03b..1a9e55b8539df 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -22,12 +22,12 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API -#include - #include "pandas/vendored/numpy/datetime/np_datetime.h" +#include #include #include #include +#include #if defined(_WIN32) #ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS @@ -70,12 +70,15 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler"); #endif #endif +#define XSTR(a) STR(a) +#define STR(a) #a + #define PD_CHECK_OVERFLOW(FUNC) \ do { \ if ((FUNC) != 0) { \ PyGILState_STATE gstate = PyGILState_Ensure(); \ PyErr_SetString(PyExc_OverflowError, \ - "Overflow occurred in npy_datetimestruct_to_datetime"); \ + "Overflow occurred at " __FILE__ ":" XSTR(__LINE__)); \ PyGILState_Release(gstate); \ return -1; \ } \ @@ -442,6 +445,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, } const int64_t days = get_datetimestruct_days(dts); + if (days == -1) { + PyGILState_STATE gstate = PyGILState_Ensure(); + bool did_error = PyErr_Occurred() ? false : true; + PyGILState_Release(gstate); + if (did_error) { + return -1; + } + } + if (base == NPY_FR_D) { return days; } diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index bf17b61b0e3f3..a0929022d8ac0 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3198,11 +3198,8 @@ def test_from_out_of_bounds_ns_datetime( def test_out_of_s_bounds_datetime64(self, constructor): scalar = np.datetime64(np.iinfo(np.int64).max, "D") - result = constructor(scalar) - item = get1(result) - assert type(item) is np.datetime64 - dtype = tm.get_dtype(result) - assert dtype == object + with pytest.raises(OverflowError, match="Overflow occurred"): + constructor(scalar) @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) def test_from_out_of_bounds_ns_timedelta( From 2ecfea025229f91bcb15ff6240496a33caccfea0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 17 Nov 2023 13:44:55 -0800 Subject: [PATCH 6/6] test fixes --- pandas/_libs/src/vendored/numpy/datetime/np_datetime.c | 2 +- pandas/tests/frame/test_constructors.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c index 15c0a2c4e22c3..31c39d48ad487 100644 --- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -435,7 +435,7 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, const int64_t days = get_datetimestruct_days(dts); if (days == -1) { PyGILState_STATE gstate = PyGILState_Ensure(); - bool did_error = PyErr_Occurred() ? false : true; + bool did_error = PyErr_Occurred() == NULL ? false : true; PyGILState_Release(gstate); if (did_error) { return -1; diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a0929022d8ac0..bf17b61b0e3f3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3198,8 +3198,11 @@ def test_from_out_of_bounds_ns_datetime( def test_out_of_s_bounds_datetime64(self, constructor): scalar = np.datetime64(np.iinfo(np.int64).max, "D") - with pytest.raises(OverflowError, match="Overflow occurred"): - constructor(scalar) + result = constructor(scalar) + item = get1(result) + assert type(item) is np.datetime64 + dtype = tm.get_dtype(result) + assert dtype == object @pytest.mark.parametrize("cls", [timedelta, np.timedelta64]) def test_from_out_of_bounds_ns_timedelta(