From ad3af962ad6baebfd234924775a103079f70801c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 21 Dec 2022 13:24:38 +0000 Subject: [PATCH 1/2] fix: adds bounds checking because pandas now handles microsecond resolution --- db_dtypes/__init__.py | 14 +++++++++++++- tests/unit/test_date.py | 17 ++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 2b51bcd..09308b8 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -23,9 +23,11 @@ import packaging.version import pandas import pandas.api.extensions +from pandas.errors import OutOfBoundsDatetime import pyarrow import pyarrow.compute + from db_dtypes.version import __version__ from db_dtypes import core @@ -143,6 +145,7 @@ def _datetime( second = parsed.group("seconds") fraction = parsed.group("fraction") nanosecond = int(fraction.ljust(9, "0")[:9]) if fraction else 0 + return pandas.Timestamp( year=1970, month=1, @@ -263,7 +266,16 @@ def _datetime( year = int(match.group("year")) month = int(match.group("month")) day = int(match.group("day")) - return pandas.Timestamp(year=year, month=month, day=day).to_datetime64() + + dateObj = pandas.Timestamp( + year=year, + month=month, + day=day, + ) + if pandas.Timestamp.min < dateObj < pandas.Timestamp.max: + return dateObj.to_datetime64() + else: + raise OutOfBoundsDatetime("Out of bounds", scalar) else: raise TypeError("Invalid value type", scalar) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index bbe74cb..5bd0812 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -18,6 +18,7 @@ import numpy import numpy.testing import pandas +from pandas.errors import OutOfBoundsDatetime import pandas.testing import pytest @@ -143,15 +144,25 @@ def test_date_set_slice_null(): ("2021-2-99", "day is out of range for month"), ("2021-99-1", "month must be in 1[.][.]12"), ("10000-1-1", "year 10000 is out of range"), - # Outside of min/max values pandas.Timestamp. + ], +) +def test_date_parsing_errors(value, error): + with pytest.raises(ValueError, match=error): + pandas.Series([value], dtype="dbdate") + + +@pytest.mark.parametrize( + "value, error", + [ + # Values that are outside of the min/max values allowed by pandas.Timestamp ("0001-01-01", "Out of bounds"), ("9999-12-31", "Out of bounds"), ("1677-09-21", "Out of bounds"), ("2262-04-12", "Out of bounds"), ], ) -def test_date_parsing_errors(value, error): - with pytest.raises(ValueError, match=error): +def test_date_parsing_errors_out_of_bounds(value, error): + with pytest.raises(OutOfBoundsDatetime, match=error): pandas.Series([value], dtype="dbdate") From 27f04c35eca16108fd8f9a7b804393c7917cbc69 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 3 Jan 2023 10:30:51 -0500 Subject: [PATCH 2/2] Update db_dtypes/__init__.py Co-authored-by: Tim Swast --- db_dtypes/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 09308b8..3ecefed 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -274,8 +274,9 @@ def _datetime( ) if pandas.Timestamp.min < dateObj < pandas.Timestamp.max: return dateObj.to_datetime64() - else: - raise OutOfBoundsDatetime("Out of bounds", scalar) + else: # pragma: NO COVER + # TODO(#166): Include these lines in coverage when pandas 2.0 is released. + raise OutOfBoundsDatetime("Out of bounds", scalar) # pragma: NO COVER else: raise TypeError("Invalid value type", scalar)