diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index b4a43a3..c2e91a1 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -69,29 +69,33 @@ def _datetime( cls, scalar, match_fn=re.compile( - r"\s*(?P\d+)(?::(?P\d+)(?::(?P\d+(?:[.]\d+)?)?)?)?\s*$" + r"\s*(?P\d+)" + r"(?::(?P\d+)" + r"(?::(?P\d+)" + r"(?:\.(?P\d*))?)?)?\s*$" ).match, ): if isinstance(scalar, datetime.time): return datetime.datetime.combine(cls._epoch, scalar) elif isinstance(scalar, str): # iso string - match = match_fn(scalar) - if not match: + parsed = match_fn(scalar) + if not parsed: raise ValueError(f"Bad time string: {repr(scalar)}") - hour = match.group("hour") - minute = match.group("minute") - second = match.group("second") - second, microsecond = divmod(float(second if second else 0), 1) + hours = parsed.group("hours") + minutes = parsed.group("minutes") + seconds = parsed.group("seconds") + fraction = parsed.group("fraction") + microseconds = int(fraction.ljust(6, "0")[:6]) if fraction else 0 return datetime.datetime( 1970, 1, 1, - int(hour), - int(minute if minute else 0), - int(second), - int(microsecond * 1_000_000), + int(hours), + int(minutes) if minutes else 0, + int(seconds) if seconds else 0, + microseconds, ) else: raise TypeError("Invalid value type", scalar) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py new file mode 100644 index 0000000..71e704a --- /dev/null +++ b/tests/unit/test_date.py @@ -0,0 +1,62 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pandas +import pytest + +# To register the types. +import db_dtypes # noqa + + +@pytest.mark.parametrize( + "value, expected", + [ + # Min/Max values for pandas.Timestamp. + ("1677-09-22", datetime.date(1677, 9, 22)), + ("2262-04-11", datetime.date(2262, 4, 11)), + # Typical "zero" values. + ("1900-01-01", datetime.date(1900, 1, 1)), + ("1970-01-01", datetime.date(1970, 1, 1)), + # Assorted values. + ("1993-10-31", datetime.date(1993, 10, 31)), + ("2012-02-29", datetime.date(2012, 2, 29)), + ("2021-12-17", datetime.date(2021, 12, 17)), + ("2038-01-19", datetime.date(2038, 1, 19)), + ], +) +def test_date_parsing(value, expected): + assert pandas.Series([value], dtype="date")[0] == expected + + +@pytest.mark.parametrize( + "value, error", + [ + ("thursday", "Bad date string: 'thursday'"), + ("1-2-thursday", "Bad date string: '1-2-thursday'"), + ("1-2-3-4", "Bad date string: '1-2-3-4'"), + ("1-2-3.f", "Bad date string: '1-2-3.f'"), + ("1-d-3", "Bad date string: '1-d-3'"), + ("1-3", "Bad date string: '1-3'"), + ("1", "Bad date string: '1'"), + ("", "Bad date string: ''"), + ("2021-2-99", "day is out of range for month"), + ("2021-99-1", "month must be in 1[.][.]12"), + ("10000-1-1", "year 10000 is out of range"), + ], +) +def test_date_parsing_errors(value, error): + with pytest.raises(ValueError, match=error): + pandas.Series([value], dtype="date") diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 118458e..a514c47 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -621,51 +621,3 @@ def test_date_sub(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do assert np.array_equal(dates - do, expect) - - -@pytest.mark.parametrize( - "value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))], -) -def test_short_time_parsing(value, expected): - assert _cls("time")([value])[0] == expected - - -@pytest.mark.parametrize( - "value, error", - [ - ("thursday", "Bad time string: 'thursday'"), - ("1:2:3thursday", "Bad time string: '1:2:3thursday'"), - ("1:2:3:4", "Bad time string: '1:2:3:4'"), - ("1:2:3.f", "Bad time string: '1:2:3.f'"), - ("1:d:3", "Bad time string: '1:d:3'"), - ("1:2.3", "Bad time string: '1:2.3'"), - ("", "Bad time string: ''"), - ("1:2:99", "second must be in 0[.][.]59"), - ("1:99", "minute must be in 0[.][.]59"), - ("99", "hour must be in 0[.][.]23"), - ], -) -def test_bad_time_parsing(value, error): - with pytest.raises(ValueError, match=error): - _cls("time")([value]) - - -@pytest.mark.parametrize( - "value, error", - [ - ("thursday", "Bad date string: 'thursday'"), - ("1-2-thursday", "Bad date string: '1-2-thursday'"), - ("1-2-3-4", "Bad date string: '1-2-3-4'"), - ("1-2-3.f", "Bad date string: '1-2-3.f'"), - ("1-d-3", "Bad date string: '1-d-3'"), - ("1-3", "Bad date string: '1-3'"), - ("1", "Bad date string: '1'"), - ("", "Bad date string: ''"), - ("2021-2-99", "day is out of range for month"), - ("2021-99-1", "month must be in 1[.][.]12"), - ("10000-1-1", "year 10000 is out of range"), - ], -) -def test_bad_date_parsing(value, error): - with pytest.raises(ValueError, match=error): - _cls("date")([value]) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py new file mode 100644 index 0000000..4a6adc8 --- /dev/null +++ b/tests/unit/test_time.py @@ -0,0 +1,84 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pandas +import pytest + +# To register the types. +import db_dtypes # noqa + + +@pytest.mark.parametrize( + "value, expected", + [ + # Midnight + ("0", datetime.time(0)), + ("0:0", datetime.time(0)), + ("0:0:0", datetime.time(0)), + ("0:0:0.", datetime.time(0)), + ("0:0:0.0", datetime.time(0)), + ("0:0:0.000000", datetime.time(0)), + ("00:00:00", datetime.time(0, 0, 0)), + (" 00:00:00 ", datetime.time(0, 0, 0)), + # Short values + ("1", datetime.time(1)), + ("23", datetime.time(23)), + ("1:2", datetime.time(1, 2)), + ("23:59", datetime.time(23, 59)), + ("1:2:3", datetime.time(1, 2, 3)), + ("23:59:59", datetime.time(23, 59, 59)), + # Non-octal values. + ("08:08:08", datetime.time(8, 8, 8)), + ("09:09:09", datetime.time(9, 9, 9)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0:0:59.876543", datetime.time(0, 0, 59, 876543)), + ("01:01:01.010101", datetime.time(1, 1, 1, 10101)), + ("09:09:09.090909", datetime.time(9, 9, 9, 90909)), + ("11:11:11.111111", datetime.time(11, 11, 11, 111111)), + ("19:16:23.987654", datetime.time(19, 16, 23, 987654)), + # Microsecond precision + ("00:00:00.000001", datetime.time(0, 0, 0, 1)), + ("23:59:59.999999", datetime.time(23, 59, 59, 999_999)), + # TODO: Support nanosecond precision values without truncation. + # https://github.com/googleapis/python-db-dtypes-pandas/issues/19 + ("0:0:0.000001001", datetime.time(0, 0, 0, 1)), + ("23:59:59.999999000", datetime.time(23, 59, 59, 999_999)), + ("23:59:59.999999999", datetime.time(23, 59, 59, 999_999)), + ], +) +def test_time_parsing(value, expected): + assert pandas.Series([value], dtype="time")[0] == expected + + +@pytest.mark.parametrize( + "value, error", + [ + ("thursday", "Bad time string: 'thursday'"), + ("1:2:3thursday", "Bad time string: '1:2:3thursday'"), + ("1:2:3:4", "Bad time string: '1:2:3:4'"), + ("1:2:3.f", "Bad time string: '1:2:3.f'"), + ("1:d:3", "Bad time string: '1:d:3'"), + ("1:2.3", "Bad time string: '1:2.3'"), + ("", "Bad time string: ''"), + ("1:2:99", "second must be in 0[.][.]59"), + ("1:99", "minute must be in 0[.][.]59"), + ("99", "hour must be in 0[.][.]23"), + ], +) +def test_time_parsing_errors(value, error): + with pytest.raises(ValueError, match=error): + pandas.Series([value], dtype="time")