diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index 032bd50390..e2439f6393 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -19,12 +19,18 @@ import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike import bigframes_vendored.pandas.core.indexes.accessor as vendordt +import pandas +from bigframes import dtypes from bigframes.core import log_adapter import bigframes.operations as ops import bigframes.operations.base import bigframes.series as series +_ONE_DAY = pandas.Timedelta("1d") +_ONE_SECOND = pandas.Timedelta("1s") +_ONE_MICRO = pandas.Timedelta("1us") + @log_adapter.class_logger class DatetimeMethods( @@ -80,6 +86,35 @@ def second(self) -> series.Series: def time(self) -> series.Series: return self._apply_unary_op(ops.time_op) + # Timedelta accessors + @property + def days(self) -> series.Series: + self._check_dtype(dtypes.TIMEDELTA_DTYPE) + + return self._apply_binary_op(_ONE_DAY, ops.floordiv_op) + + @property + def seconds(self) -> series.Series: + self._check_dtype(dtypes.TIMEDELTA_DTYPE) + + return self._apply_binary_op(_ONE_DAY, ops.mod_op) // _ONE_SECOND # type: ignore + + @property + def microseconds(self) -> series.Series: + self._check_dtype(dtypes.TIMEDELTA_DTYPE) + + return self._apply_binary_op(_ONE_SECOND, ops.mod_op) // _ONE_MICRO # type: ignore + + def total_seconds(self) -> series.Series: + self._check_dtype(dtypes.TIMEDELTA_DTYPE) + + return self._apply_binary_op(_ONE_SECOND, ops.div_op) + + def _check_dtype(self, target_dtype: dtypes.Dtype): + if self._dtype == target_dtype: + return + raise TypeError(f"Expect dtype: {target_dtype}, but got {self._dtype}") + @property def tz(self) -> Optional[dt.timezone]: # Assumption: pyarrow dtype diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 705439fd96..69f68ca5de 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -30,6 +30,14 @@ ] +@pytest.fixture +def timedelta_series(session): + pd_s = pd.Series(pd.to_timedelta([1.1010101, 2.2020102, 3.3030103], unit="d")) + bf_s = session.read_pandas(pd_s) + + return bf_s, pd_s + + @pytest.mark.parametrize( ("col_name",), DATE_COLUMNS, @@ -489,3 +497,39 @@ def test_timestamp_series_diff_agg(scalars_dfs, column): expected_result = pd_series.diff() assert_series_equal(actual_result, expected_result) + + +@pytest.mark.parametrize( + "access", + [ + pytest.param(lambda x: x.dt.days, id="dt.days"), + pytest.param(lambda x: x.dt.seconds, id="dt.seconds"), + pytest.param(lambda x: x.dt.microseconds, id="dt.microseconds"), + pytest.param(lambda x: x.dt.total_seconds(), id="dt.total_seconds()"), + ], +) +def test_timedelta_dt_accessors(timedelta_series, access): + bf_s, pd_s = timedelta_series + + actual_result = access(bf_s).to_pandas() + + expected_result = access(pd_s) + assert_series_equal( + actual_result, expected_result, check_dtype=False, check_index_type=False + ) + + +@pytest.mark.parametrize( + "access", + [ + pytest.param(lambda x: x.dt.days, id="dt.days"), + pytest.param(lambda x: x.dt.seconds, id="dt.seconds"), + pytest.param(lambda x: x.dt.microseconds, id="dt.microseconds"), + pytest.param(lambda x: x.dt.total_seconds(), id="dt.total_seconds()"), + ], +) +def test_timedelta_dt_accessors_on_wrong_type_raise_exception(scalars_dfs, access): + bf_df, _ = scalars_dfs + + with pytest.raises(TypeError): + access(bf_df["timestamp_col"]) diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index a5004c93d0..e642d11ca4 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -299,6 +299,77 @@ def year(self): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property + def days(self): + """The numebr of days for each element + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) + >>> s + 0 4 days 00:03:02.000001 + dtype: duration[us][pyarrow] + >>> s.dt.days + 0 4 + dtype: Int64 + """ + + @property + def seconds(self): + """Number of seconds (>= 0 and less than 1 day) for each element. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) + >>> s + 0 4 days 00:03:02.000001 + dtype: duration[us][pyarrow] + >>> s.dt.seconds + 0 182 + dtype: Int64 + """ + + @property + def microseconds(self): + """Number of microseconds (>= 0 and less than 1 second) for each element. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) + >>> s + 0 4 days 00:03:02.000001 + dtype: duration[us][pyarrow] + >>> s.dt.microseconds + 0 1 + dtype: Int64 + """ + + def total_seconds(self): + """Return total duration of each element expressed in seconds. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([pd.Timedelta("1d1m1s1us")]) + >>> s + 0 1 days 00:01:01.000001 + dtype: duration[us][pyarrow] + >>> s.dt.total_seconds() + 0 86461.000001 + dtype: Float64 + """ + @property def tz(self): """Return the timezone.