From 64b74ae734a5c4d9360d501ba740ff4211145961 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 18:13:29 +0000 Subject: [PATCH] feat: add `isocalendar()` for dt accessor" --- bigframes/core/compile/scalar_op_compiler.py | 16 ++++++++++ bigframes/operations/__init__.py | 10 ++++-- bigframes/operations/date_ops.py | 14 ++++++++ bigframes/operations/datetimes.py | 13 ++++++-- .../system/small/operations/test_datetimes.py | 15 +++++++++ .../pandas/core/indexes/accessor.py | 32 +++++++++++++++++++ 6 files changed, 96 insertions(+), 4 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index e2dfa38ce1..78e373121e 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -667,6 +667,22 @@ def date_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).date() +@scalar_op_compiler.register_unary_op(ops.iso_day_op) +def iso_day_op_impl(x: ibis_types.Value): + # Plus 1 because iso day of week uses 1-based indexing + return dayofweek_op_impl(x) + 1 + + +@scalar_op_compiler.register_unary_op(ops.iso_week_op) +def iso_week_op_impl(x: ibis_types.Value): + return typing.cast(ibis_types.TimestampValue, x).week_of_year() + + +@scalar_op_compiler.register_unary_op(ops.iso_year_op) +def iso_year_op_impl(x: ibis_types.Value): + return typing.cast(ibis_types.TimestampValue, x).iso_year() + + @scalar_op_compiler.register_unary_op(ops.dayofweek_op) def dayofweek_op_impl(x: ibis_types.Value): return ( diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 0f9b64b760..e5da674a8c 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -43,6 +43,9 @@ day_op, dayofweek_op, dayofyear_op, + iso_day_op, + iso_week_op, + iso_year_op, month_op, quarter_op, year_op, @@ -260,11 +263,14 @@ # Date ops "date_diff_op", "day_op", - "month_op", - "year_op", "dayofweek_op", "dayofyear_op", + "iso_day_op", + "iso_week_op", + "iso_year_op", + "month_op", "quarter_op", + "year_op", # Time ops "hour_op", "minute_op", diff --git a/bigframes/operations/date_ops.py b/bigframes/operations/date_ops.py index 9bcdddb8df..0b91c86b11 100644 --- a/bigframes/operations/date_ops.py +++ b/bigframes/operations/date_ops.py @@ -34,6 +34,20 @@ type_signature=op_typing.DATELIKE_ACCESSOR, ) +iso_day_op = base_ops.create_unary_op( + name="iso_day", type_signature=op_typing.DATELIKE_ACCESSOR +) + +iso_week_op = base_ops.create_unary_op( + name="iso_weeek", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + +iso_year_op = base_ops.create_unary_op( + name="iso_year", + type_signature=op_typing.DATELIKE_ACCESSOR, +) + dayofweek_op = base_ops.create_unary_op( name="dayofweek", type_signature=op_typing.DATELIKE_ACCESSOR, diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index e2439f6393..56320e7cc6 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -21,11 +21,11 @@ import bigframes_vendored.pandas.core.indexes.accessor as vendordt import pandas -from bigframes import dtypes +from bigframes import dataframe, dtypes, series from bigframes.core import log_adapter +from bigframes.core.reshape import concat import bigframes.operations as ops import bigframes.operations.base -import bigframes.series as series _ONE_DAY = pandas.Timedelta("1d") _ONE_SECOND = pandas.Timedelta("1s") @@ -69,6 +69,15 @@ def year(self) -> series.Series: def month(self) -> series.Series: return self._apply_unary_op(ops.month_op) + def isocalendar(self) -> dataframe.DataFrame: + years = self._apply_unary_op(ops.iso_year_op) + weeks = self._apply_unary_op(ops.iso_week_op) + days = self._apply_unary_op(ops.iso_day_op) + + result = concat.concat([years, weeks, days], axis=1) + result.columns = pandas.Index(["year", "week", "day"]) + return result + # Time accessors @property def hour(self) -> series.Series: diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 69f68ca5de..bbecf40e0b 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -229,6 +229,21 @@ def test_dt_year(scalars_dfs, col_name): ) +def test_dt_isocalendar(session): + # We don't re-use the exisintg scalars_dfs fixture because iso calendar + # get tricky when a new year starts, but the dataset `scalars_dfs` does not cover + # this case. + pd_s = pd.Series(pd.date_range("2009-12-25", "2010-01-07", freq="d")) + bf_s = session.read_pandas(pd_s) + + actual_result = bf_s.dt.isocalendar().to_pandas() + + expected_result = pd_s.dt.isocalendar() + testing.assert_frame_equal( + actual_result, expected_result, check_dtype=False, check_index_type=False + ) + + @pytest.mark.parametrize( ("col_name",), DATETIME_COL_NAMES, diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index e642d11ca4..469f35f181 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -199,6 +199,38 @@ def month(self): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def isocalendar(self): + """ + Calculate year, week, and day according to the ISO 8601 standard. + + **Examples:** + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... pd.date_range('2009-12-27', '2010-01-04', freq='d').to_series() + ... ) + >>> s.dt.isocalendar() + year week day + 2009-12-27 00:00:00 2009 52 7 + 2009-12-28 00:00:00 2009 53 1 + 2009-12-29 00:00:00 2009 53 2 + 2009-12-30 00:00:00 2009 53 3 + 2009-12-31 00:00:00 2009 53 4 + 2010-01-01 00:00:00 2009 53 5 + 2010-01-02 00:00:00 2009 53 6 + 2010-01-03 00:00:00 2009 53 7 + 2010-01-04 00:00:00 2010 1 1 + + [9 rows x 3 columns] + + + Returns: DataFrame + With columns year, week and day. + + + """ + @property def second(self): """The seconds of the datetime.