diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index c95d1ca45e..7c6503a94f 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -632,6 +632,13 @@ def year_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.TimestampValue, x).year().cast(ibis_dtypes.int64) +@scalar_op_compiler.register_unary_op(ops.normalize_op) +def normalize_op_impl(x: ibis_types.Value): + result_type = x.type() + result = x.truncate("D") + return result.cast(result_type) + + # Parameterized ops @scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True) def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp): diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index b122f1fe7c..71a188aed6 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -206,6 +206,7 @@ def create_ternary_op( second_op = create_unary_op(name="second", type_rule=op_typing.INTEGER) time_op = create_unary_op(name="time", type_rule=op_typing.INTEGER) year_op = create_unary_op(name="year", type_rule=op_typing.INTEGER) +normalize_op = create_unary_op(name="normalize") ## Trigonometry Ops sin_op = create_unary_op(name="sin", type_rule=op_typing.REAL_NUMERIC) cos_op = create_unary_op(name="cos", type_rule=op_typing.REAL_NUMERIC) diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index eb91bc0b20..1b4a2fe0e6 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -94,3 +94,6 @@ def unit(self) -> str: def strftime(self, date_format: str) -> series.Series: return self._apply_unary_op(ops.StrftimeOp(date_format=date_format)) + + def normalize(self) -> series.Series: + return self._apply_unary_op(ops.normalize_op) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 854672585d..d5100e7dc2 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -266,3 +266,19 @@ def test_dt_strftime_time(): bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" + + +@pytest.mark.parametrize( + ("col_name",), + DATETIME_COL_NAMES, +) +@skip_legacy_pandas +def test_dt_normalize(scalars_dfs, col_name): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = scalars_df[col_name].dt.normalize().to_pandas() + pd_result = scalars_pandas_df[col_name].dt.normalize() + + assert_series_equal( + pd_result.astype(scalars_df[col_name].dtype), # normalize preserves type + bf_result, + ) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py index 4f7e33909e..60ac19b818 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py @@ -36,3 +36,34 @@ def strftime(self, date_format: str): bigframes.series.Series of formatted strings. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def normalize(self): + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases when the time does not matter. + The return dtype will match the source series. + + This method is available on Series with datetime values under the + .dt accessor. + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> s = bpd.Series(pd.date_range( + ... start='2014-08-01 10:00', + ... freq='h', + ... periods=3, + ... tz='Asia/Calcutta')) # note timezones will be converted to UTC here + >>> s.dt.normalize() + 0 2014-08-01 00:00:00+00:00 + 1 2014-08-01 00:00:00+00:00 + 2 2014-08-01 00:00:00+00:00 + dtype: timestamp[us, tz=UTC][pyarrow] + + Returns: + bigframes.series.Series of the same dtype as the data. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)