From f5034a6b8203e73a0cf1320875a66bb84d5ead11 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 17:14:20 +0000 Subject: [PATCH 1/5] [WIP] added DatetimeIndex. Docs and tests to come next --- bigframes/core/indexes/base.py | 14 ++++++--- bigframes/core/indexes/datetimes.py | 47 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 bigframes/core/indexes/datetimes.py diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index eac1f58eae..9f58e45d0c 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -25,6 +25,7 @@ import numpy as np import pandas +from bigframes import dtypes import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex @@ -90,12 +91,17 @@ def __new__( block = df.DataFrame(pd_df, session=session)._block # TODO: Support more index subtypes - from bigframes.core.indexes.multi import MultiIndex - if len(block._index_columns) <= 1: - klass = cls - else: + if len(block._index_columns) > 1: + from bigframes.core.indexes.multi import MultiIndex + klass = MultiIndex + elif dtypes.is_datetime_like(block.index.dtypes[0]): + from bigframes.core.indexes.datetimes import DatetimeIndex + + klass = DatetimeIndex + else: + klass = cls result = typing.cast(Index, object.__new__(klass)) result._query_job = None diff --git a/bigframes/core/indexes/datetimes.py b/bigframes/core/indexes/datetimes.py new file mode 100644 index 0000000000..a2368ca44b --- /dev/null +++ b/bigframes/core/indexes/datetimes.py @@ -0,0 +1,47 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An index based on a single column with a datetime-like data type.""" + +from __future__ import annotations + +from bigframes.core import expression as ex +from bigframes.core.indexes.base import Index +from bigframes.operations import date_ops + + +class DatetimeIndex(Index): + @property + def year(self) -> Index: + return self._apply_unary_expr(date_ops.year_op.as_expr(ex.free_var("arg"))) + + @property + def month(self) -> Index: + return self._apply_unary_expr(date_ops.month_op.as_expr(ex.free_var("arg"))) + + @property + def day(self) -> Index: + return self._apply_unary_expr(date_ops.day_op.as_expr(ex.free_var("arg"))) + + @property + def dayofweek(self) -> Index: + return self._apply_unary_expr(date_ops.dayofweek_op.as_expr(ex.free_var("arg"))) + + @property + def day_of_week(self) -> Index: + return self.dayofweek + + @property + def weekday(self) -> Index: + return self.dayofweek From 4553b7fab4ef3d1093f2ce5e3a288c56db7750c6 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 22:05:41 +0000 Subject: [PATCH 2/5] add tests and docs --- bigframes/core/indexes/__init__.py | 2 + bigframes/core/indexes/datetimes.py | 11 +- bigframes/pandas/__init__.py | 2 + docs/reference/bigframes.pandas/indexing.rst | 12 ++ tests/system/small/core/indexes/__init__.py | 13 +++ .../small/core/indexes/test_datetimes.py | 46 ++++++++ .../pandas/core/indexes/datetimes.py | 106 ++++++++++++++++++ 7 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 tests/system/small/core/indexes/__init__.py create mode 100644 tests/system/small/core/indexes/test_datetimes.py create mode 100644 third_party/bigframes_vendored/pandas/core/indexes/datetimes.py diff --git a/bigframes/core/indexes/__init__.py b/bigframes/core/indexes/__init__.py index 0a95adcd83..dfe361aa76 100644 --- a/bigframes/core/indexes/__init__.py +++ b/bigframes/core/indexes/__init__.py @@ -13,9 +13,11 @@ # limitations under the License. from bigframes.core.indexes.base import Index +from bigframes.core.indexes.datetimes import DatetimeIndex from bigframes.core.indexes.multi import MultiIndex __all__ = [ "Index", "MultiIndex", + "DatetimeIndex", ] diff --git a/bigframes/core/indexes/datetimes.py b/bigframes/core/indexes/datetimes.py index a2368ca44b..23ad8b03b4 100644 --- a/bigframes/core/indexes/datetimes.py +++ b/bigframes/core/indexes/datetimes.py @@ -16,12 +16,21 @@ from __future__ import annotations +from bigframes_vendored.pandas.core.indexes import ( + datetimes as vendored_pandas_datetime_index, +) + from bigframes.core import expression as ex from bigframes.core.indexes.base import Index from bigframes.operations import date_ops -class DatetimeIndex(Index): +class DatetimeIndex(Index, vendored_pandas_datetime_index.DatetimeIndex): + __doc__ = vendored_pandas_datetime_index.DatetimeIndex.__doc__ + + # Must be above 5000 for pandas to delegate to bigframes for binops + __pandas_priority__ = 12000 + @property def year(self) -> Index: return self._apply_unary_expr(date_ops.year_op.as_expr(ex.free_var("arg"))) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 8e1e03e024..e90f123778 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -271,6 +271,7 @@ def clean_up_by_session_id( DataFrame = bigframes.dataframe.DataFrame Index = bigframes.core.indexes.Index MultiIndex = bigframes.core.indexes.MultiIndex +DatetimeIndex = bigframes.core.indexes.DatetimeIndex Series = bigframes.series.Series __version__ = bigframes.version.__version__ @@ -357,6 +358,7 @@ def reset_session(): "DataFrame", "Index", "MultiIndex", + "DatetimeIndex", "Series", "__version__", # Other public pandas attributes diff --git a/docs/reference/bigframes.pandas/indexing.rst b/docs/reference/bigframes.pandas/indexing.rst index 2cc1acfabf..e25e8652ec 100644 --- a/docs/reference/bigframes.pandas/indexing.rst +++ b/docs/reference/bigframes.pandas/indexing.rst @@ -7,3 +7,15 @@ Index objects :members: :inherited-members: :undoc-members: + + +.. autoclass:: bigframes.core.indexes.multi.MultiIndex + :members: + :inherited-members: + :undoc-members: + + +.. autoclass:: bigframes.core.indexes.datetimes.DatetimeIndex + :members: + :inherited-members: + :undoc-members: \ No newline at end of file diff --git a/tests/system/small/core/indexes/__init__.py b/tests/system/small/core/indexes/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/tests/system/small/core/indexes/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/core/indexes/test_datetimes.py b/tests/system/small/core/indexes/test_datetimes.py new file mode 100644 index 0000000000..40ce310b31 --- /dev/null +++ b/tests/system/small/core/indexes/test_datetimes.py @@ -0,0 +1,46 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pandas +import pandas.testing +import pytest + + +@pytest.fixture(scope="module") +def datetime_indexes(session): + pd_index = pandas.date_range("2024-12-25", periods=10, freq="d") + bf_index = session.read_pandas(pd_index) + + return bf_index, pd_index + + +@pytest.mark.parametrize( + "access", + [ + pytest.param(lambda x: x.year, id="year"), + pytest.param(lambda x: x.month, id="month"), + pytest.param(lambda x: x.day, id="day"), + pytest.param(lambda x: x.dayofweek, id="dayofweek"), + pytest.param(lambda x: x.day_of_week, id="day_of_week"), + pytest.param(lambda x: x.weekday, id="weekday"), + ], +) +def test_datetime_index_properties(datetime_indexes, access): + bf_index, pd_index = datetime_indexes + + actual_result = access(bf_index).to_pandas() + + expected_result = access(pd_index).astype(pandas.Int64Dtype()) + pandas.testing.assert_index_equal(actual_result, expected_result) diff --git a/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py new file mode 100644 index 0000000000..105a376728 --- /dev/null +++ b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py @@ -0,0 +1,106 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/datetimes.py + +from __future__ import annotations + +from bigframes_vendored import constants +from bigframes_vendored.pandas.core.indexes import base + + +class DatetimeIndex(base.Index): + """Immutable sequence used for indexing and alignment with datetime-like values""" + + @property + def year(self) -> base.Index: + """The year of the datetime + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.year + Index([2025], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def month(self) -> base.Index: + """The month as January=1, December=12. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.month + Index([2], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def day(self) -> base.Index: + """The day of the datetime. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.day + Index([15], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def day_of_week(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.day_of_week + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def dayofweek(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.dayofweek + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def weekday(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.weekday + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 4b1bed8a0199b684c5601d89b1f4c5d3083ae20c Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 22:16:35 +0000 Subject: [PATCH 3/5] fix mypy --- bigframes/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 9f58e45d0c..01051a7a54 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -95,7 +95,7 @@ def __new__( if len(block._index_columns) > 1: from bigframes.core.indexes.multi import MultiIndex - klass = MultiIndex + klass: type[Index] = MultiIndex # type hint to make mypy happy elif dtypes.is_datetime_like(block.index.dtypes[0]): from bigframes.core.indexes.datetimes import DatetimeIndex From 54ab4691c318d7092e201b278564ecc2e20285f0 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 22:17:57 +0000 Subject: [PATCH 4/5] fix lint --- bigframes/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 01051a7a54..7a01a5e1ab 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -95,7 +95,7 @@ def __new__( if len(block._index_columns) > 1: from bigframes.core.indexes.multi import MultiIndex - klass: type[Index] = MultiIndex # type hint to make mypy happy + klass: type[Index] = MultiIndex # type hint to make mypy happy elif dtypes.is_datetime_like(block.index.dtypes[0]): from bigframes.core.indexes.datetimes import DatetimeIndex From dee93ca078f05526a13216134d2a7d519574b167 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 9 May 2025 23:32:03 +0000 Subject: [PATCH 5/5] handle empty index scenario --- bigframes/core/indexes/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 7a01a5e1ab..9b4b5e4290 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -96,7 +96,7 @@ def __new__( from bigframes.core.indexes.multi import MultiIndex klass: type[Index] = MultiIndex # type hint to make mypy happy - elif dtypes.is_datetime_like(block.index.dtypes[0]): + elif _should_create_datetime_index(block): from bigframes.core.indexes.datetimes import DatetimeIndex klass = DatetimeIndex @@ -561,3 +561,10 @@ def to_numpy(self, dtype=None, *, allow_large_results=None, **kwargs) -> np.ndar def __len__(self): return self.shape[0] + + +def _should_create_datetime_index(block: blocks.Block) -> bool: + if len(block.index.dtypes) != 1: + return False + + return dtypes.is_datetime_like(block.index.dtypes[0])