diff --git a/bigframes/core/indexes/__init__.py b/bigframes/core/indexes/__init__.py index 0a95adcd83..dfe361aa76 100644 --- a/bigframes/core/indexes/__init__.py +++ b/bigframes/core/indexes/__init__.py @@ -13,9 +13,11 @@ # limitations under the License. from bigframes.core.indexes.base import Index +from bigframes.core.indexes.datetimes import DatetimeIndex from bigframes.core.indexes.multi import MultiIndex __all__ = [ "Index", "MultiIndex", + "DatetimeIndex", ] diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index eac1f58eae..9b4b5e4290 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -25,6 +25,7 @@ import numpy as np import pandas +from bigframes import dtypes import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex @@ -90,12 +91,17 @@ def __new__( block = df.DataFrame(pd_df, session=session)._block # TODO: Support more index subtypes - from bigframes.core.indexes.multi import MultiIndex - if len(block._index_columns) <= 1: - klass = cls + if len(block._index_columns) > 1: + from bigframes.core.indexes.multi import MultiIndex + + klass: type[Index] = MultiIndex # type hint to make mypy happy + elif _should_create_datetime_index(block): + from bigframes.core.indexes.datetimes import DatetimeIndex + + klass = DatetimeIndex else: - klass = MultiIndex + klass = cls result = typing.cast(Index, object.__new__(klass)) result._query_job = None @@ -555,3 +561,10 @@ def to_numpy(self, dtype=None, *, allow_large_results=None, **kwargs) -> np.ndar def __len__(self): return self.shape[0] + + +def _should_create_datetime_index(block: blocks.Block) -> bool: + if len(block.index.dtypes) != 1: + return False + + return dtypes.is_datetime_like(block.index.dtypes[0]) diff --git a/bigframes/core/indexes/datetimes.py b/bigframes/core/indexes/datetimes.py new file mode 100644 index 0000000000..23ad8b03b4 --- /dev/null +++ b/bigframes/core/indexes/datetimes.py @@ -0,0 +1,56 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An index based on a single column with a datetime-like data type.""" + +from __future__ import annotations + +from bigframes_vendored.pandas.core.indexes import ( + datetimes as vendored_pandas_datetime_index, +) + +from bigframes.core import expression as ex +from bigframes.core.indexes.base import Index +from bigframes.operations import date_ops + + +class DatetimeIndex(Index, vendored_pandas_datetime_index.DatetimeIndex): + __doc__ = vendored_pandas_datetime_index.DatetimeIndex.__doc__ + + # Must be above 5000 for pandas to delegate to bigframes for binops + __pandas_priority__ = 12000 + + @property + def year(self) -> Index: + return self._apply_unary_expr(date_ops.year_op.as_expr(ex.free_var("arg"))) + + @property + def month(self) -> Index: + return self._apply_unary_expr(date_ops.month_op.as_expr(ex.free_var("arg"))) + + @property + def day(self) -> Index: + return self._apply_unary_expr(date_ops.day_op.as_expr(ex.free_var("arg"))) + + @property + def dayofweek(self) -> Index: + return self._apply_unary_expr(date_ops.dayofweek_op.as_expr(ex.free_var("arg"))) + + @property + def day_of_week(self) -> Index: + return self.dayofweek + + @property + def weekday(self) -> Index: + return self.dayofweek diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 8e1e03e024..e90f123778 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -271,6 +271,7 @@ def clean_up_by_session_id( DataFrame = bigframes.dataframe.DataFrame Index = bigframes.core.indexes.Index MultiIndex = bigframes.core.indexes.MultiIndex +DatetimeIndex = bigframes.core.indexes.DatetimeIndex Series = bigframes.series.Series __version__ = bigframes.version.__version__ @@ -357,6 +358,7 @@ def reset_session(): "DataFrame", "Index", "MultiIndex", + "DatetimeIndex", "Series", "__version__", # Other public pandas attributes diff --git a/docs/reference/bigframes.pandas/indexing.rst b/docs/reference/bigframes.pandas/indexing.rst index 2cc1acfabf..e25e8652ec 100644 --- a/docs/reference/bigframes.pandas/indexing.rst +++ b/docs/reference/bigframes.pandas/indexing.rst @@ -7,3 +7,15 @@ Index objects :members: :inherited-members: :undoc-members: + + +.. autoclass:: bigframes.core.indexes.multi.MultiIndex + :members: + :inherited-members: + :undoc-members: + + +.. autoclass:: bigframes.core.indexes.datetimes.DatetimeIndex + :members: + :inherited-members: + :undoc-members: \ No newline at end of file diff --git a/tests/system/small/core/indexes/__init__.py b/tests/system/small/core/indexes/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/tests/system/small/core/indexes/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/core/indexes/test_datetimes.py b/tests/system/small/core/indexes/test_datetimes.py new file mode 100644 index 0000000000..40ce310b31 --- /dev/null +++ b/tests/system/small/core/indexes/test_datetimes.py @@ -0,0 +1,46 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pandas +import pandas.testing +import pytest + + +@pytest.fixture(scope="module") +def datetime_indexes(session): + pd_index = pandas.date_range("2024-12-25", periods=10, freq="d") + bf_index = session.read_pandas(pd_index) + + return bf_index, pd_index + + +@pytest.mark.parametrize( + "access", + [ + pytest.param(lambda x: x.year, id="year"), + pytest.param(lambda x: x.month, id="month"), + pytest.param(lambda x: x.day, id="day"), + pytest.param(lambda x: x.dayofweek, id="dayofweek"), + pytest.param(lambda x: x.day_of_week, id="day_of_week"), + pytest.param(lambda x: x.weekday, id="weekday"), + ], +) +def test_datetime_index_properties(datetime_indexes, access): + bf_index, pd_index = datetime_indexes + + actual_result = access(bf_index).to_pandas() + + expected_result = access(pd_index).astype(pandas.Int64Dtype()) + pandas.testing.assert_index_equal(actual_result, expected_result) diff --git a/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py new file mode 100644 index 0000000000..105a376728 --- /dev/null +++ b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py @@ -0,0 +1,106 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/datetimes.py + +from __future__ import annotations + +from bigframes_vendored import constants +from bigframes_vendored.pandas.core.indexes import base + + +class DatetimeIndex(base.Index): + """Immutable sequence used for indexing and alignment with datetime-like values""" + + @property + def year(self) -> base.Index: + """The year of the datetime + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.year + Index([2025], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def month(self) -> base.Index: + """The month as January=1, December=12. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.month + Index([2], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def day(self) -> base.Index: + """The day of the datetime. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.day + Index([15], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def day_of_week(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.day_of_week + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def dayofweek(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.dayofweek + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def weekday(self) -> base.Index: + """The day of the week with Monday=0, Sunday=6. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> idx = bpd.Index([pd.Timestamp("20250215")]) + >>> idx.weekday + Index([5], dtype='Int64') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)