Skip to content

Commit 6d8f3af

Browse files
feat: Allow DataFrame binary ops to align on either axis and with loc… (#544)
1 parent f8821fe commit 6d8f3af

File tree

4 files changed

+151
-23
lines changed

4 files changed

+151
-23
lines changed

bigframes/core/convert.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from __future__ import annotations
15+
16+
import pandas as pd
17+
18+
import bigframes.core.indexes as index
19+
import bigframes.series as series
20+
21+
22+
def to_bf_series(obj, default_index: index.Index) -> series.Series:
23+
if isinstance(obj, series.Series):
24+
return obj
25+
if isinstance(obj, pd.Series):
26+
return series.Series(obj)
27+
if isinstance(obj, index.Index):
28+
return series.Series(obj, default_index)
29+
if isinstance(obj, pd.Index):
30+
return series.Series(obj, default_index)
31+
if pd.api.types.is_list_like(obj):
32+
return series.Series(obj, default_index)
33+
else:
34+
raise TypeError(f"Cannot interpret {obj} as series.")
35+
36+
37+
def to_pd_series(obj, default_index: pd.Index) -> pd.Series:
38+
if isinstance(obj, series.Series):
39+
return obj.to_pandas()
40+
if isinstance(obj, pd.Series):
41+
return obj
42+
if isinstance(obj, index.Index):
43+
return pd.Series(obj.to_pandas(), default_index)
44+
if isinstance(obj, pd.Index):
45+
return pd.Series(obj, default_index)
46+
if pd.api.types.is_list_like(obj):
47+
return pd.Series(obj, default_index)
48+
else:
49+
raise TypeError(f"Cannot interpret {obj} as series.")

bigframes/dataframe.py

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from bigframes.core import log_adapter
5151
import bigframes.core.block_transforms as block_ops
5252
import bigframes.core.blocks as blocks
53+
import bigframes.core.convert
5354
import bigframes.core.expression as ex
5455
import bigframes.core.groupby as groupby
5556
import bigframes.core.guid
@@ -663,22 +664,20 @@ def _apply_binop(
663664
how: str = "outer",
664665
reverse: bool = False,
665666
):
666-
if isinstance(other, (float, int)):
667+
if isinstance(other, (float, int, bool)):
667668
return self._apply_scalar_binop(other, op, reverse=reverse)
668-
elif isinstance(other, indexes.Index):
669-
return self._apply_series_binop(
670-
other.to_series(index=self.index),
671-
op,
672-
axis=axis,
673-
how=how,
674-
reverse=reverse,
675-
)
676-
elif isinstance(other, bigframes.series.Series):
677-
return self._apply_series_binop(
678-
other, op, axis=axis, how=how, reverse=reverse
679-
)
680669
elif isinstance(other, DataFrame):
681670
return self._apply_dataframe_binop(other, op, how=how, reverse=reverse)
671+
elif isinstance(other, pandas.DataFrame):
672+
return self._apply_dataframe_binop(
673+
DataFrame(other), op, how=how, reverse=reverse
674+
)
675+
elif utils.get_axis_number(axis) == 0:
676+
bf_series = bigframes.core.convert.to_bf_series(other, self.index)
677+
return self._apply_series_binop_axis_0(bf_series, op, how, reverse)
678+
elif utils.get_axis_number(axis) == 1:
679+
pd_series = bigframes.core.convert.to_pd_series(other, self.columns)
680+
return self._apply_series_binop_axis_1(pd_series, op, how, reverse)
682681
raise NotImplementedError(
683682
f"binary operation is not implemented on the second operand of type {type(other).__name__}."
684683
f"{constants.FEEDBACK_LINK}"
@@ -700,22 +699,13 @@ def _apply_scalar_binop(
700699
block = block.drop_columns([column_id])
701700
return DataFrame(block)
702701

703-
def _apply_series_binop(
702+
def _apply_series_binop_axis_0(
704703
self,
705704
other: bigframes.series.Series,
706705
op: ops.BinaryOp,
707-
axis: str | int = "columns",
708706
how: str = "outer",
709707
reverse: bool = False,
710708
) -> DataFrame:
711-
if axis not in ("columns", "index", 0, 1):
712-
raise ValueError(f"Invalid input: axis {axis}.")
713-
714-
if axis in ("columns", 1):
715-
raise NotImplementedError(
716-
f"Row Series operations haven't been supported. {constants.FEEDBACK_LINK}"
717-
)
718-
719709
block, (get_column_left, get_column_right) = self._block.join(
720710
other._block, how=how
721711
)
@@ -738,6 +728,63 @@ def _apply_series_binop(
738728
block = block.with_index_labels(self.index.names)
739729
return DataFrame(block)
740730

731+
def _apply_series_binop_axis_1(
732+
self,
733+
other: pandas.Series,
734+
op: ops.BinaryOp,
735+
how: str = "outer",
736+
reverse: bool = False,
737+
) -> DataFrame:
738+
# Somewhat different alignment than df-df so separate codepath for now.
739+
if self.columns.equals(other.index):
740+
columns, lcol_indexer, rcol_indexer = self.columns, None, None
741+
else:
742+
if not (self.columns.is_unique and other.index.is_unique):
743+
raise ValueError("Cannot align non-unique indices")
744+
columns, lcol_indexer, rcol_indexer = self.columns.join(
745+
other.index, how=how, return_indexers=True
746+
)
747+
748+
binop_result_ids = []
749+
750+
column_indices = zip(
751+
lcol_indexer if (lcol_indexer is not None) else range(len(columns)),
752+
rcol_indexer if (rcol_indexer is not None) else range(len(columns)),
753+
)
754+
755+
block = self._block
756+
for left_index, right_index in column_indices:
757+
if left_index >= 0 and right_index >= 0: # -1 indices indicate missing
758+
self_col_id = self._block.value_columns[left_index]
759+
other_scalar = other.iloc[right_index]
760+
expr = (
761+
op.as_expr(ex.const(other_scalar), self_col_id)
762+
if reverse
763+
else op.as_expr(self_col_id, ex.const(other_scalar))
764+
)
765+
elif left_index >= 0:
766+
self_col_id = self._block.value_columns[left_index]
767+
expr = (
768+
op.as_expr(ex.const(None), self_col_id)
769+
if reverse
770+
else op.as_expr(self_col_id, ex.const(None))
771+
)
772+
elif right_index >= 0:
773+
other_scalar = other.iloc[right_index]
774+
expr = (
775+
op.as_expr(ex.const(other_scalar), ex.const(None))
776+
if reverse
777+
else op.as_expr(ex.const(None), ex.const(other_scalar))
778+
)
779+
else:
780+
# Should not be possible
781+
raise ValueError("No right or left index.")
782+
block, result_col_id = block.project_expr(expr)
783+
binop_result_ids.append(result_col_id)
784+
785+
block = block.select_columns(binop_result_ids)
786+
return DataFrame(block.with_column_labels(columns))
787+
741788
def _apply_dataframe_binop(
742789
self,
743790
other: DataFrame,

bigframes/typing.py

Whitespace-only changes.

tests/system/small/test_dataframe.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import bigframes
2929
import bigframes._config.display_options as display_options
30+
import bigframes.core.indexes as bf_indexes
3031
import bigframes.dataframe as dataframe
3132
import bigframes.series as series
3233
from tests.system.utils import (
@@ -2074,6 +2075,37 @@ def test_series_binop_axis_index(
20742075
assert_pandas_df_equal(bf_result, pd_result)
20752076

20762077

2078+
@skip_legacy_pandas
2079+
@pytest.mark.parametrize(
2080+
("input"),
2081+
[
2082+
((1000, 2000, 3000)),
2083+
(pd.Index([1000, 2000, 3000])),
2084+
(bf_indexes.Index([1000, 2000, 3000])),
2085+
(pd.Series((1000, 2000), index=["int64_too", "float64_col"])),
2086+
(series.Series((1000, 2000), index=["int64_too", "float64_col"])),
2087+
],
2088+
ids=[
2089+
"tuple",
2090+
"pd_index",
2091+
"bf_index",
2092+
"pd_series",
2093+
"bf_series",
2094+
],
2095+
)
2096+
def test_listlike_binop_axis_1(scalars_dfs, input):
2097+
scalars_df, scalars_pandas_df = scalars_dfs
2098+
2099+
df_columns = ["int64_col", "float64_col", "int64_too"]
2100+
2101+
bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas()
2102+
if hasattr(input, "to_pandas"):
2103+
input = input.to_pandas()
2104+
pd_result = scalars_pandas_df[df_columns].add(input, axis=1)
2105+
2106+
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
2107+
2108+
20772109
@pytest.mark.parametrize(
20782110
("left_labels", "right_labels"),
20792111
[

0 commit comments

Comments
 (0)