fix: Product operation produces float result for all input types (#501)

TrevorBergeron · web-flow · commit 6873b30b691a · 2024-03-26T20:42:16.000Z
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
@@ -190,7 +190,7 @@ def _(
         .else_(magnitude * pow(-1, negative_count_parity))
         .end()
     )
-    return float_result.cast(column.type())  # type: ignore
+    return float_result
 
 
 @compile_unary_agg.register
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
@@ -139,10 +139,7 @@ class ProductOp(UnaryAggregateOp):
     name: ClassVar[str] = "product"
 
     def output_type(self, *input_types: dtypes.ExpressionType):
-        if pd.api.types.is_bool_dtype(input_types[0]):
-            return dtypes.INT_DTYPE
-        else:
-            return input_types[0]
+        return dtypes.FLOAT_DTYPE
 
 
 @dataclasses.dataclass(frozen=True)
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
@@ -228,8 +228,7 @@ def test_dataframe_groupby_multi_sum(
         (lambda x: x.cumsum(numeric_only=True)),
         (lambda x: x.cummax(numeric_only=True)),
         (lambda x: x.cummin(numeric_only=True)),
-        # pandas 2.2 uses floating point for cumulative product even for
-        # integer inputs.
+        # Pre-pandas 2.2 doesn't always proeduce float.
         (lambda x: x.cumprod().astype("Float64")),
         (lambda x: x.shift(periods=2)),
     ],
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -1481,7 +1481,7 @@ def test_groupby_prod(scalars_dfs):
     bf_series = scalars_df[col_name].groupby(scalars_df["int64_col"]).prod()
     pd_series = (
         scalars_pandas_df[col_name].groupby(scalars_pandas_df["int64_col"]).prod()
-    )
+    ).astype(pd.Float64Dtype())
     # TODO(swast): Update groupby to use index based on group by key(s).
     bf_result = bf_series.to_pandas()
     assert_series_equal(
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4416,10 +4416,10 @@ def cumprod(self) -> DataFrame:
             [3 rows x 2 columns]
 
             >>> df.cumprod()
-                A	B
-            0	3	1
-            1	3	2
-            2	6	6
+                 A    B
+            0  3.0  1.0
+            1  3.0  2.0
+            2  6.0  6.0
             <BLANKLINE>
             [3 rows x 2 columns]
 

Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@ def _(`
`190`	`190`	`.else_(magnitude * pow(-1, negative_count_parity))`
`191`	`191`	`.end()`
`192`	`192`	`)`
`193`		`- return float_result.cast(column.type()) # type: ignore`
	`193`	`+ return float_result`
`194`	`194`
`195`	`195`
`196`	`196`	`@compile_unary_agg.register`