feat: Add DataFrame.round method (#1742)

TrevorBergeron · web-flow · commit 3ea6043be702 · 2025-05-16T16:13:11.000-05:00
* feat: Add DataFrame.round method

* docstring full typename

* restrict round test to pandas 2.x

* add type hint
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
@@ -1826,6 +1826,13 @@ def fillna_op(
 
 @scalar_op_compiler.register_binary_op(ops.round_op)
 def round_op(x: ibis_types.Value, y: ibis_types.Value):
+    if x.type().is_integer():
+        # bq produces float64, but pandas returns int
+        return (
+            typing.cast(ibis_types.NumericValue, x)
+            .round(digits=typing.cast(ibis_types.IntegerValue, y))
+            .cast(ibis_dtypes.int64)
+        )
     return typing.cast(ibis_types.NumericValue, x).round(
         digits=typing.cast(ibis_types.IntegerValue, y)
     )
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -27,6 +27,7 @@
 from typing import (
     Callable,
     Dict,
+    Hashable,
     Iterable,
     List,
     Literal,
@@ -3608,6 +3609,47 @@ def _groupby_series(
     def abs(self) -> DataFrame:
         return self._apply_unary_op(ops.abs_op)
 
+    def round(self, decimals: Union[int, dict[Hashable, int]] = 0) -> DataFrame:
+        is_mapping = utils.is_dict_like(decimals)
+        if not (is_mapping or isinstance(decimals, int)):
+            raise TypeError("'decimals' must be either a dict-like or integer.")
+        block = self._block
+        exprs = []
+        for label, col_id, dtype in zip(
+            block.column_labels, block.value_columns, block.dtypes
+        ):
+            if dtype in set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE) - {
+                bigframes.dtypes.BOOL_DTYPE
+            }:
+                if is_mapping:
+                    if label in decimals:  # type: ignore
+                        exprs.append(
+                            ops.round_op.as_expr(
+                                col_id,
+                                ex.const(
+                                    decimals[label], dtype=bigframes.dtypes.INT_DTYPE  # type: ignore
+                                ),
+                            )
+                        )
+                    else:
+                        exprs.append(ex.deref(col_id))
+                else:
+                    exprs.append(
+                        ops.round_op.as_expr(
+                            col_id,
+                            ex.const(
+                                typing.cast(int, decimals),
+                                dtype=bigframes.dtypes.INT_DTYPE,
+                            ),
+                        )
+                    )
+            else:
+                exprs.append(ex.deref(col_id))
+
+        return DataFrame(
+            block.project_exprs(exprs, labels=block.column_labels, drop=True)
+        )
+
     def isna(self) -> DataFrame:
         return self._apply_unary_op(ops.isnull_op)
 
diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py
@@ -289,7 +289,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 )
 
 round_op = base_ops.create_binary_op(
-    name="round", type_signature=op_typing.BINARY_REAL_NUMERIC
+    name="round", type_signature=op_typing.BINARY_NUMERIC
 )
 
 unsafe_pow_op = base_ops.create_binary_op(
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -1635,6 +1635,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
     )
 
 
+@pytest.mark.parametrize(
+    ("decimals",),
+    [
+        (2,),
+        ({"float64_col": 0, "bool_col": 1, "int64_too": -3},),
+        ({},),
+    ],
+)
+def test_dataframe_round(scalars_dfs, decimals):
+    if pd.__version__.startswith("1."):
+        pytest.skip("Rounding doesn't work as expected in pandas 1.x")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.round(decimals).to_pandas()
+    pd_result = scalars_pandas_df.round(decimals)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
 def test_get_dtypes(scalars_df_default_index):
     dtypes = scalars_df_default_index.dtypes
     dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4788,6 +4788,83 @@ def merge(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def round(self, decimals):
+        """
+        Round a DataFrame to a variable number of decimal places.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)],
+            ...                   columns=['dogs', 'cats'])
+            >>> df
+               dogs  cats
+            0  0.21  0.32
+            1  0.01  0.67
+            2  0.66  0.03
+            3  0.21  0.18
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            By providing an integer each column is rounded to the same number
+            of decimal places
+
+            >>> df.round(1)
+                dogs  cats
+            0   0.2   0.3
+            1   0.0   0.7
+            2   0.7   0.0
+            3   0.2   0.2
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            With a dict, the number of places for specific columns can be
+            specified with the column names as key and the number of decimal
+            places as value
+
+            >>> df.round({'dogs': 1, 'cats': 0})
+                dogs  cats
+            0   0.2   0.0
+            1   0.0   1.0
+            2   0.7   0.0
+            3   0.2   0.0
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+            Using a Series, the number of places for specific columns can be
+            specified with the column names as index and the number of
+            decimal places as value
+
+            >>> decimals = pd.Series([0, 1], index=['cats', 'dogs'])
+            >>> df.round(decimals)
+                dogs  cats
+            0   0.2   0.0
+            1   0.0   1.0
+            2   0.7   0.0
+            3   0.2   0.0
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Args:
+            decimals (int, dict, Series):
+                Number of decimal places to round each column to. If an int is
+                given, round each column to the same number of places.
+                Otherwise dict and Series round to variable numbers of places.
+                Column names should be in the keys if `decimals` is a
+                dict-like, or in the index if `decimals` is a Series. Any
+                columns not included in `decimals` will be left as is. Elements
+                of `decimals` which are not columns of the input will be
+                ignored.
+
+        Returns:
+            bigframes.pandas.DataFrame:
+                A DataFrame with the affected columns rounded to the specified
+                number of decimal places.
+
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def apply(self, func, *, axis=0, args=(), **kwargs):
         """Apply a function along an axis of the DataFrame.
 

Original file line number	Diff line number	Diff line change
`@@ -289,7 +289,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT`
`289`	`289`	`)`
`290`	`290`
`291`	`291`	`round_op = base_ops.create_binary_op(`
`292`		`- name="round", type_signature=op_typing.BINARY_REAL_NUMERIC`
	`292`	`+ name="round", type_signature=op_typing.BINARY_NUMERIC`
`293`	`293`	`)`
`294`	`294`
`295`	`295`	`unsafe_pow_op = base_ops.create_binary_op(`