diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a1cf72be97..6576276b11 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1826,6 +1826,13 @@ def fillna_op( @scalar_op_compiler.register_binary_op(ops.round_op) def round_op(x: ibis_types.Value, y: ibis_types.Value): + if x.type().is_integer(): + # bq produces float64, but pandas returns int + return ( + typing.cast(ibis_types.NumericValue, x) + .round(digits=typing.cast(ibis_types.IntegerValue, y)) + .cast(ibis_dtypes.int64) + ) return typing.cast(ibis_types.NumericValue, x).round( digits=typing.cast(ibis_types.IntegerValue, y) ) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index a98733b48a..cba635062f 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -27,6 +27,7 @@ from typing import ( Callable, Dict, + Hashable, Iterable, List, Literal, @@ -3608,6 +3609,47 @@ def _groupby_series( def abs(self) -> DataFrame: return self._apply_unary_op(ops.abs_op) + def round(self, decimals: Union[int, dict[Hashable, int]] = 0) -> DataFrame: + is_mapping = utils.is_dict_like(decimals) + if not (is_mapping or isinstance(decimals, int)): + raise TypeError("'decimals' must be either a dict-like or integer.") + block = self._block + exprs = [] + for label, col_id, dtype in zip( + block.column_labels, block.value_columns, block.dtypes + ): + if dtype in set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE) - { + bigframes.dtypes.BOOL_DTYPE + }: + if is_mapping: + if label in decimals: # type: ignore + exprs.append( + ops.round_op.as_expr( + col_id, + ex.const( + decimals[label], dtype=bigframes.dtypes.INT_DTYPE # type: ignore + ), + ) + ) + else: + exprs.append(ex.deref(col_id)) + else: + exprs.append( + ops.round_op.as_expr( + col_id, + ex.const( + typing.cast(int, decimals), + dtype=bigframes.dtypes.INT_DTYPE, + ), + ) + ) + else: + exprs.append(ex.deref(col_id)) + + return DataFrame( + block.project_exprs(exprs, labels=block.column_labels, drop=True) + ) + def isna(self) -> DataFrame: return self._apply_unary_op(ops.isnull_op) diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py index 9d6749a169..b9820cd0ea 100644 --- a/bigframes/operations/numeric_ops.py +++ b/bigframes/operations/numeric_ops.py @@ -289,7 +289,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) round_op = base_ops.create_binary_op( - name="round", type_signature=op_typing.BINARY_REAL_NUMERIC + name="round", type_signature=op_typing.BINARY_NUMERIC ) unsafe_pow_op = base_ops.create_binary_op( diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 00c11d073e..582d164540 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -1635,6 +1635,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how): ) +@pytest.mark.parametrize( + ("decimals",), + [ + (2,), + ({"float64_col": 0, "bool_col": 1, "int64_too": -3},), + ({},), + ], +) +def test_dataframe_round(scalars_dfs, decimals): + if pd.__version__.startswith("1."): + pytest.skip("Rounding doesn't work as expected in pandas 1.x") + scalars_df, scalars_pandas_df = scalars_dfs + + bf_result = scalars_df.round(decimals).to_pandas() + pd_result = scalars_pandas_df.round(decimals) + + assert_pandas_df_equal(bf_result, pd_result) + + def test_get_dtypes(scalars_df_default_index): dtypes = scalars_df_default_index.dtypes dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = { diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 5bbf72b421..9bb25cb5a4 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4788,6 +4788,83 @@ def merge( """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def round(self, decimals): + """ + Round a DataFrame to a variable number of decimal places. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) + >>> df + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + [4 rows x 2 columns] + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + [4 rows x 2 columns] + + With a dict, the number of places for specific columns can be + specified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + [4 rows x 2 columns] + + Using a Series, the number of places for specific columns can be + specified with the column names as index and the number of + decimal places as value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) + >>> df.round(decimals) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + [4 rows x 2 columns] + + Args: + decimals (int, dict, Series): + Number of decimal places to round each column to. If an int is + given, round each column to the same number of places. + Otherwise dict and Series round to variable numbers of places. + Column names should be in the keys if `decimals` is a + dict-like, or in the index if `decimals` is a Series. Any + columns not included in `decimals` will be left as is. Elements + of `decimals` which are not columns of the input will be + ignored. + + Returns: + bigframes.pandas.DataFrame: + A DataFrame with the affected columns rounded to the specified + number of decimal places. + + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def apply(self, func, *, axis=0, args=(), **kwargs): """Apply a function along an axis of the DataFrame.