Skip to content

feat: Add DataFrame.round method #1742

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,13 @@ def fillna_op(

@scalar_op_compiler.register_binary_op(ops.round_op)
def round_op(x: ibis_types.Value, y: ibis_types.Value):
if x.type().is_integer():
# bq produces float64, but pandas returns int
return (
typing.cast(ibis_types.NumericValue, x)
.round(digits=typing.cast(ibis_types.IntegerValue, y))
.cast(ibis_dtypes.int64)
)
return typing.cast(ibis_types.NumericValue, x).round(
digits=typing.cast(ibis_types.IntegerValue, y)
)
Expand Down
42 changes: 42 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import (
Callable,
Dict,
Hashable,
Iterable,
List,
Literal,
Expand Down Expand Up @@ -3608,6 +3609,47 @@ def _groupby_series(
def abs(self) -> DataFrame:
return self._apply_unary_op(ops.abs_op)

def round(self, decimals: Union[int, dict[Hashable, int]] = 0) -> DataFrame:
is_mapping = utils.is_dict_like(decimals)
if not (is_mapping or isinstance(decimals, int)):
raise TypeError("'decimals' must be either a dict-like or integer.")
block = self._block
exprs = []
for label, col_id, dtype in zip(
block.column_labels, block.value_columns, block.dtypes
):
if dtype in set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE) - {
bigframes.dtypes.BOOL_DTYPE
}:
if is_mapping:
if label in decimals: # type: ignore
exprs.append(
ops.round_op.as_expr(
col_id,
ex.const(
decimals[label], dtype=bigframes.dtypes.INT_DTYPE # type: ignore
),
)
)
else:
exprs.append(ex.deref(col_id))
else:
exprs.append(
ops.round_op.as_expr(
col_id,
ex.const(
typing.cast(int, decimals),
dtype=bigframes.dtypes.INT_DTYPE,
),
)
)
else:
exprs.append(ex.deref(col_id))

return DataFrame(
block.project_exprs(exprs, labels=block.column_labels, drop=True)
)

def isna(self) -> DataFrame:
return self._apply_unary_op(ops.isnull_op)

Expand Down
2 changes: 1 addition & 1 deletion bigframes/operations/numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
)

round_op = base_ops.create_binary_op(
name="round", type_signature=op_typing.BINARY_REAL_NUMERIC
name="round", type_signature=op_typing.BINARY_NUMERIC
)

unsafe_pow_op = base_ops.create_binary_op(
Expand Down
19 changes: 19 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,6 +1635,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
)


@pytest.mark.parametrize(
("decimals",),
[
(2,),
({"float64_col": 0, "bool_col": 1, "int64_too": -3},),
({},),
],
)
def test_dataframe_round(scalars_dfs, decimals):
if pd.__version__.startswith("1."):
pytest.skip("Rounding doesn't work as expected in pandas 1.x")
scalars_df, scalars_pandas_df = scalars_dfs

bf_result = scalars_df.round(decimals).to_pandas()
pd_result = scalars_pandas_df.round(decimals)

assert_pandas_df_equal(bf_result, pd_result)


def test_get_dtypes(scalars_df_default_index):
dtypes = scalars_df_default_index.dtypes
dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
Expand Down
77 changes: 77 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4788,6 +4788,83 @@ def merge(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def round(self, decimals):
"""
Round a DataFrame to a variable number of decimal places.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)],
... columns=['dogs', 'cats'])
>>> df
dogs cats
0 0.21 0.32
1 0.01 0.67
2 0.66 0.03
3 0.21 0.18
<BLANKLINE>
[4 rows x 2 columns]

By providing an integer each column is rounded to the same number
of decimal places

>>> df.round(1)
dogs cats
0 0.2 0.3
1 0.0 0.7
2 0.7 0.0
3 0.2 0.2
<BLANKLINE>
[4 rows x 2 columns]

With a dict, the number of places for specific columns can be
specified with the column names as key and the number of decimal
places as value

>>> df.round({'dogs': 1, 'cats': 0})
dogs cats
0 0.2 0.0
1 0.0 1.0
2 0.7 0.0
3 0.2 0.0
<BLANKLINE>
[4 rows x 2 columns]

Using a Series, the number of places for specific columns can be
specified with the column names as index and the number of
decimal places as value

>>> decimals = pd.Series([0, 1], index=['cats', 'dogs'])
>>> df.round(decimals)
dogs cats
0 0.2 0.0
1 0.0 1.0
2 0.7 0.0
3 0.2 0.0
<BLANKLINE>
[4 rows x 2 columns]

Args:
decimals (int, dict, Series):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, it should be pandas.Series?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be either, really

Number of decimal places to round each column to. If an int is
given, round each column to the same number of places.
Otherwise dict and Series round to variable numbers of places.
Column names should be in the keys if `decimals` is a
dict-like, or in the index if `decimals` is a Series. Any
columns not included in `decimals` will be left as is. Elements
of `decimals` which are not columns of the input will be
ignored.

Returns:
bigframes.pandas.DataFrame:
A DataFrame with the affected columns rounded to the specified
number of decimal places.

"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def apply(self, func, *, axis=0, args=(), **kwargs):
"""Apply a function along an axis of the DataFrame.

Expand Down