Skip to content

Commit 3ea6043

Browse files
feat: Add DataFrame.round method (#1742)
* feat: Add DataFrame.round method * docstring full typename * restrict round test to pandas 2.x * add type hint
1 parent 2858b1e commit 3ea6043

File tree

5 files changed

+146
-1
lines changed

5 files changed

+146
-1
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,6 +1826,13 @@ def fillna_op(
18261826

18271827
@scalar_op_compiler.register_binary_op(ops.round_op)
18281828
def round_op(x: ibis_types.Value, y: ibis_types.Value):
1829+
if x.type().is_integer():
1830+
# bq produces float64, but pandas returns int
1831+
return (
1832+
typing.cast(ibis_types.NumericValue, x)
1833+
.round(digits=typing.cast(ibis_types.IntegerValue, y))
1834+
.cast(ibis_dtypes.int64)
1835+
)
18291836
return typing.cast(ibis_types.NumericValue, x).round(
18301837
digits=typing.cast(ibis_types.IntegerValue, y)
18311838
)

bigframes/dataframe.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from typing import (
2828
Callable,
2929
Dict,
30+
Hashable,
3031
Iterable,
3132
List,
3233
Literal,
@@ -3608,6 +3609,47 @@ def _groupby_series(
36083609
def abs(self) -> DataFrame:
36093610
return self._apply_unary_op(ops.abs_op)
36103611

3612+
def round(self, decimals: Union[int, dict[Hashable, int]] = 0) -> DataFrame:
3613+
is_mapping = utils.is_dict_like(decimals)
3614+
if not (is_mapping or isinstance(decimals, int)):
3615+
raise TypeError("'decimals' must be either a dict-like or integer.")
3616+
block = self._block
3617+
exprs = []
3618+
for label, col_id, dtype in zip(
3619+
block.column_labels, block.value_columns, block.dtypes
3620+
):
3621+
if dtype in set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE) - {
3622+
bigframes.dtypes.BOOL_DTYPE
3623+
}:
3624+
if is_mapping:
3625+
if label in decimals: # type: ignore
3626+
exprs.append(
3627+
ops.round_op.as_expr(
3628+
col_id,
3629+
ex.const(
3630+
decimals[label], dtype=bigframes.dtypes.INT_DTYPE # type: ignore
3631+
),
3632+
)
3633+
)
3634+
else:
3635+
exprs.append(ex.deref(col_id))
3636+
else:
3637+
exprs.append(
3638+
ops.round_op.as_expr(
3639+
col_id,
3640+
ex.const(
3641+
typing.cast(int, decimals),
3642+
dtype=bigframes.dtypes.INT_DTYPE,
3643+
),
3644+
)
3645+
)
3646+
else:
3647+
exprs.append(ex.deref(col_id))
3648+
3649+
return DataFrame(
3650+
block.project_exprs(exprs, labels=block.column_labels, drop=True)
3651+
)
3652+
36113653
def isna(self) -> DataFrame:
36123654
return self._apply_unary_op(ops.isnull_op)
36133655

bigframes/operations/numeric_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
289289
)
290290

291291
round_op = base_ops.create_binary_op(
292-
name="round", type_signature=op_typing.BINARY_REAL_NUMERIC
292+
name="round", type_signature=op_typing.BINARY_NUMERIC
293293
)
294294

295295
unsafe_pow_op = base_ops.create_binary_op(

tests/system/small/test_dataframe.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
16351635
)
16361636

16371637

1638+
@pytest.mark.parametrize(
1639+
("decimals",),
1640+
[
1641+
(2,),
1642+
({"float64_col": 0, "bool_col": 1, "int64_too": -3},),
1643+
({},),
1644+
],
1645+
)
1646+
def test_dataframe_round(scalars_dfs, decimals):
1647+
if pd.__version__.startswith("1."):
1648+
pytest.skip("Rounding doesn't work as expected in pandas 1.x")
1649+
scalars_df, scalars_pandas_df = scalars_dfs
1650+
1651+
bf_result = scalars_df.round(decimals).to_pandas()
1652+
pd_result = scalars_pandas_df.round(decimals)
1653+
1654+
assert_pandas_df_equal(bf_result, pd_result)
1655+
1656+
16381657
def test_get_dtypes(scalars_df_default_index):
16391658
dtypes = scalars_df_default_index.dtypes
16401659
dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,83 @@ def merge(
47884788
"""
47894789
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
47904790

4791+
def round(self, decimals):
4792+
"""
4793+
Round a DataFrame to a variable number of decimal places.
4794+
4795+
**Examples:**
4796+
4797+
>>> import bigframes.pandas as bpd
4798+
>>> bpd.options.display.progress_bar = None
4799+
>>> df = bpd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)],
4800+
... columns=['dogs', 'cats'])
4801+
>>> df
4802+
dogs cats
4803+
0 0.21 0.32
4804+
1 0.01 0.67
4805+
2 0.66 0.03
4806+
3 0.21 0.18
4807+
<BLANKLINE>
4808+
[4 rows x 2 columns]
4809+
4810+
By providing an integer each column is rounded to the same number
4811+
of decimal places
4812+
4813+
>>> df.round(1)
4814+
dogs cats
4815+
0 0.2 0.3
4816+
1 0.0 0.7
4817+
2 0.7 0.0
4818+
3 0.2 0.2
4819+
<BLANKLINE>
4820+
[4 rows x 2 columns]
4821+
4822+
With a dict, the number of places for specific columns can be
4823+
specified with the column names as key and the number of decimal
4824+
places as value
4825+
4826+
>>> df.round({'dogs': 1, 'cats': 0})
4827+
dogs cats
4828+
0 0.2 0.0
4829+
1 0.0 1.0
4830+
2 0.7 0.0
4831+
3 0.2 0.0
4832+
<BLANKLINE>
4833+
[4 rows x 2 columns]
4834+
4835+
Using a Series, the number of places for specific columns can be
4836+
specified with the column names as index and the number of
4837+
decimal places as value
4838+
4839+
>>> decimals = pd.Series([0, 1], index=['cats', 'dogs'])
4840+
>>> df.round(decimals)
4841+
dogs cats
4842+
0 0.2 0.0
4843+
1 0.0 1.0
4844+
2 0.7 0.0
4845+
3 0.2 0.0
4846+
<BLANKLINE>
4847+
[4 rows x 2 columns]
4848+
4849+
Args:
4850+
decimals (int, dict, Series):
4851+
Number of decimal places to round each column to. If an int is
4852+
given, round each column to the same number of places.
4853+
Otherwise dict and Series round to variable numbers of places.
4854+
Column names should be in the keys if `decimals` is a
4855+
dict-like, or in the index if `decimals` is a Series. Any
4856+
columns not included in `decimals` will be left as is. Elements
4857+
of `decimals` which are not columns of the input will be
4858+
ignored.
4859+
4860+
Returns:
4861+
bigframes.pandas.DataFrame:
4862+
A DataFrame with the affected columns rounded to the specified
4863+
number of decimal places.
4864+
4865+
"""
4866+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
4867+
47914868
def apply(self, func, *, axis=0, args=(), **kwargs):
47924869
"""Apply a function along an axis of the DataFrame.
47934870

0 commit comments

Comments
 (0)