Skip to content

Commit d451aef

Browse files
fix: Fix clip int series with float bounds (#1739)
1 parent 133ac6b commit d451aef

File tree

4 files changed

+22
-24
lines changed

4 files changed

+22
-24
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,34 +1935,18 @@ def clip_op(
19351935
if isinstance(lower, ibis_types.NullScalar) and (
19361936
not isinstance(upper, ibis_types.NullScalar)
19371937
):
1938-
return (
1939-
ibis_api.case() # type: ignore
1940-
.when(upper.isnull() | (original > upper), upper)
1941-
.else_(original)
1942-
.end()
1943-
)
1938+
return ibis_api.least(original, upper)
19441939
elif (not isinstance(lower, ibis_types.NullScalar)) and isinstance(
19451940
upper, ibis_types.NullScalar
19461941
):
1947-
return (
1948-
ibis_api.case() # type: ignore
1949-
.when(lower.isnull() | (original < lower), lower)
1950-
.else_(original)
1951-
.end()
1952-
)
1942+
return ibis_api.greatest(original, lower)
19531943
elif isinstance(lower, ibis_types.NullScalar) and (
19541944
isinstance(upper, ibis_types.NullScalar)
19551945
):
19561946
return original
19571947
else:
19581948
# Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound
1959-
return (
1960-
ibis_api.case() # type: ignore
1961-
.when(lower.isnull() | (original < lower), lower)
1962-
.when(upper.isnull() | (original > upper), upper)
1963-
.else_(original)
1964-
.end()
1965-
)
1949+
return ibis_api.greatest(ibis_api.least(original, upper), lower)
19661950

19671951

19681952
# N-ary Operations

bigframes/operations/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,9 @@ def _align(
245245
)
246246
return (typing.cast(ex.DerefOp, values[0]), values[1], block)
247247

248-
def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left") -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore
248+
def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left", cast_scalars: bool = True) -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore
249249
"""Aligns the series value with 2 other scalars or series objects. Returns new values and joined tabled expression."""
250-
values, index = self._align_n([other1, other2], how)
250+
values, index = self._align_n([other1, other2], how, cast_scalars=cast_scalars)
251251
return (
252252
typing.cast(ex.DerefOp, values[0]),
253253
values[1],
@@ -260,7 +260,7 @@ def _align_n(
260260
others: typing.Sequence[typing.Union[series.Series, scalars.Scalar]],
261261
how="outer",
262262
ignore_self=False,
263-
cast_scalars: bool = True,
263+
cast_scalars: bool = False,
264264
) -> tuple[
265265
typing.Sequence[Union[ex.ScalarConstantExpression, ex.DerefOp]],
266266
blocks.Block,

bigframes/series.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,14 +1353,17 @@ def where(self, cond, other=None):
13531353
)
13541354
return Series(block.select_column(result_id).with_column_labels([self.name]))
13551355

1356-
def clip(self, lower, upper):
1356+
def clip(self, lower=None, upper=None):
13571357
if lower is None and upper is None:
13581358
return self
13591359
if lower is None:
13601360
return self._apply_binary_op(upper, ops.minimum_op, alignment="left")
13611361
if upper is None:
13621362
return self._apply_binary_op(lower, ops.maximum_op, alignment="left")
1363-
value_id, lower_id, upper_id, block = self._align3(lower, upper)
1363+
# special rule to coerce scalar string args to date
1364+
value_id, lower_id, upper_id, block = self._align3(
1365+
lower, upper, cast_scalars=(bigframes.dtypes.is_date_like(self.dtype))
1366+
)
13641367
block, result_id = block.project_expr(
13651368
ops.clip_op.as_expr(value_id, lower_id, upper_id),
13661369
)

tests/system/small/test_series.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,6 +3001,17 @@ def test_clip(scalars_df_index, scalars_pandas_df_index, ordered):
30013001
assert_series_equal(bf_result, pd_result, ignore_order=not ordered)
30023002

30033003

3004+
def test_clip_int_with_float_bounds(scalars_df_index, scalars_pandas_df_index):
3005+
col_bf = scalars_df_index["int64_too"]
3006+
bf_result = col_bf.clip(-100, 3.14151593).to_pandas()
3007+
3008+
col_pd = scalars_pandas_df_index["int64_too"]
3009+
# pandas doesn't work with Int64 and clip with floats
3010+
pd_result = col_pd.astype("int64").clip(-100, 3.14151593).astype("Float64")
3011+
3012+
assert_series_equal(bf_result, pd_result)
3013+
3014+
30043015
def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index):
30053016
col_bf = scalars_df_index["int64_col"].iloc[::2]
30063017
lower_bf = scalars_df_index["int64_too"].iloc[2:] - 1

0 commit comments

Comments
 (0)