diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 6576276b11..228c866e1a 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1935,34 +1935,18 @@ def clip_op( if isinstance(lower, ibis_types.NullScalar) and ( not isinstance(upper, ibis_types.NullScalar) ): - return ( - ibis_api.case() # type: ignore - .when(upper.isnull() | (original > upper), upper) - .else_(original) - .end() - ) + return ibis_api.least(original, upper) elif (not isinstance(lower, ibis_types.NullScalar)) and isinstance( upper, ibis_types.NullScalar ): - return ( - ibis_api.case() # type: ignore - .when(lower.isnull() | (original < lower), lower) - .else_(original) - .end() - ) + return ibis_api.greatest(original, lower) elif isinstance(lower, ibis_types.NullScalar) and ( isinstance(upper, ibis_types.NullScalar) ): return original else: # Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound - return ( - ibis_api.case() # type: ignore - .when(lower.isnull() | (original < lower), lower) - .when(upper.isnull() | (original > upper), upper) - .else_(original) - .end() - ) + return ibis_api.greatest(ibis_api.least(original, upper), lower) # N-ary Operations diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 8d70596b7d..c316d28321 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -245,9 +245,9 @@ def _align( ) return (typing.cast(ex.DerefOp, values[0]), values[1], block) - def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left") -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore + def _align3(self, other1: series.Series | scalars.Scalar, other2: series.Series | scalars.Scalar, how="left", cast_scalars: bool = True) -> tuple[ex.DerefOp, AlignedExprT, AlignedExprT, blocks.Block]: # type: ignore """Aligns the series value with 2 other scalars or series objects. Returns new values and joined tabled expression.""" - values, index = self._align_n([other1, other2], how) + values, index = self._align_n([other1, other2], how, cast_scalars=cast_scalars) return ( typing.cast(ex.DerefOp, values[0]), values[1], @@ -260,7 +260,7 @@ def _align_n( others: typing.Sequence[typing.Union[series.Series, scalars.Scalar]], how="outer", ignore_self=False, - cast_scalars: bool = True, + cast_scalars: bool = False, ) -> tuple[ typing.Sequence[Union[ex.ScalarConstantExpression, ex.DerefOp]], blocks.Block, diff --git a/bigframes/series.py b/bigframes/series.py index 2c387734d3..626cf2fc76 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1353,14 +1353,17 @@ def where(self, cond, other=None): ) return Series(block.select_column(result_id).with_column_labels([self.name])) - def clip(self, lower, upper): + def clip(self, lower=None, upper=None): if lower is None and upper is None: return self if lower is None: return self._apply_binary_op(upper, ops.minimum_op, alignment="left") if upper is None: return self._apply_binary_op(lower, ops.maximum_op, alignment="left") - value_id, lower_id, upper_id, block = self._align3(lower, upper) + # special rule to coerce scalar string args to date + value_id, lower_id, upper_id, block = self._align3( + lower, upper, cast_scalars=(bigframes.dtypes.is_date_like(self.dtype)) + ) block, result_id = block.project_expr( ops.clip_op.as_expr(value_id, lower_id, upper_id), ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 7972fbe1e9..286a480d18 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -3001,6 +3001,17 @@ def test_clip(scalars_df_index, scalars_pandas_df_index, ordered): assert_series_equal(bf_result, pd_result, ignore_order=not ordered) +def test_clip_int_with_float_bounds(scalars_df_index, scalars_pandas_df_index): + col_bf = scalars_df_index["int64_too"] + bf_result = col_bf.clip(-100, 3.14151593).to_pandas() + + col_pd = scalars_pandas_df_index["int64_too"] + # pandas doesn't work with Int64 and clip with floats + pd_result = col_pd.astype("int64").clip(-100, 3.14151593).astype("Float64") + + assert_series_equal(bf_result, pd_result) + + def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index): col_bf = scalars_df_index["int64_col"].iloc[::2] lower_bf = scalars_df_index["int64_too"].iloc[2:] - 1