diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index 35d0c5d4d4..5af6fbd56e 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -401,37 +401,10 @@ def aggregate( ) ) - def project_window_op( - self, - column_name: str, - op: agg_ops.UnaryWindowOp, - window_spec: WindowSpec, - *, - never_skip_nulls=False, - skip_reproject_unsafe: bool = False, - ) -> Tuple[ArrayValue, str]: - """ - Creates a new expression based on this expression with unary operation applied to one column. - column_name: the id of the input column present in the expression - op: the windowable operator to apply to the input column - window_spec: a specification of the window over which to apply the operator - output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided - never_skip_nulls: will disable null skipping for operators that would otherwise do so - skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection - """ - - return self.project_window_expr( - agg_expressions.UnaryAggregation(op, ex.deref(column_name)), - window_spec, - never_skip_nulls, - skip_reproject_unsafe, - ) - def project_window_expr( self, expression: agg_expressions.Aggregation, window: WindowSpec, - never_skip_nulls=False, skip_reproject_unsafe: bool = False, ): output_name = self._gen_namespaced_uid() @@ -442,7 +415,6 @@ def project_window_expr( expression=expression, window_spec=window, output_name=ids.ColumnId(output_name), - never_skip_nulls=never_skip_nulls, skip_reproject_unsafe=skip_reproject_unsafe, ) ), diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index bd478800eb..466dbfce72 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1090,7 +1090,6 @@ def multi_apply_window_op( window_spec: windows.WindowSpec, *, skip_null_groups: bool = False, - never_skip_nulls: bool = False, ) -> typing.Tuple[Block, typing.Sequence[str]]: block = self result_ids = [] @@ -1103,7 +1102,6 @@ def multi_apply_window_op( skip_reproject_unsafe=(i + 1) < len(columns), result_label=label, skip_null_groups=skip_null_groups, - never_skip_nulls=never_skip_nulls, ) result_ids.append(result_id) return block, result_ids @@ -1184,7 +1182,6 @@ def apply_window_op( result_label: Label = None, skip_null_groups: bool = False, skip_reproject_unsafe: bool = False, - never_skip_nulls: bool = False, ) -> typing.Tuple[Block, str]: agg_expr = agg_expressions.UnaryAggregation(op, ex.deref(column)) return self.apply_analytic( @@ -1192,7 +1189,6 @@ def apply_window_op( window_spec, result_label, skip_reproject_unsafe=skip_reproject_unsafe, - never_skip_nulls=never_skip_nulls, skip_null_groups=skip_null_groups, ) @@ -1203,7 +1199,6 @@ def apply_analytic( result_label: Label, *, skip_reproject_unsafe: bool = False, - never_skip_nulls: bool = False, skip_null_groups: bool = False, ) -> typing.Tuple[Block, str]: block = self @@ -1214,7 +1209,6 @@ def apply_analytic( agg_expr, window, skip_reproject_unsafe=skip_reproject_unsafe, - never_skip_nulls=never_skip_nulls, ) block = Block( expr, diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 91d72d96b2..f8be331d59 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -394,8 +394,6 @@ def project_window_op( expression: ex_types.Aggregation, window_spec: WindowSpec, output_name: str, - *, - never_skip_nulls=False, ) -> UnorderedIR: """ Creates a new expression based on this expression with unary operation applied to one column. @@ -403,7 +401,6 @@ def project_window_op( op: the windowable operator to apply to the input column window_spec: a specification of the window over which to apply the operator output_name: the id to assign to the output of the operator - never_skip_nulls: will disable null skipping for operators that would otherwise do so """ # Cannot nest analytic expressions, so reproject to cte first if needed. # Also ibis cannot window literals, so need to reproject those (even though this is legal in googlesql) @@ -425,7 +422,6 @@ def project_window_op( expression, window_spec, output_name, - never_skip_nulls=never_skip_nulls, ) if expression.op.order_independent and window_spec.is_unbounded: @@ -437,9 +433,6 @@ def project_window_op( expression, window_spec ) clauses: list[tuple[ex.Expression, ex.Expression]] = [] - if expression.op.skips_nulls and not never_skip_nulls: - for input in expression.inputs: - clauses.append((ops.isnull_op.as_expr(input), ex.const(None))) if window_spec.min_periods and len(expression.inputs) > 0: if not expression.op.nulls_count_for_min_values: is_observation = ops.notnull_op.as_expr() diff --git a/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/bigframes/core/compile/ibis_compiler/ibis_compiler.py index 17bfc6ef55..b46c66f879 100644 --- a/bigframes/core/compile/ibis_compiler/ibis_compiler.py +++ b/bigframes/core/compile/ibis_compiler/ibis_compiler.py @@ -269,7 +269,6 @@ def compile_window(node: nodes.WindowOpNode, child: compiled.UnorderedIR): node.expression, node.window_spec, node.output_name.sql, - never_skip_nulls=node.never_skip_nulls, ) return result diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index f822a6a83f..1c9b0d802d 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -16,7 +16,6 @@ import dataclasses import functools import itertools -import operator from typing import cast, Literal, Optional, Sequence, Tuple, Type, TYPE_CHECKING import pandas as pd @@ -868,26 +867,6 @@ def compile_window(self, node: nodes.WindowOpNode): df, node.expression, node.window_spec, node.output_name.sql ) result = pl.concat([df, window_result], how="horizontal") - - # Probably easier just to pull this out as a rewriter - if ( - node.expression.op.skips_nulls - and not node.never_skip_nulls - and node.expression.column_references - ): - nullity_expr = functools.reduce( - operator.or_, - ( - pl.col(column.sql).is_null() - for column in node.expression.column_references - ), - ) - result = result.with_columns( - pl.when(nullity_expr) - .then(None) - .otherwise(pl.col(node.output_name.sql)) - .alias(node.output_name.sql) - ) return result def _calc_row_analytic_func( diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index 2f99b74176..276751d6e3 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -324,12 +324,8 @@ def compile_window(node: nodes.WindowOpNode, child: ir.SQLGlotIR) -> ir.SQLGlotI ) clauses: list[tuple[sge.Expression, sge.Expression]] = [] - if node.expression.op.skips_nulls and not node.never_skip_nulls: - for column in inputs: - clauses.append((sge.Is(this=column, expression=sge.Null()), sge.Null())) - if window_spec.min_periods and len(inputs) > 0: - if node.expression.op.skips_nulls: + if not node.expression.op.nulls_count_for_min_values: # Most operations do not count NULL values towards min_periods not_null_columns = [ sge.Not(this=sge.Is(this=column, expression=sge.Null())) diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py index 149971249f..21f0d7f426 100644 --- a/bigframes/core/groupby/dataframe_group_by.py +++ b/bigframes/core/groupby/dataframe_group_by.py @@ -38,6 +38,7 @@ import bigframes.core.window_spec as window_specs import bigframes.dataframe as df import bigframes.dtypes as dtypes +import bigframes.operations import bigframes.operations.aggregations as agg_ops import bigframes.series as series @@ -747,14 +748,26 @@ def _apply_window_op( window_spec = window or window_specs.cumulative_rows( grouping_keys=tuple(self._by_col_ids) ) - columns, _ = self._aggregated_columns(numeric_only=numeric_only) + columns, labels = self._aggregated_columns(numeric_only=numeric_only) block, result_ids = self._block.multi_apply_window_op( columns, op, window_spec=window_spec, ) - result = df.DataFrame(block.select_columns(result_ids)) - return result + block = block.project_exprs( + tuple( + bigframes.operations.where_op.as_expr( + r_col, + bigframes.operations.notnull_op.as_expr(og_col), + ex.const(None), + ) + for og_col, r_col in zip(columns, result_ids) + ), + labels=labels, + drop=True, + ) + + return df.DataFrame(block) def _resolve_label(self, label: blocks.Label) -> str: """Resolve label to column id.""" diff --git a/bigframes/core/groupby/series_group_by.py b/bigframes/core/groupby/series_group_by.py index b09b63dcfe..27c55f7a6a 100644 --- a/bigframes/core/groupby/series_group_by.py +++ b/bigframes/core/groupby/series_group_by.py @@ -37,6 +37,7 @@ import bigframes.core.window_spec as window_specs import bigframes.dataframe as df import bigframes.dtypes +import bigframes.operations import bigframes.operations.aggregations as agg_ops import bigframes.series as series @@ -339,7 +340,6 @@ def cumcount(self, *args, **kwargs) -> series.Series: self._apply_window_op( agg_ops.SizeUnaryOp(), discard_name=True, - never_skip_nulls=True, ) - 1 ) @@ -426,7 +426,6 @@ def _apply_window_op( op: agg_ops.UnaryWindowOp, discard_name=False, window: typing.Optional[window_specs.WindowSpec] = None, - never_skip_nulls: bool = False, ) -> series.Series: """Apply window op to groupby. Defaults to grouped cumulative window.""" window_spec = window or window_specs.cumulative_rows( @@ -439,6 +438,15 @@ def _apply_window_op( op, result_label=label, window_spec=window_spec, - never_skip_nulls=never_skip_nulls, ) + if op.skips_nulls: + block, result_id = block.project_expr( + bigframes.operations.where_op.as_expr( + result_id, + bigframes.operations.notnull_op.as_expr(self._value_column), + ex.const(None), + ), + label, + ) + return series.Series(block.select_column(result_id)) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 8d1759afd7..e1631c435d 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -1394,7 +1394,6 @@ class WindowOpNode(UnaryNode, AdditiveNode): expression: agg_expressions.Aggregation window_spec: window.WindowSpec output_name: identifiers.ColumnId - never_skip_nulls: bool = False skip_reproject_unsafe: bool = False def _validate(self): diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index 91c6ab83c6..5c7a85ee1b 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -67,7 +67,6 @@ def rewrite_timedelta_expressions(root: nodes.BigFrameNode) -> nodes.BigFrameNod _rewrite_aggregation(root.expression, root.schema), root.window_spec, root.output_name, - root.never_skip_nulls, root.skip_reproject_unsafe, ) @@ -112,6 +111,8 @@ def _rewrite_expressions(expr: ex.Expression, schema: schema.ArraySchema) -> _Ty def _rewrite_scalar_constant_expr(expr: ex.ScalarConstantExpression) -> _TypedExpr: + if expr.value is None: + return _TypedExpr(ex.const(None, expr.dtype), expr.dtype) if expr.dtype == dtypes.TIMEDELTA_DTYPE: int_repr = utils.timedelta_to_micros(expr.value) # type: ignore return _TypedExpr(ex.const(int_repr, expr.dtype), expr.dtype) diff --git a/bigframes/core/window/rolling.py b/bigframes/core/window/rolling.py index 1f3466874f..8b21f4166c 100644 --- a/bigframes/core/window/rolling.py +++ b/bigframes/core/window/rolling.py @@ -102,7 +102,6 @@ def _aggregate_block(self, op: agg_ops.UnaryAggregateOp) -> blocks.Block: op, self._window_spec, skip_null_groups=self._drop_null_groups, - never_skip_nulls=True, ) if self._window_spec.grouping_keys: diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 4b41006547..173aa48db8 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -4159,7 +4159,22 @@ def _apply_window_op( op, window_spec=window_spec, ) - return DataFrame(block.select_columns(result_ids)) + if op.skips_nulls: + block = block.project_exprs( + tuple( + bigframes.operations.where_op.as_expr( + r_col, + bigframes.operations.notnull_op.as_expr(og_col), + ex.const(None), + ) + for og_col, r_col in zip(self._block.value_columns, result_ids) + ), + labels=self._block.column_labels, + drop=True, + ) + else: + block = block.select_columns(result_ids) + return DataFrame(block) @validations.requires_ordering() def sample( diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py index 1160ab2c8e..289d3bf00a 100644 --- a/bigframes/operations/aggregations.py +++ b/bigframes/operations/aggregations.py @@ -183,6 +183,10 @@ def output_type(self, *input_types: dtypes.ExpressionType): class SizeUnaryOp(UnaryAggregateOp): name: ClassVar[str] = "size" + @property + def skips_nulls(self): + return False + def output_type(self, *input_types: dtypes.ExpressionType): return dtypes.INT_DTYPE diff --git a/bigframes/series.py b/bigframes/series.py index c11cc48394..f2d4d98c14 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1780,7 +1780,11 @@ def _apply_window_op( block, result_id = block.apply_window_op( self._value_column, op, window_spec=window_spec, result_label=self.name ) - return Series(block.select_column(result_id)) + result = Series(block.select_column(result_id)) + if op.skips_nulls: + return result.where(self.notna(), None) + else: + return result def value_counts( self, diff --git a/tests/system/small/engines/test_windowing.py b/tests/system/small/engines/test_windowing.py index a34d7b8f38..510a2de3ba 100644 --- a/tests/system/small/engines/test_windowing.py +++ b/tests/system/small/engines/test_windowing.py @@ -43,12 +43,10 @@ def test_engines_with_offsets( assert_equivalence_execution(result.node, REFERENCE_ENGINE, engine) -@pytest.mark.parametrize("never_skip_nulls", [True, False]) @pytest.mark.parametrize("agg_op", [agg_ops.sum_op, agg_ops.count_op]) def test_engines_with_rows_window( scalars_array_value: array_value.ArrayValue, bigquery_client: bigquery.Client, - never_skip_nulls, agg_op, ): window = window_spec.WindowSpec( @@ -61,7 +59,6 @@ def test_engines_with_rows_window( ), window_spec=window, output_name=identifiers.ColumnId("agg_int64"), - never_skip_nulls=never_skip_nulls, skip_reproject_unsafe=False, ) diff --git a/tests/system/small/session/test_read_gbq_query.py b/tests/system/small/session/test_read_gbq_query.py index c1408febca..bb9026dc70 100644 --- a/tests/system/small/session/test_read_gbq_query.py +++ b/tests/system/small/session/test_read_gbq_query.py @@ -36,9 +36,9 @@ def test_read_gbq_query_w_allow_large_results(session: bigframes.Session): allow_large_results=False, ) assert df_false.shape == (1, 1) - roots_false = df_false._get_block().expr.node.roots - assert any(isinstance(node, nodes.ReadLocalNode) for node in roots_false) - assert not any(isinstance(node, nodes.ReadTableNode) for node in roots_false) + nodes_false = df_false._get_block().expr.node.unique_nodes() + assert any(isinstance(node, nodes.ReadLocalNode) for node in nodes_false) + assert not any(isinstance(node, nodes.ReadTableNode) for node in nodes_false) # Large results allowed should wrap a table. df_true = session.read_gbq( @@ -47,8 +47,8 @@ def test_read_gbq_query_w_allow_large_results(session: bigframes.Session): allow_large_results=True, ) assert df_true.shape == (1, 1) - roots_true = df_true._get_block().expr.node.roots - assert any(isinstance(node, nodes.ReadTableNode) for node in roots_true) + nodes_true = df_true._get_block().expr.node.unique_nodes() + assert any(isinstance(node, nodes.ReadTableNode) for node in nodes_true) def test_read_gbq_query_w_columns(session: bigframes.Session): diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 4df257423f..6c681596f5 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1031,7 +1031,8 @@ def test_series_int_int_operators_scalar( bf_result = maybe_reversed_op(scalars_df["int64_col"], other_scalar).to_pandas() pd_result = maybe_reversed_op(scalars_pandas_df["int64_col"], other_scalar) - assert_series_equal(pd_result, bf_result) + # don't check dtype, as pandas is a bit unstable here across versions, esp floordiv + assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_pow_scalar(scalars_dfs): diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql index 83bd288e73..829e5a8836 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `bool_col` IS NULL - THEN NULL - ELSE COALESCE(LOGICAL_AND(`bool_col`) OVER (), TRUE) - END AS `bfcol_1` + COALESCE(LOGICAL_AND(`bool_col`) OVER (), TRUE) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql index dc2471148b..23357817c1 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `bool_col` IS NULL - THEN NULL - ELSE COALESCE(LOGICAL_AND(`bool_col`) OVER (PARTITION BY `string_col`), TRUE) - END AS `bfcol_2` + COALESCE(LOGICAL_AND(`bool_col`) OVER (PARTITION BY `string_col`), TRUE) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql index 970349a4f5..337f0ff963 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `bool_col` IS NULL - THEN NULL - ELSE COALESCE(LOGICAL_OR(`bool_col`) OVER (), FALSE) - END AS `bfcol_1` + COALESCE(LOGICAL_OR(`bool_col`) OVER (), FALSE) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql index f179808b57..ea15243d90 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE ANY_VALUE(`int64_col`) OVER () END AS `bfcol_1` + ANY_VALUE(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql index e1b3da8a9a..e722318fbc 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE ANY_VALUE(`int64_col`) OVER (PARTITION BY `string_col`) - END AS `bfcol_2` + ANY_VALUE(`int64_col`) OVER (PARTITION BY `string_col`) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql index 40c9e6ddd8..b053178f58 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql @@ -5,14 +5,10 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE FIRST_VALUE(`int64_col`) OVER ( - ORDER BY `int64_col` DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) - END AS `bfcol_1` + FIRST_VALUE(`int64_col`) OVER ( + ORDER BY `int64_col` DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql index ebeaa0e338..61e90ee612 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql @@ -5,14 +5,10 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE LAST_VALUE(`int64_col`) OVER ( - ORDER BY `int64_col` DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) - END AS `bfcol_1` + LAST_VALUE(`int64_col`) OVER ( + ORDER BY `int64_col` DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql index a234601b6a..f55201418a 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE MAX(`int64_col`) OVER () END AS `bfcol_1` + MAX(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql index f918500788..ac9b2df84e 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE MAX(`int64_col`) OVER (PARTITION BY `string_col`) - END AS `bfcol_2` + MAX(`int64_col`) OVER (PARTITION BY `string_col`) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql index 73bec9ccce..fdb59809c3 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE AVG(`int64_col`) OVER () END AS `bfcol_1` + AVG(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql index d0a8345e12..d96121e54d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE AVG(`int64_col`) OVER (PARTITION BY `string_col`) - END AS `bfcol_2` + AVG(`int64_col`) OVER (PARTITION BY `string_col`) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql index 1d9db63491..cbda2b7d58 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE MIN(`int64_col`) OVER () END AS `bfcol_1` + MIN(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql index 8040f43ca5..d601832950 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE MIN(`int64_col`) OVER (PARTITION BY `string_col`) - END AS `bfcol_2` + MIN(`int64_col`) OVER (PARTITION BY `string_col`) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_pop_var/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_pop_var/window_out.sql index eae3db0048..430da33e3c 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_pop_var/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_pop_var/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE VAR_POP(`int64_col`) OVER () END AS `bfcol_1` + VAR_POP(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/window_out.sql index cb39f6dbc8..80e0cf5bc6 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE STDDEV(`int64_col`) OVER () END AS `bfcol_1` + STDDEV(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql index 401436019e..47426abcbd 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE COALESCE(SUM(`int64_col`) OVER (), 0) - END AS `bfcol_1` + COALESCE(SUM(`int64_col`) OVER (), 0) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql index f8ada8a5d4..fd1bd4f630 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql @@ -6,11 +6,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `int64_col` IS NULL - THEN NULL - ELSE COALESCE(SUM(`int64_col`) OVER (PARTITION BY `string_col`), 0) - END AS `bfcol_2` + COALESCE(SUM(`int64_col`) OVER (PARTITION BY `string_col`), 0) AS `bfcol_2` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_var/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_var/window_out.sql index d300251447..e9d6c1cb93 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_var/window_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_var/window_out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `int64_col` IS NULL THEN NULL ELSE VARIANCE(`int64_col`) OVER () END AS `bfcol_1` + VARIANCE(`int64_col`) OVER () AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 6f729b0df0..e98db92b93 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -1036,7 +1036,8 @@ def test_series_int_int_operators_scalar( bf_result = maybe_reversed_op(scalars_df["int64_col"], other_scalar).to_pandas() pd_result = maybe_reversed_op(scalars_pandas_df["int64_col"], other_scalar) - assert_series_equal(pd_result, bf_result) + # don't check dtype, as pandas is a bit unstable here across versions, esp floordiv + assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_pow_scalar(scalars_dfs):