Skip to content

Commit dd1cf01

Browse files
authored
Support IGNORE NULLS for LEAD window function (#9419)
* IGNORE NULLS support for LEAD * fix * fix * fix
1 parent 1a4dc00 commit dd1cf01

File tree

2 files changed

+87
-12
lines changed

2 files changed

+87
-12
lines changed

datafusion/physical-expr/src/window/lead_lag.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ impl BuiltInWindowFunctionExpr for WindowShift {
121121
default_value: self.default_value.clone(),
122122
ignore_nulls: self.ignore_nulls,
123123
non_null_offsets: VecDeque::new(),
124+
curr_valid_idx: 0,
124125
}))
125126
}
126127

@@ -143,6 +144,8 @@ pub(crate) struct WindowShiftEvaluator {
143144
ignore_nulls: bool,
144145
// VecDeque contains offset values that between non-null entries
145146
non_null_offsets: VecDeque<usize>,
147+
// Current idx pointing to non null value
148+
curr_valid_idx: i64,
146149
}
147150

148151
impl WindowShiftEvaluator {
@@ -270,10 +273,22 @@ impl PartitionEvaluator for WindowShiftEvaluator {
270273
self.non_null_offsets[end_idx] += 1;
271274
}
272275
} else if self.ignore_nulls && !self.is_lag() {
273-
// IGNORE NULLS and LEAD mode.
274-
return Err(exec_datafusion_err!(
275-
"IGNORE NULLS mode for LEAD is not supported for BoundedWindowAggExec"
276-
));
276+
if self.non_null_offsets.is_empty() {
277+
self.non_null_offsets
278+
.extend(array.nulls().unwrap().valid_indices().collect::<Vec<_>>());
279+
}
280+
if range.start == 0 && array.is_valid(0) {
281+
self.curr_valid_idx = -self.shift_offset - 1;
282+
}
283+
let idx0 = self.non_null_offsets.get(self.curr_valid_idx as usize);
284+
if idx0.is_some() && range.start == *idx0.unwrap() {
285+
self.curr_valid_idx += 1;
286+
}
287+
idx = self
288+
.non_null_offsets
289+
.get((self.curr_valid_idx - self.shift_offset - 1) as usize)
290+
.map(|v| *v as i64)
291+
.unwrap_or(-1);
277292
}
278293

279294
// Set the default value if

datafusion/sqllogictest/test_files/window.slt

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4169,32 +4169,92 @@ select lag(a, 2, null) ignore nulls over (order by id desc) as x1,
41694169
sum(id) over (order by id desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as sum_id
41704170
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
41714171

4172+
# LEAD window function IGNORE/RESPECT NULLS support with ascending order and default offset 1
4173+
query TTTTTT
4174+
select lead(a) ignore nulls over (order by id) as x,
4175+
lead(a, 1, null) ignore nulls over (order by id) as x1,
4176+
lead(a, 1, 'def') ignore nulls over (order by id) as x2,
4177+
lead(a) respect nulls over (order by id) as x3,
4178+
lead(a, 1, null) respect nulls over (order by id) as x4,
4179+
lead(a, 1, 'def') respect nulls over (order by id) as x5
4180+
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
4181+
----
4182+
b b b b b b
4183+
x x x NULL NULL NULL
4184+
x x x x x x
4185+
NULL NULL def NULL NULL def
4186+
4187+
# LEAD window function IGNORE/RESPECT NULLS support with descending order and default offset 1
4188+
query TTTTTT
4189+
select lead(a) ignore nulls over (order by id desc) as x,
4190+
lead(a, 1, null) ignore nulls over (order by id desc) as x1,
4191+
lead(a, 1, 'def') ignore nulls over (order by id desc) as x2,
4192+
lead(a) respect nulls over (order by id desc) as x3,
4193+
lead(a, 1, null) respect nulls over (order by id desc) as x4,
4194+
lead(a, 1, 'def') respect nulls over (order by id desc) as x5
4195+
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
4196+
----
4197+
b b b NULL NULL NULL
4198+
b b b b b b
4199+
NULL NULL def NULL NULL NULL
4200+
NULL NULL def NULL NULL def
4201+
4202+
# LEAD window function IGNORE/RESPECT NULLS support with ascending order and nondefault offset
4203+
query TTTT
4204+
select lead(a, 2, null) ignore nulls over (order by id) as x1,
4205+
lead(a, 2, 'def') ignore nulls over (order by id) as x2,
4206+
lead(a, 2, null) respect nulls over (order by id) as x4,
4207+
lead(a, 2, 'def') respect nulls over (order by id) as x5
4208+
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
4209+
----
4210+
x x NULL NULL
4211+
NULL def x x
4212+
NULL def NULL def
4213+
NULL def NULL def
4214+
41724215
# LEAD window function IGNORE/RESPECT NULLS support with descending order and nondefault offset
4173-
statement error Execution error: IGNORE NULLS mode for LEAD is not supported for BoundedWindowAggExec
4216+
query TTTT
41744217
select lead(a, 2, null) ignore nulls over (order by id desc) as x1,
41754218
lead(a, 2, 'def') ignore nulls over (order by id desc) as x2,
41764219
lead(a, 2, null) respect nulls over (order by id desc) as x4,
41774220
lead(a, 2, 'def') respect nulls over (order by id desc) as x5
41784221
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
4222+
----
4223+
NULL def b b
4224+
NULL def NULL NULL
4225+
NULL def NULL def
4226+
NULL def NULL def
4227+
4228+
# LEAD window function IGNORE/RESPECT NULLS support with descending order and nondefault offset.
4229+
# To trigger WindowAggExec, we added a sum window function with all of the ranges.
4230+
statement error Execution error: IGNORE NULLS mode for LAG and LEAD is not supported for WindowAggExec
4231+
select lead(a, 2, null) ignore nulls over (order by id desc) as x1,
4232+
lead(a, 2, 'def') ignore nulls over (order by id desc) as x2,
4233+
lead(a, 2, null) respect nulls over (order by id desc) as x4,
4234+
lead(a, 2, 'def') respect nulls over (order by id desc) as x5,
4235+
sum(id) over (order by id desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as sum_id
4236+
from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
41794237

41804238
statement ok
41814239
set datafusion.execution.batch_size = 1000;
41824240

4183-
query I
4184-
SELECT LAG(c1, 2) IGNORE NULLS OVER()
4241+
query II
4242+
SELECT LAG(c1, 2) IGNORE NULLS OVER(),
4243+
LEAD(c1, 2) IGNORE NULLS OVER()
41854244
FROM null_cases
41864245
ORDER BY c2
41874246
LIMIT 5;
41884247
----
4189-
78
4190-
63
4191-
3
4192-
24
4193-
14
4248+
78 50
4249+
63 38
4250+
3 53
4251+
24 31
4252+
14 94
41944253

41954254
# result should be same with above, when lag algorithm work with pruned data.
41964255
# decreasing batch size, causes data to be produced in smaller chunks at the source.
41974256
# Hence sliding window algorithm is used during calculations.
4257+
# LEAD function to be added, there is some bug with incoming data array when LEAD is called
41984258
statement ok
41994259
set datafusion.execution.batch_size = 1;
42004260

0 commit comments

Comments
 (0)