18
18
//! [`FilterNullJoinKeys`] adds filters to join inputs when input isn't nullable
19
19
20
20
use crate :: optimizer:: ApplyOrder ;
21
+ use crate :: push_down_filter:: on_lr_is_preserved;
21
22
use crate :: { OptimizerConfig , OptimizerRule } ;
22
23
use datafusion_common:: tree_node:: Transformed ;
23
24
use datafusion_common:: Result ;
24
25
use datafusion_expr:: utils:: conjunction;
25
- use datafusion_expr:: {
26
- logical_plan:: Filter , logical_plan:: JoinType , Expr , ExprSchemable , LogicalPlan ,
27
- } ;
26
+ use datafusion_expr:: { logical_plan:: Filter , Expr , ExprSchemable , LogicalPlan } ;
28
27
use std:: sync:: Arc ;
29
28
30
- /// The FilterNullJoinKeys rule will identify inner joins with equi-join conditions
31
- /// where the join key is nullable on one side and non-nullable on the other side
32
- /// and then insert an `IsNotNull` filter on the nullable side since null values
29
+ /// The FilterNullJoinKeys rule will identify joins with equi-join conditions
30
+ /// where the join key is nullable and then insert an `IsNotNull` filter on the nullable side since null values
33
31
/// can never match.
34
32
#[ derive( Default ) ]
35
33
pub struct FilterNullJoinKeys { }
@@ -51,21 +49,23 @@ impl OptimizerRule for FilterNullJoinKeys {
51
49
if !config. options ( ) . optimizer . filter_null_join_keys {
52
50
return Ok ( Transformed :: no ( plan) ) ;
53
51
}
54
-
55
52
match plan {
56
- LogicalPlan :: Join ( mut join) if join. join_type == JoinType :: Inner => {
53
+ LogicalPlan :: Join ( mut join) if !join. on . is_empty ( ) => {
54
+ let ( left_preserved, right_preserved) =
55
+ on_lr_is_preserved ( join. join_type ) ;
56
+
57
57
let left_schema = join. left . schema ( ) ;
58
58
let right_schema = join. right . schema ( ) ;
59
59
60
60
let mut left_filters = vec ! [ ] ;
61
61
let mut right_filters = vec ! [ ] ;
62
62
63
63
for ( l, r) in & join. on {
64
- if l. nullable ( left_schema) ? {
64
+ if left_preserved && l. nullable ( left_schema) ? {
65
65
left_filters. push ( l. clone ( ) ) ;
66
66
}
67
67
68
- if r. nullable ( right_schema) ? {
68
+ if right_preserved && r. nullable ( right_schema) ? {
69
69
right_filters. push ( r. clone ( ) ) ;
70
70
}
71
71
}
@@ -109,7 +109,7 @@ mod tests {
109
109
use arrow:: datatypes:: { DataType , Field , Schema } ;
110
110
use datafusion_common:: Column ;
111
111
use datafusion_expr:: logical_plan:: table_scan;
112
- use datafusion_expr:: { col, lit, LogicalPlanBuilder } ;
112
+ use datafusion_expr:: { col, lit, JoinType , LogicalPlanBuilder } ;
113
113
114
114
fn assert_optimized_plan_equal ( plan : LogicalPlan , expected : & str ) -> Result < ( ) > {
115
115
assert_optimized_plan_eq ( Arc :: new ( FilterNullJoinKeys { } ) , plan, expected)
@@ -118,18 +118,41 @@ mod tests {
118
118
#[ test]
119
119
fn left_nullable ( ) -> Result < ( ) > {
120
120
let ( t1, t2) = test_tables ( ) ?;
121
- let plan = build_plan ( t1, t2, "t1.optional_id" , "t2.id" ) ?;
121
+ let plan = build_plan ( t1, t2, "t1.optional_id" , "t2.id" , JoinType :: Inner ) ?;
122
122
let expected = "Inner Join: t1.optional_id = t2.id\
123
123
\n Filter: t1.optional_id IS NOT NULL\
124
124
\n TableScan: t1\
125
125
\n TableScan: t2";
126
126
assert_optimized_plan_equal ( plan, expected)
127
127
}
128
128
129
+ #[ test]
130
+ fn left_nullable_left_join ( ) -> Result < ( ) > {
131
+ let ( t1, t2) = test_tables ( ) ?;
132
+ let plan = build_plan ( t1, t2, "t1.optional_id" , "t2.id" , JoinType :: Left ) ?;
133
+ let expected = "Left Join: t1.optional_id = t2.id\
134
+ \n TableScan: t1\
135
+ \n TableScan: t2";
136
+ assert_optimized_plan_equal ( plan, expected)
137
+ }
138
+
139
+ #[ test]
140
+ fn left_nullable_left_join_reordered ( ) -> Result < ( ) > {
141
+ let ( t_left, t_right) = test_tables ( ) ?;
142
+ // Note: order of tables is reversed
143
+ let plan =
144
+ build_plan ( t_right, t_left, "t2.id" , "t1.optional_id" , JoinType :: Left ) ?;
145
+ let expected = "Left Join: t2.id = t1.optional_id\
146
+ \n TableScan: t2\
147
+ \n Filter: t1.optional_id IS NOT NULL\
148
+ \n TableScan: t1";
149
+ assert_optimized_plan_equal ( plan, expected)
150
+ }
151
+
129
152
#[ test]
130
153
fn left_nullable_on_condition_reversed ( ) -> Result < ( ) > {
131
154
let ( t1, t2) = test_tables ( ) ?;
132
- let plan = build_plan ( t1, t2, "t2.id" , "t1.optional_id" ) ?;
155
+ let plan = build_plan ( t1, t2, "t2.id" , "t1.optional_id" , JoinType :: Inner ) ?;
133
156
let expected = "Inner Join: t1.optional_id = t2.id\
134
157
\n Filter: t1.optional_id IS NOT NULL\
135
158
\n TableScan: t1\
@@ -140,7 +163,7 @@ mod tests {
140
163
#[ test]
141
164
fn nested_join_multiple_filter_expr ( ) -> Result < ( ) > {
142
165
let ( t1, t2) = test_tables ( ) ?;
143
- let plan = build_plan ( t1, t2, "t1.optional_id" , "t2.id" ) ?;
166
+ let plan = build_plan ( t1, t2, "t1.optional_id" , "t2.id" , JoinType :: Inner ) ?;
144
167
let schema = Schema :: new ( vec ! [
145
168
Field :: new( "id" , DataType :: UInt32 , false ) ,
146
169
Field :: new( "t1_id" , DataType :: UInt32 , true ) ,
@@ -244,11 +267,12 @@ mod tests {
244
267
right_table : LogicalPlan ,
245
268
left_key : & str ,
246
269
right_key : & str ,
270
+ join_type : JoinType ,
247
271
) -> Result < LogicalPlan > {
248
272
LogicalPlanBuilder :: from ( left_table)
249
273
. join (
250
274
right_table,
251
- JoinType :: Inner ,
275
+ join_type ,
252
276
(
253
277
vec ! [ Column :: from_qualified_name( left_key) ] ,
254
278
vec ! [ Column :: from_qualified_name( right_key) ] ,
0 commit comments