Skip to content

Commit a44258b

Browse files
committed
refactor: convert InSubquery to predicate
Signed-off-by: Kould <[email protected]>
1 parent 0b90559 commit a44258b

File tree

7 files changed

+82
-73
lines changed

7 files changed

+82
-73
lines changed

src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ impl Binder {
143143
} else {
144144
None
145145
};
146-
let (table_index, source_table_index) = self.metadata.write().add_table(
146+
let (table_index, _) = self.metadata.write().add_table(
147147
CATALOG_DEFAULT.to_string(),
148148
"system".to_string(),
149149
table.clone(),
@@ -154,14 +154,8 @@ impl Binder {
154154
None,
155155
);
156156

157-
let (s_expr, mut bind_context) = self.bind_base_table(
158-
bind_context,
159-
"system",
160-
table_index,
161-
source_table_index,
162-
None,
163-
sample,
164-
)?;
157+
let (s_expr, mut bind_context) =
158+
self.bind_base_table(bind_context, "system", table_index, None, None, sample)?;
165159
if let Some(alias) = alias {
166160
bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?;
167161
}

src/query/sql/src/planner/binder/column_binding.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ pub struct ColumnBinding {
2929
/// Table index of this `ColumnBinding` in current context
3030
pub table_index: Option<IndexType>,
3131
/// Source table index of this `ColumnBinding` in current context
32+
///
33+
/// Example:
34+
/// `SELECT n.* FROM notes AS n WHERE n.type = 'D' AND n.id IN (SELECT m.id FROM notes AS m WHERE m.commit_id = 7)`
35+
/// In this case, the table index and column index of `n.id` and `m.id` are inconsistent,
36+
/// but logically they both belong to the `notes` table.
37+
/// Therefore, the source table index is used to allow `n` and `m` to determine whether they are both `notes` in this case.
3238
pub source_table_index: Option<IndexType>,
3339
/// Column name of this `ColumnBinding` in current context
3440
pub column_name: String,

src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs

Lines changed: 30 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ use crate::plans::CastExpr;
3737
use crate::plans::ComparisonOp;
3838
use crate::plans::ConstantExpr;
3939
use crate::plans::EvalScalar;
40-
use crate::plans::Exchange;
4140
use crate::plans::Filter;
4241
use crate::plans::FunctionCall;
4342
use crate::plans::Join;
@@ -274,10 +273,10 @@ impl SubqueryRewriter {
274273
from_count_func: false,
275274
};
276275
let (s_expr, result) = if prop.outer_columns.is_empty() {
277-
if let Some((scalar_expr, s_expr)) =
278-
self.try_eliminate_in_subquery(s_expr, &subquery)?
276+
if let Some(scalar_expr) =
277+
self.try_eliminate_in_subquery(s_expr, &subquery, is_conjunctive_predicate)?
279278
{
280-
return Ok((scalar_expr, s_expr));
279+
return Ok((scalar_expr, s_expr.clone()));
281280
}
282281
self.try_rewrite_uncorrelated_subquery(
283282
s_expr,
@@ -465,22 +464,23 @@ impl SubqueryRewriter {
465464
&self,
466465
left: &SExpr,
467466
subquery: &SubqueryExpr,
468-
) -> Result<Option<(ScalarExpr, SExpr)>> {
467+
is_conjunctive_predicate: bool,
468+
) -> Result<Option<ScalarExpr>> {
469469
#[derive(Debug)]
470470
struct SubqueryBodyVisitor {
471471
replacer: BindingReplacer,
472472
failed: bool,
473-
new_s_expr: Option<SExpr>,
473+
predicates: Vec<ScalarExpr>,
474474
}
475475

476476
impl SubqueryBodyVisitor {
477-
fn new(new_column_bindings: HashMap<IndexType, ColumnBinding>, expr: SExpr) -> Self {
477+
fn new(new_column_bindings: HashMap<IndexType, ColumnBinding>) -> Self {
478478
Self {
479479
replacer: BindingReplacer {
480480
new_column_bindings,
481481
},
482482
failed: false,
483-
new_s_expr: Some(expr),
483+
predicates: Vec::new(),
484484
}
485485
}
486486

@@ -491,43 +491,15 @@ impl SubqueryRewriter {
491491
if self.failed {
492492
return Ok(());
493493
}
494-
if !matches!(
495-
s_expr.plan(),
496-
RelOperator::Scan(_)
497-
| RelOperator::Filter(_)
498-
| RelOperator::EvalScalar(_)
499-
| RelOperator::Sort(_)
500-
| RelOperator::Exchange(_)
501-
) {
502-
self.failed = true;
503-
return Ok(());
504-
}
505494
match s_expr.plan() {
506-
RelOperator::Scan(_) | RelOperator::Sort(_) => (),
507-
RelOperator::EvalScalar(eval_scalar) => {
508-
let mut eval_scalar = eval_scalar.clone();
509-
510-
for scalar_item in eval_scalar.items.iter_mut() {
511-
self.replacer.visit(&mut scalar_item.scalar)?;
512-
}
513-
self.build_s_expr(RelOperator::EvalScalar(eval_scalar));
514-
}
495+
RelOperator::Scan(_) | RelOperator::Sort(_) | RelOperator::EvalScalar(_) => (),
515496
RelOperator::Filter(filter) => {
516497
let mut filter = filter.clone();
517498

518499
for scalar_expr in filter.predicates.iter_mut() {
519500
self.replacer.visit(scalar_expr)?;
520501
}
521-
self.build_s_expr(RelOperator::Filter(filter));
522-
}
523-
RelOperator::Exchange(exchange) => {
524-
let mut exchange = exchange.clone();
525-
if let Exchange::Hash(scalar_exprs) = &mut exchange {
526-
for scalar_expr in scalar_exprs {
527-
self.replacer.visit(scalar_expr)?;
528-
}
529-
}
530-
self.build_s_expr(RelOperator::Exchange(exchange));
502+
self.predicates.append(&mut filter.predicates);
531503
}
532504
_ => {
533505
self.failed = true;
@@ -537,15 +509,23 @@ impl SubqueryRewriter {
537509
Ok(())
538510
}
539511

540-
fn build_s_expr(&mut self, op: RelOperator) {
541-
self.new_s_expr = Some(match self.new_s_expr.take() {
542-
None => SExpr::create_leaf(Arc::new(op)),
543-
Some(child_expr) => SExpr::create_unary(Arc::new(op), Arc::new(child_expr)),
544-
});
545-
}
546-
547-
fn result(self) -> Option<SExpr> {
548-
(!self.failed).then_some(self.new_s_expr).flatten()
512+
fn result(self) -> Option<ScalarExpr> {
513+
(!self.failed).then(|| {
514+
self.predicates.into_iter().fold(
515+
ScalarExpr::ConstantExpr(ConstantExpr {
516+
span: None,
517+
value: Scalar::Boolean(true),
518+
}),
519+
|acc, expr| {
520+
ScalarExpr::FunctionCall(FunctionCall {
521+
span: None,
522+
func_name: "and".to_string(),
523+
params: vec![],
524+
arguments: vec![acc, expr],
525+
})
526+
},
527+
)
528+
})
549529
}
550530
}
551531

@@ -570,7 +550,7 @@ impl SubqueryRewriter {
570550
return Ok(None);
571551
};
572552
// `xx in null` != `where xx = null`
573-
if left_column.column.data_type.is_nullable() {
553+
if left_column.column.data_type.is_nullable() || !is_conjunctive_predicate {
574554
return Ok(None);
575555
}
576556
let (Some(left_table_index), Some(right_table_index)) = (
@@ -641,17 +621,9 @@ impl SubqueryRewriter {
641621
new_column_bindings
642622
};
643623
// restore ColumnBinding of same table in Subquery to table in Left
644-
let mut body_visitor = SubqueryBodyVisitor::new(new_column_bindings, left.clone());
624+
let mut body_visitor = SubqueryBodyVisitor::new(new_column_bindings);
645625
body_visitor.visit(subquery.subquery.as_ref())?;
646-
Ok(body_visitor.result().map(|s_expr| {
647-
(
648-
ScalarExpr::ConstantExpr(ConstantExpr {
649-
span: None,
650-
value: Scalar::Boolean(true),
651-
}),
652-
s_expr,
653-
)
654-
}))
626+
Ok(body_visitor.result())
655627
}
656628

657629
fn try_rewrite_uncorrelated_subquery(

src/query/sql/tests/optimizer/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414

1515
mod filter;
1616
mod histogram;
17+
mod subquery;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use databend_common_exception::Result;
16+
17+
#[test]
18+
fn test_eliminate_simple_in_subquery() -> Result<()> {
19+
// TODO: How to build a SExpr and Metadata better
20+
21+
Ok(())
22+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
mod eliminate_in_subquery_test;

tests/sqllogictests/suites/mode/standalone/explain/explain.test

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,18 +1330,18 @@ statement ok
13301330
drop table t2;
13311331

13321332
statement ok
1333-
create OR REPLACE table t1(a int not null, b int, c varchar(20));
1333+
create OR REPLACE table t1(a int, b int not null, c varchar(20));
13341334

13351335
statement ok
1336-
create OR REPLACE table t2(a int not null, b int, c varchar(20));
1336+
create OR REPLACE table t2(a int, b int not null, c varchar(20));
13371337

13381338
# eliminate subquery
13391339
query T
1340-
explain select t3.* from t1 as t3 where t3.c = 'D' and t3.a in (select t4.a from t1 as t4 where t4.b = 7);
1340+
explain select t3.* from t1 as t3 where t3.c = 'D' and t3.b in (select t4.b from t1 as t4 where t4.a = 7);
13411341
----
13421342
Filter
13431343
├── output columns: [t3.a (#0), t3.b (#1), t3.c (#2)]
1344-
├── filters: [is_true(t1.b (#1) = 7), is_true(t3.c (#2) = 'D')]
1344+
├── filters: [is_true(t1.a (#0) = 7), is_true(t3.c (#2) = 'D')]
13451345
├── estimated rows: 0.00
13461346
└── TableScan
13471347
├── table: default.default.t1
@@ -1350,10 +1350,9 @@ Filter
13501350
├── read size: 0
13511351
├── partitions total: 0
13521352
├── partitions scanned: 0
1353-
├── push downs: [filters: [and_filters(t1.b (#1) = 7, t1.c (#2) = 'D')], limit: NONE]
1353+
├── push downs: [filters: [and_filters(t1.a (#0) = 7, t1.c (#2) = 'D')], limit: NONE]
13541354
└── estimated rows: 0.00
13551355

1356-
13571356
# scalar subquery and sort plan contains count() agg function.
13581357
query T
13591358
explain select * from t2 where c > (select c from t1 where t1.a = t2.a group by c order by count(a));

0 commit comments

Comments
 (0)