Skip to content

Commit de2bdae

Browse files
authored
Merge pull request #9612 from sundy-li/group_by_ast_rewrite
chore(query): add DistinctToGroupBy
2 parents f962bbc + ce44722 commit de2bdae

File tree

6 files changed

+187
-3
lines changed

6 files changed

+187
-3
lines changed

src/query/ast/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub use visitors::walk_expr;
2828
pub use visitors::walk_expr_mut;
2929
pub use visitors::walk_query;
3030
pub use visitors::walk_query_mut;
31+
pub use visitors::walk_statement_mut;
3132
pub use visitors::Visitor;
3233
pub use visitors::VisitorMut;
3334

src/query/ast/src/visitors/visitor_mut.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,9 @@ pub trait VisitorMut: Sized {
344344
walk_query_mut(self, query);
345345
}
346346

347-
fn visit_explain(&mut self, _kind: &mut ExplainKind, _query: &mut Statement<'_>) {}
347+
fn visit_explain(&mut self, _kind: &mut ExplainKind, stmt: &mut Statement<'_>) {
348+
walk_statement_mut(self, stmt);
349+
}
348350

349351
fn visit_copy(&mut self, _copy: &mut CopyStmt<'_>) {}
350352

src/query/sql/src/planner/planner.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@
1414

1515
use std::sync::Arc;
1616

17+
use common_ast::ast::Statement;
1718
use common_ast::parser::parse_sql;
1819
use common_ast::parser::token::Token;
1920
use common_ast::parser::token::TokenKind;
2021
use common_ast::parser::token::Tokenizer;
22+
use common_ast::walk_statement_mut;
2123
use common_ast::Backtrace;
2224
use common_catalog::catalog::CatalogManager;
2325
use common_catalog::table_context::TableContext;
2426
use common_exception::Result;
2527
use parking_lot::RwLock;
2628

29+
use super::semantic::DistinctToGroupBy;
2730
use crate::optimizer::optimize;
2831
use crate::optimizer::OptimizerConfig;
2932
use crate::optimizer::OptimizerContext;
@@ -76,8 +79,8 @@ impl Planner {
7679
let res = async {
7780
// Step 2: Parse the SQL.
7881
let backtrace = Backtrace::new();
79-
let (stmt, format) = parse_sql(&tokens, sql_dialect, &backtrace)?;
80-
82+
let (mut stmt, format) = parse_sql(&tokens, sql_dialect, &backtrace)?;
83+
replace_stmt(&mut stmt);
8184
// Step 3: Bind AST with catalog, and generate a pure logical SExpr
8285
let metadata = Arc::new(RwLock::new(Metadata::default()));
8386
let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
@@ -126,3 +129,10 @@ impl Planner {
126129
}
127130
}
128131
}
132+
133+
fn replace_stmt(stmt: &mut Statement) {
134+
let mut visitors = vec![DistinctToGroupBy::default()];
135+
for v in visitors.iter_mut() {
136+
walk_statement_mut(v, stmt)
137+
}
138+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2022 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use common_ast::ast::Expr;
16+
use common_ast::ast::Identifier;
17+
use common_ast::ast::Query;
18+
use common_ast::ast::SelectStmt;
19+
use common_ast::ast::SetExpr;
20+
use common_ast::ast::TableReference;
21+
use common_ast::VisitorMut;
22+
23+
#[derive(Debug, Clone, Default)]
24+
pub struct DistinctToGroupBy {}
25+
26+
impl VisitorMut for DistinctToGroupBy {
27+
fn visit_select_stmt(&mut self, stmt: &mut SelectStmt<'_>) {
28+
let SelectStmt {
29+
select_list,
30+
from,
31+
selection,
32+
group_by,
33+
having,
34+
..
35+
} = stmt;
36+
37+
if group_by.is_empty() && select_list.len() == 1 && from.len() == 1 {
38+
if let common_ast::ast::SelectTarget::AliasedExpr {
39+
expr:
40+
box Expr::FunctionCall {
41+
span,
42+
distinct,
43+
name,
44+
args,
45+
..
46+
},
47+
alias,
48+
} = &select_list[0]
49+
{
50+
if ((name.name.to_ascii_lowercase() == "count" && *distinct)
51+
|| name.name.to_ascii_lowercase() == "count_distinct")
52+
&& args.iter().all(|arg| !matches!(arg, Expr::Literal { .. }))
53+
{
54+
let tmp_token = span[0].clone();
55+
let subquery = Query {
56+
span: &[],
57+
with: None,
58+
body: SetExpr::Select(Box::new(SelectStmt {
59+
span: &[],
60+
distinct: false,
61+
select_list: vec![],
62+
from: from.clone(),
63+
selection: None,
64+
group_by: args.clone(),
65+
having: None,
66+
})),
67+
order_by: vec![],
68+
limit: vec![],
69+
offset: None,
70+
ignore_result: false,
71+
};
72+
73+
let new_stmt = SelectStmt {
74+
span: &[],
75+
distinct: false,
76+
select_list: vec![common_ast::ast::SelectTarget::AliasedExpr {
77+
expr: Box::new(Expr::FunctionCall {
78+
span: &[],
79+
distinct: false,
80+
name: Identifier {
81+
name: "count".to_string(),
82+
quote: None,
83+
span: tmp_token.clone(),
84+
},
85+
args: vec![],
86+
params: vec![],
87+
}),
88+
alias: alias.clone(),
89+
}],
90+
from: vec![TableReference::Subquery {
91+
span: &[],
92+
subquery: Box::new(subquery),
93+
alias: None,
94+
}],
95+
selection: selection.clone(),
96+
group_by: vec![],
97+
having: having.clone(),
98+
};
99+
100+
*stmt = new_stmt;
101+
}
102+
}
103+
}
104+
}
105+
}

src/query/sql/src/planner/semantic/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
mod distinct_to_groupby;
1516
mod grouping_check;
1617
mod lowering;
1718
mod name_resolution;
1819
mod type_check;
1920

21+
pub use distinct_to_groupby::DistinctToGroupBy;
2022
pub use grouping_check::GroupingChecker;
2123
pub use name_resolution::compare_table_name;
2224
pub use name_resolution::normalize_identifier;

tests/sqllogictests/suites/mode/standalone/explain/explain.test

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,70 @@ HashJoin
768768
├── push downs: [filters: [], limit: NONE]
769769
└── estimated rows: 5.00
770770

771+
query T
772+
explain select count(distinct a) from t1;
773+
----
774+
EvalScalar
775+
├── expressions: [count() (#3)]
776+
├── estimated rows: 1.00
777+
└── AggregateFinal
778+
├── group by: []
779+
├── aggregate functions: [count()]
780+
├── estimated rows: 1.00
781+
└── AggregatePartial
782+
├── group by: []
783+
├── aggregate functions: [count()]
784+
├── estimated rows: 1.00
785+
└── AggregateFinal
786+
├── group by: [a]
787+
├── aggregate functions: []
788+
├── estimated rows: 1.00
789+
└── AggregatePartial
790+
├── group by: [a]
791+
├── aggregate functions: []
792+
├── estimated rows: 1.00
793+
└── TableScan
794+
├── table: default.default.t1
795+
├── read rows: 1
796+
├── read bytes: 31
797+
├── partitions total: 1
798+
├── partitions scanned: 1
799+
├── push downs: [filters: [], limit: NONE]
800+
├── output columns: [0]
801+
└── estimated rows: 1.00
802+
803+
query T
804+
explain select count_distinct(a) from t1;
805+
----
806+
EvalScalar
807+
├── expressions: [count() (#3)]
808+
├── estimated rows: 1.00
809+
└── AggregateFinal
810+
├── group by: []
811+
├── aggregate functions: [count()]
812+
├── estimated rows: 1.00
813+
└── AggregatePartial
814+
├── group by: []
815+
├── aggregate functions: [count()]
816+
├── estimated rows: 1.00
817+
└── AggregateFinal
818+
├── group by: [a]
819+
├── aggregate functions: []
820+
├── estimated rows: 1.00
821+
└── AggregatePartial
822+
├── group by: [a]
823+
├── aggregate functions: []
824+
├── estimated rows: 1.00
825+
└── TableScan
826+
├── table: default.default.t1
827+
├── read rows: 1
828+
├── read bytes: 31
829+
├── partitions total: 1
830+
├── partitions scanned: 1
831+
├── push downs: [filters: [], limit: NONE]
832+
├── output columns: [0]
833+
└── estimated rows: 1.00
834+
771835
statement ok
772836
drop table t1
773837

0 commit comments

Comments
 (0)