Skip to content

Commit 462b9b7

Browse files
committed
fix: allow group by same expr in Aggregate
1 parent 48c72c6 commit 462b9b7

File tree

5 files changed

+84
-16
lines changed

5 files changed

+84
-16
lines changed

datafusion/common/src/dfschema.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,7 @@ impl DFSchema {
6868

6969
for field in &fields {
7070
if let Some(qualifier) = field.qualifier() {
71-
if !qualified_names.insert((qualifier, field.name())) {
72-
return Err(DataFusionError::SchemaError(
73-
SchemaError::DuplicateQualifiedField {
74-
qualifier: Box::new(qualifier.clone()),
75-
name: field.name().to_string(),
76-
},
77-
));
78-
}
71+
qualified_names.insert((qualifier, field.name()));
7972
} else if !unqualified_names.insert(field.name()) {
8073
return Err(DataFusionError::SchemaError(
8174
SchemaError::DuplicateUnqualifiedField {
@@ -859,10 +852,7 @@ mod tests {
859852
let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
860853
let right = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
861854
let join = left.join(&right);
862-
assert_eq!(
863-
join.unwrap_err().to_string(),
864-
"Schema error: Schema contains duplicate qualified field name t1.c0",
865-
);
855+
assert!(join.err().is_none());
866856
Ok(())
867857
}
868858

datafusion/core/tests/sqllogictests/test_files/aggregate.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ LOCATION '../../testing/data/csv/aggregate_test_100.csv'
4141
#######
4242

4343
# https://github.com/apache/arrow-datafusion/issues/3353
44-
statement error Aggregations require unique expression names
44+
statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name "APPROXDISTINCT\(aggregate_test_100\.c9\)"
4545
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100
4646

4747
# csv_query_approx_percentile_cont_with_weight
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
statement ok
20+
CREATE TABLE tab0(col0 INTEGER, col1 INTEGER, col2 INTEGER)
21+
22+
statement ok
23+
CREATE TABLE tab1(col0 INTEGER, col1 INTEGER, col2 INTEGER)
24+
25+
statement ok
26+
CREATE TABLE tab2(col0 INTEGER, col1 INTEGER, col2 INTEGER)
27+
28+
statement ok
29+
INSERT INTO tab0 VALUES(83,0,38)
30+
31+
statement ok
32+
INSERT INTO tab0 VALUES(26,0,79)
33+
34+
statement ok
35+
INSERT INTO tab0 VALUES(43,81,24)
36+
37+
statement ok
38+
INSERT INTO tab1 VALUES(22,6,8)
39+
40+
statement ok
41+
INSERT INTO tab1 VALUES(28,57,45)
42+
43+
statement ok
44+
INSERT INTO tab1 VALUES(82,44,71)
45+
46+
statement ok
47+
INSERT INTO tab2 VALUES(15,61,87)
48+
49+
statement ok
50+
INSERT INTO tab2 VALUES(91,59,79)
51+
52+
statement ok
53+
INSERT INTO tab2 VALUES(92,41,58)
54+
55+
# group by same column
56+
query I
57+
SELECT 38 FROM tab0 AS cor0 GROUP BY cor0.col1, cor0.col1;
58+
----
59+
38
60+
38

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
///! Logical plan types
19-
use crate::logical_plan::builder::validate_unique_names;
2019
use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
2120
use crate::logical_plan::extension::UserDefinedLogicalNode;
2221
use crate::logical_plan::statement::{DmlStatement, Statement};
@@ -1669,7 +1668,6 @@ impl Aggregate {
16691668
let group_expr = enumerate_grouping_sets(group_expr)?;
16701669
let grouping_expr: Vec<Expr> = grouping_set_to_exprlist(group_expr.as_slice())?;
16711670
let all_expr = grouping_expr.iter().chain(aggr_expr.iter());
1672-
validate_unique_names("Aggregations", all_expr.clone())?;
16731671
let schema = DFSchema::new_with_metadata(
16741672
exprlist_to_fields(all_expr, &input)?,
16751673
input.schema().metadata().clone(),

datafusion/optimizer/src/eliminate_duplicated_expr.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::{OptimizerConfig, OptimizerRule};
2020
use datafusion_common::Result;
2121
use datafusion_expr::expr::Sort as ExprSort;
2222
use datafusion_expr::logical_plan::LogicalPlan;
23-
use datafusion_expr::{Expr, Sort};
23+
use datafusion_expr::{Aggregate, Expr, Sort};
2424
use hashbrown::HashSet;
2525

2626
/// Optimization rule that eliminate duplicated expr.
@@ -74,6 +74,26 @@ impl OptimizerRule for EliminateDuplicatedExpr {
7474
})))
7575
}
7676
}
77+
LogicalPlan::Aggregate(agg) => {
78+
// dedup agg.groupby and keep order
79+
let mut dedup_expr = Vec::new();
80+
let mut dedup_set = HashSet::new();
81+
agg.group_expr.iter().for_each(|expr| {
82+
if !dedup_set.contains(expr) {
83+
dedup_expr.push(expr.clone());
84+
dedup_set.insert(expr);
85+
}
86+
});
87+
if dedup_expr.len() == agg.group_expr.len() {
88+
Ok(None)
89+
} else {
90+
Ok(Some(LogicalPlan::Aggregate(Aggregate::try_new(
91+
agg.input.clone(),
92+
dedup_expr,
93+
agg.aggr_expr.clone(),
94+
)?)))
95+
}
96+
}
7797
_ => Ok(None),
7898
}
7999
}

0 commit comments

Comments
 (0)