diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 1686a5d0dfe0..23833e8e13cc 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -68,14 +68,7 @@ impl DFSchema { for field in &fields { if let Some(qualifier) = field.qualifier() { - if !qualified_names.insert((qualifier, field.name())) { - return Err(DataFusionError::SchemaError( - SchemaError::DuplicateQualifiedField { - qualifier: Box::new(qualifier.clone()), - name: field.name().to_string(), - }, - )); - } + qualified_names.insert((qualifier, field.name())); } else if !unqualified_names.insert(field.name()) { return Err(DataFusionError::SchemaError( SchemaError::DuplicateUnqualifiedField { @@ -859,10 +852,7 @@ mod tests { let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; let right = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?; let join = left.join(&right); - assert_eq!( - join.unwrap_err().to_string(), - "Schema error: Schema contains duplicate qualified field name t1.c0", - ); + assert!(join.err().is_none()); Ok(()) } diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index 9e122d3a26e5..9998e766f783 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -41,7 +41,7 @@ LOCATION '../../testing/data/csv/aggregate_test_100.csv' ####### # https://github.com/apache/arrow-datafusion/issues/3353 -statement error Aggregations require unique expression names +statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name "APPROXDISTINCT\(aggregate_test_100\.c9\)" SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100 # csv_query_approx_percentile_cont_with_weight diff --git a/datafusion/core/tests/sqllogictests/test_files/groupby.slt b/datafusion/core/tests/sqllogictests/test_files/groupby.slt new file mode 100644 index 000000000000..bd081048f87d --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/groupby.slt @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +statement ok +CREATE TABLE tab0(col0 INTEGER, col1 INTEGER, col2 INTEGER) + +statement ok +CREATE TABLE tab1(col0 INTEGER, col1 INTEGER, col2 INTEGER) + +statement ok +CREATE TABLE tab2(col0 INTEGER, col1 INTEGER, col2 INTEGER) + +statement ok +INSERT INTO tab0 VALUES(83,0,38) + +statement ok +INSERT INTO tab0 VALUES(26,0,79) + +statement ok +INSERT INTO tab0 VALUES(43,81,24) + +statement ok +INSERT INTO tab1 VALUES(22,6,8) + +statement ok +INSERT INTO tab1 VALUES(28,57,45) + +statement ok +INSERT INTO tab1 VALUES(82,44,71) + +statement ok +INSERT INTO tab2 VALUES(15,61,87) + +statement ok +INSERT INTO tab2 VALUES(91,59,79) + +statement ok +INSERT INTO tab2 VALUES(92,41,58) + +# group by same column +query I +SELECT 38 FROM tab0 AS cor0 GROUP BY cor0.col1, cor0.col1; +---- +38 +38 diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 03ee55baa52e..2524b12caece 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -16,7 +16,6 @@ // under the License. ///! Logical plan types -use crate::logical_plan::builder::validate_unique_names; use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor}; use crate::logical_plan::extension::UserDefinedLogicalNode; use crate::logical_plan::statement::{DmlStatement, Statement}; @@ -1669,7 +1668,6 @@ impl Aggregate { let group_expr = enumerate_grouping_sets(group_expr)?; let grouping_expr: Vec = grouping_set_to_exprlist(group_expr.as_slice())?; let all_expr = grouping_expr.iter().chain(aggr_expr.iter()); - validate_unique_names("Aggregations", all_expr.clone())?; let schema = DFSchema::new_with_metadata( exprlist_to_fields(all_expr, &input)?, input.schema().metadata().clone(), diff --git a/datafusion/optimizer/src/eliminate_duplicated_expr.rs b/datafusion/optimizer/src/eliminate_duplicated_expr.rs index 15f3d8e1d851..de05717a72e2 100644 --- a/datafusion/optimizer/src/eliminate_duplicated_expr.rs +++ b/datafusion/optimizer/src/eliminate_duplicated_expr.rs @@ -20,7 +20,7 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::Result; use datafusion_expr::expr::Sort as ExprSort; use datafusion_expr::logical_plan::LogicalPlan; -use datafusion_expr::{Expr, Sort}; +use datafusion_expr::{Aggregate, Expr, Sort}; use hashbrown::HashSet; /// Optimization rule that eliminate duplicated expr. @@ -74,6 +74,26 @@ impl OptimizerRule for EliminateDuplicatedExpr { }))) } } + LogicalPlan::Aggregate(agg) => { + // dedup agg.groupby and keep order + let mut dedup_expr = Vec::new(); + let mut dedup_set = HashSet::new(); + agg.group_expr.iter().for_each(|expr| { + if !dedup_set.contains(expr) { + dedup_expr.push(expr.clone()); + dedup_set.insert(expr); + } + }); + if dedup_expr.len() == agg.group_expr.len() { + Ok(None) + } else { + Ok(Some(LogicalPlan::Aggregate(Aggregate::try_new( + agg.input.clone(), + dedup_expr, + agg.aggr_expr.clone(), + )?))) + } + } _ => Ok(None), } }