Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1651,14 +1651,11 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
== NullTreatment::IgnoreNulls;

let (agg_expr, filter, order_bys) = {
let order_bys = match order_by {
Some(exprs) => create_physical_sort_exprs(
exprs,
logical_input_schema,
execution_props,
)?,
None => vec![],
};
let order_bys = create_physical_sort_exprs(
order_by,
logical_input_schema,
execution_props,
)?;

let agg_expr =
AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/execution/logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ async fn count_only_nulls() -> Result<()> {
args: vec![input_col_ref],
distinct: false,
filter: None,
order_by: None,
order_by: vec![],
null_treatment: None,
},
})],
Expand Down
37 changes: 17 additions & 20 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::expr_fn::binary_expr;
use crate::function::WindowFunctionSimplification;
use crate::logical_plan::Subquery;
use crate::Volatility;
use crate::{udaf, ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};
use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};

use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable};
Expand Down Expand Up @@ -994,7 +994,7 @@ pub struct AggregateFunctionParams {
/// Optional filter
pub filter: Option<Box<Expr>>,
/// Optional ordering
pub order_by: Option<Vec<Sort>>,
pub order_by: Vec<Sort>,
pub null_treatment: Option<NullTreatment>,
}

Expand All @@ -1005,7 +1005,7 @@ impl AggregateFunction {
args: Vec<Expr>,
distinct: bool,
filter: Option<Box<Expr>>,
order_by: Option<Vec<Sort>>,
order_by: Vec<Sort>,
null_treatment: Option<NullTreatment>,
) -> Self {
Self {
Expand Down Expand Up @@ -1175,26 +1175,26 @@ impl Exists {

/// User Defined Aggregate Function
///
/// See [`udaf::AggregateUDF`] for more information.
/// See [`crate::udaf::AggregateUDF`] for more information.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct AggregateUDF {
/// The function
pub fun: Arc<udaf::AggregateUDF>,
pub fun: Arc<crate::AggregateUDF>,
/// List of expressions to feed to the functions as arguments
pub args: Vec<Expr>,
/// Optional filter
pub filter: Option<Box<Expr>>,
/// Optional ORDER BY applied prior to aggregating
pub order_by: Option<Vec<Expr>>,
pub order_by: Vec<Sort>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand why the Optional was removed (that's the purpose of this PR). I don't yet understand why Expr was replaced with Sort thought.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a mistake during the batch replacement process, but since this struct has never been used in the project, it did not cause any exceptions. I'll change it back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And we can open another issue to discuss whether it is more reasonable to change it to the Sort type, because other order_by fields are defined as the Sort type. Or we can directly delete this unused struct.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it's unused, let's delete it, prefferrably in a separate PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Should it be before or after this PR?

}

impl AggregateUDF {
/// Create a new AggregateUDF expression
pub fn new(
fun: Arc<udaf::AggregateUDF>,
fun: Arc<crate::AggregateUDF>,
args: Vec<Expr>,
filter: Option<Box<Expr>>,
order_by: Option<Vec<Expr>>,
order_by: Vec<Sort>,
) -> Self {
Self {
fun,
Expand Down Expand Up @@ -2303,18 +2303,15 @@ impl NormalizeEq for Expr {
(None, None) => true,
_ => false,
}
&& match (self_order_by, other_order_by) {
(Some(self_order_by), Some(other_order_by)) => self_order_by
.iter()
.zip(other_order_by.iter())
.all(|(a, b)| {
a.asc == b.asc
&& a.nulls_first == b.nulls_first
&& a.expr.normalize_eq(&b.expr)
}),
(None, None) => true,
_ => false,
}
&& self_order_by
.iter()
.zip(other_order_by.iter())
.all(|(a, b)| {
a.asc == b.asc
&& a.nulls_first == b.nulls_first
&& a.expr.normalize_eq(&b.expr)
})
&& self_order_by.len() == other_order_by.len()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The length check was missing, thanks for adding it.
Coincidentally, recently i was reflecting about precisely this -- how Iter::zip is error-prone to use.

}
(Expr::WindowFunction(left), Expr::WindowFunction(other)) => {
let WindowFunction {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ impl ExprFuncBuilder {

let fun_expr = match fun {
ExprFuncKind::Aggregate(mut udaf) => {
udaf.params.order_by = order_by;
udaf.params.order_by = order_by.unwrap_or_default();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as a follow-up, the builder order_by field can be changed to non-optional as well

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this type is private and the builder fn order_by already takes a Vec w/o an Option, could we just do this in this PR as well? It's like a 2-line change.

udaf.params.filter = filter.map(Box::new);
udaf.params.distinct = distinct;
udaf.params.null_treatment = null_treatment;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ pub struct RawAggregateExpr {
pub args: Vec<Expr>,
pub distinct: bool,
pub filter: Option<Box<Expr>>,
pub order_by: Option<Vec<SortExpr>>,
pub order_by: Vec<SortExpr>,
pub null_treatment: Option<NullTreatment>,
}

Expand Down
10 changes: 5 additions & 5 deletions datafusion/expr/src/test/function_stub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn sum(expr: Expr) -> Expr {
vec![expr],
false,
None,
None,
vec![],
None,
))
}
Expand All @@ -73,7 +73,7 @@ pub fn count(expr: Expr) -> Expr {
vec![expr],
false,
None,
None,
vec![],
None,
))
}
Expand All @@ -86,7 +86,7 @@ pub fn avg(expr: Expr) -> Expr {
vec![expr],
false,
None,
None,
vec![],
None,
))
}
Expand Down Expand Up @@ -282,7 +282,7 @@ pub fn min(expr: Expr) -> Expr {
vec![expr],
false,
None,
None,
vec![],
None,
))
}
Expand Down Expand Up @@ -363,7 +363,7 @@ pub fn max(expr: Expr) -> Expr {
vec![expr],
false,
None,
None,
vec![],
None,
))
}
Expand Down
15 changes: 8 additions & 7 deletions datafusion/expr/src/udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ impl AggregateUDF {
args,
false,
None,
None,
vec![],
None,
))
}
Expand Down Expand Up @@ -394,7 +394,7 @@ where
/// fn get_doc() -> &'static Documentation {
/// &DOCUMENTATION
/// }
///
///
/// /// Implement the AggregateUDFImpl trait for GeoMeanUdf
/// impl AggregateUDFImpl for GeoMeanUdf {
/// fn as_any(&self) -> &dyn Any { self }
Expand All @@ -415,7 +415,7 @@ where
/// ])
/// }
/// fn documentation(&self) -> Option<&Documentation> {
/// Some(get_doc())
/// Some(get_doc())
/// }
/// }
///
Expand Down Expand Up @@ -474,7 +474,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?;
};

if let Some(order_by) = order_by {
if !order_by.is_empty() {
let clause = match self.is_ordered_set_aggregate() {
true => "WITHIN GROUP",
false => "ORDER BY",
Expand Down Expand Up @@ -519,7 +519,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?;
};

if let Some(order_by) = order_by {
if !order_by.is_empty() {
schema_name.write_fmt(format_args!(
" ORDER BY [{}]",
schema_name_from_sorts(order_by)?
Expand Down Expand Up @@ -608,10 +608,11 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
if let Some(fe) = filter {
display_name.write_fmt(format_args!(" FILTER (WHERE {fe})"))?;
}
if let Some(ob) = order_by {
if !order_by.is_empty() {
display_name.write_fmt(format_args!(
" ORDER BY [{}]",
ob.iter()
order_by
.iter()
.map(|o| format!("{o}"))
.collect::<Vec<String>>()
.join(", ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pub fn approx_percentile_cont(
args,
false,
None,
Some(vec![order_by]),
vec![order_by],
None,
))
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-aggregate/src/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub fn count_distinct(expr: Expr) -> Expr {
vec![expr],
true,
None,
None,
vec![],
None,
))
}
Expand Down
8 changes: 4 additions & 4 deletions datafusion/functions-aggregate/src/first_last.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ create_func!(FirstValue, first_value_udaf);
create_func!(LastValue, last_value_udaf);

/// Returns the first value in a group of values.
pub fn first_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
if let Some(order_by) = order_by {
pub fn first_value(expression: Expr, order_by: Vec<SortExpr>) -> Expr {
if !order_by.is_empty() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition seems redundant now.

first_value_udaf()
.call(vec![expression])
.order_by(order_by)
Expand All @@ -69,8 +69,8 @@ pub fn first_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
}

/// Returns the last value in a group of values.
pub fn last_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
if let Some(order_by) = order_by {
pub fn last_value(expression: Expr, order_by: Vec<SortExpr>) -> Expr {
if !order_by.is_empty() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition seems redundant now.

last_value_udaf()
.call(vec![expression])
.order_by(order_by)
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-aggregate/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ macro_rules! make_udaf_expr {
vec![$($arg),*],
false,
None,
None,
vec![],
None,
))
}
Expand All @@ -52,7 +52,7 @@ macro_rules! make_udaf_expr_and_func {
args,
false,
None,
None,
vec![],
None,
))
}
Expand Down
14 changes: 7 additions & 7 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ mod test {
vec![lit(10i64)],
false,
None,
None,
vec![],
None,
));
let plan = LogicalPlan::Projection(Projection::try_new(vec![udaf], empty)?);
Expand Down Expand Up @@ -1632,7 +1632,7 @@ mod test {
vec![lit("10")],
false,
None,
None,
vec![],
None,
));

Expand All @@ -1651,7 +1651,7 @@ mod test {
vec![lit(12f64)],
false,
None,
None,
vec![],
None,
));
let plan = LogicalPlan::Projection(Projection::try_new(vec![agg_expr], empty)?);
Expand All @@ -1670,7 +1670,7 @@ mod test {
vec![cast(col("a"), DataType::Float64)],
false,
None,
None,
vec![],
None,
));
let plan = LogicalPlan::Projection(Projection::try_new(vec![agg_expr], empty)?);
Expand All @@ -1692,7 +1692,7 @@ mod test {
vec![lit("1")],
false,
None,
None,
vec![],
None,
));
let err = Projection::try_new(vec![agg_expr], empty)
Expand Down Expand Up @@ -1727,7 +1727,7 @@ mod test {
let empty = empty_with_type(DataType::Int64);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
assert_analyzed_plan_eq!(
plan,
plan,
@r"
Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)])
EmptyRelation
Expand All @@ -1744,7 +1744,7 @@ mod test {
}));
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?);
assert_analyzed_plan_eq!(
plan,
plan,
@r"
Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))])
EmptyRelation
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ mod test {
vec![inner],
false,
None,
None,
vec![],
None,
))
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4324,7 +4324,7 @@ mod tests {
vec![],
false,
None,
None,
vec![],
None,
));

Expand All @@ -4338,7 +4338,7 @@ mod tests {
vec![],
false,
None,
None,
vec![],
None,
));

Expand Down
Loading