Skip to content

Commit 1400069

Browse files
migrate approx_percentile_cont, approx_distinct, and approx_median to UDAF
Ref: approx_distinct apache/datafusion#10851 Ref: approx_median apache/datafusion#10840 Ref: approx_percentile_cont and _with_weight apache/datafusion#10917
1 parent f7bd619 commit 1400069

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

python/datafusion/functions.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,9 +1211,9 @@ def flatten(array: Expr) -> Expr:
12111211

12121212

12131213
# aggregate functions
1214-
def approx_distinct(arg: Expr) -> Expr:
1214+
def approx_distinct(expression: Expr) -> Expr:
12151215
"""Returns the approximate number of distinct values."""
1216-
return Expr(f.approx_distinct(arg.expr, distinct=True))
1216+
return Expr(f.approx_distinct(expression.expr))
12171217

12181218

12191219
def approx_median(arg: Expr, distinct: bool = False) -> Expr:
@@ -1222,20 +1222,22 @@ def approx_median(arg: Expr, distinct: bool = False) -> Expr:
12221222

12231223

12241224
def approx_percentile_cont(
1225-
expr: Expr,
1225+
expression: Expr,
12261226
percentile: Expr,
1227-
num_centroids: int | None = None,
1227+
# num_centroids: int | None = None,
12281228
distinct: bool = False,
12291229
) -> Expr:
12301230
"""Returns the value that is approximately at a given percentile of ``expr``."""
1231+
# TODO: enable num_centroids
1232+
num_centroids = None
12311233
if num_centroids is None:
12321234
return Expr(
12331235
f.approx_percentile_cont(expr.expr, percentile.expr, distinct=distinct)
12341236
)
12351237

12361238
return Expr(
12371239
f.approx_percentile_cont(
1238-
expr.expr, percentile.expr, num_centroids, distinct=distinct
1240+
expr.expr, percentile.expr, distinct=distinct
12391241
)
12401242
)
12411243

src/functions.rs

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,57 @@ use datafusion_expr::{
3636
lit, Expr, WindowFunctionDefinition,
3737
};
3838

39+
#[pyfunction]
40+
pub fn approx_distinct(expression: PyExpr) -> PyExpr {
41+
functions_aggregate::expr_fn::approx_distinct::approx_distinct(expression.expr).into()
42+
}
43+
44+
#[pyfunction]
45+
pub fn approx_median(expression: PyExpr, distinct: bool) -> PyResult<PyExpr> {
46+
// TODO: better builder pattern
47+
let expr = functions_aggregate::expr_fn::approx_median(expression.expr);
48+
if distinct {
49+
Ok(expr.distinct().build()?.into())
50+
} else {
51+
Ok(expr.into())
52+
}
53+
}
54+
55+
#[pyfunction]
56+
pub fn approx_percentile_cont(
57+
expression: PyExpr,
58+
percentile: PyExpr,
59+
distinct: bool,
60+
) -> PyResult<PyExpr> {
61+
// TODO: better builder pattern
62+
let expr =
63+
functions_aggregate::expr_fn::approx_percentile_cont(expression.expr, percentile.expr);
64+
if distinct {
65+
Ok(expr.distinct().build()?.into())
66+
} else {
67+
Ok(expr.into())
68+
}
69+
}
70+
71+
#[pyfunction]
72+
pub fn approx_percentile_cont_with_weight(
73+
expression: PyExpr,
74+
weight: PyExpr,
75+
percentile: PyExpr,
76+
distinct: bool,
77+
) -> PyResult<PyExpr> {
78+
let expr = functions_aggregate::expr_fn::approx_percentile_cont_with_weight(
79+
expression.expr,
80+
weight.expr,
81+
percentile.expr,
82+
);
83+
if distinct {
84+
Ok(expr.distinct().build()?.into())
85+
} else {
86+
Ok(expr.into())
87+
}
88+
}
89+
3990
#[pyfunction]
4091
pub fn sum(args: PyExpr) -> PyExpr {
4192
functions_aggregate::expr_fn::sum(args.expr).into()
@@ -697,13 +748,6 @@ array_fn!(list_resize, array_resize, array size value);
697748
array_fn!(flatten, array);
698749
array_fn!(range, start stop step);
699750

700-
aggregate_function!(approx_distinct, ApproxDistinct);
701-
aggregate_function!(approx_median, ApproxMedian);
702-
aggregate_function!(approx_percentile_cont, ApproxPercentileCont);
703-
aggregate_function!(
704-
approx_percentile_cont_with_weight,
705-
ApproxPercentileContWithWeight
706-
);
707751
aggregate_function!(array_agg, ArrayAgg);
708752
aggregate_function!(avg, Avg);
709753
aggregate_function!(corr, Correlation);

0 commit comments

Comments
 (0)