Skip to content

Commit 10cac80

Browse files
committed
Mirror upstream elastic#137511 as single snapshot commit for AI review
BASE=3017e334274a7292997b0fea77f90d2c73b58eba HEAD=71c1691e34614a43230da1905611f4fa9dfeec01 Branch=main
1 parent 3017e33 commit 10cac80

File tree

8 files changed

+513
-27
lines changed

8 files changed

+513
-27
lines changed

docs/changelog/137511.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137511
2+
summary: Fixes esql class cast bug in STATS at planning level
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3463,3 +3463,89 @@ employees:long | job_positions:keyword
34633463
15 | Tech Lead
34643464
;
34653465

3466+
fixClassCastBugWithCountDistinct
3467+
required_capability: fix_stats_classcast_exception
3468+
3469+
from airports
3470+
| rename scalerank AS x
3471+
| stats a = count(x), b = count(x) + count(x), c = count_distinct(x)
3472+
;
3473+
3474+
a:long | b:long | c:long
3475+
891 | 1782 | 8
3476+
;
3477+
3478+
fixClassCastBugWithValuesFn
3479+
required_capability: fix_stats_classcast_exception
3480+
3481+
ROW x = [1,2,3]
3482+
| STATS a = MV_COUNT(VALUES(x)), b = VALUES(x), c = SUM(x)
3483+
;
3484+
3485+
a:integer | b:integer | c:long
3486+
3 | [1, 2, 3] | 6
3487+
;
3488+
3489+
fixClassCastBugWithSeveralCountDistincts
3490+
required_capability: fix_stats_classcast_exception
3491+
3492+
ROW x = 1
3493+
| STATS a = 2*COUNT_DISTINCT(x), b = COUNT_DISTINCT(x), c = MAX(x)
3494+
;
3495+
3496+
a:long | b:long | c:integer
3497+
2 | 1 | 1
3498+
;
3499+
3500+
fixClassCastBugWithSeveralCounts
3501+
required_capability: inline_stats
3502+
required_capability: fix_stats_classcast_exception
3503+
3504+
FROM sample_data, sample_data_str
3505+
| EVAL one_ip = client_ip::ip
3506+
| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip)
3507+
| KEEP count1, count2
3508+
| LIMIT 3
3509+
;
3510+
3511+
count1:long |count2:long
3512+
14 |14
3513+
14 |14
3514+
14 |14
3515+
;
3516+
3517+
fixClassCastBugWithMedianPlusCountDistinct
3518+
required_capability: fix_stats_classcast_exception
3519+
3520+
FROM sample_data_ts_long
3521+
| EVAL sym1 = 0, sym5 = 1
3522+
| STATS sym2 = median(sym5) + 0, sym3 = median(sym5), sym4 = count_distinct(sym1)
3523+
;
3524+
3525+
sym2:double |sym3:double | sym4:long
3526+
1.0 | 1.0 | 1
3527+
;
3528+
3529+
fixClassCastBug6
3530+
required_capability: fix_stats_classcast_exception
3531+
3532+
from airports
3533+
| rename scalerank AS x
3534+
| stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1)
3535+
;
3536+
3537+
a:long | b:long | c:long | d:long
3538+
891 | 1782 | 8 | 8
3539+
;
3540+
3541+
fixClassCastBugWithSurrogateExpressions
3542+
required_capability: fix_stats_classcast_exception
3543+
3544+
from airports
3545+
| rename scalerank AS x
3546+
| stats a = median(x), b = percentile(x, 50), c = count_distinct(x)
3547+
;
3548+
3549+
a:double | b:double | c:long
3550+
6.0 | 6.0 | 8
3551+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,6 +1579,13 @@ public enum Cap {
15791579
*/
15801580
PUSHING_DOWN_EVAL_WITH_SCORE,
15811581

1582+
/**
1583+
* Fix for ClassCastException in STATS
1584+
* https://github.com/elastic/elasticsearch/issues/133992
1585+
* https://github.com/elastic/elasticsearch/issues/136598
1586+
*/
1587+
FIX_STATS_CLASSCAST_EXCEPTION,
1588+
15821589
/**
15831590
* Fix attribute equality to respect the name id of the attribute.
15841591
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineLimitTopN;
2020
import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineProjections;
2121
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConstantFolding;
22+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggs;
2223
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter;
2324
import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull;
2425
import org.elasticsearch.xpack.esql.optimizer.rules.logical.HoistRemoteEnrichLimit;
@@ -178,6 +179,8 @@ protected static Batch<LogicalPlan> operators() {
178179
new SplitInWithFoldableValue(),
179180
new PropagateEvalFoldables(),
180181
new ConstantFolding(),
182+
// then extract nested aggs top-level
183+
new DeduplicateAggs(),
181184
new PartiallyFoldCase(),
182185
// boolean
183186
new BooleanSimplification(),
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
9+
10+
import org.elasticsearch.common.util.Maps;
11+
import org.elasticsearch.xpack.esql.core.expression.Alias;
12+
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
13+
import org.elasticsearch.xpack.esql.core.expression.Expression;
14+
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
15+
import org.elasticsearch.xpack.esql.core.tree.Source;
16+
import org.elasticsearch.xpack.esql.core.util.Holder;
17+
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
18+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
19+
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
20+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
21+
import org.elasticsearch.xpack.esql.plan.logical.Project;
22+
23+
import java.util.ArrayList;
24+
import java.util.List;
25+
import java.util.Map;
26+
27+
/**
28+
* This rule handles duplicate aggregate functions to avoid duplicate compute
29+
* stats a = min(x), b = min(x), c = count(*), d = count() by g
30+
* becomes
31+
* stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g
32+
*/
33+
public final class DeduplicateAggs extends OptimizerRules.OptimizerRule<Aggregate> implements OptimizerRules.CoordinatorOnly {
34+
public DeduplicateAggs() {
35+
super(OptimizerRules.TransformDirection.UP);
36+
}
37+
38+
@Override
39+
protected LogicalPlan rule(Aggregate aggregate) {
40+
// an alias map for evaluatable grouping functions
41+
AttributeMap.Builder<Expression> aliasesBuilder = AttributeMap.builder();
42+
aggregate.forEachExpressionUp(Alias.class, a -> {
43+
if (a.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction == false) {
44+
aliasesBuilder.put(a.toAttribute(), a.child());
45+
}
46+
});
47+
var aliases = aliasesBuilder.build();
48+
49+
// break down each aggregate into AggregateFunction and/or grouping key
50+
// preserve the projection at the end
51+
List<? extends NamedExpression> aggs = aggregate.aggregates();
52+
53+
// root/naked aggs
54+
Map<AggregateFunction, Alias> rootAggs = Maps.newLinkedHashMapWithExpectedSize(aggs.size());
55+
List<NamedExpression> newProjections = new ArrayList<>();
56+
// track the aggregate aggs (including grouping which is not an AggregateFunction)
57+
List<NamedExpression> newAggs = new ArrayList<>();
58+
59+
Holder<Boolean> changed = new Holder<>(false);
60+
61+
for (NamedExpression agg : aggs) {
62+
if (agg instanceof Alias as) {
63+
// use intermediate variable to mark child as final for lambda use
64+
Expression child = as.child();
65+
66+
// common case - handle duplicates
67+
if (child instanceof AggregateFunction af) {
68+
// canonical representation, with resolved aliases
69+
AggregateFunction canonical = (AggregateFunction) af.transformUp(e -> aliases.resolve(e, e));
70+
71+
Alias found = rootAggs.get(canonical);
72+
// aggregate is new
73+
if (found == null) {
74+
rootAggs.put(canonical, as);
75+
newAggs.add(as);
76+
newProjections.add(as.toAttribute());
77+
}
78+
// agg already exists - preserve the current alias but point it to the existing agg
79+
// thus don't add it to the list of aggs as we don't want duplicated compute
80+
else {
81+
changed.set(true);
82+
newProjections.add(as.replaceChild(found.toAttribute()));
83+
}
84+
}
85+
}
86+
// not an alias (e.g. grouping field)
87+
else {
88+
newAggs.add(agg);
89+
newProjections.add(agg.toAttribute());
90+
}
91+
}
92+
93+
LogicalPlan plan = aggregate;
94+
if (changed.get()) {
95+
Source source = aggregate.source();
96+
plan = aggregate.with(aggregate.child(), aggregate.groupings(), newAggs);
97+
// preserve initial projection
98+
plan = new Project(source, plan, newProjections);
99+
}
100+
101+
return plan;
102+
}
103+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,12 @@ protected LogicalPlan rule(Aggregate aggregate) {
109109
else {
110110
changed.set(true);
111111
Expression aggExpression = child.transformUp(AggregateFunction.class, af -> {
112-
AggregateFunction canonical = (AggregateFunction) af.canonical();
112+
// canonical representation, with resolved aliases
113+
AggregateFunction canonical = (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e));
113114
Alias alias = rootAggs.get(canonical);
114115
if (alias == null) {
115-
// create synthetic alias ove the found agg function
116-
alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), canonical, null, true);
116+
// create synthetic alias over the found agg function
117+
alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), af.canonical(), null, true);
117118
// and remember it to remove duplicates
118119
rootAggs.put(canonical, alias);
119120
// add it to the list of aggregates and continue

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -533,30 +533,6 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception {
533533
);
534534
}
535535

536-
/**
537-
* <pre>{@code
538-
* Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]]
539-
* \_Limit[1000[INTEGER]]
540-
* \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]]
541-
* \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..]
542-
* }</pre>
543-
*/
544-
public void testCombineProjectionWithDuplicateAggregation() {
545-
var plan = plan("""
546-
from test
547-
| stats s = sum(salary), d = sum(salary), c = sum(salary) by last_name, first_name
548-
| keep d, s, last_name, first_name
549-
""");
550-
551-
var project = as(plan, Project.class);
552-
assertThat(Expressions.names(project.projections()), contains("d", "s", "last_name", "first_name"));
553-
var limit = as(project.child(), Limit.class);
554-
var agg = as(limit.child(), Aggregate.class);
555-
assertThat(Expressions.names(agg.aggregates()), contains("s", "last_name", "first_name"));
556-
assertThat(Alias.unwrap(agg.aggregates().get(0)), instanceOf(Sum.class));
557-
assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name"));
558-
}
559-
560536
/**
561537
* <pre>{@code
562538
* Limit[1000[INTEGER]]

0 commit comments

Comments
 (0)