Skip to content

Commit b6fd751

Browse files
committed
test: demonstrate offset not applied correctly with limit pushdown on multiple input streams
1 parent 9bc39a0 commit b6fd751

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

datafusion/sqllogictest/test_files/order.slt

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,3 +1196,136 @@ physical_plan
11961196
02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
11971197
03)----SortExec: TopK(fetch=1), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
11981198
04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
1199+
1200+
1201+
####################
1202+
# Test issue: TBD
1203+
####################
1204+
1205+
# all results
1206+
query II
1207+
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc;
1208+
----
1209+
3 25
1210+
2 25
1211+
1 0
1212+
0 0
1213+
1214+
# limit only
1215+
query II
1216+
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc LIMIT 3;
1217+
----
1218+
3 25
1219+
2 25
1220+
1 0
1221+
1222+
# offset only
1223+
query II
1224+
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1;
1225+
----
1226+
2 25
1227+
1 0
1228+
0 0
1229+
1230+
# TODO: fix this to properly apply offset
1231+
# offset + limit
1232+
query II
1233+
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1 LIMIT 2;
1234+
----
1235+
3 25
1236+
2 25
1237+
1 0
1238+
1239+
# TODO: fix this to not remove the skip=1 during the limit pushdown
1240+
# Applying offset & limit when multiple streams from groupby
1241+
query TT
1242+
EXPLAIN SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1 LIMIT 2;
1243+
----
1244+
logical_plan
1245+
01)Limit: skip=1, fetch=2
1246+
02)--Sort: ordered_table.b DESC NULLS FIRST, fetch=3
1247+
03)----Aggregate: groupBy=[[ordered_table.b]], aggr=[[sum(CAST(ordered_table.a AS Int64))]]
1248+
04)------TableScan: ordered_table projection=[a, b]
1249+
physical_plan
1250+
01)SortPreservingMergeExec: [b@0 DESC], fetch=3
1251+
02)--SortExec: TopK(fetch=3), expr=[b@0 DESC], preserve_partitioning=[true]
1252+
03)----AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(ordered_table.a)]
1253+
04)------CoalesceBatchesExec: target_batch_size=8192
1254+
05)--------RepartitionExec: partitioning=Hash([b@0], 2), input_partitions=2
1255+
06)----------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(ordered_table.a)]
1256+
07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
1257+
08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
1258+
1259+
# TODO: fix this to not remove the skip=4 during the limit pushdown
1260+
# Applying offset & limit when multiple streams from union
1261+
query TT
1262+
explain select * FROM (
1263+
select c FROM ordered_table
1264+
UNION ALL
1265+
select d FROM ordered_table
1266+
) order by 1 desc LIMIT 10 OFFSET 4;
1267+
----
1268+
logical_plan
1269+
01)Limit: skip=4, fetch=10
1270+
02)--Sort: ordered_table.c DESC NULLS FIRST, fetch=14
1271+
03)----Union
1272+
04)------Projection: CAST(ordered_table.c AS Int64) AS c
1273+
05)--------TableScan: ordered_table projection=[c]
1274+
06)------Projection: CAST(ordered_table.d AS Int64) AS c
1275+
07)--------TableScan: ordered_table projection=[d]
1276+
physical_plan
1277+
01)SortPreservingMergeExec: [c@0 DESC], fetch=14
1278+
02)--UnionExec
1279+
03)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true]
1280+
04)------ProjectionExec: expr=[CAST(c@0 AS Int64) as c]
1281+
05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
1282+
06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
1283+
07)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true]
1284+
08)------ProjectionExec: expr=[CAST(d@0 AS Int64) as c]
1285+
09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
1286+
10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[d], has_header=true
1287+
1288+
# ApplyingmLIMIT & OFFSET to subquery.
1289+
query III
1290+
select t1.b, c, c2 FROM (
1291+
select b, c FROM ordered_table ORDER BY b desc, c desc OFFSET 1 LIMIT 4
1292+
) as t1 INNER JOIN (
1293+
select b, c as c2 FROM ordered_table ORDER BY b desc, d desc OFFSET 1 LIMIT 4
1294+
) as t2
1295+
ON t1.b = t2.b
1296+
ORDER BY t1.b desc, c desc, c2 desc;
1297+
----
1298+
3 98 96
1299+
3 98 89
1300+
3 98 82
1301+
3 98 79
1302+
3 97 96
1303+
3 97 89
1304+
3 97 82
1305+
3 97 79
1306+
3 96 96
1307+
3 96 89
1308+
3 96 82
1309+
3 96 79
1310+
3 95 96
1311+
3 95 89
1312+
3 95 82
1313+
3 95 79
1314+
1315+
# TODO: fix this does not correctly work.
1316+
# Apply OFFSET & LIMIT to both parent and child (subquery).
1317+
query III
1318+
select t1.b, c, c2 FROM (
1319+
select b, c FROM ordered_table ORDER BY b desc, c desc OFFSET 1 LIMIT 4
1320+
) as t1 INNER JOIN (
1321+
select b, c as c2 FROM ordered_table ORDER BY b desc, d desc OFFSET 1 LIMIT 4
1322+
) as t2
1323+
ON t1.b = t2.b
1324+
ORDER BY t1.b desc, c desc, c2 desc
1325+
OFFSET 3 LIMIT 2;
1326+
----
1327+
3 99 96
1328+
3 99 89
1329+
3 99 87
1330+
3 99 82
1331+
3 99 79

0 commit comments

Comments
 (0)