-
Notifications
You must be signed in to change notification settings - Fork 1.8k
fix: window unparsing #17367
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: window unparsing #17367
Changes from all commits
518a820
48fa677
4b5f21f
e05b31b
e9d4c20
783c73a
4447886
3f027c4
c3e5a24
357d05f
4f7077c
4a33b0f
7d27b4c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -207,6 +207,13 @@ pub trait Dialect: Send + Sync { | |
| Ok(None) | ||
| } | ||
|
|
||
| /// Allows the dialect to support the QUALIFY clause | ||
| /// | ||
| /// Some dialects, like Postgres, do not support the QUALIFY clause | ||
| fn supports_qualify(&self) -> bool { | ||
| true | ||
| } | ||
|
|
||
| /// Allows the dialect to override logic of formatting datetime with tz into string. | ||
| fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String { | ||
| dt.to_string() | ||
|
|
@@ -274,6 +281,14 @@ impl Dialect for DefaultDialect { | |
| pub struct PostgreSqlDialect {} | ||
|
|
||
| impl Dialect for PostgreSqlDialect { | ||
| fn supports_qualify(&self) -> bool { | ||
| false | ||
| } | ||
|
|
||
| fn requires_derived_table_alias(&self) -> bool { | ||
| true | ||
| } | ||
|
Comment on lines
+288
to
+290
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the relation of this fix?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's another bug. postgres must has an alias for derived table.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, please make sure to explicitly mention this in the PR body so reviewers have context on the change |
||
|
|
||
| fn identifier_quote_style(&self, _: &str) -> Option<char> { | ||
| Some('"') | ||
| } | ||
|
|
@@ -424,6 +439,10 @@ impl Dialect for DuckDBDialect { | |
| pub struct MySqlDialect {} | ||
|
|
||
| impl Dialect for MySqlDialect { | ||
| fn supports_qualify(&self) -> bool { | ||
| false | ||
| } | ||
|
|
||
| fn identifier_quote_style(&self, _: &str) -> Option<char> { | ||
| Some('`') | ||
| } | ||
|
|
@@ -485,6 +504,10 @@ impl Dialect for MySqlDialect { | |
| pub struct SqliteDialect {} | ||
|
|
||
| impl Dialect for SqliteDialect { | ||
| fn supports_qualify(&self) -> bool { | ||
| false | ||
| } | ||
|
|
||
| fn identifier_quote_style(&self, _: &str) -> Option<char> { | ||
| Some('`') | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,12 +21,14 @@ use datafusion_common::{ | |
| assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result, | ||
| TableReference, | ||
| }; | ||
| use datafusion_expr::expr::{WindowFunction, WindowFunctionParams}; | ||
| use datafusion_expr::test::function_stub::{ | ||
| count_udaf, max_udaf, min_udaf, sum, sum_udaf, | ||
| }; | ||
| use datafusion_expr::{ | ||
| cast, col, lit, table_scan, wildcard, EmptyRelation, Expr, Extension, LogicalPlan, | ||
| LogicalPlanBuilder, Union, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, | ||
| WindowFrame, WindowFunctionDefinition, | ||
| }; | ||
| use datafusion_functions::unicode; | ||
| use datafusion_functions_aggregate::grouping::grouping_udaf; | ||
|
|
@@ -2521,6 +2523,90 @@ fn test_unparse_left_semi_join_with_table_scan_projection() -> Result<()> { | |
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_unparse_window() -> Result<()> { | ||
| // SubqueryAlias: t | ||
| // Projection: t.k, t.v, rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS r | ||
| // Filter: rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = UInt64(1) | ||
| // WindowAggr: windowExpr=[[rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] | ||
| // TableScan: t projection=[k, v] | ||
|
|
||
| let schema = Schema::new(vec![ | ||
| Field::new("k", DataType::Int32, false), | ||
| Field::new("v", DataType::Int32, false), | ||
| ]); | ||
| let window_expr = Expr::WindowFunction(Box::new(WindowFunction { | ||
| fun: WindowFunctionDefinition::WindowUDF(rank_udwf()), | ||
| params: WindowFunctionParams { | ||
| args: vec![], | ||
| partition_by: vec![col("k")], | ||
| order_by: vec![col("v").sort(true, true)], | ||
| window_frame: WindowFrame::new(None), | ||
| null_treatment: None, | ||
| distinct: false, | ||
| filter: None, | ||
| }, | ||
| })); | ||
| let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?; | ||
| let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr.clone()])?; | ||
|
|
||
| let name = plan.schema().fields().last().unwrap().name().clone(); | ||
| let plan = LogicalPlanBuilder::from(plan) | ||
| .filter(col(name.clone()).eq(lit(1i64)))? | ||
| .project(vec![col("k"), col("v"), col(name)])? | ||
| .build()?; | ||
|
|
||
| let unparser = Unparser::new(&UnparserPostgreSqlDialect {}); | ||
| let sql = unparser.plan_to_sql(&plan)?; | ||
| assert_snapshot!( | ||
| sql, | ||
| @r#"SELECT "test"."k", "test"."v", "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v", rank() OVER (PARTITION BY "test"."k" ORDER BY "test"."v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM "test") AS "test" WHERE ("rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"# | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These column names seem really chunky 🤔 Is it possible to also use the cases provided in the original issue, e.g. select k, v, r
from (
select *, rank() over(partition by k order by v) as r
from t
) t
where r = 1and select *, rank() over(partition by k order by v) as r
from t
qualify r = 1;And see if the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. column name is chunky. but the test is essential. because postgres and datafusion have different default column names for window function. I added a default alias to make sure unparsed sql can be queried in postgres. |
||
| ); | ||
|
|
||
| let unparser = Unparser::new(&UnparserMySqlDialect {}); | ||
| let sql = unparser.plan_to_sql(&plan)?; | ||
| assert_snapshot!( | ||
| sql, | ||
| @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"# | ||
| ); | ||
|
|
||
| let unparser = Unparser::new(&SqliteDialect {}); | ||
| let sql = unparser.plan_to_sql(&plan)?; | ||
| assert_snapshot!( | ||
| sql, | ||
| @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"# | ||
| ); | ||
|
|
||
| let unparser = Unparser::new(&DefaultDialect {}); | ||
| let sql = unparser.plan_to_sql(&plan)?; | ||
| assert_snapshot!( | ||
| sql, | ||
| @r#"SELECT test.k, test.v, rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM test QUALIFY (rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1)"# | ||
| ); | ||
|
|
||
| // without table qualifier | ||
| let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?; | ||
| let table = LogicalPlanBuilder::from(table) | ||
| .project(vec![col("k").alias("k"), col("v").alias("v")])? | ||
| .build()?; | ||
| let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr])?; | ||
|
|
||
| let name = plan.schema().fields().last().unwrap().name().clone(); | ||
| let plan = LogicalPlanBuilder::from(plan) | ||
| .filter(col(name.clone()).eq(lit(1i64)))? | ||
| .project(vec![col("k"), col("v"), col(name)])? | ||
| .build()?; | ||
|
|
||
| let unparser = Unparser::new(&UnparserPostgreSqlDialect {}); | ||
| let sql = unparser.plan_to_sql(&plan)?; | ||
| assert_snapshot!( | ||
| sql, | ||
| @r#"SELECT "k", "v", "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "k" AS "k", "v" AS "v", rank() OVER (PARTITION BY "k" ORDER BY "v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v" FROM "test") AS "derived_projection") AS "__qualify_subquery" WHERE ("rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"# | ||
| ); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_like_filter() { | ||
| let statement = generate_round_trip_statement( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like PostgreSQL, MySQL and SQLite also do not support the
Qualifykeyword, so they should also havesupports_qualifyreturningfalse.