From d0901ecfc83beb6ed7cf3c5434a5ee687c45174e Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 01:38:03 +0900 Subject: [PATCH 01/18] add dialect for mysql with no-escape mode --- Cargo.toml | 1 + examples/cli.rs | 1 + src/dialect/mod.rs | 1 + src/dialect/mysql.rs | 11 ++++++++--- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a1126d278..a8cf56800 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ serde = { version = "1.0", features = ["derive"], optional = true } # https://github.com/rust-lang/cargo/issues/1596 serde_json = { version = "1.0", optional = true } sqlparser_derive = { version = "0.1.1", path = "derive", optional = true } +duplicate = "1.0.0" [dev-dependencies] simple_logger = "4.0" diff --git a/examples/cli.rs b/examples/cli.rs index a320a00bc..fd8feb4a1 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -42,6 +42,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--mysql" => Box::new(MySqlDialect {}), + "--mysql-no-escape" => Box::new(MySqlNoEscapeDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 5744ae65e..cea77dee0 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -35,6 +35,7 @@ pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; +pub use self::mysql::MySqlNoEscapeDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index ceab34810..18410c013 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -11,16 +11,21 @@ // limitations under the License. use crate::dialect::Dialect; +use duplicate::duplicate_item; /// [MySQL](https://www.mysql.com/) #[derive(Debug)] pub struct MySqlDialect {} -impl Dialect for MySqlDialect { +#[derive(Debug)] +pub struct MySqlNoEscapeDialect {} + +#[duplicate_item(name; [MySqlDialect]; [MySqlNoEscapeDialect])] +impl Dialect for name { fn is_identifier_start(&self, ch: char) -> bool { // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. - // Identifiers which begin with a digit are recognized while tokenizing numbers, - // so they can be distinguished from exponent numeric literals. + // We don't yet support identifiers beginning with numbers, as that + // makes it hard to distinguish numeric literals. ch.is_alphabetic() || ch == '_' || ch == '$' From 4ccecb723bbc3dd04c84a35ffa5ea5e19b506de6 Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 01:54:19 +0900 Subject: [PATCH 02/18] add and mod cases related to escape --- src/test_utils.rs | 12 ++++ tests/sqlparser_common.rs | 44 ++++++++++-- tests/sqlparser_mysql.rs | 146 +++++++++++++++++++++++++++++++++++--- 3 files changed, 188 insertions(+), 14 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index d01bbbab9..dcdb41b15 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -166,6 +166,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(HiveDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), ], @@ -173,6 +174,17 @@ pub fn all_dialects() -> TestedDialects { } } +pub fn all_dialects_other_than_MySqlNoEscape() -> TestedDialects { + let mut all_dialects = all_dialects(); + let index_of_MySqlNoEscape = all_dialects + .dialects + .iter() + .position(|dialect| dialect.is::()) + .unwrap(); + all_dialects.dialects.remove(index_of_MySqlNoEscape); + return all_dialects; +} + pub fn assert_eq_vec(expected: &[&str], actual: &[T]) { assert_eq!( expected, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 16fd623dd..5c4bee3c3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -25,13 +25,14 @@ use sqlparser::ast::TableFactor::Pivot; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect, - MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, + MySqlDialect, MySqlNoEscapeDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, + SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use test_utils::{ - all_dialects, assert_eq_vec, expr_from_projection, join, number, only, table, table_alias, - TestedDialects, + all_dialects, all_dialects_other_than_MySqlNoEscape, assert_eq_vec, expr_from_projection, join, + number, only, table, table_alias, TestedDialects, }; #[macro_use] @@ -945,6 +946,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { // Box::new(HiveDialect {}), Box::new(MsSqlDialect {}), Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), Box::new(PostgreSqlDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(SnowflakeDialect {}), @@ -999,11 +1001,13 @@ fn parse_select_with_date_column_name() { } #[test] -fn parse_escaped_single_quote_string_predicate() { +fn parse_escaped_single_quote_string_predicate_with_escape() { use self::BinaryOperator::*; let sql = "SELECT id, fname, lname FROM customer \ WHERE salary <> 'Jim''s salary'"; - let ast = verified_only_select(sql); + + let ast = verified_only_select_with_dialects_other_than_MySqlNoEscape(sql); + assert_eq!( Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("salary"))), @@ -1016,6 +1020,30 @@ fn parse_escaped_single_quote_string_predicate() { ); } +#[test] +fn parse_escaped_single_quote_string_predicate_with_no_escape() { + use self::BinaryOperator::*; + let sql = "SELECT id, fname, lname FROM customer \ + WHERE salary <> 'Jim''s salary'"; + + let ast = TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_only_select(sql); + + assert_eq!( + Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("salary"))), + op: NotEq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + "Jim''s salary".to_string() + ))), + }), + ast.selection, + ); +} + #[test] fn parse_number() { let expr = verified_expr("1.0"); @@ -2300,6 +2328,7 @@ fn parse_create_table_hive_array() { Box::new(PostgreSqlDialect {}), Box::new(HiveDialect {}), Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), ], options: None, }; @@ -6212,6 +6241,10 @@ fn verified_only_select(query: &str) -> Select { all_dialects().verified_only_select(query) } +fn verified_only_select_with_dialects_other_than_MySqlNoEscape(query: &str) -> Select { + all_dialects_other_than_MySqlNoEscape().verified_only_select(query) +} + fn verified_expr(query: &str) -> Expr { all_dialects().verified_expr(query) } @@ -6875,6 +6908,7 @@ fn parse_non_latin_identifiers() { Box::new(MsSqlDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), ], options: None, }; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1c479bb18..4cd5c4d8f 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -17,7 +17,7 @@ use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; -use sqlparser::dialect::{GenericDialect, MySqlDialect}; +use sqlparser::dialect::{GenericDialect, MySqlDialect, MySqlNoEscapeDialect}; use sqlparser::tokenizer::Token; use test_utils::*; @@ -438,10 +438,14 @@ fn parse_quote_identifiers() { } #[test] -fn parse_quote_identifiers_2() { +fn parse_escaped_quote_identifiers_with_escape() { let sql = "SELECT `quoted `` identifier`"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -472,10 +476,52 @@ fn parse_quote_identifiers_2() { } #[test] -fn parse_quote_identifiers_3() { +fn parse_escaped_quote_identifiers_with_no_escape() { + let sql = "SELECT `quoted `` identifier`"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "quoted `` identifier".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + +#[test] +fn parse_escaped_backticks_with_escape() { let sql = "SELECT ```quoted identifier```"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -505,6 +551,44 @@ fn parse_quote_identifiers_3() { ); } +#[test] +fn parse_escaped_backticks_with_no_escape() { + let sql = "SELECT ```quoted identifier```"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "``quoted identifier``".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + #[test] fn parse_unterminated_escape() { let sql = r#"SELECT 'I\'m not fine\'"#; @@ -517,9 +601,13 @@ fn parse_unterminated_escape() { } #[test] -fn parse_escaped_string() { +fn parse_escaped_string_with_escape() { fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = mysql().one_statement_parses_to(sql, ""); + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .one_statement_parses_to(sql, ""); match stmt { Statement::Query(query) => match *query.body { @@ -548,6 +636,42 @@ fn parse_escaped_string() { assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); } +#[test] +fn parse_escaped_string_with_no_escape() { + fn assert_mysql_query_value(sql: &str, quoted: &str) { + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .one_statement_parses_to(sql, ""); + + match stmt { + Statement::Query(query) => match *query.body { + SetExpr::Select(value) => { + let expr = expr_from_projection(only(&value.projection)); + assert_eq!( + *expr, + Expr::Value(Value::SingleQuotedString(quoted.to_string())) + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + } + let sql = r#"SELECT 'I\'m fine'"#; + assert_mysql_query_value(sql, r#"I\'m fine"#); + + let sql = r#"SELECT 'I''m fine'"#; + assert_mysql_query_value(sql, r#"I''m fine"#); + + let sql = r#"SELECT 'I\"m fine'"#; + assert_mysql_query_value(sql, r#"I\"m fine"#); + + let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; + assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); +} + #[test] fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; @@ -1349,14 +1473,18 @@ fn parse_create_table_with_fulltext_definition_should_not_accept_constraint_name fn mysql() -> TestedDialects { TestedDialects { - dialects: vec![Box::new(MySqlDialect {})], + dialects: vec![Box::new(MySqlDialect {}), Box::new(MySqlNoEscapeDialect {})], options: None, } } fn mysql_and_generic() -> TestedDialects { TestedDialects { - dialects: vec![Box::new(MySqlDialect {}), Box::new(GenericDialect {})], + dialects: vec![ + Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), + Box::new(GenericDialect {}), + ], options: None, } } From 57654e704a4358b3852cbd8e44e6d69ca1ad540c Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 03:37:06 +0900 Subject: [PATCH 03/18] mod dialect_from_str() --- src/dialect/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index cea77dee0..a7abfc695 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -155,6 +155,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(GenericDialect)), "mysql" => Some(Box::new(MySqlDialect {})), + "mysql-no-escape" => Some(Box::new(MySqlNoEscapeDialect {})), "postgresql" | "postgres" => Some(Box::new(PostgreSqlDialect {})), "hive" => Some(Box::new(HiveDialect {})), "sqlite" => Some(Box::new(SQLiteDialect {})), @@ -202,6 +203,8 @@ mod tests { assert!(parse_dialect("generic").is::()); assert!(parse_dialect("mysql").is::()); assert!(parse_dialect("MySql").is::()); + assert!(parse_dialect("mysql-no-escape").is::()); + assert!(parse_dialect("MySql-No-Escape").is::()); assert!(parse_dialect("postgresql").is::()); assert!(parse_dialect("postgres").is::()); assert!(parse_dialect("hive").is::()); From af1cc78d2610b0ce6a832e9e3a55d1a542158869 Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 03:37:42 +0900 Subject: [PATCH 04/18] mod parse logic --- src/ast/value.rs | 20 +++++++++--- src/parser.rs | 30 ++++++++++-------- src/tokenizer.rs | 81 ++++++++++++++++++++++++++++++++++-------------- 3 files changed, 90 insertions(+), 41 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 95ea978d0..df940e1df 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -184,11 +184,21 @@ pub struct EscapeQuotedString<'a> { impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for c in self.string.chars() { - if c == self.quote { - write!(f, "{q}{q}", q = self.quote)?; - } else { - write!(f, "{c}")?; + let mut peekable_chars = self.string.chars().peekable(); + while let Some(&ch) = peekable_chars.peek() { + let quote = self.quote; + match ch { + char if char == quote => { + write!(f, "{char}{char}", char = self.quote)?; + peekable_chars.next(); + if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) { + peekable_chars.next(); + } + } + _ => { + write!(f, "{ch}")?; + peekable_chars.next(); + } } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 82cbe9d12..f84efbd07 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -742,7 +742,7 @@ impl<'a> Parser<'a> { } Keyword::ARRAY_AGG => self.parse_array_agg_expr(), Keyword::NOT => self.parse_not(), - Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { + Keyword::MATCH if dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) => { self.parse_match_against() } // Here `w` is a word, check if it's a part of a multi-part @@ -3566,7 +3566,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Some(ColumnOption::Check(expr))) } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL Ok(Some(ColumnOption::DialectSpecific(vec![ @@ -3580,7 +3580,7 @@ impl<'a> Parser<'a> { Token::make_keyword("AUTOINCREMENT"), ]))) } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) { let expr = self.parse_expr()?; Ok(Some(ColumnOption::OnUpdate(expr))) @@ -3716,7 +3716,7 @@ impl<'a> Parser<'a> { } Token::Word(w) if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) - && dialect_of!(self is GenericDialect | MySqlDialect) => + && dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) => { let display_as_key = w.keyword == Keyword::KEY; @@ -3741,7 +3741,7 @@ impl<'a> Parser<'a> { } Token::Word(w) if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) - && dialect_of!(self is GenericDialect | MySqlDialect) => + && dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) => { if let Some(name) = name { return self.expected( @@ -3900,7 +3900,7 @@ impl<'a> Parser<'a> { cascade, } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) { AlterTableOperation::DropPrimaryKey } else { @@ -4995,7 +4995,7 @@ impl<'a> Parser<'a> { offset = Some(self.parse_offset()?) } - if dialect_of!(self is GenericDialect | MySqlDialect) + if dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) && limit.is_some() && offset.is_none() && self.consume_token(&Token::Comma) @@ -5088,7 +5088,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; SetExpr::Query(Box::new(subquery)) } else if self.parse_keyword(Keyword::VALUES) { - let is_mysql = dialect_of!(self is MySqlDialect); + let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); SetExpr::Values(self.parse_values(is_mysql)?) } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) @@ -5365,7 +5365,7 @@ impl<'a> Parser<'a> { }; if variable.to_string().eq_ignore_ascii_case("NAMES") - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) { if self.parse_keyword(Keyword::DEFAULT) { return Ok(Statement::SetNamesDefault {}); @@ -5458,7 +5458,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::COLLATION) { Ok(self.parse_show_collation()?) } else if self.parse_keyword(Keyword::VARIABLES) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) { // TODO: Support GLOBAL|SESSION Ok(Statement::ShowVariables { @@ -6133,7 +6133,7 @@ impl<'a> Parser<'a> { // Hive lets you put table here regardless let table = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; - let is_mysql = dialect_of!(self is MySqlDialect); + let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; let partitioned = if self.parse_keyword(Keyword::PARTITION) { @@ -6762,7 +6762,7 @@ impl<'a> Parser<'a> { "INSERT in MATCHED merge clause".to_string(), )); } - let is_mysql = dialect_of!(self is MySqlDialect); + let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; self.expect_keyword(Keyword::VALUES)?; let values = self.parse_values(is_mysql)?; @@ -7333,7 +7333,11 @@ mod tests { } let dialect = TestedDialects { - dialects: vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})], + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(MySqlDialect {}), + Box::new(MySqlNoEscapeDialect {}), + ], options: None, }; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a550c4f5d..b711569d7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -36,7 +36,7 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect}; -use crate::dialect::{Dialect, MySqlDialect}; +use crate::dialect::{Dialect, MySqlDialect, MySqlNoEscapeDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; /// SQL Token enumeration @@ -636,7 +636,7 @@ impl<'a> Tokenizer<'a> { let error_loc = chars.location(); chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = parse_quoted_ident(chars, quote_end); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); if last_char == Some(quote_end) { Ok(Some(Token::make_word(&s, Some(quote_start)))) @@ -705,7 +705,9 @@ impl<'a> Tokenizer<'a> { // mysql dialect supports identifiers that start with a numeric prefix, // as long as they aren't an exponent number. - if dialect_of!(self is MySqlDialect) && exponent_part.is_empty() { + if dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect) + && exponent_part.is_empty() + { let word = peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); @@ -1112,6 +1114,10 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); + if dialect_of!(self is MySqlNoEscapeDialect) { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } chars.next(); } else { return Ok(s); @@ -1120,7 +1126,7 @@ impl<'a> Tokenizer<'a> { '\\' => { // consume chars.next(); - // slash escaping is specific to MySQL dialect + // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { if let Some(next) = chars.peek() { // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences @@ -1137,6 +1143,13 @@ impl<'a> Tokenizer<'a> { s.push(n); chars.next(); // consume next } + } else if dialect_of!(self is MySqlNoEscapeDialect) { + // In no-escape mode, the given query has to be saved completely including backslashes. + if let Some(next) = chars.peek() { + s.push(ch); + s.push(*next); + chars.next(); // consume next + } } else { s.push(ch); } @@ -1183,6 +1196,29 @@ impl<'a> Tokenizer<'a> { } } + fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option) { + let mut last_char = None; + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == quote_end { + if chars.peek() == Some("e_end) { + chars.next(); + s.push(ch); + if dialect_of!(self is MySqlNoEscapeDialect) { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } + } else { + last_char = Some(quote_end); + break; + } + } else { + s.push(ch); + } + } + (s, last_char) + } + #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, @@ -1210,25 +1246,6 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool s } -fn parse_quoted_ident(chars: &mut State, quote_end: char) -> (String, Option) { - let mut last_char = None; - let mut s = String::new(); - while let Some(ch) = chars.next() { - if ch == quote_end { - if chars.peek() == Some("e_end) { - chars.next(); - s.push(ch); - } else { - last_char = Some(quote_end); - break; - } - } else { - s.push(ch); - } - } - (s, last_char) -} - #[cfg(test)] mod tests { use super::*; @@ -1870,6 +1887,24 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_quoted_identifier_with_no_escape() { + let sql = r#" "a "" b" "a """ "c """"" "#; + let dialect = MySqlNoEscapeDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Whitespace(Whitespace::Space), + Token::DoubleQuotedString(String::from(r#"a "" b"#)), + Token::Whitespace(Whitespace::Space), + Token::DoubleQuotedString(String::from(r#"a """#)), + Token::Whitespace(Whitespace::Space), + Token::DoubleQuotedString(String::from(r#"c """""#)), + Token::Whitespace(Whitespace::Space), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_with_location() { let sql = "SELECT a,\n b"; From 0e467a1cc92567efbaa9a6bff0b795ed2fb15893 Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 11:47:06 +0900 Subject: [PATCH 05/18] mod spell --- src/test_utils.rs | 6 +++--- tests/sqlparser_common.rs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index dcdb41b15..99e5eaed5 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -174,14 +174,14 @@ pub fn all_dialects() -> TestedDialects { } } -pub fn all_dialects_other_than_MySqlNoEscape() -> TestedDialects { +pub fn all_dialects_other_than_mysqlnoescape() -> TestedDialects { let mut all_dialects = all_dialects(); - let index_of_MySqlNoEscape = all_dialects + let index_of_mysqlnoescape = all_dialects .dialects .iter() .position(|dialect| dialect.is::()) .unwrap(); - all_dialects.dialects.remove(index_of_MySqlNoEscape); + all_dialects.dialects.remove(index_of_mysqlnoescape); return all_dialects; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5c4bee3c3..be751d681 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -31,7 +31,7 @@ use sqlparser::dialect::{ use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use test_utils::{ - all_dialects, all_dialects_other_than_MySqlNoEscape, assert_eq_vec, expr_from_projection, join, + all_dialects, all_dialects_other_than_mysqlnoescape, assert_eq_vec, expr_from_projection, join, number, only, table, table_alias, TestedDialects, }; @@ -1006,7 +1006,7 @@ fn parse_escaped_single_quote_string_predicate_with_escape() { let sql = "SELECT id, fname, lname FROM customer \ WHERE salary <> 'Jim''s salary'"; - let ast = verified_only_select_with_dialects_other_than_MySqlNoEscape(sql); + let ast = verified_only_select_with_dialects_other_than_mysqlnoescape(sql); assert_eq!( Some(Expr::BinaryOp { @@ -6241,8 +6241,8 @@ fn verified_only_select(query: &str) -> Select { all_dialects().verified_only_select(query) } -fn verified_only_select_with_dialects_other_than_MySqlNoEscape(query: &str) -> Select { - all_dialects_other_than_MySqlNoEscape().verified_only_select(query) +fn verified_only_select_with_dialects_other_than_mysqlnoescape(query: &str) -> Select { + all_dialects_other_than_mysqlnoescape().verified_only_select(query) } fn verified_expr(query: &str) -> Expr { From ba313e865665a2f9012d24d1dbd654b55adff6ff Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Tue, 9 May 2023 12:39:27 +0900 Subject: [PATCH 06/18] cargo clippy --- src/test_utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 99e5eaed5..724c0bdf4 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -182,7 +182,7 @@ pub fn all_dialects_other_than_mysqlnoescape() -> TestedDialects { .position(|dialect| dialect.is::()) .unwrap(); all_dialects.dialects.remove(index_of_mysqlnoescape); - return all_dialects; + all_dialects } pub fn assert_eq_vec(expected: &[&str], actual: &[T]) { From 598d9e97ada700d1cf57b2ea17b383365dd2b542 Mon Sep 17 00:00:00 2001 From: Kanaru Sato Date: Wed, 10 May 2023 12:59:38 +0900 Subject: [PATCH 07/18] rm duplicate crate --- Cargo.toml | 1 - src/dialect/mysql.rs | 31 ++++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a8cf56800..a1126d278 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,6 @@ serde = { version = "1.0", features = ["derive"], optional = true } # https://github.com/rust-lang/cargo/issues/1596 serde_json = { version = "1.0", optional = true } sqlparser_derive = { version = "0.1.1", path = "derive", optional = true } -duplicate = "1.0.0" [dev-dependencies] simple_logger = "4.0" diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 18410c013..b7c20a462 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -11,21 +11,42 @@ // limitations under the License. use crate::dialect::Dialect; -use duplicate::duplicate_item; /// [MySQL](https://www.mysql.com/) #[derive(Debug)] pub struct MySqlDialect {} +impl Dialect for MySqlDialect { + fn is_identifier_start(&self, ch: char) -> bool { + // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. + // Identifiers which begin with a digit are recognized while tokenizing numbers, + // so they can be distinguished from exponent numeric literals. + ch.is_alphabetic() + || ch == '_' + || ch == '$' + || ch == '@' + || ('\u{0080}'..='\u{ffff}').contains(&ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + self.is_identifier_start(ch) || ch.is_ascii_digit() + } + + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' + } +} + +/// [MySQL](https://www.mysql.com/) +/// You should use it if you don't want to escape queries when both parsing and serializing them. #[derive(Debug)] pub struct MySqlNoEscapeDialect {} -#[duplicate_item(name; [MySqlDialect]; [MySqlNoEscapeDialect])] -impl Dialect for name { +impl Dialect for MySqlNoEscapeDialect { fn is_identifier_start(&self, ch: char) -> bool { // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. - // We don't yet support identifiers beginning with numbers, as that - // makes it hard to distinguish numeric literals. + // Identifiers which begin with a digit are recognized while tokenizing numbers, + // so they can be distinguished from exponent numeric literals. ch.is_alphabetic() || ch == '_' || ch == '$' From 790804ebc81de07658d685ca3a443946261c5131 Mon Sep 17 00:00:00 2001 From: canalun Date: Sat, 17 Jun 2023 15:27:30 +0900 Subject: [PATCH 08/18] mod test --- src/dialect/mysql.rs | 18 ++++++++++++++++++ tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_mysql.rs | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 5b2910716..6b32adb97 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -86,4 +86,22 @@ impl Dialect for MySqlNoEscapeDialect { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '`' } + + fn parse_infix( + &self, + parser: &mut crate::parser::Parser, + expr: &crate::ast::Expr, + _precedence: u8, + ) -> Option> { + // Parse DIV as an operator + if parser.parse_keyword(Keyword::DIV) { + Some(Ok(Expr::BinaryOp { + left: Box::new(expr.clone()), + op: BinaryOperator::MyIntegerDivide, + right: Box::new(parser.parse_expr().unwrap()), + })) + } else { + None + } + } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4a40134c5..1c45a5d97 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -24,8 +24,8 @@ use sqlparser::ast::TableFactor::Pivot; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, - MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, MySqlNoEscapeDialect, - SnowflakeDialect, + MsSqlDialect, MySqlDialect, MySqlNoEscapeDialect, PostgreSqlDialect, RedshiftSqlDialect, + SQLiteDialect, SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 05065e34f..c1ff1eccc 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -503,6 +503,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { distribute_by: vec![], sort_by: vec![], having: None, + named_window: vec![], qualify: None }))), order_by: vec![], @@ -580,6 +581,7 @@ fn parse_escaped_backticks_with_no_escape() { distribute_by: vec![], sort_by: vec![], having: None, + named_window: vec![], qualify: None }))), order_by: vec![], From 8d7f5be73838b09a8629bab4d8a1f99c58084d71 Mon Sep 17 00:00:00 2001 From: canalun Date: Sat, 17 Jun 2023 18:21:30 +0900 Subject: [PATCH 09/18] mod logic of Display for Double/SingleQuotedString and add test --- src/ast/value.rs | 19 ++++++++++++--- tests/sqlparser_mysql.rs | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index df940e1df..9995dfcab 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -68,7 +68,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), - Value::DoubleQuotedString(v) => write!(f, "\"{v}\""), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::DollarQuotedString(v) => write!(f, "{v}"), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), @@ -184,15 +184,23 @@ pub struct EscapeQuotedString<'a> { impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let quote = self.quote; + let mut previous_char = char::default(); let mut peekable_chars = self.string.chars().peekable(); while let Some(&ch) = peekable_chars.peek() { - let quote = self.quote; match ch { char if char == quote => { - write!(f, "{char}{char}", char = self.quote)?; + if previous_char == '\\' { + write!(f, "{char}")?; + peekable_chars.next(); + continue; + } peekable_chars.next(); if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) { + write!(f, "{char}{char}")?; peekable_chars.next(); + } else { + write!(f, "{char}{char}")?; } } _ => { @@ -200,6 +208,7 @@ impl<'a> fmt::Display for EscapeQuotedString<'a> { peekable_chars.next(); } } + previous_char = ch; } Ok(()) } @@ -213,6 +222,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\'') } +pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> { + escape_quoted_string(s, '\"') +} + pub struct EscapeEscapedStringLiteral<'a>(&'a str); impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c1ff1eccc..f08a72040 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -676,6 +676,57 @@ fn parse_escaped_string_with_no_escape() { assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); } +#[test] +fn check_roundtrip_of_escaped_string() { + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT 'I\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT 'I''m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT "I\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT "I""m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + + TestedDialects { + dialects: vec![Box::new(MySqlNoEscapeDialect {})], + options: None, + } + .verified_stmt(r#"SELECT "I'm ''fine''""#); +} + #[test] fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; From 07fd54596f077389e9e871aadb4c390d1be83f68 Mon Sep 17 00:00:00 2001 From: canalun Date: Tue, 6 Jun 2023 01:26:35 +0900 Subject: [PATCH 10/18] update: add no-escape field to parserOptions and tokenizerOptions that is newly defined --- src/ast/visitor.rs | 2 +- src/parser.rs | 8 +++- src/tokenizer.rs | 73 ++++++++++++++++++++---------------- tests/sqlparser_common.rs | 1 + tests/sqlparser_snowflake.rs | 4 +- 5 files changed, 51 insertions(+), 37 deletions(-) diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 81343220a..e857087e7 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -632,7 +632,7 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let s = Parser::new(&dialect) .with_tokens(tokens) diff --git a/src/parser.rs b/src/parser.rs index ec74e2555..2f1dd4aa5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -198,6 +198,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct ParserOptions { pub trailing_commas: bool, + pub no_escape: bool, } pub struct Parser<'a> { @@ -207,7 +208,7 @@ pub struct Parser<'a> { /// The current dialect to use dialect: &'a dyn Dialect, /// Additional options that allow you to mix & match behavior otherwise - /// constrained to certain dialects (e.g. trailing commas) + /// constrained to certain dialects (e.g. trailing commas) and/or format of parse (e.g. no escape) options: ParserOptions, /// ensure the stack does not overflow by limiting recursion depth recursion_counter: RecursionCounter, @@ -317,7 +318,10 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let mut tokenizer = Tokenizer::new(self.dialect, sql); + let tokenizer_options = TokenizerOptions { + no_escape: self.options.no_escape, + }; + let mut tokenizer = Tokenizer::new(self.dialect, sql, &tokenizer_options); let tokens = tokenizer.tokenize()?; Ok(self.with_tokens(tokens)) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 16102103f..3be97b7c9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -447,12 +447,21 @@ impl<'a> State<'a> { pub struct Tokenizer<'a> { dialect: &'a dyn Dialect, query: &'a str, + options: &'a TokenizerOptions, +} + +pub struct TokenizerOptions { + pub no_escape: bool, } impl<'a> Tokenizer<'a> { /// Create a new SQL tokenizer for the specified SQL statement - pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { - Self { dialect, query } + pub fn new(dialect: &'a dyn Dialect, query: &'a str, options: &'a TokenizerOptions) -> Self { + Self { + dialect, + query, + options, + } } /// Tokenize the statement and produce a vector of tokens @@ -1276,7 +1285,7 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1292,7 +1301,7 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1308,7 +1317,7 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1343,7 +1352,7 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1362,7 +1371,7 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1381,7 +1390,7 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1405,7 +1414,7 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1445,7 +1454,7 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1477,7 +1486,7 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1507,7 +1516,7 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1539,7 +1548,7 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1568,7 +1577,7 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ @@ -1584,7 +1593,7 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; compare(expected, tokens); @@ -1595,7 +1604,7 @@ mod tests { let sql = String::from("select 'foo"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1611,7 +1620,7 @@ mod tests { let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1627,7 +1636,7 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ @@ -1651,7 +1660,7 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), @@ -1668,7 +1677,7 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1687,7 +1696,7 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1705,7 +1714,7 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), @@ -1719,7 +1728,7 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1736,7 +1745,7 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1753,7 +1762,7 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1768,7 +1777,7 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), @@ -1783,7 +1792,7 @@ mod tests { let sql = String::from("\"foo"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1799,7 +1808,7 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), @@ -1818,7 +1827,7 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1840,7 +1849,7 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1879,7 +1888,7 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), @@ -1915,7 +1924,7 @@ mod tests { fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize_with_location().unwrap(); let expected = vec![ TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1c45a5d97..007923070 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7101,6 +7101,7 @@ fn parse_trailing_comma() { dialects: vec![Box::new(GenericDialect {})], options: Some(ParserOptions { trailing_commas: true, + no_escape: false, }), }; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9a54c89cf..4649a559e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,7 +55,7 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -72,7 +72,7 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ From dc858c427db3354f627f0ce2b4558c23b5e30cc4 Mon Sep 17 00:00:00 2001 From: canalun Date: Sun, 18 Jun 2023 13:52:22 +0900 Subject: [PATCH 11/18] rm no-escape-dialect and adopt no-escape option --- examples/cli.rs | 1 - src/dialect/mod.rs | 4 -- src/dialect/mysql.rs | 44 ------------------ src/parser.rs | 38 +++++++--------- src/test_utils.rs | 16 +------ src/tokenizer.rs | 64 +++++++++++++------------- tests/sqlparser_common.rs | 56 +++++++++++------------ tests/sqlparser_mysql.rs | 95 ++++++++++++++++++++++++++------------- 8 files changed, 141 insertions(+), 177 deletions(-) diff --git a/examples/cli.rs b/examples/cli.rs index 1d6170c2d..8af6246a0 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -42,7 +42,6 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--mysql" => Box::new(MySqlDialect {}), - "--mysql-no-escape" => Box::new(MySqlNoEscapeDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 37fd8f26f..48357501c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -37,7 +37,6 @@ pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; -pub use self::mysql::MySqlNoEscapeDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; @@ -157,7 +156,6 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(GenericDialect)), "mysql" => Some(Box::new(MySqlDialect {})), - "mysql-no-escape" => Some(Box::new(MySqlNoEscapeDialect {})), "postgresql" | "postgres" => Some(Box::new(PostgreSqlDialect {})), "hive" => Some(Box::new(HiveDialect {})), "sqlite" => Some(Box::new(SQLiteDialect {})), @@ -206,8 +204,6 @@ mod tests { assert!(parse_dialect("generic").is::()); assert!(parse_dialect("mysql").is::()); assert!(parse_dialect("MySql").is::()); - assert!(parse_dialect("mysql-no-escape").is::()); - assert!(parse_dialect("MySql-No-Escape").is::()); assert!(parse_dialect("postgresql").is::()); assert!(parse_dialect("postgres").is::()); assert!(parse_dialect("hive").is::()); diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 6b32adb97..0f914ed02 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -61,47 +61,3 @@ impl Dialect for MySqlDialect { } } } - -/// [MySQL](https://www.mysql.com/) -/// You should use it if you don't want to escape queries when both parsing and serializing them. -#[derive(Debug)] -pub struct MySqlNoEscapeDialect {} - -impl Dialect for MySqlNoEscapeDialect { - fn is_identifier_start(&self, ch: char) -> bool { - // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. - // Identifiers which begin with a digit are recognized while tokenizing numbers, - // so they can be distinguished from exponent numeric literals. - ch.is_alphabetic() - || ch == '_' - || ch == '$' - || ch == '@' - || ('\u{0080}'..='\u{ffff}').contains(&ch) - } - - fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || ch.is_ascii_digit() - } - - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '`' - } - - fn parse_infix( - &self, - parser: &mut crate::parser::Parser, - expr: &crate::ast::Expr, - _precedence: u8, - ) -> Option> { - // Parse DIV as an operator - if parser.parse_keyword(Keyword::DIV) { - Some(Ok(Expr::BinaryOp { - left: Box::new(expr.clone()), - op: BinaryOperator::MyIntegerDivide, - right: Box::new(parser.parse_expr().unwrap()), - })) - } else { - None - } - } -} diff --git a/src/parser.rs b/src/parser.rs index 2f1dd4aa5..72d072daa 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -278,7 +278,7 @@ impl<'a> Parser<'a> { /// # fn main() -> Result<(), ParserError> { /// let dialect = GenericDialect{}; /// let result = Parser::new(&dialect) - /// .with_options(ParserOptions { trailing_commas: true }) + /// .with_options(ParserOptions { trailing_commas: true, no_escape: false }) /// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")? /// .parse_statements(); /// assert!(matches!(result, Ok(_))); @@ -748,7 +748,7 @@ impl<'a> Parser<'a> { } Keyword::ARRAY_AGG => self.parse_array_agg_expr(), Keyword::NOT => self.parse_not(), - Keyword::MATCH if dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) => { + Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() } // Here `w` is a word, check if it's a part of a multi-part @@ -3566,7 +3566,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Some(ColumnOption::Check(expr))) } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL Ok(Some(ColumnOption::DialectSpecific(vec![ @@ -3580,7 +3580,7 @@ impl<'a> Parser<'a> { Token::make_keyword("AUTOINCREMENT"), ]))) } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { let expr = self.parse_expr()?; Ok(Some(ColumnOption::OnUpdate(expr))) @@ -3716,7 +3716,7 @@ impl<'a> Parser<'a> { } Token::Word(w) if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) - && dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) => + && dialect_of!(self is GenericDialect | MySqlDialect) => { let display_as_key = w.keyword == Keyword::KEY; @@ -3741,7 +3741,7 @@ impl<'a> Parser<'a> { } Token::Word(w) if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) - && dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) => + && dialect_of!(self is GenericDialect | MySqlDialect) => { if let Some(name) = name { return self.expected( @@ -3900,7 +3900,7 @@ impl<'a> Parser<'a> { cascade, } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { AlterTableOperation::DropPrimaryKey } else { @@ -5081,7 +5081,7 @@ impl<'a> Parser<'a> { offset = Some(self.parse_offset()?) } - if dialect_of!(self is GenericDialect | MySqlDialect | MySqlNoEscapeDialect) + if dialect_of!(self is GenericDialect | MySqlDialect) && limit.is_some() && offset.is_none() && self.consume_token(&Token::Comma) @@ -5174,7 +5174,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; SetExpr::Query(Box::new(subquery)) } else if self.parse_keyword(Keyword::VALUES) { - let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); + let is_mysql = dialect_of!(self is MySqlDialect); SetExpr::Values(self.parse_values(is_mysql)?) } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) @@ -5458,7 +5458,7 @@ impl<'a> Parser<'a> { }; if variable.to_string().eq_ignore_ascii_case("NAMES") - && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { if self.parse_keyword(Keyword::DEFAULT) { return Ok(Statement::SetNamesDefault {}); @@ -5551,7 +5551,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::COLLATION) { Ok(self.parse_show_collation()?) } else if self.parse_keyword(Keyword::VARIABLES) - && dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { // TODO: Support GLOBAL|SESSION Ok(Statement::ShowVariables { @@ -6226,7 +6226,7 @@ impl<'a> Parser<'a> { // Hive lets you put table here regardless let table = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; - let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); + let is_mysql = dialect_of!(self is MySqlDialect); let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; let partitioned = if self.parse_keyword(Keyword::PARTITION) { @@ -6873,7 +6873,7 @@ impl<'a> Parser<'a> { "INSERT in MATCHED merge clause".to_string(), )); } - let is_mysql = dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect); + let is_mysql = dialect_of!(self is MySqlDialect); let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; self.expect_keyword(Keyword::VALUES)?; let values = self.parse_values(is_mysql)?; @@ -7119,7 +7119,7 @@ mod tests { #[test] fn test_prev_index() { let sql = "SELECT version"; - all_dialects().run_parser_method(sql, |parser| { + all_dialects(None).run_parser_method(sql, |parser| { assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); parser.prev_token(); @@ -7474,7 +7474,7 @@ mod tests { // The expected name should be identical as the input name, that's why I don't receive both macro_rules! test_parse_schema_name { ($input:expr, $expected_name:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { + all_dialects(None).run_parser_method(&*$input, |parser| { let schema_name = parser.parse_schema_name().unwrap(); // Validate that the structure is the same as expected assert_eq!(schema_name, $expected_name); @@ -7517,11 +7517,7 @@ mod tests { } let dialect = TestedDialects { - dialects: vec![ - Box::new(GenericDialect {}), - Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), - ], + dialects: vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})], options: None, }; @@ -7690,7 +7686,7 @@ mod tests { fn test_parse_multipart_identifier_negative() { macro_rules! test_parse_multipart_identifier_error { ($input:expr, $expected_err:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { + all_dialects(None).run_parser_method(&*$input, |parser| { let actual_err = parser.parse_multipart_identifier().unwrap_err(); assert_eq!(actual_err.to_string(), $expected_err); }); diff --git a/src/test_utils.rs b/src/test_utils.rs index aa0bcfa7c..8ff099455 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -155,7 +155,7 @@ impl TestedDialects { } } -pub fn all_dialects() -> TestedDialects { +pub fn all_dialects(options: Option) -> TestedDialects { TestedDialects { dialects: vec![ Box::new(GenericDialect {}), @@ -166,26 +166,14 @@ pub fn all_dialects() -> TestedDialects { Box::new(HiveDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), Box::new(DuckDbDialect {}), ], - options: None, + options, } } -pub fn all_dialects_other_than_mysqlnoescape() -> TestedDialects { - let mut all_dialects = all_dialects(); - let index_of_mysqlnoescape = all_dialects - .dialects - .iter() - .position(|dialect| dialect.is::()) - .unwrap(); - all_dialects.dialects.remove(index_of_mysqlnoescape); - all_dialects -} - pub fn assert_eq_vec(expected: &[&str], actual: &[T]) { assert_eq!( expected, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3be97b7c9..f28c233b9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -36,7 +36,7 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect}; -use crate::dialect::{Dialect, MySqlDialect, MySqlNoEscapeDialect}; +use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; /// SQL Token enumeration @@ -717,9 +717,7 @@ impl<'a> Tokenizer<'a> { // mysql dialect supports identifiers that start with a numeric prefix, // as long as they aren't an exponent number. - if dialect_of!(self is MySqlDialect | MySqlNoEscapeDialect) - && exponent_part.is_empty() - { + if dialect_of!(self is MySqlDialect) && exponent_part.is_empty() { let word = peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); @@ -1129,7 +1127,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); - if dialect_of!(self is MySqlNoEscapeDialect) { + if self.options.no_escape { // In no-escape mode, the given query has to be saved completely s.push(ch); } @@ -1143,27 +1141,29 @@ impl<'a> Tokenizer<'a> { chars.next(); // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { - if let Some(next) = chars.peek() { - // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences - let n = match next { - '\'' | '\"' | '\\' | '%' | '_' => *next, - '0' => '\0', - 'b' => '\u{8}', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'Z' => '\u{1a}', - _ => *next, - }; - s.push(n); - chars.next(); // consume next - } - } else if dialect_of!(self is MySqlNoEscapeDialect) { - // In no-escape mode, the given query has to be saved completely including backslashes. - if let Some(next) = chars.peek() { - s.push(ch); - s.push(*next); - chars.next(); // consume next + if self.options.no_escape { + // In no-escape mode, the given query has to be saved completely including backslashes. + if let Some(next) = chars.peek() { + s.push(ch); + s.push(*next); + chars.next(); // consume next + } + } else { + if let Some(next) = chars.peek() { + // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences + let n = match next { + '\'' | '\"' | '\\' | '%' | '_' => *next, + '0' => '\0', + 'b' => '\u{8}', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'Z' => '\u{1a}', + _ => *next, + }; + s.push(n); + chars.next(); // consume next + } } } else { s.push(ch); @@ -1219,7 +1219,7 @@ impl<'a> Tokenizer<'a> { if chars.peek() == Some("e_end) { chars.next(); s.push(ch); - if dialect_of!(self is MySqlNoEscapeDialect) { + if self.options.no_escape { // In no-escape mode, the given query has to be saved completely s.push(ch); } @@ -1905,16 +1905,16 @@ mod tests { #[test] fn tokenize_quoted_identifier_with_no_escape() { let sql = r#" "a "" b" "a """ "c """"" "#; - let dialect = MySqlNoEscapeDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: true }); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), - Token::DoubleQuotedString(String::from(r#"a "" b"#)), + Token::make_word(r#"a "" b"#, Some('"')), Token::Whitespace(Whitespace::Space), - Token::DoubleQuotedString(String::from(r#"a """#)), + Token::make_word(r#"a """#, Some('"')), Token::Whitespace(Whitespace::Space), - Token::DoubleQuotedString(String::from(r#"c """""#)), + Token::make_word(r#"c """""#, Some('"')), Token::Whitespace(Whitespace::Space), ]; compare(expected, tokens); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 007923070..79f88ade6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -24,14 +24,14 @@ use sqlparser::ast::TableFactor::Pivot; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, - MsSqlDialect, MySqlDialect, MySqlNoEscapeDialect, PostgreSqlDialect, RedshiftSqlDialect, - SQLiteDialect, SnowflakeDialect, + MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, + SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use test_utils::{ - all_dialects, all_dialects_other_than_mysqlnoescape, assert_eq_vec, expr_from_projection, join, - number, only, table, table_alias, TestedDialects, + all_dialects, assert_eq_vec, expr_from_projection, join, number, only, table, table_alias, + TestedDialects, }; #[macro_use] @@ -373,14 +373,14 @@ fn parse_select_with_table_alias() { #[test] fn parse_invalid_table_name() { - let ast = all_dialects() + let ast = all_dialects(None) .run_parser_method("db.public..customer", |parser| parser.parse_object_name()); assert!(ast.is_err()); } #[test] fn parse_no_table_name() { - let ast = all_dialects().run_parser_method("", |parser| parser.parse_object_name()); + let ast = all_dialects(None).run_parser_method("", |parser| parser.parse_object_name()); assert!(ast.is_err()); } @@ -898,7 +898,7 @@ fn parse_invalid_infix_not() { fn parse_collate() { let sql = "SELECT name COLLATE \"de_DE\" FROM customer"; assert_matches!( - only(&all_dialects().verified_only_select(sql).projection), + only(&all_dialects(None).verified_only_select(sql).projection), SelectItem::UnnamedExpr(Expr::Collate { .. }) ); } @@ -907,7 +907,7 @@ fn parse_collate() { fn parse_collate_after_parens() { let sql = "SELECT (name) COLLATE \"de_DE\" FROM customer"; assert_matches!( - only(&all_dialects().verified_only_select(sql).projection), + only(&all_dialects(None).verified_only_select(sql).projection), SelectItem::UnnamedExpr(Expr::Collate { .. }) ); } @@ -950,7 +950,6 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { // Box::new(HiveDialect {}), Box::new(MsSqlDialect {}), Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), Box::new(PostgreSqlDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(SnowflakeDialect {}), @@ -1010,7 +1009,7 @@ fn parse_escaped_single_quote_string_predicate_with_escape() { let sql = "SELECT id, fname, lname FROM customer \ WHERE salary <> 'Jim''s salary'"; - let ast = verified_only_select_with_dialects_other_than_mysqlnoescape(sql); + let ast = verified_only_select(sql); assert_eq!( Some(Expr::BinaryOp { @@ -1031,8 +1030,11 @@ fn parse_escaped_single_quote_string_predicate_with_no_escape() { WHERE salary <> 'Jim''s salary'"; let ast = TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: true, + no_escape: true, + }), } .verified_only_select(sql); @@ -1415,7 +1417,7 @@ fn parse_string_agg() { /// selects all dialects but PostgreSQL pub fn all_dialects_but_pg() -> TestedDialects { TestedDialects { - dialects: all_dialects() + dialects: all_dialects(None) .dialects .into_iter() .filter(|x| !x.is::()) @@ -1428,8 +1430,8 @@ pub fn all_dialects_but_pg() -> TestedDialects { fn parse_bitwise_ops() { let bitwise_ops = &[ ("^", BinaryOperator::BitwiseXor, all_dialects_but_pg()), - ("|", BinaryOperator::BitwiseOr, all_dialects()), - ("&", BinaryOperator::BitwiseAnd, all_dialects()), + ("|", BinaryOperator::BitwiseOr, all_dialects(None)), + ("&", BinaryOperator::BitwiseAnd, all_dialects(None)), ]; for (str_op, op, dialects) in bitwise_ops { @@ -2360,7 +2362,6 @@ fn parse_create_table_hive_array() { Box::new(PostgreSqlDialect {}), Box::new(HiveDialect {}), Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), ], options: None, }; @@ -2660,7 +2661,7 @@ fn parse_create_table_clone() { #[test] fn parse_create_table_trailing_comma() { let sql = "CREATE TABLE foo (bar int,)"; - all_dialects().one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); + all_dialects(None).one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); } #[test] @@ -4675,7 +4676,7 @@ fn parse_ctes() { #[test] fn parse_cte_renamed_columns() { let sql = "WITH cte (col1, col2) AS (SELECT foo, bar FROM baz) SELECT * FROM cte"; - let query = all_dialects().verified_query(sql); + let query = all_dialects(None).verified_query(sql); assert_eq!( vec![Ident::new("col1"), Ident::new("col2")], query @@ -6389,31 +6390,27 @@ fn all_keywords_sorted() { } fn parse_sql_statements(sql: &str) -> Result, ParserError> { - all_dialects().parse_sql_statements(sql) + all_dialects(None).parse_sql_statements(sql) } fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement { - all_dialects().one_statement_parses_to(sql, canonical) + all_dialects(None).one_statement_parses_to(sql, canonical) } fn verified_stmt(query: &str) -> Statement { - all_dialects().verified_stmt(query) + all_dialects(None).verified_stmt(query) } fn verified_query(query: &str) -> Query { - all_dialects().verified_query(query) + all_dialects(None).verified_query(query) } fn verified_only_select(query: &str) -> Select { - all_dialects().verified_only_select(query) -} - -fn verified_only_select_with_dialects_other_than_mysqlnoescape(query: &str) -> Select { - all_dialects_other_than_mysqlnoescape().verified_only_select(query) + all_dialects(None).verified_only_select(query) } fn verified_expr(query: &str) -> Expr { - all_dialects().verified_expr(query) + all_dialects(None).verified_expr(query) } #[test] @@ -6713,7 +6710,7 @@ fn parse_cache_table() { let sql = "SELECT a, b, c FROM foo"; let cache_table_name = "cache_table_name"; let table_flag = "flag"; - let query = all_dialects().verified_query(sql); + let query = all_dialects(None).verified_query(sql); assert_eq!( verified_stmt(format!("CACHE TABLE '{cache_table_name}'").as_str()), @@ -7082,7 +7079,6 @@ fn parse_non_latin_identifiers() { Box::new(MsSqlDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), ], options: None, }; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f08a72040..51d1c6801 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -17,7 +17,8 @@ use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; -use sqlparser::dialect::{GenericDialect, MySqlDialect, MySqlNoEscapeDialect}; +use sqlparser::dialect::{GenericDialect, MySqlDialect}; +use sqlparser::parser::ParserOptions; use sqlparser::tokenizer::Token; use test_utils::*; @@ -481,8 +482,11 @@ fn parse_escaped_quote_identifiers_with_no_escape() { let sql = "SELECT `quoted `` identifier`"; assert_eq!( TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true + }), } .verified_stmt(sql), Statement::Query(Box::new(Query { @@ -559,8 +563,11 @@ fn parse_escaped_backticks_with_no_escape() { let sql = "SELECT ```quoted identifier```"; assert_eq!( TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true + }), } .verified_stmt(sql), Statement::Query(Box::new(Query { @@ -644,8 +651,11 @@ fn parse_escaped_string_with_escape() { fn parse_escaped_string_with_no_escape() { fn assert_mysql_query_value(sql: &str, quoted: &str) { let stmt = TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .one_statement_parses_to(sql, ""); @@ -679,50 +689,77 @@ fn parse_escaped_string_with_no_escape() { #[test] fn check_roundtrip_of_escaped_string() { TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT 'I\'m fine'"#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT 'I''m fine'"#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT 'I\\\'m fine'"#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT 'I\\\'m fine'"#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT "I\"m fine""#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT "I""m fine""#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT "I\\\"m fine""#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT "I\\\"m fine""#); TestedDialects { - dialects: vec![Box::new(MySqlNoEscapeDialect {})], - options: None, + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + no_escape: true, + }), } .verified_stmt(r#"SELECT "I'm ''fine''""#); } @@ -1537,18 +1574,14 @@ fn parse_create_table_with_fulltext_definition_should_not_accept_constraint_name fn mysql() -> TestedDialects { TestedDialects { - dialects: vec![Box::new(MySqlDialect {}), Box::new(MySqlNoEscapeDialect {})], + dialects: vec![Box::new(MySqlDialect {})], options: None, } } fn mysql_and_generic() -> TestedDialects { TestedDialects { - dialects: vec![ - Box::new(MySqlDialect {}), - Box::new(MySqlNoEscapeDialect {}), - Box::new(GenericDialect {}), - ], + dialects: vec![Box::new(MySqlDialect {}), Box::new(GenericDialect {})], options: None, } } From 2e6502e40b9b777e0ee590e5aa172a3c1bc2a42d Mon Sep 17 00:00:00 2001 From: canalun Date: Sun, 18 Jun 2023 18:03:21 +0900 Subject: [PATCH 12/18] mod cargo clippy --- src/tokenizer.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f28c233b9..a3df02c4e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1141,15 +1141,13 @@ impl<'a> Tokenizer<'a> { chars.next(); // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { - if self.options.no_escape { - // In no-escape mode, the given query has to be saved completely including backslashes. - if let Some(next) = chars.peek() { + if let Some(next) = chars.peek() { + if self.options.no_escape { + // In no-escape mode, the given query has to be saved completely including backslashes. s.push(ch); s.push(*next); chars.next(); // consume next - } - } else { - if let Some(next) = chars.peek() { + } else { // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences let n = match next { '\'' | '\"' | '\\' | '%' | '_' => *next, From 29cb1e20f618b45ea9e9d9418723f8800bcaef8d Mon Sep 17 00:00:00 2001 From: canalun Date: Sat, 24 Jun 2023 23:38:44 +0900 Subject: [PATCH 13/18] add comment to ToknizerOptions --- src/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a3df02c4e..0fed3f11f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -451,7 +451,7 @@ pub struct Tokenizer<'a> { } pub struct TokenizerOptions { - pub no_escape: bool, + pub no_escape: bool, // true when no-escape mode is enabled by the user } impl<'a> Tokenizer<'a> { From 5573ab6af6f91a9caf711b162410fa82c1e5f3da Mon Sep 17 00:00:00 2001 From: canalun Date: Sun, 25 Jun 2023 00:22:42 +0900 Subject: [PATCH 14/18] impl and use TokenizerOptions:default() and with_no_escape() --- src/ast/visitor.rs | 3 +- src/parser.rs | 4 +- src/tokenizer.rs | 105 ++++++++++++++++++++++++----------- tests/sqlparser_snowflake.rs | 6 +- 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index e857087e7..96e65f604 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -632,7 +632,8 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let s = Parser::new(&dialect) .with_tokens(tokens) diff --git a/src/parser.rs b/src/parser.rs index 72d072daa..aaca17abb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -318,9 +318,7 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let tokenizer_options = TokenizerOptions { - no_escape: self.options.no_escape, - }; + let tokenizer_options = TokenizerOptions::default().with_no_escape(self.options.no_escape); let mut tokenizer = Tokenizer::new(self.dialect, sql, &tokenizer_options); let tokens = tokenizer.tokenize()?; Ok(self.with_tokens(tokens)) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0fed3f11f..28acd170d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -454,6 +454,18 @@ pub struct TokenizerOptions { pub no_escape: bool, // true when no-escape mode is enabled by the user } +impl Default for TokenizerOptions { + fn default() -> Self { + Self { no_escape: false } + } +} + +impl TokenizerOptions { + pub fn with_no_escape(self, no_escape: bool) -> Self { + Self { no_escape } + } +} + impl<'a> Tokenizer<'a> { /// Create a new SQL tokenizer for the specified SQL statement pub fn new(dialect: &'a dyn Dialect, query: &'a str, options: &'a TokenizerOptions) -> Self { @@ -1283,7 +1295,8 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1299,7 +1312,8 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1315,7 +1329,8 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1350,7 +1365,8 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1369,7 +1385,8 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1388,7 +1405,8 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1412,7 +1430,8 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1452,7 +1471,8 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1484,7 +1504,8 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1514,7 +1535,8 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1546,7 +1568,8 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1575,7 +1598,8 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ @@ -1591,7 +1615,8 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; compare(expected, tokens); @@ -1602,7 +1627,8 @@ mod tests { let sql = String::from("select 'foo"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1618,7 +1644,8 @@ mod tests { let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1634,7 +1661,8 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ @@ -1658,7 +1686,8 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), @@ -1675,7 +1704,8 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1694,7 +1724,8 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1712,7 +1743,8 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), @@ -1726,7 +1758,8 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1743,7 +1776,8 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), @@ -1760,7 +1794,8 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1775,7 +1810,8 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), @@ -1790,7 +1826,8 @@ mod tests { let sql = String::from("\"foo"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1806,7 +1843,8 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), @@ -1825,7 +1863,8 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1847,7 +1886,8 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1886,7 +1926,8 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), @@ -1904,7 +1945,8 @@ mod tests { fn tokenize_quoted_identifier_with_no_escape() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: true }); + let option = TokenizerOptions::default().with_no_escape(true); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), @@ -1922,7 +1964,8 @@ mod tests { fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize_with_location().unwrap(); let expected = vec![ TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 4649a559e..63c09a00b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,7 +55,8 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -72,7 +73,8 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let mut tokenizer = Tokenizer::new(&dialect, sql, &TokenizerOptions { no_escape: false }); + let option = TokenizerOptions::default().with_no_escape(false); + let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ From 0267fb5ac9d07e3559c6e35b9562db67f1501c9b Mon Sep 17 00:00:00 2001 From: canalun Date: Sat, 1 Jul 2023 23:12:16 +0900 Subject: [PATCH 15/18] mod lint --- src/ast/visitor.rs | 3 +- src/parser.rs | 2 +- src/tokenizer.rs | 68 +++++++++++++++++++----------------- tests/sqlparser_snowflake.rs | 4 +-- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 96e65f604..cf156885f 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -590,6 +590,7 @@ mod tests { use crate::dialect::GenericDialect; use crate::parser::Parser; use crate::tokenizer::Tokenizer; + use crate::tokenizer::TokenizerOptions; #[derive(Default)] struct TestVisitor { @@ -632,7 +633,7 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let s = Parser::new(&dialect) diff --git a/src/parser.rs b/src/parser.rs index aaca17abb..a6d260446 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -318,7 +318,7 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let tokenizer_options = TokenizerOptions::default().with_no_escape(self.options.no_escape); + let tokenizer_options = TokenizerOptions::new().with_no_escape(self.options.no_escape); let mut tokenizer = Tokenizer::new(self.dialect, sql, &tokenizer_options); let tokens = tokenizer.tokenize()?; Ok(self.with_tokens(tokens)) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 28acd170d..1478a470e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -456,11 +456,15 @@ pub struct TokenizerOptions { impl Default for TokenizerOptions { fn default() -> Self { - Self { no_escape: false } + Self::new() } } impl TokenizerOptions { + pub fn new() -> Self { + Self { no_escape: false } + } + pub fn with_no_escape(self, no_escape: bool) -> Self { Self { no_escape } } @@ -1295,7 +1299,7 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1312,7 +1316,7 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1329,7 +1333,7 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1365,7 +1369,7 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1385,7 +1389,7 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1405,7 +1409,7 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1430,7 +1434,7 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1471,7 +1475,7 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1504,7 +1508,7 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1535,7 +1539,7 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1568,7 +1572,7 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1598,7 +1602,7 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); @@ -1615,7 +1619,7 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; @@ -1627,7 +1631,7 @@ mod tests { let sql = String::from("select 'foo"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), @@ -1644,7 +1648,7 @@ mod tests { let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), @@ -1661,7 +1665,7 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); @@ -1686,7 +1690,7 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1704,7 +1708,7 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -1724,7 +1728,7 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1743,7 +1747,7 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { @@ -1758,7 +1762,7 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1776,7 +1780,7 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1794,7 +1798,7 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1810,7 +1814,7 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1826,7 +1830,7 @@ mod tests { let sql = String::from("\"foo"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); assert_eq!( tokenizer.tokenize(), @@ -1843,7 +1847,7 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1863,7 +1867,7 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1886,7 +1890,7 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1926,7 +1930,7 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1945,7 +1949,7 @@ mod tests { fn tokenize_quoted_identifier_with_no_escape() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(true); + let option = TokenizerOptions::new().with_no_escape(true); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ @@ -1964,7 +1968,7 @@ mod tests { fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize_with_location().unwrap(); let expected = vec![ diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 63c09a00b..93029de0f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,7 +55,7 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); @@ -73,7 +73,7 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let option = TokenizerOptions::default().with_no_escape(false); + let option = TokenizerOptions::new().with_no_escape(false); let mut tokenizer = Tokenizer::new(&dialect, sql, &option); let tokens = tokenizer.tokenize().unwrap(); From bdeafa7403f7025e51a2f8bc878f155fd2feafe5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 17 Jul 2023 15:32:07 -0400 Subject: [PATCH 16/18] Simplify setting Tokenizer options, add docs and comments --- src/ast/value.rs | 19 ++++ src/parser.rs | 68 ++++++++++-- src/tokenizer.rs | 210 ++++++++++++++++------------------- tests/sqlparser_common.rs | 14 +-- tests/sqlparser_mysql.rs | 59 +++------- tests/sqlparser_snowflake.rs | 8 +- 6 files changed, 192 insertions(+), 186 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 9995dfcab..70a23d40d 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -182,8 +182,27 @@ pub struct EscapeQuotedString<'a> { quote: char, } + impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // EscapeQuotedString doesn't know which mode of escape was + // chosen by the user. So this code must to correctly display + // strings without knowing if the strings are already escaped + // or not. + // + // If the quote symbol in the string is repeated twice, OR, if + // the quote symbol is after backslash, display all the chars + // without any escape. However, if the quote symbol is used + // just between usual chars, `fmt()` should display it twice." + // + // The following table has examples + // + // | original query | mode | AST Node | serialized | + // | ------------- | --------- | -------------------------------------------------- | ------------ | + // | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` | + // | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | + // | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` | + // | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | let quote = self.quote; let mut previous_char = char::default(); let mut peekable_chars = self.string.chars().peekable(); diff --git a/src/parser.rs b/src/parser.rs index a6d260446..a3eb221ef 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -195,10 +195,52 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; -#[derive(Debug, Default, Clone, PartialEq, Eq)] +/// Options that control how the [`Parser`] parses SQL text +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { pub trailing_commas: bool, - pub no_escape: bool, + /// Controls how literal values are unescaped. See + /// [`Tokenizer::with_unescape`] for more details. + pub unescape: bool, +} + +impl Default for ParserOptions { + fn default() -> Self { + Self { + trailing_commas: false, + unescape: true, + } + } +} + +impl ParserOptions { + /// Create a new [`ParserOptions`] + pub fn new() -> Self { + Default::default() + } + + /// Set if trailing commas are allowed. + /// + /// If this option is `false` (the default), the following SQL will + /// not parse. If the option is `true`, the SQL will parse. + /// + /// ```sql + /// SELECT + /// foo, + /// bar, + /// FROM baz + /// ``` + pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self { + self.trailing_commas = trailing_commas; + self + } + + /// Set if literal values are unescaped. Defaults to true. See + /// [`Tokenizer::with_unescape`] for more details. + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self + } } pub struct Parser<'a> { @@ -207,8 +249,9 @@ pub struct Parser<'a> { index: usize, /// The current dialect to use dialect: &'a dyn Dialect, - /// Additional options that allow you to mix & match behavior otherwise - /// constrained to certain dialects (e.g. trailing commas) and/or format of parse (e.g. no escape) + /// Additional options that allow you to mix & match behavior + /// otherwise constrained to certain dialects (e.g. trailing + /// commas) and/or format of parse (e.g. unescaping) options: ParserOptions, /// ensure the stack does not overflow by limiting recursion depth recursion_counter: RecursionCounter, @@ -268,17 +311,20 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// /// - /// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to - /// mix & match behavior otherwise constrained to certain dialects (e.g. trailing - /// commas). + /// [`Parser`] supports additional options ([`ParserOptions`]) + /// that allow you to mix & match behavior otherwise constrained + /// to certain dialects (e.g. trailing commas). /// /// Example: /// ``` /// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect}; /// # fn main() -> Result<(), ParserError> { /// let dialect = GenericDialect{}; + /// let options = ParserOptions::new() + /// .with_trailing_commas(true) + /// .with_unescape(false); /// let result = Parser::new(&dialect) - /// .with_options(ParserOptions { trailing_commas: true, no_escape: false }) + /// .with_options(options) /// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")? /// .parse_statements(); /// assert!(matches!(result, Ok(_))); @@ -318,9 +364,9 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let tokenizer_options = TokenizerOptions::new().with_no_escape(self.options.no_escape); - let mut tokenizer = Tokenizer::new(self.dialect, sql, &tokenizer_options); - let tokens = tokenizer.tokenize()?; + let tokens = Tokenizer::new(self.dialect, sql) + .with_unescape(self.options.unescape) + .tokenize()?; Ok(self.with_tokens(tokens)) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1478a470e..6d91e77ad 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -447,39 +447,71 @@ impl<'a> State<'a> { pub struct Tokenizer<'a> { dialect: &'a dyn Dialect, query: &'a str, - options: &'a TokenizerOptions, -} - -pub struct TokenizerOptions { - pub no_escape: bool, // true when no-escape mode is enabled by the user -} - -impl Default for TokenizerOptions { - fn default() -> Self { - Self::new() - } -} - -impl TokenizerOptions { - pub fn new() -> Self { - Self { no_escape: false } - } - - pub fn with_no_escape(self, no_escape: bool) -> Self { - Self { no_escape } - } + /// If true (the default), the tokenizer will un-escape literal + /// SQL strings See [`Tokenizer::with_unescape`] for more details. + unescape: bool, } impl<'a> Tokenizer<'a> { /// Create a new SQL tokenizer for the specified SQL statement - pub fn new(dialect: &'a dyn Dialect, query: &'a str, options: &'a TokenizerOptions) -> Self { + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#"SELECT 'foo'"#; + /// + /// // Parsing the query + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// + /// assert_eq!(tokens, vec![ + /// Token::make_word("SELECT", None), + /// Token::Whitespace(Whitespace::Space), + /// Token::SingleQuotedString("foo".to_string()), + /// ]); + pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { Self { dialect, query, - options, + unescape: true, } } + /// Set unescape mode + /// + /// When true (default) the tokenizer unescapes literal values + /// (for example, `""` in SQL is unescaped to the literal `"`). + /// + /// When false, the tokenizer provides the raw strings as provided + /// in the query. This can be helpful for programs that wish to + /// recover the *exact* original query text without normalizing + /// the escaping + /// + /// # Example + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#""Foo "" Bar""#; + /// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"')); + /// let original = Token::make_word(r#"Foo "" Bar"#, Some('"')); + /// + /// // Parsing with unescaping (default) + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// assert_eq!(tokens, vec![unescaped]); + /// + /// // Parsing with unescape = false + /// let tokens = Tokenizer::new(&dialect, &query) + /// .with_unescape(false) + /// .tokenize().unwrap(); + /// assert_eq!(tokens, vec![original]); + /// ``` + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self + } + /// Tokenize the statement and produce a vector of tokens pub fn tokenize(&mut self) -> Result, TokenizerError> { let twl = self.tokenize_with_location()?; @@ -1143,7 +1175,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); - if self.options.no_escape { + if !self.unescape { // In no-escape mode, the given query has to be saved completely s.push(ch); } @@ -1158,7 +1190,7 @@ impl<'a> Tokenizer<'a> { // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { if let Some(next) = chars.peek() { - if self.options.no_escape { + if !self.unescape { // In no-escape mode, the given query has to be saved completely including backslashes. s.push(ch); s.push(*next); @@ -1233,7 +1265,7 @@ impl<'a> Tokenizer<'a> { if chars.peek() == Some("e_end) { chars.next(); s.push(ch); - if self.options.no_escape { + if !self.unescape { // In no-escape mode, the given query has to be saved completely s.push(ch); } @@ -1299,9 +1331,7 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1316,9 +1346,7 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1333,9 +1361,7 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1369,9 +1395,7 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1389,9 +1413,7 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1409,9 +1431,7 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1434,9 +1454,7 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1475,9 +1493,7 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1508,9 +1524,7 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1539,9 +1553,7 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1572,9 +1584,7 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1602,9 +1612,7 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1619,9 +1627,7 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; compare(expected, tokens); } @@ -1631,8 +1637,7 @@ mod tests { let sql = String::from("select 'foo"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); + let mut tokenizer = Tokenizer::new(&dialect, &sql); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1648,8 +1653,7 @@ mod tests { let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); + let mut tokenizer = Tokenizer::new(&dialect, &sql); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1665,9 +1669,7 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1690,9 +1692,7 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), Token::LParen, @@ -1708,9 +1708,7 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("a", None), @@ -1728,9 +1726,7 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { @@ -1747,9 +1743,7 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment".to_string(), @@ -1762,9 +1756,7 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1780,9 +1772,7 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1798,9 +1788,7 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), @@ -1814,9 +1802,7 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space), @@ -1830,8 +1816,7 @@ mod tests { let sql = String::from("\"foo"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); + let mut tokenizer = Tokenizer::new(&dialect, &sql); assert_eq!( tokenizer.tokenize(), Err(TokenizerError { @@ -1847,9 +1832,7 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, &sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), Token::Whitespace(Whitespace::Newline), @@ -1867,9 +1850,7 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1890,9 +1871,7 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1930,9 +1909,7 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::make_word(r#"a " b"#, Some('"')), @@ -1949,9 +1926,10 @@ mod tests { fn tokenize_quoted_identifier_with_no_escape() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(true); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql) + .with_unescape(false) + .tokenize() + .unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::make_word(r#"a "" b"#, Some('"')), @@ -1968,9 +1946,9 @@ mod tests { fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize_with_location().unwrap(); + let tokens = Tokenizer::new(&dialect, sql) + .tokenize_with_location() + .unwrap(); let expected = vec![ TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1), TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 79f88ade6..aec6b0982 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1031,10 +1031,11 @@ fn parse_escaped_single_quote_string_predicate_with_no_escape() { let ast = TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: true, - no_escape: true, - }), + options: Some( + ParserOptions::new() + .with_trailing_commas(true) + .with_unescape(false), + ), } .verified_only_select(sql); @@ -7095,10 +7096,7 @@ fn parse_non_latin_identifiers() { fn parse_trailing_comma() { let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], - options: Some(ParserOptions { - trailing_commas: true, - no_escape: false, - }), + options: Some(ParserOptions::new().with_trailing_commas(true)), }; trailing_commas.one_statement_parses_to( diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 51d1c6801..3d3747776 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -485,7 +485,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { dialects: vec![Box::new(MySqlDialect {})], options: Some(ParserOptions { trailing_commas: false, - no_escape: true + unescape: false, }), } .verified_stmt(sql), @@ -564,10 +564,7 @@ fn parse_escaped_backticks_with_no_escape() { assert_eq!( TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true - }), + options: Some(ParserOptions::new().with_unescape(false)), } .verified_stmt(sql), Statement::Query(Box::new(Query { @@ -652,10 +649,7 @@ fn parse_escaped_string_with_no_escape() { fn assert_mysql_query_value(sql: &str, quoted: &str) { let stmt = TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: Some(ParserOptions::new().with_unescape(false)), } .one_statement_parses_to(sql, ""); @@ -688,78 +682,53 @@ fn parse_escaped_string_with_no_escape() { #[test] fn check_roundtrip_of_escaped_string() { + let options = Some(ParserOptions::new().with_unescape(false)); + TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT 'I\'m fine'"#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT 'I''m fine'"#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT 'I\\\'m fine'"#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT 'I\\\'m fine'"#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT "I\"m fine""#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT "I""m fine""#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT "I\\\"m fine""#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options: options.clone(), } .verified_stmt(r#"SELECT "I\\\"m fine""#); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions { - trailing_commas: false, - no_escape: true, - }), + options, } .verified_stmt(r#"SELECT "I'm ''fine''""#); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 93029de0f..43ebb8b11 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,9 +55,7 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), @@ -73,9 +71,7 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), From 67aed8500aeb92ede539ea784b8ab3709f8b5fc1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jul 2023 16:10:45 -0400 Subject: [PATCH 17/18] Fix compile errors --- src/ast/value.rs | 1 - src/ast/visitor.rs | 5 +---- tests/sqlparser_common.rs | 12 ++++++------ 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 42978eb62..9c18a325c 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -185,7 +185,6 @@ pub struct EscapeQuotedString<'a> { quote: char, } - impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // EscapeQuotedString doesn't know which mode of escape was diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index cf156885f..8aa038db9 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -590,7 +590,6 @@ mod tests { use crate::dialect::GenericDialect; use crate::parser::Parser; use crate::tokenizer::Tokenizer; - use crate::tokenizer::TokenizerOptions; #[derive(Default)] struct TestVisitor { @@ -633,9 +632,7 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let option = TokenizerOptions::new().with_no_escape(false); - let mut tokenizer = Tokenizer::new(&dialect, sql, &option); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let s = Parser::new(&dialect) .with_tokens(tokens) .parse_statement() diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4e05c3f6c..1e962968c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1154,15 +1154,15 @@ fn pg_and_generic() -> TestedDialects { fn parse_json_ops_without_colon() { use self::JsonOperator; let binary_ops = &[ - ("->", JsonOperator::Arrow, all_dialects()), - ("->>", JsonOperator::LongArrow, all_dialects()), + ("->", JsonOperator::Arrow, all_dialects(None)), + ("->>", JsonOperator::LongArrow, all_dialects(None)), ("#>", JsonOperator::HashArrow, pg_and_generic()), ("#>>", JsonOperator::HashLongArrow, pg_and_generic()), - ("@>", JsonOperator::AtArrow, all_dialects()), - ("<@", JsonOperator::ArrowAt, all_dialects()), + ("@>", JsonOperator::AtArrow, all_dialects(None)), + ("<@", JsonOperator::ArrowAt, all_dialects(None)), ("#-", JsonOperator::HashMinus, pg_and_generic()), - ("@?", JsonOperator::AtQuestion, all_dialects()), - ("@@", JsonOperator::AtAt, all_dialects()), + ("@?", JsonOperator::AtQuestion, all_dialects(None)), + ("@@", JsonOperator::AtAt, all_dialects(None)), ]; for (str_op, op, dialects) in binary_ops { From 17b27d9282c1a5944ad72257d2bfb15eac10c3be Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jul 2023 16:15:29 -0400 Subject: [PATCH 18/18] Revert inconsistent argument to all_dialects --- src/parser.rs | 6 +++--- src/test_utils.rs | 4 ++-- tests/sqlparser_common.rs | 44 +++++++++++++++++++-------------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 8dcd17839..790ba8fbb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7295,7 +7295,7 @@ mod tests { #[test] fn test_prev_index() { let sql = "SELECT version"; - all_dialects(None).run_parser_method(sql, |parser| { + all_dialects().run_parser_method(sql, |parser| { assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); parser.prev_token(); @@ -7650,7 +7650,7 @@ mod tests { // The expected name should be identical as the input name, that's why I don't receive both macro_rules! test_parse_schema_name { ($input:expr, $expected_name:expr $(,)?) => {{ - all_dialects(None).run_parser_method(&*$input, |parser| { + all_dialects().run_parser_method(&*$input, |parser| { let schema_name = parser.parse_schema_name().unwrap(); // Validate that the structure is the same as expected assert_eq!(schema_name, $expected_name); @@ -7862,7 +7862,7 @@ mod tests { fn test_parse_multipart_identifier_negative() { macro_rules! test_parse_multipart_identifier_error { ($input:expr, $expected_err:expr $(,)?) => {{ - all_dialects(None).run_parser_method(&*$input, |parser| { + all_dialects().run_parser_method(&*$input, |parser| { let actual_err = parser.parse_multipart_identifier().unwrap_err(); assert_eq!(actual_err.to_string(), $expected_err); }); diff --git a/src/test_utils.rs b/src/test_utils.rs index 380907e70..47fb00d5d 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -155,7 +155,7 @@ impl TestedDialects { } } -pub fn all_dialects(options: Option) -> TestedDialects { +pub fn all_dialects() -> TestedDialects { TestedDialects { dialects: vec![ Box::new(GenericDialect {}), @@ -170,7 +170,7 @@ pub fn all_dialects(options: Option) -> TestedDialects { Box::new(SQLiteDialect {}), Box::new(DuckDbDialect {}), ], - options, + options: None, } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1e962968c..356926e13 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -373,14 +373,14 @@ fn parse_select_with_table_alias() { #[test] fn parse_invalid_table_name() { - let ast = all_dialects(None) + let ast = all_dialects() .run_parser_method("db.public..customer", |parser| parser.parse_object_name()); assert!(ast.is_err()); } #[test] fn parse_no_table_name() { - let ast = all_dialects(None).run_parser_method("", |parser| parser.parse_object_name()); + let ast = all_dialects().run_parser_method("", |parser| parser.parse_object_name()); assert!(ast.is_err()); } @@ -898,7 +898,7 @@ fn parse_invalid_infix_not() { fn parse_collate() { let sql = "SELECT name COLLATE \"de_DE\" FROM customer"; assert_matches!( - only(&all_dialects(None).verified_only_select(sql).projection), + only(&all_dialects().verified_only_select(sql).projection), SelectItem::UnnamedExpr(Expr::Collate { .. }) ); } @@ -907,7 +907,7 @@ fn parse_collate() { fn parse_collate_after_parens() { let sql = "SELECT (name) COLLATE \"de_DE\" FROM customer"; assert_matches!( - only(&all_dialects(None).verified_only_select(sql).projection), + only(&all_dialects().verified_only_select(sql).projection), SelectItem::UnnamedExpr(Expr::Collate { .. }) ); } @@ -1154,15 +1154,15 @@ fn pg_and_generic() -> TestedDialects { fn parse_json_ops_without_colon() { use self::JsonOperator; let binary_ops = &[ - ("->", JsonOperator::Arrow, all_dialects(None)), - ("->>", JsonOperator::LongArrow, all_dialects(None)), + ("->", JsonOperator::Arrow, all_dialects()), + ("->>", JsonOperator::LongArrow, all_dialects()), ("#>", JsonOperator::HashArrow, pg_and_generic()), ("#>>", JsonOperator::HashLongArrow, pg_and_generic()), - ("@>", JsonOperator::AtArrow, all_dialects(None)), - ("<@", JsonOperator::ArrowAt, all_dialects(None)), + ("@>", JsonOperator::AtArrow, all_dialects()), + ("<@", JsonOperator::ArrowAt, all_dialects()), ("#-", JsonOperator::HashMinus, pg_and_generic()), - ("@?", JsonOperator::AtQuestion, all_dialects(None)), - ("@@", JsonOperator::AtAt, all_dialects(None)), + ("@?", JsonOperator::AtQuestion, all_dialects()), + ("@@", JsonOperator::AtAt, all_dialects()), ]; for (str_op, op, dialects) in binary_ops { @@ -1473,7 +1473,7 @@ fn parse_string_agg() { /// selects all dialects but PostgreSQL pub fn all_dialects_but_pg() -> TestedDialects { TestedDialects { - dialects: all_dialects(None) + dialects: all_dialects() .dialects .into_iter() .filter(|x| !x.is::()) @@ -1486,8 +1486,8 @@ pub fn all_dialects_but_pg() -> TestedDialects { fn parse_bitwise_ops() { let bitwise_ops = &[ ("^", BinaryOperator::BitwiseXor, all_dialects_but_pg()), - ("|", BinaryOperator::BitwiseOr, all_dialects(None)), - ("&", BinaryOperator::BitwiseAnd, all_dialects(None)), + ("|", BinaryOperator::BitwiseOr, all_dialects()), + ("&", BinaryOperator::BitwiseAnd, all_dialects()), ]; for (str_op, op, dialects) in bitwise_ops { @@ -2716,7 +2716,7 @@ fn parse_create_table_clone() { #[test] fn parse_create_table_trailing_comma() { let sql = "CREATE TABLE foo (bar int,)"; - all_dialects(None).one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); + all_dialects().one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); } #[test] @@ -4875,7 +4875,7 @@ fn parse_ctes() { #[test] fn parse_cte_renamed_columns() { let sql = "WITH cte (col1, col2) AS (SELECT foo, bar FROM baz) SELECT * FROM cte"; - let query = all_dialects(None).verified_query(sql); + let query = all_dialects().verified_query(sql); assert_eq!( vec![Ident::new("col1"), Ident::new("col2")], query @@ -6589,27 +6589,27 @@ fn all_keywords_sorted() { } fn parse_sql_statements(sql: &str) -> Result, ParserError> { - all_dialects(None).parse_sql_statements(sql) + all_dialects().parse_sql_statements(sql) } fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement { - all_dialects(None).one_statement_parses_to(sql, canonical) + all_dialects().one_statement_parses_to(sql, canonical) } fn verified_stmt(query: &str) -> Statement { - all_dialects(None).verified_stmt(query) + all_dialects().verified_stmt(query) } fn verified_query(query: &str) -> Query { - all_dialects(None).verified_query(query) + all_dialects().verified_query(query) } fn verified_only_select(query: &str) -> Select { - all_dialects(None).verified_only_select(query) + all_dialects().verified_only_select(query) } fn verified_expr(query: &str) -> Expr { - all_dialects(None).verified_expr(query) + all_dialects().verified_expr(query) } #[test] @@ -6909,7 +6909,7 @@ fn parse_cache_table() { let sql = "SELECT a, b, c FROM foo"; let cache_table_name = "cache_table_name"; let table_flag = "flag"; - let query = all_dialects(None).verified_query(sql); + let query = all_dialects().verified_query(sql); assert_eq!( verified_stmt(format!("CACHE TABLE '{cache_table_name}'").as_str()),