diff --git a/src/ast/value.rs b/src/ast/value.rs index 491553cac..9c18a325c 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -71,7 +71,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), - Value::DoubleQuotedString(v) => write!(f, "\"{v}\""), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::DollarQuotedString(v) => write!(f, "{v}"), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), @@ -187,12 +187,49 @@ pub struct EscapeQuotedString<'a> { impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for c in self.string.chars() { - if c == self.quote { - write!(f, "{q}{q}", q = self.quote)?; - } else { - write!(f, "{c}")?; + // EscapeQuotedString doesn't know which mode of escape was + // chosen by the user. So this code must to correctly display + // strings without knowing if the strings are already escaped + // or not. + // + // If the quote symbol in the string is repeated twice, OR, if + // the quote symbol is after backslash, display all the chars + // without any escape. However, if the quote symbol is used + // just between usual chars, `fmt()` should display it twice." + // + // The following table has examples + // + // | original query | mode | AST Node | serialized | + // | ------------- | --------- | -------------------------------------------------- | ------------ | + // | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` | + // | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | + // | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` | + // | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` | + let quote = self.quote; + let mut previous_char = char::default(); + let mut peekable_chars = self.string.chars().peekable(); + while let Some(&ch) = peekable_chars.peek() { + match ch { + char if char == quote => { + if previous_char == '\\' { + write!(f, "{char}")?; + peekable_chars.next(); + continue; + } + peekable_chars.next(); + if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) { + write!(f, "{char}{char}")?; + peekable_chars.next(); + } else { + write!(f, "{char}{char}")?; + } + } + _ => { + write!(f, "{ch}")?; + peekable_chars.next(); + } } + previous_char = ch; } Ok(()) } @@ -206,6 +243,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\'') } +pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> { + escape_quoted_string(s, '\"') +} + pub struct EscapeEscapedStringLiteral<'a>(&'a str); impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> { diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 81343220a..8aa038db9 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -632,8 +632,7 @@ mod tests { fn do_visit(sql: &str) -> Vec { let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let s = Parser::new(&dialect) .with_tokens(tokens) .parse_statement() diff --git a/src/parser.rs b/src/parser.rs index 4d331ce07..790ba8fbb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -195,9 +195,52 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; -#[derive(Debug, Default, Clone, PartialEq, Eq)] +/// Options that control how the [`Parser`] parses SQL text +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { pub trailing_commas: bool, + /// Controls how literal values are unescaped. See + /// [`Tokenizer::with_unescape`] for more details. + pub unescape: bool, +} + +impl Default for ParserOptions { + fn default() -> Self { + Self { + trailing_commas: false, + unescape: true, + } + } +} + +impl ParserOptions { + /// Create a new [`ParserOptions`] + pub fn new() -> Self { + Default::default() + } + + /// Set if trailing commas are allowed. + /// + /// If this option is `false` (the default), the following SQL will + /// not parse. If the option is `true`, the SQL will parse. + /// + /// ```sql + /// SELECT + /// foo, + /// bar, + /// FROM baz + /// ``` + pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self { + self.trailing_commas = trailing_commas; + self + } + + /// Set if literal values are unescaped. Defaults to true. See + /// [`Tokenizer::with_unescape`] for more details. + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self + } } pub struct Parser<'a> { @@ -206,8 +249,9 @@ pub struct Parser<'a> { index: usize, /// The current dialect to use dialect: &'a dyn Dialect, - /// Additional options that allow you to mix & match behavior otherwise - /// constrained to certain dialects (e.g. trailing commas) + /// Additional options that allow you to mix & match behavior + /// otherwise constrained to certain dialects (e.g. trailing + /// commas) and/or format of parse (e.g. unescaping) options: ParserOptions, /// ensure the stack does not overflow by limiting recursion depth recursion_counter: RecursionCounter, @@ -267,17 +311,20 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// /// - /// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to - /// mix & match behavior otherwise constrained to certain dialects (e.g. trailing - /// commas). + /// [`Parser`] supports additional options ([`ParserOptions`]) + /// that allow you to mix & match behavior otherwise constrained + /// to certain dialects (e.g. trailing commas). /// /// Example: /// ``` /// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect}; /// # fn main() -> Result<(), ParserError> { /// let dialect = GenericDialect{}; + /// let options = ParserOptions::new() + /// .with_trailing_commas(true) + /// .with_unescape(false); /// let result = Parser::new(&dialect) - /// .with_options(ParserOptions { trailing_commas: true }) + /// .with_options(options) /// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")? /// .parse_statements(); /// assert!(matches!(result, Ok(_))); @@ -317,8 +364,9 @@ impl<'a> Parser<'a> { /// See example on [`Parser::new()`] for an example pub fn try_with_sql(self, sql: &str) -> Result { debug!("Parsing sql '{}'...", sql); - let mut tokenizer = Tokenizer::new(self.dialect, sql); - let tokens = tokenizer.tokenize()?; + let tokens = Tokenizer::new(self.dialect, sql) + .with_unescape(self.options.unescape) + .tokenize()?; Ok(self.with_tokens(tokens)) } @@ -3654,7 +3702,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Some(ColumnOption::Check(expr))) } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) + && dialect_of!(self is MySqlDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL Ok(Some(ColumnOption::DialectSpecific(vec![ diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 6a135b439..83e9f317e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -455,12 +455,69 @@ impl<'a> State<'a> { pub struct Tokenizer<'a> { dialect: &'a dyn Dialect, query: &'a str, + /// If true (the default), the tokenizer will un-escape literal + /// SQL strings See [`Tokenizer::with_unescape`] for more details. + unescape: bool, } impl<'a> Tokenizer<'a> { /// Create a new SQL tokenizer for the specified SQL statement + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#"SELECT 'foo'"#; + /// + /// // Parsing the query + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// + /// assert_eq!(tokens, vec![ + /// Token::make_word("SELECT", None), + /// Token::Whitespace(Whitespace::Space), + /// Token::SingleQuotedString("foo".to_string()), + /// ]); pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { - Self { dialect, query } + Self { + dialect, + query, + unescape: true, + } + } + + /// Set unescape mode + /// + /// When true (default) the tokenizer unescapes literal values + /// (for example, `""` in SQL is unescaped to the literal `"`). + /// + /// When false, the tokenizer provides the raw strings as provided + /// in the query. This can be helpful for programs that wish to + /// recover the *exact* original query text without normalizing + /// the escaping + /// + /// # Example + /// + /// ``` + /// # use sqlparser::tokenizer::{Token, Tokenizer}; + /// # use sqlparser::dialect::GenericDialect; + /// # let dialect = GenericDialect{}; + /// let query = r#""Foo "" Bar""#; + /// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"')); + /// let original = Token::make_word(r#"Foo "" Bar"#, Some('"')); + /// + /// // Parsing with unescaping (default) + /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap(); + /// assert_eq!(tokens, vec![unescaped]); + /// + /// // Parsing with unescape = false + /// let tokens = Tokenizer::new(&dialect, &query) + /// .with_unescape(false) + /// .tokenize().unwrap(); + /// assert_eq!(tokens, vec![original]); + /// ``` + pub fn with_unescape(mut self, unescape: bool) -> Self { + self.unescape = unescape; + self } /// Tokenize the statement and produce a vector of tokens @@ -650,7 +707,7 @@ impl<'a> Tokenizer<'a> { let error_loc = chars.location(); chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = parse_quoted_ident(chars, quote_end); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); if last_char == Some(quote_end) { Ok(Some(Token::make_word(&s, Some(quote_start)))) @@ -1168,6 +1225,10 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); + if !self.unescape { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } chars.next(); } else { return Ok(s); @@ -1176,22 +1237,29 @@ impl<'a> Tokenizer<'a> { '\\' => { // consume chars.next(); - // slash escaping is specific to MySQL dialect + // slash escaping is specific to MySQL dialect. if dialect_of!(self is MySqlDialect) { if let Some(next) = chars.peek() { - // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences - let n = match next { - '\'' | '\"' | '\\' | '%' | '_' => *next, - '0' => '\0', - 'b' => '\u{8}', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'Z' => '\u{1a}', - _ => *next, - }; - s.push(n); - chars.next(); // consume next + if !self.unescape { + // In no-escape mode, the given query has to be saved completely including backslashes. + s.push(ch); + s.push(*next); + chars.next(); // consume next + } else { + // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences + let n = match next { + '\'' | '\"' | '\\' | '%' | '_' => *next, + '0' => '\0', + 'b' => '\u{8}', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'Z' => '\u{1a}', + _ => *next, + }; + s.push(n); + chars.next(); // consume next + } } } else { s.push(ch); @@ -1239,6 +1307,29 @@ impl<'a> Tokenizer<'a> { } } + fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option) { + let mut last_char = None; + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == quote_end { + if chars.peek() == Some("e_end) { + chars.next(); + s.push(ch); + if !self.unescape { + // In no-escape mode, the given query has to be saved completely + s.push(ch); + } + } else { + last_char = Some(quote_end); + break; + } + } else { + s.push(ch); + } + } + (s, last_char) + } + #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, @@ -1266,25 +1357,6 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool s } -fn parse_quoted_ident(chars: &mut State, quote_end: char) -> (String, Option) { - let mut last_char = None; - let mut s = String::new(); - while let Some(ch) = chars.next() { - if ch == quote_end { - if chars.peek() == Some("e_end) { - chars.next(); - s.push(ch); - } else { - last_char = Some(quote_end); - break; - } - } else { - s.push(ch); - } - } - (s, last_char) -} - #[cfg(test)] mod tests { use super::*; @@ -1309,8 +1381,7 @@ mod tests { fn tokenize_select_1() { let sql = String::from("SELECT 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1325,8 +1396,7 @@ mod tests { fn tokenize_select_float() { let sql = String::from("SELECT .1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1341,8 +1411,7 @@ mod tests { fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1376,8 +1445,7 @@ mod tests { fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1395,8 +1463,7 @@ mod tests { fn tokenize_string_string_concat() { let sql = String::from("SELECT 'a' || 'b'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1414,8 +1481,7 @@ mod tests { fn tokenize_bitwise_op() { let sql = String::from("SELECT one | two ^ three"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1438,8 +1504,7 @@ mod tests { let sql = String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1478,8 +1543,7 @@ mod tests { fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1510,8 +1574,7 @@ mod tests { fn tokenize_explain_select() { let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1540,8 +1603,7 @@ mod tests { fn tokenize_explain_analyze_select() { let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("EXPLAIN"), @@ -1572,8 +1634,7 @@ mod tests { fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), @@ -1601,8 +1662,7 @@ mod tests { let sql = String::from("\n💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1617,8 +1677,7 @@ mod tests { let sql = String::from("'foo\r\nbar\nbaz'"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; compare(expected, tokens); } @@ -1660,8 +1719,7 @@ mod tests { let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ Token::Whitespace(Whitespace::Newline), @@ -1684,8 +1742,7 @@ mod tests { fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), Token::LParen, @@ -1701,8 +1758,7 @@ mod tests { fn tokenize_is_null() { let sql = String::from("a IS NULL"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("a", None), @@ -1720,8 +1776,7 @@ mod tests { let sql = String::from("0--this is a comment\n1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { @@ -1738,8 +1793,7 @@ mod tests { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment".to_string(), @@ -1752,8 +1806,7 @@ mod tests { let sql = String::from("0/*multi-line\n* /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1769,8 +1822,7 @@ mod tests { let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( @@ -1786,8 +1838,7 @@ mod tests { let sql = String::from("\n/** Comment **/\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), @@ -1801,8 +1852,7 @@ mod tests { let sql = String::from(" \u{2003}\n"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::Space), @@ -1832,8 +1882,7 @@ mod tests { let sql = String::from("line1\nline2\rline3\r\nline4\r"); let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), Token::Whitespace(Whitespace::Newline), @@ -1851,8 +1900,7 @@ mod tests { fn tokenize_mssql_top() { let sql = "SELECT TOP 5 [bar] FROM foo"; let dialect = MsSqlDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1873,8 +1921,7 @@ mod tests { fn tokenize_pg_regex_match() { let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), @@ -1912,8 +1959,7 @@ mod tests { fn tokenize_quoted_identifier() { let sql = r#" "a "" b" "a """ "c """"" "#; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::Whitespace(Whitespace::Space), Token::make_word(r#"a " b"#, Some('"')), @@ -1926,12 +1972,33 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_quoted_identifier_with_no_escape() { + let sql = r#" "a "" b" "a """ "c """"" "#; + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, sql) + .with_unescape(false) + .tokenize() + .unwrap(); + let expected = vec![ + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"a "" b"#, Some('"')), + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"a """#, Some('"')), + Token::Whitespace(Whitespace::Space), + Token::make_word(r#"c """""#, Some('"')), + Token::Whitespace(Whitespace::Space), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_with_location() { let sql = "SELECT a,\n b"; let dialect = GenericDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize_with_location().unwrap(); + let tokens = Tokenizer::new(&dialect, sql) + .tokenize_with_location() + .unwrap(); let expected = vec![ TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1), TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 08451ac27..356926e13 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1004,11 +1004,13 @@ fn parse_select_with_date_column_name() { } #[test] -fn parse_escaped_single_quote_string_predicate() { +fn parse_escaped_single_quote_string_predicate_with_escape() { use self::BinaryOperator::*; let sql = "SELECT id, fname, lname FROM customer \ WHERE salary <> 'Jim''s salary'"; + let ast = verified_only_select(sql); + assert_eq!( Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("salary"))), @@ -1021,6 +1023,34 @@ fn parse_escaped_single_quote_string_predicate() { ); } +#[test] +fn parse_escaped_single_quote_string_predicate_with_no_escape() { + use self::BinaryOperator::*; + let sql = "SELECT id, fname, lname FROM customer \ + WHERE salary <> 'Jim''s salary'"; + + let ast = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some( + ParserOptions::new() + .with_trailing_commas(true) + .with_unescape(false), + ), + } + .verified_only_select(sql); + + assert_eq!( + Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("salary"))), + op: NotEq, + right: Box::new(Expr::Value(Value::SingleQuotedString( + "Jim''s salary".to_string() + ))), + }), + ast.selection, + ); +} + #[test] fn parse_number() { let expr = verified_expr("1.0"); @@ -7264,9 +7294,7 @@ fn parse_non_latin_identifiers() { fn parse_trailing_comma() { let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], - options: Some(ParserOptions { - trailing_commas: true, - }), + options: Some(ParserOptions::new().with_trailing_commas(true)), }; trailing_commas.one_statement_parses_to( diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 12d2cc733..ae95de2ea 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -18,6 +18,7 @@ use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MySqlDialect}; +use sqlparser::parser::ParserOptions; use sqlparser::tokenizer::Token; use test_utils::*; @@ -432,10 +433,14 @@ fn parse_quote_identifiers() { } #[test] -fn parse_quote_identifiers_2() { +fn parse_escaped_quote_identifiers_with_escape() { let sql = "SELECT `quoted `` identifier`"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -467,10 +472,56 @@ fn parse_quote_identifiers_2() { } #[test] -fn parse_quote_identifiers_3() { +fn parse_escaped_quote_identifiers_with_no_escape() { + let sql = "SELECT `quoted `` identifier`"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions { + trailing_commas: false, + unescape: false, + }), + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "quoted `` identifier".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + +#[test] +fn parse_escaped_backticks_with_escape() { let sql = "SELECT ```quoted identifier```"; assert_eq!( - mysql().verified_stmt(sql), + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .verified_stmt(sql), Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -501,6 +552,45 @@ fn parse_quote_identifiers_3() { ); } +#[test] +fn parse_escaped_backticks_with_no_escape() { + let sql = "SELECT ```quoted identifier```"; + assert_eq!( + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions::new().with_unescape(false)), + } + .verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "``quoted identifier``".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ); +} + #[test] fn parse_unterminated_escape() { let sql = r#"SELECT 'I\'m not fine\'"#; @@ -513,9 +603,13 @@ fn parse_unterminated_escape() { } #[test] -fn parse_escaped_string() { +fn parse_escaped_string_with_escape() { fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = mysql().one_statement_parses_to(sql, ""); + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: None, + } + .one_statement_parses_to(sql, ""); match stmt { Statement::Query(query) => match *query.body { @@ -544,6 +638,95 @@ fn parse_escaped_string() { assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); } +#[test] +fn parse_escaped_string_with_no_escape() { + fn assert_mysql_query_value(sql: &str, quoted: &str) { + let stmt = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: Some(ParserOptions::new().with_unescape(false)), + } + .one_statement_parses_to(sql, ""); + + match stmt { + Statement::Query(query) => match *query.body { + SetExpr::Select(value) => { + let expr = expr_from_projection(only(&value.projection)); + assert_eq!( + *expr, + Expr::Value(Value::SingleQuotedString(quoted.to_string())) + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + } + let sql = r#"SELECT 'I\'m fine'"#; + assert_mysql_query_value(sql, r#"I\'m fine"#); + + let sql = r#"SELECT 'I''m fine'"#; + assert_mysql_query_value(sql, r#"I''m fine"#); + + let sql = r#"SELECT 'I\"m fine'"#; + assert_mysql_query_value(sql, r#"I\"m fine"#); + + let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; + assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); +} + +#[test] +fn check_roundtrip_of_escaped_string() { + let options = Some(ParserOptions::new().with_unescape(false)); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I''m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I""m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options: options.clone(), + } + .verified_stmt(r#"SELECT "I\\\"m fine""#); + + TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + options, + } + .verified_stmt(r#"SELECT "I'm ''fine''""#); +} + #[test] fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9a54c89cf..43ebb8b11 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -55,8 +55,7 @@ fn test_snowflake_create_transient_table() { fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; let dialect = SnowflakeDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), @@ -72,8 +71,7 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); let sql = "CREATE TABLE // this is a comment \ntable_1"; - let mut tokenizer = Tokenizer::new(&dialect, sql); - let tokens = tokenizer.tokenize().unwrap(); + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"),