Skip to content

Commit 2b0e464

Browse files
committed
squash-merge: expand semicolon support
1 parent 930b0d9 commit 2b0e464

File tree

7 files changed

+842
-140
lines changed

7 files changed

+842
-140
lines changed

src/dialect/mod.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,8 +1028,14 @@ pub trait Dialect: Debug + Any {
10281028
/// Returns true if the specified keyword should be parsed as a table factor alias.
10291029
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
10301030
/// to enable looking ahead if needed.
1031-
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
1032-
explicit || self.is_table_alias(kw, parser)
1031+
///
1032+
/// When the dialect supports statements without semicolon delimiter, actual keywords aren't parsed as aliases.
1033+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
1034+
if self.supports_statements_without_semicolon_delimiter() {
1035+
kw == &Keyword::NoKeyword
1036+
} else {
1037+
explicit || self.is_table_alias(kw, _parser)
1038+
}
10331039
}
10341040

10351041
/// Returns true if this dialect supports querying historical table data
@@ -1122,6 +1128,11 @@ pub trait Dialect: Debug + Any {
11221128
) -> bool {
11231129
false
11241130
}
1131+
1132+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1133+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1134+
false
1135+
}
11251136
}
11261137

11271138
/// This represents the operators for which precedence must be defined

src/dialect/mssql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Dialect for MsSqlDialect {
6767
}
6868

6969
fn supports_connect_by(&self) -> bool {
70-
true
70+
false
7171
}
7272

7373
fn supports_eq_alias_assignment(&self) -> bool {
@@ -123,6 +123,10 @@ impl Dialect for MsSqlDialect {
123123
true
124124
}
125125

126+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
127+
true
128+
}
129+
126130
/// See <https://learn.microsoft.com/en-us/sql/relational-databases/security/authentication-access/server-level-roles>
127131
fn get_reserved_grantees_types(&self) -> &[GranteesType] {
128132
&[GranteesType::Public]
@@ -286,6 +290,9 @@ impl MsSqlDialect {
286290
) -> Result<Vec<Statement>, ParserError> {
287291
let mut stmts = Vec::new();
288292
loop {
293+
while let Token::SemiColon = parser.peek_token_ref().token {
294+
parser.advance_token();
295+
}
289296
if let Token::EOF = parser.peek_token_ref().token {
290297
break;
291298
}

src/keywords.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10721072
Keyword::ANTI,
10731073
Keyword::SEMI,
10741074
Keyword::RETURNING,
1075+
Keyword::RETURN,
10751076
Keyword::ASOF,
10761077
Keyword::MATCH_CONDITION,
10771078
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1097,6 +1098,11 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10971098
Keyword::TABLESAMPLE,
10981099
Keyword::FROM,
10991100
Keyword::OPEN,
1101+
Keyword::INSERT,
1102+
Keyword::UPDATE,
1103+
Keyword::DELETE,
1104+
Keyword::EXEC,
1105+
Keyword::EXECUTE,
11001106
];
11011107

11021108
/// Can't be used as a column alias, so that `SELECT <expr> alias`
@@ -1126,6 +1132,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
11261132
Keyword::CLUSTER,
11271133
Keyword::DISTRIBUTE,
11281134
Keyword::RETURNING,
1135+
Keyword::RETURN,
11291136
// Reserved only as a column alias in the `SELECT` clause
11301137
Keyword::FROM,
11311138
Keyword::INTO,
@@ -1140,6 +1147,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
11401147
Keyword::LIMIT,
11411148
Keyword::HAVING,
11421149
Keyword::WHERE,
1150+
Keyword::RETURN,
11431151
];
11441152

11451153
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1150,4 +1158,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
11501158
Keyword::INTERVAL,
11511159
Keyword::STRUCT,
11521160
Keyword::TRIM,
1161+
Keyword::RETURN,
11531162
];

src/parser/mod.rs

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,22 @@ impl ParserOptions {
265265
self.unescape = unescape;
266266
self
267267
}
268+
269+
/// Set if semicolon statement delimiters are required.
270+
///
271+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
272+
///
273+
/// ```sql
274+
/// SELECT 1
275+
/// SELECT 2
276+
/// ```
277+
pub fn with_require_semicolon_stmt_delimiter(
278+
mut self,
279+
require_semicolon_stmt_delimiter: bool,
280+
) -> Self {
281+
self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter;
282+
self
283+
}
268284
}
269285

270286
#[derive(Copy, Clone)]
@@ -355,7 +371,11 @@ impl<'a> Parser<'a> {
355371
state: ParserState::Normal,
356372
dialect,
357373
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
358-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
374+
options: ParserOptions::new()
375+
.with_trailing_commas(dialect.supports_trailing_commas())
376+
.with_require_semicolon_stmt_delimiter(
377+
!dialect.supports_statements_without_semicolon_delimiter(),
378+
),
359379
}
360380
}
361381

@@ -478,10 +498,10 @@ impl<'a> Parser<'a> {
478498
match self.peek_token().token {
479499
Token::EOF => break,
480500

481-
// end of statement
482-
Token::Word(word) => {
483-
if expecting_statement_delimiter && word.keyword == Keyword::END {
484-
break;
501+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
502+
Token::Whitespace(Whitespace::Newline) => {
503+
if !self.options.require_semicolon_stmt_delimiter {
504+
expecting_statement_delimiter = false;
485505
}
486506

487507
if expecting_statement_delimiter && word.keyword == Keyword::GO {
@@ -496,9 +516,9 @@ impl<'a> Parser<'a> {
496516
}
497517

498518
let statement = self.parse_statement()?;
499-
// Treat batch delimiter as an end of statement, so no additional statement delimiter expected here
500-
expecting_statement_delimiter = !matches!(statement, Statement::Go(_));
501519
stmts.push(statement);
520+
// Treat batch delimiter as an end of statement, so no additional statement delimiter expected here
521+
expecting_statement_delimiter = !matches!(statement, Statement::Go(_)) && self.options.require_semicolon_stmt_delimiter;
502522
}
503523
Ok(stmts)
504524
}
@@ -4559,6 +4579,18 @@ impl<'a> Parser<'a> {
45594579
return Ok(vec![]);
45604580
}
45614581

4582+
if end_token == Token::SemiColon
4583+
&& self
4584+
.dialect
4585+
.supports_statements_without_semicolon_delimiter()
4586+
{
4587+
if let Token::Word(ref kw) = self.peek_token().token {
4588+
if kw.keyword != Keyword::NoKeyword {
4589+
return Ok(vec![]);
4590+
}
4591+
}
4592+
}
4593+
45624594
if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] {
45634595
let _ = self.consume_token(&Token::Comma);
45644596
return Ok(vec![]);
@@ -4576,6 +4608,9 @@ impl<'a> Parser<'a> {
45764608
) -> Result<Vec<Statement>, ParserError> {
45774609
let mut values = vec![];
45784610
loop {
4611+
// ignore empty statements (between successive statement delimiters)
4612+
while self.consume_token(&Token::SemiColon) {}
4613+
45794614
match &self.peek_nth_token_ref(0).token {
45804615
Token::EOF => break,
45814616
Token::Word(w) => {
@@ -4587,7 +4622,13 @@ impl<'a> Parser<'a> {
45874622
}
45884623

45894624
values.push(self.parse_statement()?);
4590-
self.expect_token(&Token::SemiColon)?;
4625+
4626+
if self.options.require_semicolon_stmt_delimiter {
4627+
self.expect_token(&Token::SemiColon)?;
4628+
}
4629+
4630+
// ignore empty statements (between successive statement delimiters)
4631+
while self.consume_token(&Token::SemiColon) {}
45914632
}
45924633
Ok(values)
45934634
}
@@ -16413,7 +16454,28 @@ impl<'a> Parser<'a> {
1641316454

1641416455
/// Parse [Statement::Return]
1641516456
fn parse_return(&mut self) -> Result<Statement, ParserError> {
16416-
match self.maybe_parse(|p| p.parse_expr())? {
16457+
let rs = self.maybe_parse(|p| {
16458+
let expr = p.parse_expr()?;
16459+
16460+
match &expr {
16461+
Expr::Value(_)
16462+
| Expr::Function(_)
16463+
| Expr::UnaryOp { .. }
16464+
| Expr::BinaryOp { .. }
16465+
| Expr::Case { .. }
16466+
| Expr::Cast { .. }
16467+
| Expr::Convert { .. }
16468+
| Expr::Subquery(_) => Ok(expr),
16469+
// todo: how to retstrict to variables?
16470+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
16471+
_ => parser_err!(
16472+
"Non-returnable expression found following RETURN",
16473+
p.peek_token().span.start
16474+
),
16475+
}
16476+
})?;
16477+
16478+
match rs {
1641716479
Some(expr) => Ok(Statement::Return(ReturnStatement {
1641816480
value: Some(ReturnStatementValue::Expr(expr)),
1641916481
})),

src/test_utils.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#[cfg(not(feature = "std"))]
2626
use alloc::{
2727
boxed::Box,
28+
format,
2829
string::{String, ToString},
2930
vec,
3031
vec::Vec,
@@ -193,6 +194,37 @@ impl TestedDialects {
193194
statements
194195
}
195196

197+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
198+
pub fn statements_without_semicolons_parse_to(
199+
&self,
200+
sql: &str,
201+
canonical: &str,
202+
) -> Vec<Statement> {
203+
let sql_without_semicolons = sql
204+
.replace("; ", " ")
205+
.replace(" ;", " ")
206+
.replace(";\n", "\n")
207+
.replace("\n;", "\n")
208+
.replace(";", " ");
209+
let statements = self
210+
.parse_sql_statements(&sql_without_semicolons)
211+
.expect(&sql_without_semicolons);
212+
if !canonical.is_empty() && sql != canonical {
213+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
214+
} else {
215+
assert_eq!(
216+
sql,
217+
statements
218+
.iter()
219+
// note: account for format_statement_list manually inserted semicolons
220+
.map(|s| s.to_string().trim_end_matches(";").to_string())
221+
.collect::<Vec<_>>()
222+
.join("; ")
223+
);
224+
}
225+
statements
226+
}
227+
196228
/// Ensures that `sql` parses as an [`Expr`], and that
197229
/// re-serializing the parse result produces canonical
198230
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -325,6 +357,43 @@ where
325357
all_dialects_where(|d| !except(d))
326358
}
327359

360+
/// Returns all dialects that don't support statements without semicolon delimiters.
361+
/// (i.e. dialects that require semicolon delimiters.)
362+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
363+
let tested_dialects =
364+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
365+
assert_ne!(tested_dialects.dialects.len(), 0);
366+
tested_dialects
367+
}
368+
369+
/// Returns all dialects that do support statements without semicolon delimiters.
370+
/// (i.e. dialects not requiring semicolon delimiters.)
371+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
372+
let tested_dialects =
373+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
374+
assert_ne!(tested_dialects.dialects.len(), 0);
375+
tested_dialects
376+
}
377+
378+
/// Asserts an error for `parse_sql_statements`:
379+
/// - "end of statement" for dialects that require semicolon delimiters
380+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
381+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
382+
assert_eq!(
383+
ParserError::ParserError(format!("Expected: end of statement, found: {}", found)),
384+
all_dialects_requiring_semicolon_statement_delimiter()
385+
.parse_sql_statements(sql)
386+
.unwrap_err()
387+
);
388+
389+
assert_eq!(
390+
ParserError::ParserError(format!("Expected: an SQL statement, found: {}", found)),
391+
all_dialects_not_requiring_semicolon_statement_delimiter()
392+
.parse_sql_statements(sql)
393+
.unwrap_err()
394+
);
395+
}
396+
328397
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
329398
assert_eq!(
330399
expected,

0 commit comments

Comments
 (0)