Skip to content

Commit 2316432

Browse files
committed
syntax: fix bug when parsing ((?x))
This fixes yet another bug with our handling of (?flags) directives in the regex. This time, we try to be a bit more principled and specifically treat a (?flags) directive as a valid empty sub-expression. While this means we could remove errors reported from previous fixes for things like `(?i)+`, we retain those for now since they are a bit weird. Although `((?i))+` is now allowed, which is equivalent. We should probably allow `(?i)+` in the future for consistency sake. Fixes #527
1 parent 7b1599f commit 2316432

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Performance improvements:
1010

1111
Bug fixes:
1212

13+
* [BUG #527](https://github.com/rust-lang/regex/issues/527):
14+
Fix a bug where the parser would panic on patterns like `((?x))`.
1315
* [BUG #555](https://github.com/rust-lang/regex/issues/555):
1416
Fix a bug where the parser would panic on patterns like `(?m){1,1}`.
1517
* [BUG #557](https://github.com/rust-lang/regex/issues/557):

regex-syntax/src/hir/translate.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
240240
type Err = Error;
241241

242242
fn finish(self) -> Result<Hir> {
243-
if self.trans().stack.borrow().is_empty() {
244-
// This can happen if the Ast given consists of a single set of
245-
// flags. e.g., `(?i)`. /shrug
246-
return Ok(Hir::empty());
247-
}
248243
// ... otherwise, we should have exactly one HIR on the stack.
249244
assert_eq!(self.trans().stack.borrow().len(), 1);
250245
Ok(self.pop().unwrap().unwrap_expr())
@@ -287,6 +282,16 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
287282
}
288283
Ast::Flags(ref x) => {
289284
self.set_flags(&x.flags);
285+
// Flags in the AST are generally considered directives and
286+
// not actual sub-expressions. However, they can be used in
287+
// the concrete syntax like `((?i))`, and we need some kind of
288+
// indication of an expression there, and Empty is the correct
289+
// choice.
290+
//
291+
// There can also be things like `(?i)+`, but we rule those out
292+
// in the parser. In the future, we might allow them for
293+
// consistency sake.
294+
self.push(HirFrame::Expr(Hir::empty()));
290295
}
291296
Ast::Literal(ref x) => {
292297
self.push(HirFrame::Expr(self.hir_literal(x)?));
@@ -1547,6 +1552,10 @@ mod tests {
15471552
hir_group_name(2, "foo", hir_lit("b")),
15481553
hir_group(3, hir_lit("c")),
15491554
]));
1555+
assert_eq!(t("()"), hir_group(1, Hir::empty()));
1556+
assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1557+
assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1558+
assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
15501559
}
15511560

15521561
#[test]

tests/regression.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ fn regression_invalid_repetition_expr() {
2020
assert!(regex_new!("(?m){1,1}").is_err());
2121
}
2222

23+
// See: https://github.com/rust-lang/regex/issues/527
24+
#[test]
25+
fn regression_invalid_flags_expression() {
26+
assert!(regex_new!("(((?x)))").is_ok());
27+
}
28+
2329
// See: https://github.com/rust-lang/regex/issues/75
2430
mat!(regression_unsorted_binary_search_1, r"(?i)[a_]+", "A_", Some((0, 2)));
2531
mat!(regression_unsorted_binary_search_2, r"(?i)[A_]+", "a_", Some((0, 2)));

0 commit comments

Comments
 (0)