From 3570070ce803501a5e86f1c38759180a42e7d62a Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 15 May 2025 12:26:55 +0000 Subject: [PATCH] Parse optionals and repeats without regexes Rather than parsing optionals and repeats fully in the recursive descent style, we were using regular expressions to do part of the matching and parsing. That's fine for what it is, but as we think about extending the grammar language surrounding repeats further, it might be more straightforward for this to be parsed in the more usual way. So let's do that. Doing this also results in better and more targeted errors when parsing malformed syntax. We had been supporting a space between an expression and the optional and repeat sigils `?`, `*`, and `+` (but not between an expression and the `{a..b}` ranged repeat syntax). In making this change, we drop this support and adjust the affected productions. We were only using this in a handful of places, and the clarity of the productions seem the same or better by removing these spaces. We verified that, setting aside the removal of these spaces, the rendered output of the Reference is byte identical before and after this change. --- mdbook-spec/src/grammar/parser.rs | 79 +++++++++++++++++++++---------- src/items/generics.md | 2 +- src/patterns.md | 4 +- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/mdbook-spec/src/grammar/parser.rs b/mdbook-spec/src/grammar/parser.rs index 60ca740f6..7a92f4772 100644 --- a/mdbook-spec/src/grammar/parser.rs +++ b/mdbook-spec/src/grammar/parser.rs @@ -214,7 +214,7 @@ impl Parser<'_> { return Ok(None); }; - let mut kind = if self.take_str("U+") { + let kind = if self.take_str("U+") { self.parse_unicode()? } else if self.input[self.index..] .chars() @@ -246,30 +246,13 @@ impl Parser<'_> { } else { return Ok(None); }; - - static REPEAT_RE: LazyLock = - LazyLock::new(|| Regex::new(r"^ ?(\*\?|\+\?|\?|\*|\+)").unwrap()); - static RANGE_RE: LazyLock = - LazyLock::new(|| Regex::new(r"^\{([0-9]+)?\.\.([0-9]+)?\}").unwrap()); - if let Some(cap) = self.take_re(&REPEAT_RE) { - kind = match &cap[1] { - "?" => ExpressionKind::Optional(box_kind(kind)), - "*" => ExpressionKind::Repeat(box_kind(kind)), - "*?" => ExpressionKind::RepeatNonGreedy(box_kind(kind)), - "+" => ExpressionKind::RepeatPlus(box_kind(kind)), - "+?" => ExpressionKind::RepeatPlusNonGreedy(box_kind(kind)), - s => panic!("unexpected `{s}`"), - }; - } else if let Some(cap) = self.take_re(&RANGE_RE) { - let a = cap.get(1).map(|m| m.as_str().parse::().unwrap()); - let b = cap.get(2).map(|m| m.as_str().parse::().unwrap()); - match (a, b) { - (Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"), - _ => {} - } - kind = ExpressionKind::RepeatRange(box_kind(kind), a, b); - } - + let kind = match self.peek() { + Some(b'?') => self.parse_optional(kind)?, + Some(b'*') => self.parse_repeat(kind)?, + Some(b'+') => self.parse_repeat_plus(kind)?, + Some(b'{') => self.parse_repeat_range(kind)?, + _ => kind, + }; let suffix = self.parse_suffix()?; let footnote = self.parse_footnote()?; @@ -370,6 +353,52 @@ impl Parser<'_> { } } + /// Parse `?` after expression. + fn parse_optional(&mut self, kind: ExpressionKind) -> Result { + self.expect("?", "expected `?`")?; + Ok(ExpressionKind::Optional(box_kind(kind))) + } + + /// Parse `*` | `*?` after expression. + fn parse_repeat(&mut self, kind: ExpressionKind) -> Result { + self.expect("*", "expected `*`")?; + Ok(if self.take_str("?") { + ExpressionKind::RepeatNonGreedy(box_kind(kind)) + } else { + ExpressionKind::Repeat(box_kind(kind)) + }) + } + + /// Parse `+` | `+?` after expression. + fn parse_repeat_plus(&mut self, kind: ExpressionKind) -> Result { + self.expect("+", "expected `+`")?; + Ok(if self.take_str("?") { + ExpressionKind::RepeatPlusNonGreedy(box_kind(kind)) + } else { + ExpressionKind::RepeatPlus(box_kind(kind)) + }) + } + + /// Parse `{a..}` | `{..b}` | `{a..b}` after expression. + fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result { + self.expect("{", "expected `{`")?; + let a = self.take_while(&|x| x.is_ascii_digit()); + let Ok(a) = (!a.is_empty()).then(|| a.parse::()).transpose() else { + bail!(self, "malformed range start"); + }; + self.expect("..", "expected `..`")?; + let b = self.take_while(&|x| x.is_ascii_digit()); + let Ok(b) = (!b.is_empty()).then(|| b.parse::()).transpose() else { + bail!(self, "malformed range end"); + }; + match (a, b) { + (Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"), + _ => {} + } + self.expect("}", "expected `}`")?; + Ok(ExpressionKind::RepeatRange(box_kind(kind), a, b)) + } + fn parse_suffix(&mut self) -> Result> { if !self.take_str(" _") { return Ok(None); diff --git a/src/items/generics.md b/src/items/generics.md index 33491587d..3a2e98a0a 100644 --- a/src/items/generics.md +++ b/src/items/generics.md @@ -232,7 +232,7 @@ r[items.generics.where] r[items.generics.where.syntax] ```grammar,items -WhereClause -> `where` ( WhereClauseItem `,` )* WhereClauseItem ? +WhereClause -> `where` ( WhereClauseItem `,` )* WhereClauseItem? WhereClauseItem -> LifetimeWhereClauseItem diff --git a/src/patterns.md b/src/patterns.md index 2d9ddb5ac..e3a5c18df 100644 --- a/src/patterns.md +++ b/src/patterns.md @@ -181,7 +181,7 @@ r[patterns.ident] r[patterns.ident.syntax] ```grammar,patterns -IdentifierPattern -> `ref`? `mut`? IDENTIFIER (`@` PatternNoTopAlt ) ? +IdentifierPattern -> `ref`? `mut`? IDENTIFIER ( `@` PatternNoTopAlt )? ``` r[patterns.ident.intro] @@ -704,7 +704,7 @@ r[patterns.struct.syntax] ```grammar,patterns StructPattern -> PathInExpression `{` - StructPatternElements ? + StructPatternElements? `}` StructPatternElements ->