diff --git a/book.toml b/book.toml index 348cad88e..5f95aba86 100644 --- a/book.toml +++ b/book.toml @@ -12,6 +12,7 @@ smart-punctuation = true [output.html.search.chapter] "test-summary.md" = { enable = false } +"grammar.md" = { enable = false } [output.html.redirect] "/expressions/enum-variant-expr.html" = "struct-expr.html" diff --git a/docs/authoring.md b/docs/authoring.md index 73ca9dbed..a465309c8 100644 --- a/docs/authoring.md +++ b/docs/authoring.md @@ -214,3 +214,7 @@ r[foo.bar.edition2021] > [!EDITION-2021] > Describe what changed in 2021. ``` + +## Grammar + +See [Grammar](grammar.md) for details on how to write grammar rules. diff --git a/docs/grammar.md b/docs/grammar.md new file mode 100644 index 000000000..c99257df8 --- /dev/null +++ b/docs/grammar.md @@ -0,0 +1,122 @@ +# Grammar + +The Reference grammar is written in markdown code blocks using a modified BNF-like syntax (with a blend of regex and other arbitrary things). The `mdbook-spec` extension parses these rules and converts them to a renderable format, including railroad diagrams. + +The code block should have a lang string with the word "grammar", a comma, and the category of the grammar, like this: + +~~~ +```grammar,items +ProductionName -> SomeExpression +``` +~~~ + +The category is used to group similar productions on the grammar summary page in the appendix. + +## Grammar syntax + +The syntax for the grammar itself is pretty close to what is described in the [Notation chapter](../src/notation.md), though there are some rendering differences. + +A "root" production, marked with `@root`, is one that is not used in any other production. + +The syntax for the grammar itself (written in itself, hopefully that's not too confusing) is: + +``` +Grammar -> Production+ + +BACKTICK -> U+0060 + +LF -> U+000A + +Production -> `@root`? Name ` ->` Expression + +Name -> + + +Expression -> Sequence (` `* `|` ` `* Sequence)* + +Sequence -> (` `* AdornedExpr)+ + +AdornedExpr -> ExprRepeat Suffix? Footnote? + +Suffix -> ` _` * `_` + +Footnote -> `[^` ~[`]` LF]+ `]` + +ExprRepeat -> + Expr1 `?` + | Expr1 `*?` + | Expr1 `*` + | Expr1 `+?` + | Expr1 `+` + | Expr1 `{` Range? `..` Range? `}` + +Range -> [0-9]+ + +Expr1 -> + Unicode + | NonTerminal + | Break + | Terminal + | Charset + | Prose + | Group + | NegativeExpression + +Unicode -> `U+` [`A`-`Z` `0`-`9`]4..4 + +NonTerminal -> Name + +Break -> LF ` `+ + +Terminal -> BACKTICK ~[LF]+ BACKTICK + +Charset -> `[` (` `* Characters)+ ` `* `]` + +Characters -> + CharacterRange + | CharacterTerminal + | CharacterName + +CharacterRange -> BACKTICK BACKTICK `-` BACKTICK BACKTICK + +CharacterTerminal -> Terminal + +CharacterName -> Name + +Prose -> `<` ~[`>` LF]+ `>` + +Group -> `(` ` `* Expression ` `* `)` + +NegativeExpression -> `~` ( Charset | Terminal | NonTerminal ) +``` + +The general format is a series of productions separated by blank lines. The expressions are: + +| Expression | Example | Description | +|------------|---------|-------------| +| Unicode | U+0060 | A single unicode character. | +| NonTerminal | FunctionParameters | A reference to another production by name. | +| Break | | This is used internally by the renderer to detect line breaks and indentation. | +| Terminal | \`example\` | This is a sequence of exact characters, surrounded by backticks | +| Charset | [ \`A\`-\`Z\` \`0\`-\`9\` \`_\` ] | A choice from a set of characters, space separated. There are three different forms. | +| CharacterRange | [ \`A\`-\`Z\` ] | A range of characters, each character should be in backticks. +| CharacterTerminal | [ \`x\` ] | A single character, surrounded by backticks. | +| CharacterName | [ LF ] | A nonterminal, referring to another production. | +| Prose | \ | This is an English description of what should be matched, surrounded in angle brackets. | +| Group | (\`,\` Parameter)+ | This groups an expression for the purpose of precedence, such as applying a repetition operator to a sequence of other expressions. +| NegativeExpression | ~[\` \` LF] | Matches anything except the given Charset, Terminal, or Nonterminal. | +| Sequence | \`fn\` Name Parameters | A sequence of expressions, where they must match in order. | +| Alternation | Expr1 \| Expr2 | Matches only one of the given expressions, separated by the vertical pipe character. | +| Suffix | \_except \[LazyBooleanExpression\]\_ | This adds a suffix to the previous expression to provide an additional English description to it, rendered in subscript. This can have limited markdown, but try to avoid anything except basics like links. | +| Footnote | \[^extern-safe\] | This adds a footnote, which can supply some extra information that may be helpful to the user. The footnote itself should be defined outside of the code block like a normal markdown footnote. | +| Optional | Expr? | The preceding expression is optional. | +| Repeat | Expr* | The preceding expression is repeated 0 or more times. | +| Repeat (non-greedy) | Expr*? | The preceding expression is repeated 0 or more times without being greedy. | +| RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. | +| RepeatPlus (non-greedy) | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | +| RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bounds can be excluded, which works just like Rust ranges. | + +## Automatic linking + +The plugin automatically adds markdown link definitions for all the production names on every page. If you want to link directly to a production name, all you need to do is surround it in square brackets, like `[ArrayExpression]`. + +In some cases there might be name collisions with the automatic linking of rule names. In that case, disambiguate with the `grammar-` prefix, such as `[Type][grammar-Type]`. You can also do that if you just feel like being more explicit. diff --git a/mdbook-spec/Cargo.lock b/mdbook-spec/Cargo.lock index c983d9842..1dea1df7b 100644 --- a/mdbook-spec/Cargo.lock +++ b/mdbook-spec/Cargo.lock @@ -412,6 +412,7 @@ dependencies = [ "once_cell", "pathdiff", "pulldown-cmark", + "railroad", "regex", "semver", "serde_json", @@ -569,6 +570,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "railroad" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ecedffc46c1b2cb04f4b80e094eae6b3f3f470a9635f1f396dd5206428f6b58" +dependencies = [ + "unicode-width", +] + [[package]] name = "regex" version = "1.11.1" @@ -780,6 +790,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "utf8parse" version = "0.2.2" diff --git a/mdbook-spec/Cargo.toml b/mdbook-spec/Cargo.toml index 4422573a8..8bd02e444 100644 --- a/mdbook-spec/Cargo.toml +++ b/mdbook-spec/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" license = "MIT OR Apache-2.0" description = "An mdBook preprocessor to help with the Rust specification." repository = "https://github.com/rust-lang/spec/" +default-run = "mdbook-spec" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -15,6 +16,7 @@ once_cell = "1.19.0" pathdiff = "0.2.1" # Try to keep in sync with mdbook. pulldown-cmark = { version = "0.10.3", default-features = false } +railroad = { version = "0.3.2", default-features = false } regex = "1.9.4" semver = "1.0.21" serde_json = "1.0.113" diff --git a/mdbook-spec/src/grammar.rs b/mdbook-spec/src/grammar.rs new file mode 100644 index 000000000..c6a850c6e --- /dev/null +++ b/mdbook-spec/src/grammar.rs @@ -0,0 +1,403 @@ +//! Support for rendering the grammar. + +use crate::{Diagnostics, warn_or_err}; +use mdbook::book::{Book, BookItem, Chapter}; +use regex::{Captures, Regex}; +use std::collections::{HashMap, HashSet}; +use std::fmt::Write; +use std::path::PathBuf; +use std::sync::LazyLock; + +mod parser; +mod render_markdown; +mod render_railroad; + +#[derive(Debug, Default)] +pub struct Grammar { + pub productions: HashMap, + /// The order that the production names were discovered. + pub name_order: Vec, +} + +#[derive(Debug)] +pub struct Production { + name: String, + /// Category is from the markdown lang string, and defines how it is + /// grouped and organized on the summary page. + category: String, + expression: Expression, + /// The path to the chapter where this is defined. + path: PathBuf, + is_root: bool, +} + +#[derive(Clone, Debug)] +struct Expression { + kind: ExpressionKind, + /// Suffix is the `_foo_` part that is shown as a subscript. + suffix: Option, + /// A footnote is a markdown footnote link. + footnote: Option, +} + +#[derive(Clone, Debug)] +enum ExpressionKind { + /// `( A B C )` + Grouped(Box), + /// `A | B | C` + Alt(Vec), + /// `A B C` + Sequence(Vec), + /// `A?` + Optional(Box), + /// `A*` + Repeat(Box), + /// `A*?` + RepeatNonGreedy(Box), + /// `A+` + RepeatPlus(Box), + /// `A+?` + RepeatPlusNonGreedy(Box), + /// `A{2..4}` + RepeatRange(Box, Option, Option), + /// `NonTerminal` + Nt(String), + /// `` `string` `` + Terminal(String), + /// `` + Prose(String), + /// An LF followed by the given number of spaces. + /// + /// Used by the renderer to help format and structure the grammar. + Break(usize), + /// ``[`A`-`Z` `_` LF]`` + Charset(Vec), + /// ``~[` ` LF]`` + NegExpression(Box), + /// `U+0060` + Unicode(String), +} + +#[derive(Clone, Debug)] +enum Characters { + /// `LF` + Named(String), + /// `` `_` `` + Terminal(String), + /// `` `A`-`Z` `` + Range(char, char), +} + +impl Grammar { + fn visit_nt(&self, callback: &mut dyn FnMut(&str)) { + for p in self.productions.values() { + p.expression.visit_nt(callback); + } + } +} + +impl Expression { + fn new_kind(kind: ExpressionKind) -> Self { + Self { + kind, + suffix: None, + footnote: None, + } + } + + fn visit_nt(&self, callback: &mut dyn FnMut(&str)) { + match &self.kind { + ExpressionKind::Grouped(e) + | ExpressionKind::Optional(e) + | ExpressionKind::Repeat(e) + | ExpressionKind::RepeatNonGreedy(e) + | ExpressionKind::RepeatPlus(e) + | ExpressionKind::RepeatPlusNonGreedy(e) + | ExpressionKind::RepeatRange(e, _, _) + | ExpressionKind::NegExpression(e) => { + e.visit_nt(callback); + } + ExpressionKind::Alt(es) | ExpressionKind::Sequence(es) => { + for e in es { + e.visit_nt(callback); + } + } + ExpressionKind::Nt(nt) => { + callback(&nt); + } + ExpressionKind::Terminal(_) + | ExpressionKind::Prose(_) + | ExpressionKind::Break(_) + | ExpressionKind::Unicode(_) => {} + ExpressionKind::Charset(set) => { + for ch in set { + match ch { + Characters::Named(s) => callback(s), + Characters::Terminal(_) | Characters::Range(_, _) => {} + } + } + } + } + } + + fn is_break(&self) -> bool { + matches!(self.kind, ExpressionKind::Break(_)) + } +} + +static GRAMMAR_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?ms)^```grammar,([^\n]+)\n(.*?)^```").unwrap()); +static NAMES_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?m)^(?:@root )?([A-Za-z0-9_]+)(?: \([^)]+\))? ->").unwrap()); + +/// Loads the [`Grammar`] from the book. +pub fn load_grammar(book: &Book, diag: &mut Diagnostics) -> Grammar { + let mut grammar = Grammar::default(); + for item in book.iter() { + let BookItem::Chapter(ch) = item else { + continue; + }; + if ch.is_draft_chapter() { + continue; + } + let path = ch.path.as_ref().unwrap().to_owned(); + for cap in GRAMMAR_RE.captures_iter(&ch.content) { + let category = &cap[1]; + let input = &cap[2]; + if let Err(e) = parser::parse_grammar(input, &mut grammar, category, &path) { + warn_or_err!(diag, "failed to parse grammar in {path:?}: {e}"); + } + } + } + check_undefined_nt(&grammar, diag); + check_unexpected_roots(&grammar, diag); + grammar +} + +/// Checks for nonterminals that are used but not defined. +fn check_undefined_nt(grammar: &Grammar, diag: &mut Diagnostics) { + grammar.visit_nt(&mut |nt| { + if !grammar.productions.contains_key(nt) { + warn_or_err!(diag, "non-terminal `{nt}` is used but not defined"); + } + }); +} + +/// This checks that all the grammar roots are what we expect. +/// +/// This is intended to help catch any unexpected misspellings, orphaned +/// productions, or general mistakes. +fn check_unexpected_roots(grammar: &Grammar, diag: &mut Diagnostics) { + let mut set: HashSet<_> = grammar.name_order.iter().map(|s| s.as_str()).collect(); + grammar.visit_nt(&mut |nt| { + set.remove(nt); + }); + let expected: HashSet<_> = grammar + .productions + .values() + .filter_map(|p| p.is_root.then(|| p.name.as_str())) + .collect(); + if set != expected { + let new: Vec<_> = set.difference(&expected).collect(); + let removed: Vec<_> = expected.difference(&set).collect(); + if !new.is_empty() { + warn_or_err!( + diag, + "New grammar production detected that is not used in any other\n\ + production. If this is expected, mark the production with\n\ + `@root`. If not, make sure it is spelled correctly and used in\n\ + another production.\n\ + \n\ + The new names are: {new:?}\n" + ); + } else if !removed.is_empty() { + warn_or_err!( + diag, + "Old grammar production root seems to have been removed.\n\ + If this is expected, remove `@root` from the production.\n\ + \n\ + The removed names are: {removed:?}\n" + ); + } else { + unreachable!("unexpected"); + } + } +} + +/// Replaces the text grammar in the given chapter with the rendered version. +pub fn insert_grammar(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnostics) -> String { + let link_map = make_relative_link_map(grammar, chapter); + + let mut content = GRAMMAR_RE + .replace_all(&chapter.content, |cap: &Captures<'_>| { + let names: Vec<_> = NAMES_RE + .captures_iter(&cap[2]) + .map(|cap| cap.get(1).unwrap().as_str()) + .collect(); + let for_lexer = &cap[1] == "lexer"; + render_names(grammar, &names, &link_map, for_lexer, chapter, diag) + }) + .to_string(); + + // Make all production names easily linkable. + let is_summary = is_summary(chapter); + for (name, path) in &link_map { + let id = render_markdown::markdown_id(name, is_summary); + if is_summary { + // On the summary page, link to the production on the summary page. + writeln!(content, "[{name}]: #{id}").unwrap(); + } else { + // This includes two variants, one for convenience (like + // `[ArrayExpression]`), and one with the `grammar-` prefix to + // disambiguate links that have the same name as a rule (rules + // take precedence). + writeln!( + content, + "[{name}]: {path}#{id}\n\ + [grammar-{name}]: {path}#{id}" + ) + .unwrap(); + } + } + content +} + +/// Creates a map of production name -> relative link path. +fn make_relative_link_map(grammar: &Grammar, chapter: &Chapter) -> HashMap { + let current_path = chapter.path.as_ref().unwrap().parent().unwrap(); + grammar + .productions + .values() + .map(|p| { + let relative = pathdiff::diff_paths(&p.path, current_path).unwrap(); + // Adjust paths for Windows. + let relative = relative.display().to_string().replace('\\', "/"); + (p.name.clone(), relative) + }) + .collect() +} + +/// Helper to take a list of production names and to render all of those to a +/// mixture of markdown and HTML. +fn render_names( + grammar: &Grammar, + names: &[&str], + link_map: &HashMap, + for_lexer: bool, + chapter: &Chapter, + diag: &mut Diagnostics, +) -> String { + let for_summary = is_summary(chapter); + let mut output = String::new(); + output.push_str( + "
\n\ + \n", + ); + if for_lexer { + output.push_str("**Lexer**\n"); + } else { + output.push_str("**Syntax**\n"); + } + output.push_str("
\n"); + + // Convert the link map to add the id. + let updated_link_map = |get_id: fn(&str, bool) -> String| -> HashMap { + link_map + .iter() + .map(|(name, path)| { + let id = get_id(name, for_summary); + let path = if for_summary { + format!("#{id}") + } else { + format!("{path}#{id}") + }; + (name.clone(), path) + }) + .collect() + }; + + let markdown_link_map = updated_link_map(render_markdown::markdown_id); + // Modify the link map so that it contains the exact destination needed to + // link to the railroad productions, and to accommodate the summary + // chapter. + let railroad_link_map = updated_link_map(render_railroad::railroad_id); + + if let Err(e) = grammar.render_markdown( + &names, + &markdown_link_map, + &railroad_link_map, + &mut output, + for_summary, + ) { + warn_or_err!( + diag, + "grammar failed in chapter {:?}: {e}", + chapter.source_path.as_ref().unwrap() + ); + } + + output.push_str( + "\n\ + \n\ +
\n\ +
\n\ + \n", + ); + + if let Err(e) = grammar.render_railroad( + &names, + &railroad_link_map, + &markdown_link_map, + &mut output, + for_summary, + ) { + warn_or_err!( + diag, + "grammar failed in chapter {:?}: {e}", + chapter.source_path.as_ref().unwrap() + ); + } + + output.push_str("
\n"); + + output +} + +pub fn is_summary(chapter: &Chapter) -> bool { + chapter.name == "Grammar summary" +} + +/// Inserts the summary of all grammar rules into the grammar summary chapter. +pub fn insert_summary(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnostics) -> String { + let link_map = make_relative_link_map(grammar, chapter); + let mut seen = HashSet::new(); + let categories: Vec<_> = grammar + .name_order + .iter() + .map(|name| &grammar.productions[name].category) + .filter(|cat| seen.insert(*cat)) + .collect(); + let mut grammar_summary = String::new(); + for category in categories { + let mut chars = category.chars(); + let cap = chars.next().unwrap().to_uppercase().collect::() + chars.as_str(); + write!(grammar_summary, "\n## {cap} summary\n\n").unwrap(); + let names: Vec<_> = grammar + .name_order + .iter() + .filter(|name| grammar.productions[*name].category == *category) + .map(|s| s.as_str()) + .collect(); + let for_lexer = category == "lexer"; + let s = render_names(grammar, &names, &link_map, for_lexer, chapter, diag); + grammar_summary.push_str(&s); + } + + chapter + .content + .replace("{{ grammar-summary }}", &grammar_summary) +} diff --git a/mdbook-spec/src/grammar/parser.rs b/mdbook-spec/src/grammar/parser.rs new file mode 100644 index 000000000..60ca740f6 --- /dev/null +++ b/mdbook-spec/src/grammar/parser.rs @@ -0,0 +1,453 @@ +//! A parser of the ENBF-like grammar. + +use super::{Characters, Expression, ExpressionKind, Grammar, Production}; +use regex::{Captures, Regex}; +use std::fmt; +use std::fmt::Display; +use std::path::Path; +use std::sync::LazyLock; + +struct Parser<'a> { + input: &'a str, + index: usize, +} + +pub struct Error { + message: String, + line: String, + lineno: usize, + col: usize, +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + let lineno = format!("{}", self.lineno); + let space = " ".repeat(lineno.len() + 1); + let col = " ".repeat(self.col); + let line = &self.line; + let message = &self.message; + write!(f, "\n{space}|\n{lineno} | {line}\n{space}|{col}^ {message}") + } +} + +macro_rules! bail { + ($parser:expr, $($arg:tt)*) => {{ + let mut msg = String::new(); + fmt::write(&mut msg, format_args!($($arg)*)).unwrap(); + return Err($parser.error(msg)); + }}; +} + +type Result = std::result::Result; + +pub fn parse_grammar( + input: &str, + grammar: &mut Grammar, + category: &str, + path: &Path, +) -> Result<()> { + let mut parser = Parser { input, index: 0 }; + loop { + let p = parser.parse_production(category, path)?; + grammar.name_order.push(p.name.clone()); + if let Some(dupe) = grammar.productions.insert(p.name.clone(), p) { + bail!(parser, "duplicate production {} in grammar", dupe.name); + } + parser.take_while(&|ch| ch == '\n'); + if parser.eof() { + break; + } + } + Ok(()) +} + +impl Parser<'_> { + fn take_while(&mut self, f: &dyn Fn(char) -> bool) -> &str { + let mut upper = 0; + let i = self.index; + let mut ci = self.input[i..].chars(); + while let Some(ch) = ci.next() { + if !f(ch) { + break; + } + upper += ch.len_utf8(); + } + self.index += upper; + &self.input[i..i + upper] + } + + /// If the input matches the given regex, it is returned and the head is moved forward. + /// + /// Note that regexes must start with `^`. + fn take_re(&mut self, re: &Regex) -> Option> { + if let Some(cap) = re.captures(&self.input[self.index..]) { + self.index += cap[0].len(); + Some(cap) + } else { + None + } + } + + /// Returns whether or not the given string is next, and advances the head if it is. + fn take_str(&mut self, s: &str) -> bool { + if self.input[self.index..].starts_with(s) { + self.index += s.len(); + true + } else { + false + } + } + + /// Returns the next byte, or None if eof. + fn peek(&mut self) -> Option { + if self.index >= self.input.len() { + None + } else { + Some(self.input.as_bytes()[self.index]) + } + } + + fn eof(&mut self) -> bool { + self.index >= self.input.len() + } + + /// Expects the next input to be the given string, and advances the head. + fn expect(&mut self, s: &str, err: &str) -> Result<()> { + if !self.input[self.index..].starts_with(s) { + bail!(self, "{err}"); + }; + self.index += s.len(); + Ok(()) + } + + fn error(&mut self, message: String) -> Error { + let (line, lineno, col) = translate_position(self.input, self.index); + Error { + message, + line: line.to_string(), + lineno, + col, + } + } + + /// Advances zero or more spaces. + fn space0(&mut self) -> &str { + self.take_while(&|ch| ch == ' ') + } + + fn parse_production(&mut self, category: &str, path: &Path) -> Result { + let is_root = self.parse_is_root(); + self.space0(); + let name = self + .parse_name() + .ok_or_else(|| self.error("expected production name".to_string()))?; + self.expect(" ->", "expected -> arrow")?; + let Some(expression) = self.parse_expression()? else { + bail!(self, "expected an expression"); + }; + Ok(Production { + name, + category: category.to_string(), + expression, + path: path.to_owned(), + is_root, + }) + } + + fn parse_is_root(&mut self) -> bool { + self.take_str("@root") + } + + fn parse_name(&mut self) -> Option { + let name = self.take_while(&|c: char| c.is_alphanumeric() || c == '_'); + if name.is_empty() { + None + } else { + Some(name.to_string()) + } + } + + fn parse_expression(&mut self) -> Result> { + static ALT_RE: LazyLock = LazyLock::new(|| Regex::new(r"^ *\| *").unwrap()); + + let mut es = Vec::new(); + loop { + let Some(e) = self.parse_seq()? else { break }; + es.push(e); + if self.take_re(&ALT_RE).is_none() { + break; + } + } + match es.len() { + 0 => Ok(None), + 1 => Ok(Some(es.pop().unwrap())), + _ => Ok(Some(Expression { + kind: ExpressionKind::Alt(es), + suffix: None, + footnote: None, + })), + } + } + + fn parse_seq(&mut self) -> Result> { + let mut es = Vec::new(); + loop { + self.space0(); + let Some(e) = self.parse_expr1()? else { + break; + }; + es.push(e); + } + match es.len() { + 0 => Ok(None), + 1 => Ok(Some(es.pop().unwrap())), + _ => Ok(Some(Expression { + kind: ExpressionKind::Sequence(es), + suffix: None, + footnote: None, + })), + } + } + + fn parse_expr1(&mut self) -> Result> { + let Some(next) = self.peek() else { + return Ok(None); + }; + + let mut kind = if self.take_str("U+") { + self.parse_unicode()? + } else if self.input[self.index..] + .chars() + .next() + .map(|ch| ch.is_alphanumeric()) + .unwrap_or(false) + { + self.parse_nonterminal() + .expect("first char already checked") + } else if self.take_str("\n") { + if self.eof() || self.take_str("\n") { + return Ok(None); + } + let space = self.take_while(&|ch| ch == ' '); + if space.len() == 0 { + bail!(self, "expected indentation on next line"); + } + ExpressionKind::Break(space.len()) + } else if next == b'`' { + self.parse_terminal()? + } else if next == b'[' { + self.parse_charset()? + } else if next == b'<' { + self.parse_prose()? + } else if next == b'(' { + self.parse_grouped()? + } else if next == b'~' { + self.parse_neg_expression()? + } else { + return Ok(None); + }; + + static REPEAT_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^ ?(\*\?|\+\?|\?|\*|\+)").unwrap()); + static RANGE_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^\{([0-9]+)?\.\.([0-9]+)?\}").unwrap()); + if let Some(cap) = self.take_re(&REPEAT_RE) { + kind = match &cap[1] { + "?" => ExpressionKind::Optional(box_kind(kind)), + "*" => ExpressionKind::Repeat(box_kind(kind)), + "*?" => ExpressionKind::RepeatNonGreedy(box_kind(kind)), + "+" => ExpressionKind::RepeatPlus(box_kind(kind)), + "+?" => ExpressionKind::RepeatPlusNonGreedy(box_kind(kind)), + s => panic!("unexpected `{s}`"), + }; + } else if let Some(cap) = self.take_re(&RANGE_RE) { + let a = cap.get(1).map(|m| m.as_str().parse::().unwrap()); + let b = cap.get(2).map(|m| m.as_str().parse::().unwrap()); + match (a, b) { + (Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"), + _ => {} + } + kind = ExpressionKind::RepeatRange(box_kind(kind), a, b); + } + + let suffix = self.parse_suffix()?; + let footnote = self.parse_footnote()?; + + Ok(Some(Expression { + kind, + suffix, + footnote, + })) + } + + fn parse_nonterminal(&mut self) -> Option { + let nt = self.parse_name()?; + Some(ExpressionKind::Nt(nt)) + } + + fn parse_terminal(&mut self) -> Result { + static TERMINAL_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^`([^`\n]+)`").unwrap()); + match self.take_re(&TERMINAL_RE) { + Some(cap) => Ok(ExpressionKind::Terminal(cap[1].to_string())), + None => bail!(self, "unterminated terminal, expected closing backtick"), + } + } + + fn parse_charset(&mut self) -> Result { + self.expect("[", "expected opening [")?; + let mut characters = Vec::new(); + loop { + self.space0(); + let Some(ch) = self.parse_characters() else { + break; + }; + characters.push(ch); + } + if characters.is_empty() { + bail!(self, "expected at least one character in character group"); + } + self.space0(); + self.expect("]", "expected closing ]")?; + Ok(ExpressionKind::Charset(characters)) + } + + fn parse_characters(&mut self) -> Option { + static RANGE_RE: LazyLock = LazyLock::new(|| Regex::new(r"^`(.)`-`(.)`").unwrap()); + static TERMINAL_RE: LazyLock = LazyLock::new(|| Regex::new("^`([^`\n]+)`").unwrap()); + if let Some(cap) = self.take_re(&RANGE_RE) { + let a = cap[1].chars().next().unwrap(); + let b = cap[2].chars().next().unwrap(); + Some(Characters::Range(a, b)) + } else if let Some(cap) = self.take_re(&TERMINAL_RE) { + Some(Characters::Terminal(cap[1].to_string())) + } else { + let name = self.parse_name()?; + Some(Characters::Named(name)) + } + } + + fn parse_prose(&mut self) -> Result { + static PROSE_RE: LazyLock = LazyLock::new(|| Regex::new(r"^<([^>\n]+)>").unwrap()); + match self.take_re(&PROSE_RE) { + Some(cap) => Ok(ExpressionKind::Prose(cap[1].to_string())), + None => bail!(self, "unterminated prose, expected closing `>`"), + } + } + + fn parse_grouped(&mut self) -> Result { + self.expect("(", "expected opening `(`")?; + self.space0(); + let Some(e) = self.parse_expression()? else { + bail!(self, "expected expression in parenthesized group"); + }; + self.space0(); + self.expect(")", "expected closing `)`")?; + Ok(ExpressionKind::Grouped(Box::new(e))) + } + + fn parse_neg_expression(&mut self) -> Result { + self.expect("~", "expected ~")?; + let Some(next) = self.peek() else { + bail!(self, "expected expression after ~"); + }; + let kind = match next { + b'[' => self.parse_charset()?, + b'`' => self.parse_terminal()?, + _ => self.parse_nonterminal().ok_or_else(|| { + self.error("expected a charset, terminal, or name after ~ negation".to_string()) + })?, + }; + Ok(ExpressionKind::NegExpression(box_kind(kind))) + } + + fn parse_unicode(&mut self) -> Result { + static UNICODE_RE: LazyLock = LazyLock::new(|| Regex::new(r"^[A-Z0-9]{4}").unwrap()); + + match self.take_re(&UNICODE_RE) { + Some(s) => Ok(ExpressionKind::Unicode(s[0].to_string())), + None => bail!(self, "expected 4 hexadecimal uppercase digits after U+"), + } + } + + fn parse_suffix(&mut self) -> Result> { + if !self.take_str(" _") { + return Ok(None); + } + let mut in_backtick = false; + let start = self.index; + loop { + let Some(next) = self.peek() else { + bail!(self, "failed to find end of _ suffixed text"); + }; + self.index += 1; + match next { + b'\n' => bail!(self, "failed to find end of _ suffixed text"), + b'`' => in_backtick = !in_backtick, + b'_' if !in_backtick => { + if self + .peek() + .map(|b| matches!(b, b'\n' | b' ')) + .unwrap_or(true) + { + break; + } + } + _ => {} + } + } + Ok(Some(self.input[start..self.index - 1].to_string())) + } + + fn parse_footnote(&mut self) -> Result> { + static FOOTNOTE_RE: LazyLock = + LazyLock::new(|| Regex::new(r"^([^\]\n]+)]").unwrap()); + if !self.take_str("[^") { + return Ok(None); + } + match self.take_re(&FOOTNOTE_RE) { + Some(cap) => Ok(Some(cap[1].to_string())), + None => bail!(self, "unterminated footnote, expected closing `]`"), + } + } +} + +fn box_kind(kind: ExpressionKind) -> Box { + Box::new(Expression { + kind, + suffix: None, + footnote: None, + }) +} + +/// Helper to translate a byte index to a `(line, line_no, col_no)` (1-based). +fn translate_position(input: &str, index: usize) -> (&str, usize, usize) { + if input.is_empty() { + return ("", 0, 0); + } + let index = index.min(input.len()); + + let mut line_start = 0; + let mut line_number = 0; + for line in input.lines() { + let line_end = line_start + line.len(); + if index >= line_start && index <= line_end { + let column_number = index - line_start + 1; + return (line, line_number + 1, column_number); + } + line_start = line_end + 1; + line_number += 1; + } + ("", line_number + 1, 0) +} + +#[test] +fn translate_tests() { + assert_eq!(translate_position("", 0), ("", 0, 0)); + assert_eq!(translate_position("test", 0), ("test", 1, 1)); + assert_eq!(translate_position("test", 3), ("test", 1, 4)); + assert_eq!(translate_position("test", 4), ("test", 1, 5)); + assert_eq!(translate_position("test\ntest2", 4), ("test", 1, 5)); + assert_eq!(translate_position("test\ntest2", 5), ("test2", 2, 1)); + assert_eq!(translate_position("test\ntest2\n", 11), ("", 3, 0)); +} diff --git a/mdbook-spec/src/grammar/render_markdown.rs b/mdbook-spec/src/grammar/render_markdown.rs new file mode 100644 index 000000000..36de0cef8 --- /dev/null +++ b/mdbook-spec/src/grammar/render_markdown.rs @@ -0,0 +1,237 @@ +//! Renders the grammar to markdown. + +use super::{Characters, Expression, ExpressionKind, Production}; +use crate::grammar::Grammar; +use anyhow::bail; +use regex::Regex; +use std::borrow::Cow; +use std::collections::HashMap; +use std::fmt::Write; +use std::sync::LazyLock; + +impl Grammar { + pub fn render_markdown( + &self, + names: &[&str], + link_map: &HashMap, + rr_link_map: &HashMap, + output: &mut String, + for_summary: bool, + ) -> anyhow::Result<()> { + let mut iter = names.into_iter().peekable(); + while let Some(name) = iter.next() { + let Some(prod) = self.productions.get(*name) else { + bail!("could not find grammar production named `{name}`"); + }; + prod.render_markdown(link_map, rr_link_map, output, for_summary); + if iter.peek().is_some() { + output.push_str("\n"); + } + } + Ok(()) + } +} + +/// The HTML id for the production. +pub fn markdown_id(name: &str, for_summary: bool) -> String { + if for_summary { + format!("grammar-summary-{}", name) + } else { + format!("grammar-{}", name) + } +} + +impl Production { + fn render_markdown( + &self, + link_map: &HashMap, + rr_link_map: &HashMap, + output: &mut String, + for_summary: bool, + ) { + let dest = rr_link_map + .get(&self.name) + .map(|path| path.to_string()) + .unwrap_or_else(|| format!("missing")); + write!( + output, + "\ + [{name}]({dest})\ + → ", + id = markdown_id(&self.name, for_summary), + name = self.name, + ) + .unwrap(); + self.expression.render_markdown(link_map, output); + output.push('\n'); + } +} + +impl Expression { + /// Returns the last [`ExpressionKind`] of this expression. + fn last(&self) -> &ExpressionKind { + match &self.kind { + ExpressionKind::Alt(es) | ExpressionKind::Sequence(es) => es.last().unwrap().last(), + ExpressionKind::Grouped(_) + | ExpressionKind::Optional(_) + | ExpressionKind::Repeat(_) + | ExpressionKind::RepeatNonGreedy(_) + | ExpressionKind::RepeatPlus(_) + | ExpressionKind::RepeatPlusNonGreedy(_) + | ExpressionKind::RepeatRange(_, _, _) + | ExpressionKind::Nt(_) + | ExpressionKind::Terminal(_) + | ExpressionKind::Prose(_) + | ExpressionKind::Break(_) + | ExpressionKind::Charset(_) + | ExpressionKind::NegExpression(_) + | ExpressionKind::Unicode(_) => &self.kind, + } + } + + fn render_markdown(&self, link_map: &HashMap, output: &mut String) { + match &self.kind { + ExpressionKind::Grouped(e) => { + output.push_str("( "); + e.render_markdown(link_map, output); + if !matches!(e.last(), ExpressionKind::Break(_)) { + output.push(' '); + } + output.push(')'); + } + ExpressionKind::Alt(es) => { + let mut iter = es.iter().peekable(); + while let Some(e) = iter.next() { + e.render_markdown(link_map, output); + if iter.peek().is_some() { + if !matches!(e.last(), ExpressionKind::Break(_)) { + output.push(' '); + } + output.push_str("| "); + } + } + } + ExpressionKind::Sequence(es) => { + let mut iter = es.iter().peekable(); + while let Some(e) = iter.next() { + e.render_markdown(link_map, output); + if iter.peek().is_some() && !matches!(e.last(), ExpressionKind::Break(_)) { + output.push(' '); + } + } + } + ExpressionKind::Optional(e) => { + e.render_markdown(link_map, output); + output.push_str("?"); + } + ExpressionKind::Repeat(e) => { + e.render_markdown(link_map, output); + output.push_str("\\*"); + } + ExpressionKind::RepeatNonGreedy(e) => { + e.render_markdown(link_map, output); + output.push_str("\\* (non-greedy)"); + } + ExpressionKind::RepeatPlus(e) => { + e.render_markdown(link_map, output); + output.push_str("+"); + } + ExpressionKind::RepeatPlusNonGreedy(e) => { + e.render_markdown(link_map, output); + output.push_str("+ (non-greedy)"); + } + ExpressionKind::RepeatRange(e, a, b) => { + e.render_markdown(link_map, output); + write!( + output, + "{}..{}", + a.map(|v| v.to_string()).unwrap_or_default(), + b.map(|v| v.to_string()).unwrap_or_default(), + ) + .unwrap(); + } + ExpressionKind::Nt(nt) => { + let dest = link_map.get(nt).map_or("missing", |d| d.as_str()); + write!(output, "[{nt}]({dest})").unwrap(); + } + ExpressionKind::Terminal(t) => { + write!( + output, + "{}", + markdown_escape(t) + ) + .unwrap(); + } + ExpressionKind::Prose(s) => { + write!(output, "\\<{s}\\>").unwrap(); + } + ExpressionKind::Break(indent) => { + output.push_str("\\\n"); + output.push_str(&" ".repeat(*indent)); + } + ExpressionKind::Charset(set) => charset_render_markdown(set, link_map, output), + ExpressionKind::NegExpression(e) => { + output.push('~'); + e.render_markdown(link_map, output); + } + ExpressionKind::Unicode(s) => { + output.push_str("U+"); + output.push_str(s); + } + } + if let Some(suffix) = &self.suffix { + write!(output, "{suffix}").unwrap(); + } + if let Some(footnote) = &self.footnote { + // The ZeroWidthSpace is to avoid conflicts with markdown link references. + write!(output, "​[^{footnote}]").unwrap(); + } + } +} + +fn charset_render_markdown( + set: &[Characters], + link_map: &HashMap, + output: &mut String, +) { + output.push_str("\\["); + let mut iter = set.iter().peekable(); + while let Some(chars) = iter.next() { + chars.render_markdown(link_map, output); + if iter.peek().is_some() { + output.push(' '); + } + } + output.push(']'); +} + +impl Characters { + fn render_markdown(&self, link_map: &HashMap, output: &mut String) { + match self { + Characters::Named(s) => { + let dest = link_map.get(s).map_or("missing", |d| d.as_str()); + write!(output, "[{s}]({dest})").unwrap(); + } + Characters::Terminal(s) => write!( + output, + "{}", + markdown_escape(s) + ) + .unwrap(), + Characters::Range(a, b) => write!( + output, + "{a}\ + -{b}" + ) + .unwrap(), + } + } +} + +/// Escapes characters that markdown would otherwise interpret. +fn markdown_escape(s: &str) -> Cow<'_, str> { + static ESC_RE: LazyLock = LazyLock::new(|| Regex::new(r#"[\\`_*\[\](){}'"]"#).unwrap()); + ESC_RE.replace_all(s, r"\$0") +} diff --git a/mdbook-spec/src/grammar/render_railroad.rs b/mdbook-spec/src/grammar/render_railroad.rs new file mode 100644 index 000000000..5974fc9fd --- /dev/null +++ b/mdbook-spec/src/grammar/render_railroad.rs @@ -0,0 +1,314 @@ +//! Converts a [`Grammar`] to an SVG railroad diagram. + +use super::{Characters, Expression, ExpressionKind, Production}; +use crate::grammar::Grammar; +use anyhow::bail; +use railroad::*; +use regex::Regex; +use std::collections::HashMap; +use std::fmt::Write; +use std::sync::LazyLock; + +impl Grammar { + pub fn render_railroad( + &self, + names: &[&str], + link_map: &HashMap, + md_link_map: &HashMap, + output: &mut String, + for_summary: bool, + ) -> anyhow::Result<()> { + for name in names { + let prod = match self.productions.get(*name) { + Some(p) => p, + None => bail!("could not find grammar production named `{name}`"), + }; + prod.render_railroad(link_map, md_link_map, output, for_summary); + } + Ok(()) + } +} + +/// The HTML id for the production. +pub fn railroad_id(name: &str, for_summary: bool) -> String { + if for_summary { + format!("railroad-summary-{}", name) + } else { + format!("railroad-{}", name) + } +} + +impl Production { + fn render_railroad( + &self, + link_map: &HashMap, + md_link_map: &HashMap, + output: &mut String, + for_summary: bool, + ) { + let mut dia = self.make_diagram(false, link_map, md_link_map); + // If the diagram is very wide, try stacking it to reduce the width. + // This 900 is somewhat arbitrary based on looking at productions that + // looked too squished. If your diagram is still too squished, + // consider adding more rules to shorten it. + if dia.width() > 900 { + dia = self.make_diagram(true, link_map, md_link_map); + } + writeln!( + output, + "
{dia}
", + width = dia.width(), + id = railroad_id(&self.name, for_summary), + ) + .unwrap(); + } + + fn make_diagram( + &self, + stack: bool, + link_map: &HashMap, + md_link_map: &HashMap, + ) -> Diagram> { + let n = self.expression.render_railroad(stack, link_map); + let dest = md_link_map + .get(&self.name) + .map(|path| path.to_string()) + .unwrap_or_else(|| format!("missing")); + let seq: Sequence> = + Sequence::new(vec![Box::new(SimpleStart), n.unwrap(), Box::new(SimpleEnd)]); + let vert = VerticalGrid::>::new(vec![ + Box::new(Link::new(Comment::new(self.name.clone()), dest)), + Box::new(seq), + ]); + + Diagram::new(Box::new(vert)) + } +} + +impl Expression { + fn render_railroad( + &self, + stack: bool, + link_map: &HashMap, + ) -> Option> { + let mut state; + let mut state_ref = &self.kind; + let n: Box = 'l: loop { + state_ref = 'cont: { + break 'l match state_ref { + // Render grouped nodes and `e{1..1}` repeats directly. + ExpressionKind::Grouped(e) + | ExpressionKind::RepeatRange(e, Some(1), Some(1)) => { + e.render_railroad(stack, link_map)? + } + ExpressionKind::Alt(es) => { + let choices: Vec<_> = es + .iter() + .map(|e| e.render_railroad(stack, link_map)) + .filter_map(|n| n) + .collect(); + Box::new(Choice::>::new(choices)) + } + ExpressionKind::Sequence(es) => { + let es: Vec<_> = es.iter().collect(); + let make_seq = |es: &[&Expression]| { + let seq: Vec<_> = es + .iter() + .map(|e| e.render_railroad(stack, link_map)) + .filter_map(|n| n) + .collect(); + let seq: Sequence> = Sequence::new(seq); + Box::new(seq) + }; + + // If `stack` is true, split the sequence on Breaks and + // stack them vertically. + if stack { + // First, trim a Break from the front and back. + let es = if matches!( + es.first(), + Some(e) if e.is_break() + ) { + &es[1..] + } else { + &es[..] + }; + let es = if matches!( + es.last(), + Some(e) if e.is_break() + ) { + &es[..es.len() - 1] + } else { + &es[..] + }; + + let mut breaks: Vec<_> = + es.split(|e| e.is_break()).map(|es| make_seq(es)).collect(); + // If there aren't any breaks, don't bother stacking. + if breaks.len() == 1 { + breaks.pop().unwrap() + } else { + Box::new(Stack::new(breaks)) + } + } else { + make_seq(&es) + } + } + // Treat `e?` and `e{..1}` / `e{0..1}` equally. + ExpressionKind::Optional(e) + | ExpressionKind::RepeatRange(e, None | Some(0), Some(1)) => { + let n = e.render_railroad(stack, link_map)?; + Box::new(Optional::new(n)) + } + // Treat `e*` and `e{..}` / `e{0..}` equally. + ExpressionKind::Repeat(e) + | ExpressionKind::RepeatRange(e, None | Some(0), None) => { + let n = e.render_railroad(stack, link_map)?; + Box::new(Optional::new(Repeat::new(n, railroad::Empty))) + } + ExpressionKind::RepeatNonGreedy(e) => { + let n = e.render_railroad(stack, link_map)?; + let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty))); + let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string())); + Box::new(lbox) + } + // Treat `e+` and `e{1..}` equally. + ExpressionKind::RepeatPlus(e) + | ExpressionKind::RepeatRange(e, Some(1), None) => { + let n = e.render_railroad(stack, link_map)?; + Box::new(Repeat::new(n, railroad::Empty)) + } + ExpressionKind::RepeatPlusNonGreedy(e) => { + let n = e.render_railroad(stack, link_map)?; + let r = Repeat::new(n, railroad::Empty); + let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string())); + Box::new(lbox) + } + // For `e{a..0}` render an empty node. + ExpressionKind::RepeatRange(_, _, Some(0)) => Box::new(railroad::Empty), + // Treat `e{..b}` / `e{0..b}` as `(e{1..b})?`. + ExpressionKind::RepeatRange(e, None | Some(0), Some(b @ 2..)) => { + state = ExpressionKind::Optional(Box::new(Expression::new_kind( + ExpressionKind::RepeatRange(e.clone(), Some(1), Some(*b)), + ))); + break 'cont &state; + } + // Render `e{1..b}` directly. + ExpressionKind::RepeatRange(e, Some(1), Some(b @ 2..)) => { + let n = e.render_railroad(stack, link_map)?; + let cmt = format!("at most {b} more times", b = b - 1); + let r = Repeat::new(n, Comment::new(cmt)); + Box::new(r) + } + // Treat `e{a..}` as `e{a-1..a-1} e{1..}` and `e{a..b}` as + // `e{a-1..a-1} e{1..b-(a-1)}`, and treat `e{x..x}` for some + // `x` as a sequence of `e` nodes of length `x`. + ExpressionKind::RepeatRange(e, Some(a @ 2..), b) => { + let mut es = Vec::::new(); + for _ in 0..(a - 1) { + es.push(*e.clone()); + } + es.push(Expression::new_kind(ExpressionKind::RepeatRange( + e.clone(), + Some(1), + b.map(|x| x - (a - 1)), + ))); + state = ExpressionKind::Sequence(es); + break 'cont &state; + } + ExpressionKind::Nt(nt) => node_for_nt(link_map, nt), + ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())), + ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())), + ExpressionKind::Break(_) => return None, + ExpressionKind::Charset(set) => { + let ns: Vec<_> = set.iter().map(|c| c.render_railroad(link_map)).collect(); + Box::new(Choice::>::new(ns)) + } + ExpressionKind::NegExpression(e) => { + let n = e.render_railroad(stack, link_map)?; + let ch = node_for_nt(link_map, "CHAR"); + Box::new(Except::new(Box::new(ch), n)) + } + ExpressionKind::Unicode(s) => Box::new(Terminal::new(format!("U+{}", s))), + }; + } + }; + if let Some(suffix) = &self.suffix { + let suffix = strip_markdown(suffix); + let lbox = LabeledBox::new(n, Comment::new(suffix)); + return Some(Box::new(lbox)); + } + // Note: Footnotes aren't supported. They could be added as a comment + // on a vertical stack or a LabeledBox or something like that, but I + // don't feel like bothering. + Some(n) + } +} + +impl Characters { + fn render_railroad(&self, link_map: &HashMap) -> Box { + match self { + Characters::Named(s) => node_for_nt(link_map, s), + Characters::Terminal(s) => Box::new(Terminal::new(s.clone())), + Characters::Range(a, b) => Box::new(Terminal::new(format!("{a}-{b}"))), + } + } +} + +fn node_for_nt(link_map: &HashMap, name: &str) -> Box { + let dest = link_map + .get(name) + .map(|path| path.to_string()) + .unwrap_or_else(|| format!("missing")); + let n = NonTerminal::new(name.to_string()); + Box::new(Link::new(n, dest)) +} + +/// Removes some markdown so it can be rendered as text. +fn strip_markdown(s: &str) -> String { + // Right now this just removes markdown linkifiers, but more can be added if needed. + static LINK_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?s)\[([^\]]+)\](?:\[[^\]]*\]|\([^)]*\))?").unwrap()); + LINK_RE.replace_all(s, "$1").to_string() +} + +struct Except { + inner: LabeledBox, Box>, +} + +impl Except { + fn new(inner: Box, label: Box) -> Self { + let grid = Box::new(VerticalGrid::new(vec![ + Box::new(Comment::new("⚠️ with the exception of".to_owned())) as Box, + label, + ])) as Box; + let mut this = Self { + inner: LabeledBox::new(inner, grid), + }; + this.inner + .attr("class".to_owned()) + .or_default() + .push_str(" exceptbox"); + this + } +} + +impl Node for Except { + fn entry_height(&self) -> i64 { + self.inner.entry_height() + } + + fn height(&self) -> i64 { + self.inner.height() + } + + fn width(&self) -> i64 { + self.inner.width() + } + + fn draw(&self, x: i64, y: i64, h_dir: svg::HDir) -> svg::Element { + self.inner.draw(x, y, h_dir) + } +} diff --git a/mdbook-spec/src/lib.rs b/mdbook-spec/src/lib.rs index a36e441d3..0f14819fc 100644 --- a/mdbook-spec/src/lib.rs +++ b/mdbook-spec/src/lib.rs @@ -14,6 +14,7 @@ use std::io; use std::ops::Range; use std::path::PathBuf; +pub mod grammar; mod rules; mod std_links; mod test_links; @@ -26,7 +27,7 @@ static ADMONITION_RE: Lazy = Lazy::new(|| { /// A primitive regex to find link reference definitions. static MD_LINK_REFERENCE_DEFINITION: Lazy = - Lazy::new(|| Regex::new(r"(?m)^\[(?