diff --git a/src/librustdoc/html/length_limit.rs b/src/librustdoc/html/length_limit.rs new file mode 100644 index 0000000000000..bbdc91c8d2ec8 --- /dev/null +++ b/src/librustdoc/html/length_limit.rs @@ -0,0 +1,119 @@ +//! See [`HtmlWithLimit`]. + +use std::fmt::Write; +use std::ops::ControlFlow; + +use crate::html::escape::Escape; + +/// A buffer that allows generating HTML with a length limit. +/// +/// This buffer ensures that: +/// +/// * all tags are closed, +/// * tags are closed in the reverse order of when they were opened (i.e., the correct HTML order), +/// * no tags are left empty (e.g., ``) due to the length limit being reached, +/// * all text is escaped. +#[derive(Debug)] +pub(super) struct HtmlWithLimit { + buf: String, + len: usize, + limit: usize, + /// A list of tags that have been requested to be opened via [`Self::open_tag()`] + /// but have not actually been pushed to `buf` yet. This ensures that tags are not + /// left empty (e.g., ``) due to the length limit being reached. + queued_tags: Vec<&'static str>, + /// A list of all tags that have been opened but not yet closed. + unclosed_tags: Vec<&'static str>, +} + +impl HtmlWithLimit { + /// Create a new buffer, with a limit of `length_limit`. + pub(super) fn new(length_limit: usize) -> Self { + let buf = if length_limit > 1000 { + // If the length limit is really large, don't preallocate tons of memory. + String::new() + } else { + // The length limit is actually a good heuristic for initial allocation size. + // Measurements showed that using it as the initial capacity ended up using less memory + // than `String::new`. + // See https://github.com/rust-lang/rust/pull/88173#discussion_r692531631 for more. + String::with_capacity(length_limit) + }; + Self { + buf, + len: 0, + limit: length_limit, + unclosed_tags: Vec::new(), + queued_tags: Vec::new(), + } + } + + /// Finish using the buffer and get the written output. + /// This function will close all unclosed tags for you. + pub(super) fn finish(mut self) -> String { + self.close_all_tags(); + self.buf + } + + /// Write some plain text to the buffer, escaping as needed. + /// + /// This function skips writing the text if the length limit was reached + /// and returns [`ControlFlow::Break`]. + pub(super) fn push(&mut self, text: &str) -> ControlFlow<(), ()> { + if self.len + text.len() > self.limit { + return ControlFlow::BREAK; + } + + self.flush_queue(); + write!(self.buf, "{}", Escape(text)).unwrap(); + self.len += text.len(); + + ControlFlow::CONTINUE + } + + /// Open an HTML tag. + /// + /// **Note:** HTML attributes have not yet been implemented. + /// This function will panic if called with a non-alphabetic `tag_name`. + pub(super) fn open_tag(&mut self, tag_name: &'static str) { + assert!( + tag_name.chars().all(|c| ('a'..='z').contains(&c)), + "tag_name contained non-alphabetic chars: {:?}", + tag_name + ); + self.queued_tags.push(tag_name); + } + + /// Close the most recently opened HTML tag. + pub(super) fn close_tag(&mut self) { + match self.unclosed_tags.pop() { + // Close the most recently opened tag. + Some(tag_name) => write!(self.buf, "{}>", tag_name).unwrap(), + // There are valid cases where `close_tag()` is called without + // there being any tags to close. For example, this occurs when + // a tag is opened after the length limit is exceeded; + // `flush_queue()` will never be called, and thus, the tag will + // not end up being added to `unclosed_tags`. + None => {} + } + } + + /// Write all queued tags and add them to the `unclosed_tags` list. + fn flush_queue(&mut self) { + for tag_name in self.queued_tags.drain(..) { + write!(self.buf, "<{}>", tag_name).unwrap(); + + self.unclosed_tags.push(tag_name); + } + } + + /// Close all unclosed tags. + fn close_all_tags(&mut self) { + while !self.unclosed_tags.is_empty() { + self.close_tag(); + } + } +} + +#[cfg(test)] +mod tests; diff --git a/src/librustdoc/html/length_limit/tests.rs b/src/librustdoc/html/length_limit/tests.rs new file mode 100644 index 0000000000000..2d02b8a16da67 --- /dev/null +++ b/src/librustdoc/html/length_limit/tests.rs @@ -0,0 +1,120 @@ +use super::*; + +#[test] +fn empty() { + assert_eq!(HtmlWithLimit::new(0).finish(), ""); + assert_eq!(HtmlWithLimit::new(60).finish(), ""); +} + +#[test] +fn basic() { + let mut buf = HtmlWithLimit::new(60); + buf.push("Hello "); + buf.open_tag("em"); + buf.push("world"); + buf.close_tag(); + buf.push("!"); + assert_eq!(buf.finish(), "Hello world!"); +} + +#[test] +fn no_tags() { + let mut buf = HtmlWithLimit::new(60); + buf.push("Hello"); + buf.push(" world!"); + assert_eq!(buf.finish(), "Hello world!"); +} + +#[test] +fn limit_0() { + let mut buf = HtmlWithLimit::new(0); + buf.push("Hello "); + buf.open_tag("em"); + buf.push("world"); + buf.close_tag(); + buf.push("!"); + assert_eq!(buf.finish(), ""); +} + +#[test] +fn exactly_limit() { + let mut buf = HtmlWithLimit::new(12); + buf.push("Hello "); + buf.open_tag("em"); + buf.push("world"); + buf.close_tag(); + buf.push("!"); + assert_eq!(buf.finish(), "Hello world!"); +} + +#[test] +fn multiple_nested_tags() { + let mut buf = HtmlWithLimit::new(60); + buf.open_tag("p"); + buf.push("This is a "); + buf.open_tag("em"); + buf.push("paragraph"); + buf.open_tag("strong"); + buf.push("!"); + buf.close_tag(); + buf.close_tag(); + buf.close_tag(); + assert_eq!(buf.finish(), "
This is a paragraph!
"); +} + +#[test] +fn forgot_to_close_tags() { + let mut buf = HtmlWithLimit::new(60); + buf.open_tag("p"); + buf.push("This is a "); + buf.open_tag("em"); + buf.push("paragraph"); + buf.open_tag("strong"); + buf.push("!"); + assert_eq!(buf.finish(), "This is a paragraph!
"); +} + +#[test] +fn past_the_limit() { + let mut buf = HtmlWithLimit::new(20); + buf.open_tag("p"); + (0..10).try_for_each(|n| { + buf.open_tag("strong"); + buf.push("word#")?; + buf.push(&n.to_string())?; + buf.close_tag(); + ControlFlow::CONTINUE + }); + buf.close_tag(); + assert_eq!( + buf.finish(), + "\ + word#0\ + word#1\ + word#2\ +
" + ); +} + +#[test] +fn quickly_past_the_limit() { + let mut buf = HtmlWithLimit::new(6); + buf.open_tag("p"); + buf.push("Hello"); + buf.push(" World"); + // intentionally not closingbefore finishing + assert_eq!(buf.finish(), "
Hello
"); +} + +#[test] +fn close_too_many() { + let mut buf = HtmlWithLimit::new(60); + buf.open_tag("p"); + buf.push("Hello"); + buf.close_tag(); + // This call does not panic because there are valid cases + // where `close_tag()` is called with no tags left to close. + // So `close_tag()` does nothing in this case. + buf.close_tag(); + assert_eq!(buf.finish(), "Hello
"); +} diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 472323daf3017..b2ca134998188 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -23,12 +23,13 @@ use rustc_hir::HirId; use rustc_middle::ty::TyCtxt; use rustc_span::edition::Edition; use rustc_span::Span; + use std::borrow::Cow; use std::cell::RefCell; use std::collections::VecDeque; use std::default::Default; use std::fmt::Write; -use std::ops::Range; +use std::ops::{ControlFlow, Range}; use std::str; use crate::clean::RenderedLink; @@ -36,6 +37,7 @@ use crate::doctest; use crate::html::escape::Escape; use crate::html::format::Buffer; use crate::html::highlight; +use crate::html::length_limit::HtmlWithLimit; use crate::html::toc::TocBuilder; use pulldown_cmark::{ @@ -1081,15 +1083,6 @@ fn markdown_summary_with_limit( return (String::new(), false); } - let mut s = String::with_capacity(md.len() * 3 / 2); - let mut text_length = 0; - let mut stopped_early = false; - - fn push(s: &mut String, text_length: &mut usize, text: &str) { - write!(s, "{}", Escape(text)).unwrap(); - *text_length += text.len(); - } - let mut replacer = |broken_link: BrokenLink<'_>| { if let Some(link) = link_names.iter().find(|link| &*link.original_text == broken_link.reference) @@ -1101,56 +1094,48 @@ fn markdown_summary_with_limit( }; let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer)); - let p = LinkReplacer::new(p, link_names); + let mut p = LinkReplacer::new(p, link_names); - 'outer: for event in p { + let mut buf = HtmlWithLimit::new(length_limit); + let mut stopped_early = false; + p.try_for_each(|event| { match &event { Event::Text(text) => { - for word in text.split_inclusive(char::is_whitespace) { - if text_length + word.len() >= length_limit { - stopped_early = true; - break 'outer; - } - - push(&mut s, &mut text_length, word); + let r = + text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word)); + if r.is_break() { + stopped_early = true; } + return r; } Event::Code(code) => { - if text_length + code.len() >= length_limit { + buf.open_tag("code"); + let r = buf.push(code); + if r.is_break() { stopped_early = true; - break; + } else { + buf.close_tag(); } - - s.push_str("");
- push(&mut s, &mut text_length, code);
- s.push_str("
");
+ return r;
}
Event::Start(tag) => match tag {
- Tag::Emphasis => s.push_str(""),
- Tag::Strong => s.push_str(""),
- Tag::CodeBlock(..) => break,
+ Tag::Emphasis => buf.open_tag("em"),
+ Tag::Strong => buf.open_tag("strong"),
+ Tag::CodeBlock(..) => return ControlFlow::BREAK,
_ => {}
},
Event::End(tag) => match tag {
- Tag::Emphasis => s.push_str(""),
- Tag::Strong => s.push_str(""),
- Tag::Paragraph => break,
- Tag::Heading(..) => break,
+ Tag::Emphasis | Tag::Strong => buf.close_tag(),
+ Tag::Paragraph | Tag::Heading(..) => return ControlFlow::BREAK,
_ => {}
},
- Event::HardBreak | Event::SoftBreak => {
- if text_length + 1 >= length_limit {
- stopped_early = true;
- break;
- }
-
- push(&mut s, &mut text_length, " ");
- }
+ Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
_ => {}
- }
- }
+ };
+ ControlFlow::CONTINUE
+ });
- (s, stopped_early)
+ (buf.finish(), stopped_early)
}
/// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,
diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs
index 1e4bdc2d15199..eca75ef013aae 100644
--- a/src/librustdoc/html/markdown/tests.rs
+++ b/src/librustdoc/html/markdown/tests.rs
@@ -225,6 +225,7 @@ fn test_short_markdown_summary() {
assert_eq!(output, expect, "original: {}", input);
}
+ t("", "");
t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
t("*italic*", "italic");
t("**bold**", "bold");
@@ -264,6 +265,7 @@ fn test_plain_text_summary() {
assert_eq!(output, expect, "original: {}", input);
}
+ t("", "");
t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
t("**bold**", "bold");
t("Multi-line\nsummary", "Multi-line summary");
diff --git a/src/librustdoc/html/mod.rs b/src/librustdoc/html/mod.rs
index 60ebdf5690d0d..109b0a356db5f 100644
--- a/src/librustdoc/html/mod.rs
+++ b/src/librustdoc/html/mod.rs
@@ -2,6 +2,7 @@ crate mod escape;
crate mod format;
crate mod highlight;
crate mod layout;
+mod length_limit;
// used by the error-index generator, so it needs to be public
pub mod markdown;
crate mod render;
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
index e02d92b11b844..ab94e0d568353 100644
--- a/src/librustdoc/lib.rs
+++ b/src/librustdoc/lib.rs
@@ -5,6 +5,7 @@
#![feature(rustc_private)]
#![feature(array_methods)]
#![feature(box_patterns)]
+#![feature(control_flow_enum)]
#![feature(in_band_lifetimes)]
#![feature(nll)]
#![feature(test)]