Skip to content

Commit 8c93798

Browse files
committed
rustdoc: check code block syntax in early pass
1 parent ee10d99 commit 8c93798

File tree

10 files changed

+476
-73
lines changed

10 files changed

+476
-73
lines changed

src/librustdoc/html/highlight.rs

+55-37
Original file line numberDiff line numberDiff line change
@@ -25,40 +25,51 @@ pub fn render_with_highlighting(
2525
tooltip: Option<(&str, &str)>,
2626
) -> String {
2727
debug!("highlighting: ================\n{}\n==============", src);
28-
let sess = parse::ParseSess::new(FilePathMapping::empty());
29-
let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
30-
src.to_string());
31-
3228
let mut out = Vec::new();
3329
if let Some((tooltip, class)) = tooltip {
3430
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
3531
class='tooltiptext'>{}</span></div></div>",
3632
class, tooltip).unwrap();
3733
}
38-
write_header(class, &mut out).unwrap();
39-
40-
let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
41-
Ok(l) => l,
42-
Err(_) => {
43-
let first_line = src.lines().next().unwrap_or_else(|| "");
44-
let mut err = sess.span_diagnostic
45-
.struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
46-
(Ignoring this codeblock)",
47-
first_line));
48-
err.emit();
49-
return String::new();
34+
35+
let sess = parse::ParseSess::new(FilePathMapping::empty());
36+
let fm = sess.source_map().new_source_file(
37+
FileName::Custom(String::from("rustdoc-highlighting")),
38+
src.to_owned(),
39+
);
40+
let highlight_result =
41+
lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
42+
let mut classifier = Classifier::new(lexer, sess.source_map());
43+
44+
let mut highlighted_source = vec![];
45+
if classifier.write_source(&mut highlighted_source).is_err() {
46+
Err(classifier.lexer.buffer_fatal_errors())
47+
} else {
48+
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
49+
}
50+
});
51+
52+
match highlight_result {
53+
Ok(highlighted_source) => {
54+
write_header(class, &mut out).unwrap();
55+
write!(out, "{}", highlighted_source).unwrap();
56+
if let Some(extension) = extension {
57+
write!(out, "{}", extension).unwrap();
58+
}
59+
write_footer(&mut out).unwrap();
5060
}
51-
};
52-
let mut classifier = Classifier::new(lexer, sess.source_map());
53-
if classifier.write_source(&mut out).is_err() {
54-
classifier.lexer.emit_fatal_errors();
55-
return format!("<pre>{}</pre>", src);
56-
}
61+
Err(errors) => {
62+
// If errors are encountered while trying to highlight, cancel the errors and just emit
63+
// the unhighlighted source. The errors will have already been reported in the
64+
// `check-code-block-syntax` pass.
65+
for mut error in errors {
66+
error.cancel();
67+
}
5768

58-
if let Some(extension) = extension {
59-
write!(out, "{}", extension).unwrap();
69+
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
70+
}
6071
}
61-
write_footer(&mut out).unwrap();
72+
6273
String::from_utf8_lossy(&out[..]).into_owned()
6374
}
6475

@@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
151162
}
152163
}
153164

165+
enum HighlightError {
166+
LexError,
167+
IoError(io::Error),
168+
}
169+
170+
impl From<io::Error> for HighlightError {
171+
fn from(err: io::Error) -> Self {
172+
HighlightError::IoError(err)
173+
}
174+
}
175+
154176
impl<'a> Classifier<'a> {
155177
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
156178
Classifier {
@@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
162184
}
163185
}
164186

165-
/// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
166-
fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
187+
/// Gets the next token out of the lexer.
188+
fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
167189
match self.lexer.try_next_token() {
168190
Ok(tas) => Ok(tas),
169-
Err(_) => {
170-
let mut err = self.lexer.sess.span_diagnostic
171-
.struct_warn("Backing out of syntax highlighting");
172-
err.note("You probably did not intend to render this as a rust code-block");
173-
err.emit();
174-
Err(io::Error::new(io::ErrorKind::Other, ""))
175-
}
191+
Err(_) => Err(HighlightError::LexError),
176192
}
177193
}
178194

@@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
185201
/// source.
186202
fn write_source<W: Writer>(&mut self,
187203
out: &mut W)
188-
-> io::Result<()> {
204+
-> Result<(), HighlightError> {
189205
loop {
190206
let next = self.try_next_token()?;
191207
if next.tok == token::Eof {
@@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
202218
fn write_token<W: Writer>(&mut self,
203219
out: &mut W,
204220
tas: TokenAndSpan)
205-
-> io::Result<()> {
221+
-> Result<(), HighlightError> {
206222
let klass = match tas.tok {
207223
token::Shebang(s) => {
208224
out.string(Escape(&s.as_str()), Class::None)?;
@@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {
341357

342358
// Anything that didn't return above is the simple case where we the
343359
// class just spans a single token, so we can use the `string` method.
344-
out.string(Escape(&self.snip(tas.sp)), klass)
360+
out.string(Escape(&self.snip(tas.sp)), klass)?;
361+
362+
Ok(())
345363
}
346364

347365
// Helper function to get a snippet from the source_map.

src/librustdoc/html/markdown.rs

+109
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
919919
links
920920
}
921921

922+
#[derive(Debug)]
923+
crate struct RustCodeBlock {
924+
/// The range in the markdown that the code block occupies. Note that this includes the fences
925+
/// for fenced code blocks.
926+
pub range: Range<usize>,
927+
/// The range in the markdown that the code within the code block occupies.
928+
pub code: Range<usize>,
929+
pub is_fenced: bool,
930+
pub syntax: Option<String>,
931+
}
932+
933+
/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
934+
/// untagged (and assumed to be rust).
935+
crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
936+
let mut code_blocks = vec![];
937+
938+
if md.is_empty() {
939+
return code_blocks;
940+
}
941+
942+
let mut opts = Options::empty();
943+
opts.insert(OPTION_ENABLE_TABLES);
944+
opts.insert(OPTION_ENABLE_FOOTNOTES);
945+
let mut p = Parser::new_ext(md, opts);
946+
947+
let mut code_block_start = 0;
948+
let mut code_start = 0;
949+
let mut is_fenced = false;
950+
let mut previous_offset = 0;
951+
let mut in_rust_code_block = false;
952+
while let Some(event) = p.next() {
953+
let offset = p.get_offset();
954+
955+
match event {
956+
Event::Start(Tag::CodeBlock(syntax)) => {
957+
let lang_string = if syntax.is_empty() {
958+
LangString::all_false()
959+
} else {
960+
LangString::parse(&*syntax, ErrorCodes::Yes)
961+
};
962+
963+
if lang_string.rust {
964+
in_rust_code_block = true;
965+
966+
code_start = offset;
967+
code_block_start = match md[previous_offset..offset].find("```") {
968+
Some(fence_idx) => {
969+
is_fenced = true;
970+
previous_offset + fence_idx
971+
}
972+
None => offset,
973+
};
974+
}
975+
}
976+
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
977+
in_rust_code_block = false;
978+
979+
let code_block_end = if is_fenced {
980+
let fence_str = &md[previous_offset..offset]
981+
.chars()
982+
.rev()
983+
.collect::<String>();
984+
fence_str
985+
.find("```")
986+
.map(|fence_idx| offset - fence_idx)
987+
.unwrap_or_else(|| offset)
988+
} else if md
989+
.as_bytes()
990+
.get(offset)
991+
.map(|b| *b == b'\n')
992+
.unwrap_or_default()
993+
{
994+
offset - 1
995+
} else {
996+
offset
997+
};
998+
999+
let code_end = if is_fenced {
1000+
previous_offset
1001+
} else {
1002+
code_block_end
1003+
};
1004+
1005+
code_blocks.push(RustCodeBlock {
1006+
is_fenced,
1007+
range: Range {
1008+
start: code_block_start,
1009+
end: code_block_end,
1010+
},
1011+
code: Range {
1012+
start: code_start,
1013+
end: code_end,
1014+
},
1015+
syntax: if !syntax.is_empty() {
1016+
Some(syntax.into_owned())
1017+
} else {
1018+
None
1019+
},
1020+
});
1021+
}
1022+
_ => (),
1023+
}
1024+
1025+
previous_offset = offset;
1026+
}
1027+
1028+
code_blocks
1029+
}
1030+
9221031
#[derive(Clone, Default, Debug)]
9231032
pub struct IdMap {
9241033
map: FxHashMap<String, usize>,

src/librustdoc/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
html_root_url = "https://doc.rust-lang.org/nightly/",
44
html_playground_url = "https://play.rust-lang.org/")]
55

6+
#![feature(bind_by_move_pattern_guards)]
67
#![feature(rustc_private)]
78
#![feature(box_patterns)]
89
#![feature(box_syntax)]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
use errors::Applicability;
2+
use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
3+
use syntax::parse::{ParseSess, token};
4+
use syntax::source_map::FilePathMapping;
5+
use syntax_pos::FileName;
6+
7+
use clean;
8+
use core::DocContext;
9+
use fold::DocFolder;
10+
use html::markdown::{self, RustCodeBlock};
11+
use passes::Pass;
12+
13+
pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
14+
Pass::early("check-code-block-syntax", check_code_block_syntax,
15+
"validates syntax inside Rust code blocks");
16+
17+
pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
18+
SyntaxChecker { cx }.fold_crate(krate)
19+
}
20+
21+
struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
22+
cx: &'a DocContext<'a, 'tcx, 'rcx>,
23+
}
24+
25+
impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
26+
fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
27+
let sess = ParseSess::new(FilePathMapping::empty());
28+
let source_file = sess.source_map().new_source_file(
29+
FileName::Custom(String::from("doctest")),
30+
dox[code_block.code].to_owned(),
31+
);
32+
33+
let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
34+
while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
35+
if tok == token::Eof {
36+
break;
37+
}
38+
}
39+
40+
let errors = lexer.buffer_fatal_errors();
41+
42+
if !errors.is_empty() {
43+
Err(errors)
44+
} else {
45+
Ok(())
46+
}
47+
});
48+
49+
if let Err(errors) = errors {
50+
let mut diag = if let Some(sp) =
51+
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
52+
{
53+
let mut diag = self
54+
.cx
55+
.sess()
56+
.struct_span_warn(sp, "could not parse code block as Rust code");
57+
58+
for mut err in errors {
59+
diag.note(&format!("error from rustc: {}", err.message()));
60+
err.cancel();
61+
}
62+
63+
if code_block.syntax.is_none() && code_block.is_fenced {
64+
let sp = sp.from_inner_byte_pos(0, 3);
65+
diag.span_suggestion_with_applicability(
66+
sp,
67+
"mark blocks that do not contain Rust code as text",
68+
String::from("```text"),
69+
Applicability::MachineApplicable,
70+
);
71+
}
72+
73+
diag
74+
} else {
75+
// We couldn't calculate the span of the markdown block that had the error, so our
76+
// diagnostics are going to be a bit lacking.
77+
let mut diag = self.cx.sess().struct_span_warn(
78+
super::span_of_attrs(&item.attrs),
79+
"doc comment contains an invalid Rust code block",
80+
);
81+
82+
for mut err in errors {
83+
// Don't bother reporting the error, because we can't show where it happened.
84+
err.cancel();
85+
}
86+
87+
if code_block.syntax.is_none() && code_block.is_fenced {
88+
diag.help("mark blocks that do not contain Rust code as text: ```text");
89+
}
90+
91+
diag
92+
};
93+
94+
diag.emit();
95+
}
96+
}
97+
}
98+
99+
impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
100+
fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
101+
if let Some(dox) = &item.attrs.collapsed_doc_value() {
102+
for code_block in markdown::rust_code_blocks(&dox) {
103+
self.check_rust_syntax(&item, &dox, code_block);
104+
}
105+
}
106+
107+
self.fold_item_recur(item)
108+
}
109+
}

0 commit comments

Comments
 (0)