generalize markdown to source span calculation

euclio · euclio · commit ee10d99b9a44 · 2019-01-14T20:29:39.000-05:00
diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs
@@ -451,17 +451,9 @@ pub fn span_of_attrs(attrs: &Attributes) -> syntax_pos::Span {
 
 /// Reports a resolution failure diagnostic.
 ///
-/// Ideally we can report the diagnostic with the actual span in the source where the link failure
-/// occurred. However, there's a mismatch between the span in the source code and the span in the
-/// markdown, so we have to do a bit of work to figure out the correspondence.
-///
-/// It's not too hard to find the span for sugared doc comments (`///` and `/**`), because the
-/// source will match the markdown exactly, excluding the comment markers. However, it's much more
-/// difficult to calculate the spans for unsugared docs, because we have to deal with escaping and
-/// other source features. So, we attempt to find the exact source span of the resolution failure
-/// in sugared docs, but use the span of the documentation attributes themselves for unsugared
-/// docs. Because this span might be overly large, we display the markdown line containing the
-/// failure as a note.
+/// If we cannot find the exact source span of the resolution failure, we use the span of the
+/// documentation attributes themselves. This is a little heavy-handed, so we display the markdown
+/// line containing the failure as a note as well.
 fn resolution_failure(
     cx: &DocContext,
     attrs: &Attributes,
@@ -473,54 +465,7 @@ fn resolution_failure(
     let msg = format!("`[{}]` cannot be resolved, ignoring it...", path_str);
 
     let mut diag = if let Some(link_range) = link_range {
-        let src = cx.sess().source_map().span_to_snippet(sp);
-        let is_all_sugared_doc = attrs.doc_strings.iter().all(|frag| match frag {
-            DocFragment::SugaredDoc(..) => true,
-            _ => false,
-        });
-
-        if let (Ok(src), true) = (src, is_all_sugared_doc) {
-            // The number of markdown lines up to and including the resolution failure.
-            let num_lines = dox[..link_range.start].lines().count();
-
-            // We use `split_terminator('\n')` instead of `lines()` when counting bytes to ensure
-            // that DOS-style line endings do not cause the spans to be calculated incorrectly.
-            let mut src_lines = src.split_terminator('\n');
-            let mut md_lines = dox.split_terminator('\n').take(num_lines).peekable();
-
-            // The number of bytes from the start of the source span to the resolution failure that
-            // are *not* part of the markdown, like comment markers.
-            let mut extra_src_bytes = 0;
-
-            while let Some(md_line) = md_lines.next() {
-                loop {
-                    let source_line = src_lines
-                        .next()
-                        .expect("could not find markdown line in source");
-
-                    match source_line.find(md_line) {
-                        Some(offset) => {
-                            extra_src_bytes += if md_lines.peek().is_some() {
-                                source_line.len() - md_line.len()
-                            } else {
-                                offset
-                            };
-                            break;
-                        }
-                        None => {
-                            // Since this is a source line that doesn't include a markdown line,
-                            // we have to count the newline that we split from earlier.
-                            extra_src_bytes += source_line.len() + 1;
-                        }
-                    }
-                }
-            }
-
-            let sp = sp.from_inner_byte_pos(
-                link_range.start + extra_src_bytes,
-                link_range.end + extra_src_bytes,
-            );
-
+        if let Some(sp) = super::source_span_for_markdown_range(cx, dox, &link_range, attrs) {
             let mut diag = cx.tcx.struct_span_lint_node(
                 lint::builtin::INTRA_DOC_LINK_RESOLUTION_FAILURE,
                 NodeId::from_u32(0),
diff --git a/src/librustdoc/passes/mod.rs b/src/librustdoc/passes/mod.rs
@@ -8,6 +8,8 @@ use rustc::util::nodemap::DefIdSet;
 use std::mem;
 use std::fmt;
 use syntax::ast::NodeId;
+use syntax_pos::Span;
+use std::ops::Range;
 
 use clean::{self, GetDefId, Item};
 use core::{DocContext, DocAccessLevels};
@@ -396,3 +398,84 @@ pub fn look_for_tests<'a, 'tcx: 'a, 'rcx: 'a>(
         }
     }
 }
+
+/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
+///
+/// This method will return `None` if we cannot construct a span from the source map or if the
+/// attributes are not all sugared doc comments. It's difficult to calculate the correct span in
+/// that case due to escaping and other source features.
+crate fn source_span_for_markdown_range(
+    cx: &DocContext,
+    markdown: &str,
+    md_range: &Range<usize>,
+    attrs: &clean::Attributes,
+) -> Option<Span> {
+    let is_all_sugared_doc = attrs.doc_strings.iter().all(|frag| match frag {
+        clean::DocFragment::SugaredDoc(..) => true,
+        _ => false,
+    });
+
+    if !is_all_sugared_doc {
+        return None;
+    }
+
+    let snippet = cx
+        .sess()
+        .source_map()
+        .span_to_snippet(span_of_attrs(attrs))
+        .ok()?;
+
+    let starting_line = markdown[..md_range.start].lines().count() - 1;
+    let ending_line = markdown[..md_range.end].lines().count() - 1;
+
+    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we only
+    // we can treat CRLF and LF line endings the same way.
+    let mut src_lines = snippet.split_terminator('\n');
+    let md_lines = markdown.split_terminator('\n');
+
+    // The number of bytes from the source span to the markdown span that are not part
+    // of the markdown, like comment markers.
+    let mut start_bytes = 0;
+    let mut end_bytes = 0;
+
+    'outer: for (line_no, md_line) in md_lines.enumerate() {
+        loop {
+            let source_line = src_lines.next().expect("could not find markdown in source");
+            match source_line.find(md_line) {
+                Some(offset) => {
+                    if line_no == starting_line {
+                        start_bytes += offset;
+
+                        if starting_line == ending_line {
+                            break 'outer;
+                        }
+                    } else if line_no == ending_line {
+                        end_bytes += offset;
+                        break 'outer;
+                    } else if line_no < starting_line {
+                        start_bytes += source_line.len() - md_line.len();
+                    } else {
+                        end_bytes += source_line.len() - md_line.len();
+                    }
+                    break;
+                }
+                None => {
+                    // Since this is a source line that doesn't include a markdown line,
+                    // we have to count the newline that we split from earlier.
+                    if line_no <= starting_line {
+                        start_bytes += source_line.len() + 1;
+                    } else {
+                        end_bytes += source_line.len() + 1;
+                    }
+                }
+            }
+        }
+    }
+
+    let sp = span_of_attrs(attrs).from_inner_byte_pos(
+        md_range.start + start_bytes,
+        md_range.end + start_bytes + end_bytes,
+    );
+
+    Some(sp)
+}