Auto merge of #80550 - bugadani:markdown-refactor, r=jyn514

Cleanup markdown span handling, take 2

This PR includes the cleanups made in #80244 except for the removal of `locate()`.

While the biggest conceptual part in #80244 was the removal of `locate()`, it introduced a diagnostic regression.

Additional cleanup:
 - Use `RefCell` to avoid building two separate vectors for the links

Work to do:
- [ ] Decide if `locate()` can be simplified by assuming `s` is always in `md`
- [ ] Should probably add some tests that still provide the undesired diagnostics causing #80381

cc `@jyn514` This is the best I can do without patching Pulldown to provide multiple ranges for reference-style links. Also, since `locate` is probably more efficient than `rfind` (at least it's constant time), I decided to not check the link type and just cover every &str as it was before.
This commit is contained in:
bors 2021-01-02 15:31:53 +00:00
commit fd85ca02f6
2 changed files with 107 additions and 99 deletions

View File

@ -25,6 +25,7 @@ use rustc_session::lint;
use rustc_span::edition::Edition; use rustc_span::edition::Edition;
use rustc_span::Span; use rustc_span::Span;
use std::borrow::Cow; use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::default::Default; use std::default::Default;
use std::fmt::Write; use std::fmt::Write;
@ -414,11 +415,13 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
} }
} }
type SpannedEvent<'a> = (Event<'a>, Range<usize>);
/// Make headings links with anchor IDs and build up TOC. /// Make headings links with anchor IDs and build up TOC.
struct HeadingLinks<'a, 'b, 'ids, I> { struct HeadingLinks<'a, 'b, 'ids, I> {
inner: I, inner: I,
toc: Option<&'b mut TocBuilder>, toc: Option<&'b mut TocBuilder>,
buf: VecDeque<Event<'a>>, buf: VecDeque<SpannedEvent<'a>>,
id_map: &'ids mut IdMap, id_map: &'ids mut IdMap,
} }
@ -428,8 +431,10 @@ impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
} }
} }
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> { impl<'a, 'b, 'ids, I: Iterator<Item = SpannedEvent<'a>>> Iterator
type Item = Event<'a>; for HeadingLinks<'a, 'b, 'ids, I>
{
type Item = SpannedEvent<'a>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(e) = self.buf.pop_front() { if let Some(e) = self.buf.pop_front() {
@ -437,31 +442,29 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
} }
let event = self.inner.next(); let event = self.inner.next();
if let Some(Event::Start(Tag::Heading(level))) = event { if let Some((Event::Start(Tag::Heading(level)), _)) = event {
let mut id = String::new(); let mut id = String::new();
for event in &mut self.inner { for event in &mut self.inner {
match &event { match &event.0 {
Event::End(Tag::Heading(..)) => break, Event::End(Tag::Heading(..)) => break,
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
Event::Text(text) | Event::Code(text) => { Event::Text(text) | Event::Code(text) => {
id.extend(text.chars().filter_map(slugify)); id.extend(text.chars().filter_map(slugify));
self.buf.push_back(event);
} }
_ => {} _ => self.buf.push_back(event),
}
match event {
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
event => self.buf.push_back(event),
} }
} }
let id = self.id_map.derive(id); let id = self.id_map.derive(id);
if let Some(ref mut builder) = self.toc { if let Some(ref mut builder) = self.toc {
let mut html_header = String::new(); let mut html_header = String::new();
html::push_html(&mut html_header, self.buf.iter().cloned()); html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
let sec = builder.push(level as u32, html_header, id.clone()); let sec = builder.push(level as u32, html_header, id.clone());
self.buf.push_front(Event::Html(format!("{} ", sec).into())); self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
} }
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into())); self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
let start_tags = format!( let start_tags = format!(
"<h{level} id=\"{id}\" class=\"section-header\">\ "<h{level} id=\"{id}\" class=\"section-header\">\
@ -469,7 +472,7 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
id = id, id = id,
level = level level = level
); );
return Some(Event::Html(start_tags.into())); return Some((Event::Html(start_tags.into()), 0..0));
} }
event event
} }
@ -555,23 +558,23 @@ impl<'a, I> Footnotes<'a, I> {
} }
} }
impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> { impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
type Item = Event<'a>; type Item = SpannedEvent<'a>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { loop {
match self.inner.next() { match self.inner.next() {
Some(Event::FootnoteReference(ref reference)) => { Some((Event::FootnoteReference(ref reference), range)) => {
let entry = self.get_entry(&reference); let entry = self.get_entry(&reference);
let reference = format!( let reference = format!(
"<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>", "<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
(*entry).1 (*entry).1
); );
return Some(Event::Html(reference.into())); return Some((Event::Html(reference.into()), range));
} }
Some(Event::Start(Tag::FootnoteDefinition(def))) => { Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
let mut content = Vec::new(); let mut content = Vec::new();
for event in &mut self.inner { for (event, _) in &mut self.inner {
if let Event::End(Tag::FootnoteDefinition(..)) = event { if let Event::End(Tag::FootnoteDefinition(..)) = event {
break; break;
} }
@ -602,7 +605,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
ret.push_str("</li>"); ret.push_str("</li>");
} }
ret.push_str("</ol></div>"); ret.push_str("</ol></div>");
return Some(Event::Html(ret.into())); return Some((Event::Html(ret.into()), 0..0));
} else { } else {
return None; return None;
} }
@ -912,13 +915,14 @@ impl Markdown<'_> {
}; };
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer)); let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
let p = p.into_offset_iter();
let mut s = String::with_capacity(md.len() * 3 / 2); let mut s = String::with_capacity(md.len() * 3 / 2);
let p = HeadingLinks::new(p, None, &mut ids); let p = HeadingLinks::new(p, None, &mut ids);
let p = LinkReplacer::new(p, links);
let p = CodeBlocks::new(p, codes, edition, playground);
let p = Footnotes::new(p); let p = Footnotes::new(p);
let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
let p = CodeBlocks::new(p, codes, edition, playground);
html::push_html(&mut s, p); html::push_html(&mut s, p);
s s
@ -929,7 +933,7 @@ impl MarkdownWithToc<'_> {
crate fn into_string(self) -> String { crate fn into_string(self) -> String {
let MarkdownWithToc(md, mut ids, codes, edition, playground) = self; let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
let p = Parser::new_ext(md, opts()); let p = Parser::new_ext(md, opts()).into_offset_iter();
let mut s = String::with_capacity(md.len() * 3 / 2); let mut s = String::with_capacity(md.len() * 3 / 2);
@ -937,8 +941,8 @@ impl MarkdownWithToc<'_> {
{ {
let p = HeadingLinks::new(p, Some(&mut toc), &mut ids); let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
let p = CodeBlocks::new(p, codes, edition, playground);
let p = Footnotes::new(p); let p = Footnotes::new(p);
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
html::push_html(&mut s, p); html::push_html(&mut s, p);
} }
@ -954,19 +958,19 @@ impl MarkdownHtml<'_> {
if md.is_empty() { if md.is_empty() {
return String::new(); return String::new();
} }
let p = Parser::new_ext(md, opts()); let p = Parser::new_ext(md, opts()).into_offset_iter();
// Treat inline HTML as plain text. // Treat inline HTML as plain text.
let p = p.map(|event| match event { let p = p.map(|event| match event.0 {
Event::Html(text) => Event::Text(text), Event::Html(text) => (Event::Text(text), event.1),
_ => event, _ => event,
}); });
let mut s = String::with_capacity(md.len() * 3 / 2); let mut s = String::with_capacity(md.len() * 3 / 2);
let p = HeadingLinks::new(p, None, &mut ids); let p = HeadingLinks::new(p, None, &mut ids);
let p = CodeBlocks::new(p, codes, edition, playground);
let p = Footnotes::new(p); let p = Footnotes::new(p);
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
html::push_html(&mut s, p); html::push_html(&mut s, p);
s s
@ -1119,56 +1123,65 @@ crate fn plain_text_summary(md: &str) -> String {
s s
} }
crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> { crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
if md.is_empty() { if md.is_empty() {
return vec![]; return vec![];
} }
let mut links = vec![]; let links = RefCell::new(vec![]);
let mut shortcut_links = vec![];
{ // FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
let locate = |s: &str| unsafe { let locate = |s: &str, fallback: Range<usize>| unsafe {
let s_start = s.as_ptr(); let s_start = s.as_ptr();
let s_end = s_start.add(s.len()); let s_end = s_start.add(s.len());
let md_start = md.as_ptr(); let md_start = md.as_ptr();
let md_end = md_start.add(md.len()); let md_end = md_start.add(md.len());
if md_start <= s_start && s_end <= md_end { if md_start <= s_start && s_end <= md_end {
let start = s_start.offset_from(md_start) as usize; let start = s_start.offset_from(md_start) as usize;
let end = s_end.offset_from(md_start) as usize; let end = s_end.offset_from(md_start) as usize;
Some(start..end) start..end
} else { } else {
None fallback
} }
}; };
let mut push = |link: BrokenLink<'_>| { let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
// FIXME: use `link.span` instead of `locate` // For diagnostics, we want to underline the link's definition but `span` will point at
// (doing it now includes the `[]` as well as the text) // where the link is used. This is a problem for reference-style links, where the definition
shortcut_links.push((link.reference.to_owned(), locate(link.reference))); // is separate from the usage.
None match link {
}; // `Borrowed` variant means the string (the link's destination) may come directly from
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)); // the markdown text and we can locate the original link destination.
// NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
// so `locate()` can fall back to use `span`.
CowStr::Borrowed(s) => locate(s, span),
// There's no need to thread an IdMap through to here because // For anything else, we can only use the provided range.
// the IDs generated aren't going to be emitted anywhere. CowStr::Boxed(_) | CowStr::Inlined(_) => span,
let mut ids = IdMap::new(); }
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids)); };
for ev in iter { let mut push = |link: BrokenLink<'_>| {
if let Event::Start(Tag::Link(_, dest, _)) = ev { let span = span_for_link(&CowStr::Borrowed(link.reference), link.span);
debug!("found link: {}", dest); links.borrow_mut().push((link.reference.to_owned(), span));
links.push(match dest { None
CowStr::Borrowed(s) => (s.to_owned(), locate(s)), };
s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None), let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter();
});
} // There's no need to thread an IdMap through to here because
// the IDs generated aren't going to be emitted anywhere.
let mut ids = IdMap::new();
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
for ev in iter {
if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
debug!("found link: {}", dest);
let span = span_for_link(&dest, ev.1);
links.borrow_mut().push((dest.into_string(), span));
} }
} }
links.append(&mut shortcut_links); links.into_inner()
links
} }
#[derive(Debug)] #[derive(Debug)]

View File

@ -245,7 +245,7 @@ struct DiagnosticInfo<'a> {
item: &'a Item, item: &'a Item,
dox: &'a str, dox: &'a str,
ori_link: &'a str, ori_link: &'a str,
link_range: Option<Range<usize>>, link_range: Range<usize>,
} }
#[derive(Clone, Debug, Hash)] #[derive(Clone, Debug, Hash)]
@ -960,7 +960,7 @@ impl LinkCollector<'_, '_> {
parent_node: Option<DefId>, parent_node: Option<DefId>,
krate: CrateNum, krate: CrateNum,
ori_link: String, ori_link: String,
link_range: Option<Range<usize>>, link_range: Range<usize>,
) -> Option<ItemLink> { ) -> Option<ItemLink> {
trace!("considering link '{}'", ori_link); trace!("considering link '{}'", ori_link);
@ -1606,7 +1606,7 @@ fn report_diagnostic(
msg: &str, msg: &str,
item: &Item, item: &Item,
dox: &str, dox: &str,
link_range: &Option<Range<usize>>, link_range: &Range<usize>,
decorate: impl FnOnce(&mut DiagnosticBuilder<'_>, Option<rustc_span::Span>), decorate: impl FnOnce(&mut DiagnosticBuilder<'_>, Option<rustc_span::Span>),
) { ) {
let hir_id = match cx.as_local_hir_id(item.def_id) { let hir_id = match cx.as_local_hir_id(item.def_id) {
@ -1624,31 +1624,27 @@ fn report_diagnostic(
cx.tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| { cx.tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| {
let mut diag = lint.build(msg); let mut diag = lint.build(msg);
let span = link_range let span = super::source_span_for_markdown_range(cx, dox, link_range, attrs);
.as_ref()
.and_then(|range| super::source_span_for_markdown_range(cx, dox, range, attrs));
if let Some(link_range) = link_range { if let Some(sp) = span {
if let Some(sp) = span { diag.set_span(sp);
diag.set_span(sp); } else {
} else { // blah blah blah\nblah\nblah [blah] blah blah\nblah blah
// blah blah blah\nblah\nblah [blah] blah blah\nblah blah // ^ ~~~~
// ^ ~~~~ // | link_range
// | link_range // last_new_line_offset
// last_new_line_offset let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1);
let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1); let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
// Print the line containing the `link_range` and manually mark it with '^'s. // Print the line containing the `link_range` and manually mark it with '^'s.
diag.note(&format!( diag.note(&format!(
"the link appears in this line:\n\n{line}\n\ "the link appears in this line:\n\n{line}\n\
{indicator: <before$}{indicator:^<found$}", {indicator: <before$}{indicator:^<found$}",
line = line, line = line,
indicator = "", indicator = "",
before = link_range.start - last_new_line_offset, before = link_range.start - last_new_line_offset,
found = link_range.len(), found = link_range.len(),
)); ));
}
} }
decorate(&mut diag, span); decorate(&mut diag, span);
@ -1668,7 +1664,7 @@ fn resolution_failure(
path_str: &str, path_str: &str,
disambiguator: Option<Disambiguator>, disambiguator: Option<Disambiguator>,
dox: &str, dox: &str,
link_range: Option<Range<usize>>, link_range: Range<usize>,
kinds: SmallVec<[ResolutionFailure<'_>; 3]>, kinds: SmallVec<[ResolutionFailure<'_>; 3]>,
) { ) {
let tcx = collector.cx.tcx; let tcx = collector.cx.tcx;
@ -1892,7 +1888,7 @@ fn anchor_failure(
item: &Item, item: &Item,
path_str: &str, path_str: &str,
dox: &str, dox: &str,
link_range: Option<Range<usize>>, link_range: Range<usize>,
failure: AnchorFailure, failure: AnchorFailure,
) { ) {
let msg = match failure { let msg = match failure {
@ -1917,7 +1913,7 @@ fn ambiguity_error(
item: &Item, item: &Item,
path_str: &str, path_str: &str,
dox: &str, dox: &str,
link_range: Option<Range<usize>>, link_range: Range<usize>,
candidates: Vec<Res>, candidates: Vec<Res>,
) { ) {
let mut msg = format!("`{}` is ", path_str); let mut msg = format!("`{}` is ", path_str);
@ -1966,13 +1962,12 @@ fn suggest_disambiguator(
path_str: &str, path_str: &str,
dox: &str, dox: &str,
sp: Option<rustc_span::Span>, sp: Option<rustc_span::Span>,
link_range: &Option<Range<usize>>, link_range: &Range<usize>,
) { ) {
let suggestion = disambiguator.suggestion(); let suggestion = disambiguator.suggestion();
let help = format!("to link to the {}, {}", disambiguator.descr(), suggestion.descr()); let help = format!("to link to the {}, {}", disambiguator.descr(), suggestion.descr());
if let Some(sp) = sp { if let Some(sp) = sp {
let link_range = link_range.as_ref().expect("must have a link range if we have a span");
let msg = if dox.bytes().nth(link_range.start) == Some(b'`') { let msg = if dox.bytes().nth(link_range.start) == Some(b'`') {
format!("`{}`", suggestion.as_help(path_str)) format!("`{}`", suggestion.as_help(path_str))
} else { } else {
@ -1991,7 +1986,7 @@ fn privacy_error(
item: &Item, item: &Item,
path_str: &str, path_str: &str,
dox: &str, dox: &str,
link_range: Option<Range<usize>>, link_range: Range<usize>,
) { ) {
let sym; let sym;
let item_name = match item.name { let item_name = match item.name {