From 08f7b0a422587dbf192a85ce04fcbec07f8023de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 31 Dec 2020 10:07:19 +0100 Subject: [PATCH 1/5] Use ranges returned by pulldown Co-authored-by: Joshua Nelson --- src/librustdoc/html/markdown.rs | 66 +++++++++++++++++---------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 20cf48915c3..7a922f570da 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -414,11 +414,13 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } } +type SpannedEvent<'a> = (Event<'a>, Range); + /// Make headings links with anchor IDs and build up TOC. struct HeadingLinks<'a, 'b, 'ids, I> { inner: I, toc: Option<&'b mut TocBuilder>, - buf: VecDeque>, + buf: VecDeque>, id_map: &'ids mut IdMap, } @@ -428,8 +430,10 @@ impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> { } } -impl<'a, 'b, 'ids, I: Iterator>> Iterator for HeadingLinks<'a, 'b, 'ids, I> { - type Item = Event<'a>; +impl<'a, 'b, 'ids, I: Iterator>> Iterator + for HeadingLinks<'a, 'b, 'ids, I> +{ + type Item = SpannedEvent<'a>; fn next(&mut self) -> Option { if let Some(e) = self.buf.pop_front() { @@ -437,31 +441,29 @@ impl<'a, 'b, 'ids, I: Iterator>> Iterator for HeadingLinks<'a, } let event = self.inner.next(); - if let Some(Event::Start(Tag::Heading(level))) = event { + if let Some((Event::Start(Tag::Heading(level)), _)) = event { let mut id = String::new(); for event in &mut self.inner { - match &event { + match &event.0 { Event::End(Tag::Heading(..)) => break, + Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {} Event::Text(text) | Event::Code(text) => { id.extend(text.chars().filter_map(slugify)); + self.buf.push_back(event); } - _ => {} - } - match event { - Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {} - event => self.buf.push_back(event), + _ => self.buf.push_back(event), } } let id = self.id_map.derive(id); if let Some(ref mut builder) = self.toc { let mut html_header = String::new(); - html::push_html(&mut html_header, self.buf.iter().cloned()); + html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone())); let sec = builder.push(level as u32, html_header, id.clone()); - self.buf.push_front(Event::Html(format!("{} ", sec).into())); + self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0)); } - self.buf.push_back(Event::Html(format!("", level).into())); + self.buf.push_back((Event::Html(format!("", level).into()), 0..0)); let start_tags = format!( "\ @@ -469,7 +471,7 @@ impl<'a, 'b, 'ids, I: Iterator>> Iterator for HeadingLinks<'a, id = id, level = level ); - return Some(Event::Html(start_tags.into())); + return Some((Event::Html(start_tags.into()), 0..0)); } event } @@ -560,23 +562,23 @@ impl<'a, I> Footnotes<'a, I> { } } -impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { - type Item = Event<'a>; +impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { + type Item = SpannedEvent<'a>; fn next(&mut self) -> Option { loop { match self.inner.next() { - Some(Event::FootnoteReference(ref reference)) => { + Some((Event::FootnoteReference(ref reference), range)) => { let entry = self.get_entry(&reference); let reference = format!( "{0}", (*entry).1 ); - return Some(Event::Html(reference.into())); + return Some((Event::Html(reference.into()), range)); } - Some(Event::Start(Tag::FootnoteDefinition(def))) => { + Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => { let mut content = Vec::new(); - for event in &mut self.inner { + for (event, _) in &mut self.inner { if let Event::End(Tag::FootnoteDefinition(..)) = event { break; } @@ -607,7 +609,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { ret.push_str(""); } ret.push_str(""); - return Some(Event::Html(ret.into())); + return Some((Event::Html(ret.into()), 0..0)); } else { return None; } @@ -917,13 +919,14 @@ impl Markdown<'_> { }; let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer)); + let p = p.into_offset_iter(); let mut s = String::with_capacity(md.len() * 3 / 2); let p = HeadingLinks::new(p, None, &mut ids); - let p = LinkReplacer::new(p, links); - let p = CodeBlocks::new(p, codes, edition, playground); let p = Footnotes::new(p); + let p = LinkReplacer::new(p.map(|(ev, _)| ev), links); + let p = CodeBlocks::new(p, codes, edition, playground); html::push_html(&mut s, p); s @@ -934,7 +937,7 @@ impl MarkdownWithToc<'_> { crate fn into_string(self) -> String { let MarkdownWithToc(md, mut ids, codes, edition, playground) = self; - let p = Parser::new_ext(md, opts()); + let p = Parser::new_ext(md, opts()).into_offset_iter(); let mut s = String::with_capacity(md.len() * 3 / 2); @@ -942,8 +945,8 @@ impl MarkdownWithToc<'_> { { let p = HeadingLinks::new(p, Some(&mut toc), &mut ids); - let p = CodeBlocks::new(p, codes, edition, playground); let p = Footnotes::new(p); + let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground); html::push_html(&mut s, p); } @@ -959,19 +962,19 @@ impl MarkdownHtml<'_> { if md.is_empty() { return String::new(); } - let p = Parser::new_ext(md, opts()); + let p = Parser::new_ext(md, opts()).into_offset_iter(); // Treat inline HTML as plain text. - let p = p.map(|event| match event { - Event::Html(text) => Event::Text(text), + let p = p.map(|event| match event.0 { + Event::Html(text) => (Event::Text(text), event.1), _ => event, }); let mut s = String::with_capacity(md.len() * 3 / 2); let p = HeadingLinks::new(p, None, &mut ids); - let p = CodeBlocks::new(p, codes, edition, playground); let p = Footnotes::new(p); + let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground); html::push_html(&mut s, p); s @@ -1153,7 +1156,8 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { shortcut_links.push((link.reference.to_owned(), locate(link.reference))); None }; - let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)); + let p = + Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); // There's no need to thread an IdMap through to here because // the IDs generated aren't going to be emitted anywhere. @@ -1161,7 +1165,7 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids)); for ev in iter { - if let Event::Start(Tag::Link(_, dest, _)) = ev { + if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { debug!("found link: {}", dest); links.push(match dest { CowStr::Borrowed(s) => (s.to_owned(), locate(s)), From 2b70da1661995dc3817e07f5230d192b658db0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 31 Dec 2020 10:07:57 +0100 Subject: [PATCH 2/5] Remove unnecessary scope Co-authored-by: Joshua Nelson --- src/librustdoc/html/markdown.rs | 65 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 7a922f570da..cabe4b49e98 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1135,43 +1135,40 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { let mut links = vec![]; let mut shortcut_links = vec![]; - { - let locate = |s: &str| unsafe { - let s_start = s.as_ptr(); - let s_end = s_start.add(s.len()); - let md_start = md.as_ptr(); - let md_end = md_start.add(md.len()); - if md_start <= s_start && s_end <= md_end { - let start = s_start.offset_from(md_start) as usize; - let end = s_end.offset_from(md_start) as usize; - Some(start..end) - } else { - None - } - }; - - let mut push = |link: BrokenLink<'_>| { - // FIXME: use `link.span` instead of `locate` - // (doing it now includes the `[]` as well as the text) - shortcut_links.push((link.reference.to_owned(), locate(link.reference))); + let locate = |s: &str| unsafe { + let s_start = s.as_ptr(); + let s_end = s_start.add(s.len()); + let md_start = md.as_ptr(); + let md_end = md_start.add(md.len()); + if md_start <= s_start && s_end <= md_end { + let start = s_start.offset_from(md_start) as usize; + let end = s_end.offset_from(md_start) as usize; + Some(start..end) + } else { None - }; - let p = - Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); + } + }; - // There's no need to thread an IdMap through to here because - // the IDs generated aren't going to be emitted anywhere. - let mut ids = IdMap::new(); - let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids)); + let mut push = |link: BrokenLink<'_>| { + // FIXME: use `link.span` instead of `locate` + // (doing it now includes the `[]` as well as the text) + shortcut_links.push((link.reference.to_owned(), locate(link.reference))); + None + }; + let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); - for ev in iter { - if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { - debug!("found link: {}", dest); - links.push(match dest { - CowStr::Borrowed(s) => (s.to_owned(), locate(s)), - s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None), - }); - } + // There's no need to thread an IdMap through to here because + // the IDs generated aren't going to be emitted anywhere. + let mut ids = IdMap::new(); + let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids)); + + for ev in iter { + if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { + debug!("found link: {}", dest); + links.push(match dest { + CowStr::Borrowed(s) => (s.to_owned(), locate(s)), + s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None), + }); } } From cb4317de008408945bb873155d67439579b12b6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 31 Dec 2020 10:11:50 +0100 Subject: [PATCH 3/5] Always provide a range Co-authored-by: Joshua Nelson --- src/librustdoc/html/markdown.rs | 14 ++--- .../passes/collect_intra_doc_links.rs | 57 +++++++++---------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index cabe4b49e98..b642f27c0b4 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1127,7 +1127,7 @@ crate fn plain_text_summary(md: &str) -> String { s } -crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { +crate fn markdown_links(md: &str) -> Vec<(String, Range)> { if md.is_empty() { return vec![]; } @@ -1135,7 +1135,7 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { let mut links = vec![]; let mut shortcut_links = vec![]; - let locate = |s: &str| unsafe { + let locate = |s: &str, fallback: Range| unsafe { let s_start = s.as_ptr(); let s_end = s_start.add(s.len()); let md_start = md.as_ptr(); @@ -1143,16 +1143,16 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { if md_start <= s_start && s_end <= md_end { let start = s_start.offset_from(md_start) as usize; let end = s_end.offset_from(md_start) as usize; - Some(start..end) + start..end } else { - None + fallback } }; let mut push = |link: BrokenLink<'_>| { // FIXME: use `link.span` instead of `locate` // (doing it now includes the `[]` as well as the text) - shortcut_links.push((link.reference.to_owned(), locate(link.reference))); + shortcut_links.push((link.reference.to_owned(), locate(link.reference, link.span))); None }; let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); @@ -1166,8 +1166,8 @@ crate fn markdown_links(md: &str) -> Vec<(String, Option>)> { if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { debug!("found link: {}", dest); links.push(match dest { - CowStr::Borrowed(s) => (s.to_owned(), locate(s)), - s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None), + CowStr::Borrowed(s) => (s.to_owned(), locate(s, ev.1)), + s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), ev.1), }); } } diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index 63cb02af3bc..3fc102f2fd2 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -245,7 +245,7 @@ struct DiagnosticInfo<'a> { item: &'a Item, dox: &'a str, ori_link: &'a str, - link_range: Option>, + link_range: Range, } #[derive(Clone, Debug, Hash)] @@ -982,7 +982,7 @@ impl LinkCollector<'_, '_> { parent_node: Option, krate: CrateNum, ori_link: String, - link_range: Option>, + link_range: Range, ) -> Option { trace!("considering link '{}'", ori_link); @@ -1628,7 +1628,7 @@ fn report_diagnostic( msg: &str, item: &Item, dox: &str, - link_range: &Option>, + link_range: &Range, decorate: impl FnOnce(&mut DiagnosticBuilder<'_>, Option), ) { let hir_id = match cx.as_local_hir_id(item.def_id) { @@ -1646,31 +1646,27 @@ fn report_diagnostic( cx.tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| { let mut diag = lint.build(msg); - let span = link_range - .as_ref() - .and_then(|range| super::source_span_for_markdown_range(cx, dox, range, attrs)); + let span = super::source_span_for_markdown_range(cx, dox, link_range, attrs); - if let Some(link_range) = link_range { - if let Some(sp) = span { - diag.set_span(sp); - } else { - // blah blah blah\nblah\nblah [blah] blah blah\nblah blah - // ^ ~~~~ - // | link_range - // last_new_line_offset - let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1); - let line = dox[last_new_line_offset..].lines().next().unwrap_or(""); + if let Some(sp) = span { + diag.set_span(sp); + } else { + // blah blah blah\nblah\nblah [blah] blah blah\nblah blah + // ^ ~~~~ + // | link_range + // last_new_line_offset + let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1); + let line = dox[last_new_line_offset..].lines().next().unwrap_or(""); - // Print the line containing the `link_range` and manually mark it with '^'s. - diag.note(&format!( - "the link appears in this line:\n\n{line}\n\ + // Print the line containing the `link_range` and manually mark it with '^'s. + diag.note(&format!( + "the link appears in this line:\n\n{line}\n\ {indicator: , dox: &str, - link_range: Option>, + link_range: Range, kinds: SmallVec<[ResolutionFailure<'_>; 3]>, ) { let tcx = collector.cx.tcx; @@ -1914,7 +1910,7 @@ fn anchor_failure( item: &Item, path_str: &str, dox: &str, - link_range: Option>, + link_range: Range, failure: AnchorFailure, ) { let msg = match failure { @@ -1939,7 +1935,7 @@ fn ambiguity_error( item: &Item, path_str: &str, dox: &str, - link_range: Option>, + link_range: Range, candidates: Vec, ) { let mut msg = format!("`{}` is ", path_str); @@ -1988,13 +1984,12 @@ fn suggest_disambiguator( path_str: &str, dox: &str, sp: Option, - link_range: &Option>, + link_range: &Range, ) { let suggestion = disambiguator.suggestion(); let help = format!("to link to the {}, {}", disambiguator.descr(), suggestion.descr()); if let Some(sp) = sp { - let link_range = link_range.as_ref().expect("must have a link range if we have a span"); let msg = if dox.bytes().nth(link_range.start) == Some(b'`') { format!("`{}`", suggestion.as_help(path_str)) } else { @@ -2013,7 +2008,7 @@ fn privacy_error( item: &Item, path_str: &str, dox: &str, - link_range: Option>, + link_range: Range, ) { let sym; let item_name = match item.name { From 854b9d172798ff33472a0ffe2d76cd4605ceeada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 31 Dec 2020 10:26:24 +0100 Subject: [PATCH 4/5] Collect links into a single vector --- src/librustdoc/html/markdown.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index b642f27c0b4..f0dd552f82c 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -25,6 +25,7 @@ use rustc_session::lint; use rustc_span::edition::Edition; use rustc_span::Span; use std::borrow::Cow; +use std::cell::RefCell; use std::collections::VecDeque; use std::default::Default; use std::fmt::Write; @@ -1132,8 +1133,7 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { return vec![]; } - let mut links = vec![]; - let mut shortcut_links = vec![]; + let links = RefCell::new(vec![]); let locate = |s: &str, fallback: Range| unsafe { let s_start = s.as_ptr(); @@ -1152,7 +1152,7 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { let mut push = |link: BrokenLink<'_>| { // FIXME: use `link.span` instead of `locate` // (doing it now includes the `[]` as well as the text) - shortcut_links.push((link.reference.to_owned(), locate(link.reference, link.span))); + links.borrow_mut().push((link.reference.to_owned(), locate(link.reference, link.span))); None }; let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); @@ -1165,16 +1165,14 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { for ev in iter { if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { debug!("found link: {}", dest); - links.push(match dest { + links.borrow_mut().push(match dest { CowStr::Borrowed(s) => (s.to_owned(), locate(s, ev.1)), s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), ev.1), }); } } - links.append(&mut shortcut_links); - - links + links.into_inner() } #[derive(Debug)] From f07354333f254fc7d3f20c939b29ba93143b8671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 31 Dec 2020 10:38:12 +0100 Subject: [PATCH 5/5] Only use locate for borrowed strings --- src/librustdoc/html/markdown.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index f0dd552f82c..465aeecbd3e 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1135,6 +1135,7 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { let links = RefCell::new(vec![]); + // FIXME: remove this function once pulldown_cmark can provide spans for link definitions. let locate = |s: &str, fallback: Range| unsafe { let s_start = s.as_ptr(); let s_end = s_start.add(s.len()); @@ -1149,10 +1150,25 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { } }; + let span_for_link = |link: &CowStr<'_>, span: Range| { + // For diagnostics, we want to underline the link's definition but `span` will point at + // where the link is used. This is a problem for reference-style links, where the definition + // is separate from the usage. + match link { + // `Borrowed` variant means the string (the link's destination) may come directly from + // the markdown text and we can locate the original link destination. + // NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources, + // so `locate()` can fall back to use `span`. + CowStr::Borrowed(s) => locate(s, span), + + // For anything else, we can only use the provided range. + CowStr::Boxed(_) | CowStr::Inlined(_) => span, + } + }; + let mut push = |link: BrokenLink<'_>| { - // FIXME: use `link.span` instead of `locate` - // (doing it now includes the `[]` as well as the text) - links.borrow_mut().push((link.reference.to_owned(), locate(link.reference, link.span))); + let span = span_for_link(&CowStr::Borrowed(link.reference), link.span); + links.borrow_mut().push((link.reference.to_owned(), span)); None }; let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter(); @@ -1165,10 +1181,8 @@ crate fn markdown_links(md: &str) -> Vec<(String, Range)> { for ev in iter { if let Event::Start(Tag::Link(_, dest, _)) = ev.0 { debug!("found link: {}", dest); - links.borrow_mut().push(match dest { - CowStr::Borrowed(s) => (s.to_owned(), locate(s, ev.1)), - s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), ev.1), - }); + let span = span_for_link(&dest, ev.1); + links.borrow_mut().push((dest.into_string(), span)); } }