From 861b8921c08e3cbe2ff8176679cc0cb3216bb2e3 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 9 Oct 2020 14:48:45 +0200 Subject: [PATCH 1/3] Clean up rustdoc HTML tags check pass --- src/librustdoc/passes/html_tags.rs | 128 ++++++++++++++++------------- 1 file changed, 71 insertions(+), 57 deletions(-) diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs index ae4eac89b45..872543d918c 100644 --- a/src/librustdoc/passes/html_tags.rs +++ b/src/librustdoc/passes/html_tags.rs @@ -4,6 +4,8 @@ use crate::core::DocContext; use crate::fold::DocFolder; use crate::html::markdown::opts; use core::ops::Range; +use std::iter::Peekable; +use std::str::CharIndices; use pulldown_cmark::{Event, Parser}; use rustc_feature::UnstableFeatures; use rustc_session::lint; @@ -75,7 +77,73 @@ fn drop_tag( } } -fn extract_tag( +fn extract_html_tag( + tags: &mut Vec<(String, Range)>, + text: &str, + range: &Range, + start_pos: usize, + iter: &mut Peekable>, + f: &impl Fn(&str, &Range), +) { + let mut tag_name = String::new(); + let mut is_closing = false; + let mut prev_pos = start_pos; + + loop { + let (pos, c) = match iter.peek() { + Some((pos, c)) => (*pos, *c), + // In case we reached the of the doc comment, we want to check that it's an + // unclosed HTML tag. For example "/// (prev_pos, '\0'), + }; + prev_pos = pos; + // Checking if this is a closing tag (like `` for ``). + if c == '/' && tag_name.is_empty() { + is_closing = true; + } else if c.is_ascii_alphanumeric() { + tag_name.push(c); + } else { + if !tag_name.is_empty() { + let mut r = + Range { start: range.start + start_pos, end: range.start + pos }; + if c == '>' { + // In case we have a tag without attribute, we can consider the span to + // refer to it fully. + r.end += 1; + } + if is_closing { + // In case we have "" or even "". + if c != '>' { + if !c.is_whitespace() { + // It seems like it's not a valid HTML tag. + break; + } + let mut found = false; + for (new_pos, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + if c == '>' { + r.end = range.start + new_pos + 1; + found = true; + } + break; + } + } + if !found { + break; + } + } + drop_tag(tags, tag_name, r, f); + } else { + tags.push((tag_name, r)); + } + } + break; + } + iter.next(); + } +} + +fn extract_tags( tags: &mut Vec<(String, Range)>, text: &str, range: Range, @@ -85,61 +153,7 @@ fn extract_tag( while let Some((start_pos, c)) = iter.next() { if c == '<' { - let mut tag_name = String::new(); - let mut is_closing = false; - let mut prev_pos = start_pos; - loop { - let (pos, c) = match iter.peek() { - Some((pos, c)) => (*pos, *c), - // In case we reached the of the doc comment, we want to check that it's an - // unclosed HTML tag. For example "/// (prev_pos, '\0'), - }; - prev_pos = pos; - // Checking if this is a closing tag (like `` for ``). - if c == '/' && tag_name.is_empty() { - is_closing = true; - } else if c.is_ascii_alphanumeric() { - tag_name.push(c); - } else { - if !tag_name.is_empty() { - let mut r = - Range { start: range.start + start_pos, end: range.start + pos }; - if c == '>' { - // In case we have a tag without attribute, we can consider the span to - // refer to it fully. - r.end += 1; - } - if is_closing { - // In case we have "" or even "". - if c != '>' { - if !c.is_whitespace() { - // It seems like it's not a valid HTML tag. - break; - } - let mut found = false; - for (new_pos, c) in text[pos..].char_indices() { - if !c.is_whitespace() { - if c == '>' { - r.end = range.start + new_pos + 1; - found = true; - } - break; - } - } - if !found { - break; - } - } - drop_tag(tags, tag_name, r, f); - } else { - tags.push((tag_name, r)); - } - } - break; - } - iter.next(); - } + extract_html_tag(tags, text, &range, start_pos, &mut iter, f); } } } @@ -172,7 +186,7 @@ impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> { for (event, range) in p { match event { - Event::Html(text) => extract_tag(&mut tags, &text, range, &report_diag), + Event::Html(text) => extract_tags(&mut tags, &text, range, &report_diag), _ => {} } } From 0009cbaabd6f2ba986e7631905612aa83d5970f8 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 9 Oct 2020 15:04:22 +0200 Subject: [PATCH 2/3] Add check for HTML comments --- src/librustdoc/passes/html_tags.rs | 34 +++++++++++++++++--- src/test/rustdoc-ui/invalid-html-tags.rs | 10 ++++++ src/test/rustdoc-ui/invalid-html-tags.stderr | 8 ++++- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs index 872543d918c..8bb71cef1c7 100644 --- a/src/librustdoc/passes/html_tags.rs +++ b/src/librustdoc/passes/html_tags.rs @@ -4,11 +4,11 @@ use crate::core::DocContext; use crate::fold::DocFolder; use crate::html::markdown::opts; use core::ops::Range; -use std::iter::Peekable; -use std::str::CharIndices; use pulldown_cmark::{Event, Parser}; use rustc_feature::UnstableFeatures; use rustc_session::lint; +use std::iter::Peekable; +use std::str::CharIndices; pub const CHECK_INVALID_HTML_TAGS: Pass = Pass { name: "check-invalid-html-tags", @@ -104,8 +104,7 @@ fn extract_html_tag( tag_name.push(c); } else { if !tag_name.is_empty() { - let mut r = - Range { start: range.start + start_pos, end: range.start + pos }; + let mut r = Range { start: range.start + start_pos, end: range.start + pos }; if c == '>' { // In case we have a tag without attribute, we can consider the span to // refer to it fully. @@ -143,6 +142,27 @@ fn extract_html_tag( } } +fn extract_html_comment( + text: &str, + range: &Range, + start_pos: usize, + iter: &mut Peekable>, + f: &impl Fn(&str, &Range), +) { + // We first skip the "!--" part. + let mut iter = iter.skip(3); + while let Some((pos, c)) = iter.next() { + if c == '-' && text[pos..].starts_with("-->") { + // All good, we can leave! + return; + } + } + f( + "Unclosed HTML comment", + &Range { start: range.start + start_pos, end: range.start + start_pos + 3 }, + ); +} + fn extract_tags( tags: &mut Vec<(String, Range)>, text: &str, @@ -153,7 +173,11 @@ fn extract_tags( while let Some((start_pos, c)) = iter.next() { if c == '<' { - extract_html_tag(tags, text, &range, start_pos, &mut iter, f); + if text[start_pos..].starts_with(" +/// +/// +/// +pub fn g() {} + +/// $DIR/invalid-html-tags.rs:83:5 + | +LL | /// ") { - // All good, we can leave! - return; - } - } - f( - "Unclosed HTML comment", - &Range { start: range.start + start_pos, end: range.start + start_pos + 3 }, - ); -} - fn extract_tags( tags: &mut Vec<(String, Range)>, text: &str, range: Range, + is_in_comment: &mut Option>, f: &impl Fn(&str, &Range), ) { let mut iter = text.char_indices().peekable(); while let Some((start_pos, c)) = iter.next() { - if c == '<' { + if is_in_comment.is_some() { + if text[start_pos..].starts_with("-->") { + *is_in_comment = None; + } + } else if c == '<' { if text[start_pos..].starts_with(" pub fn h() {} + +/// $DIR/invalid-html-tags.rs:83:5 + --> $DIR/invalid-html-tags.rs:87:5 | LL | ///