Fix FP with DOC_MARKDOWN and reference links

This commit is contained in:
mcarton 2016-04-11 23:22:30 +02:00
parent 9c4ae9295d
commit 6c0a486e8b
2 changed files with 93 additions and 19 deletions

View File

@ -84,7 +84,32 @@ fn collect_doc(attrs: &[ast::Attribute]) -> (Cow<str>, Option<Span>) {
pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute], default_span: Span) {
let (doc, span) = collect_doc(attrs);
let span = span.unwrap_or(default_span);
check_doc(cx, valid_idents, &doc, span);
}
macro_rules! jump_to {
// Get the next characters first byte UTF-8 friendlyly.
(@next_char, $chars: expr, $len: expr) => {{
if let Some(&(pos, _)) = $chars.peek() {
pos
} else {
$len
}
}};
// Jump to the next `$c`. If no such character is found, give up.
($chars: expr, $c: expr, $len: expr) => {{
if $chars.find(|&(_, c)| c == $c).is_some() {
jump_to!(@next_char, $chars, $len)
}
else {
return;
}
}};
}
#[allow(while_let_loop)] // #362
pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) {
// In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
// There really is no markdown specification that would disambiguate this properly. This is
// what GitHub and Rustdoc do:
@ -96,19 +121,68 @@ pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [a
// (_baz_) → (<em>baz</em>)
// foo _ bar _ baz → foo _ bar _ baz
let mut in_ticks = false;
for word in doc.split_whitespace() {
let ticks = word.bytes().filter(|&b| b == b'`').count();
if ticks == 2 { // likely to be “`foo`”
continue;
} else if ticks % 2 == 1 {
in_ticks = !in_ticks;
continue; // lets assume no one will ever write something like “`foo`_bar”
/// Character that can appear in a word
fn is_word_char(c: char) -> bool {
match c {
t if t.is_alphanumeric() => true,
':' | '_' => true,
_ => false,
}
}
if !in_ticks {
check_word(cx, valid_idents, word, span);
let len = doc.len();
let mut chars = doc.char_indices().peekable();
let mut current_word_begin = 0;
loop {
match chars.next() {
Some((_, c)) => {
match c {
c if c.is_whitespace() => {
current_word_begin = jump_to!(@next_char, chars, len);
}
'`' => {
current_word_begin = jump_to!(chars, '`', len);
},
'[' => {
let end = jump_to!(chars, ']', len);
let link_text = &doc[current_word_begin+1..end];
match chars.peek() {
Some(&(_, c)) => {
// Trying to parse a link. Lets ignore the link.
// FIXME: how does markdown handles such link?
// https://en.wikipedia.org/w/index.php?title=)
match c {
'(' => { // inline link
current_word_begin = jump_to!(chars, ')', len);
check_doc(cx, valid_idents, link_text, span);
}
'[' => { // reference link
current_word_begin = jump_to!(chars, ']', len);
check_doc(cx, valid_idents, link_text, span);
}
':' => { // reference link
current_word_begin = jump_to!(chars, '\n', len);
}
_ => continue,
}
}
None => return,
}
}
_ => {
let end = match chars.find(|&(_, c)| !is_word_char(c)) {
Some((end, _)) => end,
None => len,
};
check_word(cx, valid_idents, &doc[current_word_begin..end], span);
current_word_begin = jump_to!(@next_char, chars, len);
}
}
}
None => break,
}
}
}
@ -136,11 +210,6 @@ fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span
s != "_" && !s.contains("\\_") && s.contains('_')
}
// Something with a `/` might be a link, dont warn (see #823):
if word.contains('/') {
return;
}
// Trim punctuation as in `some comment (see foo::bar).`
// ^^
// Or even as in `_foo bar_` which is emphasized.

View File

@ -41,14 +41,19 @@ fn test_emphasis() {
fn test_units() {
}
/// This test has [a link with underscores][chunked-example] inside it. See #823.
/// See also [the issue tracker](https://github.com/Manishearth/rust-clippy/search?q=doc_markdown&type=Issues).
/// This test has [a link_with_underscores][chunked-example] inside it. See #823.
/// See also [the issue tracker](https://github.com/Manishearth/rust-clippy/search?q=doc_markdown&type=Issues). And here is another [inline link][inline_link].
///
/// [chunked-example]: http://en.wikipedia.org/wiki/Chunked_transfer_encoding#Example
/// [chunked-example]: https://en.wikipedia.org/wiki/Chunked_transfer_encoding#Example
/// [inline_link]: https://foobar
/// The `main` function is the entry point of the program. Here it only calls the `foo_bar` and
/// `multiline_ticks` functions.
///
/// expression of the type `_ <bit_op> m <cmp_op> c` (where `<bit_op>`
/// is one of {`&`, '|'} and `<cmp_op>` is one of {`!=`, `>=`, `>` ,
fn main() {
//~^ ERROR: you should put `link_with_underscores` between ticks
foo_bar();
multiline_ticks();
test_emphasis();