From 01386e649265a71d9d91cf9ec138d6f046dc438b Mon Sep 17 00:00:00 2001 From: Oliver Middleton Date: Wed, 29 Jun 2016 22:58:57 +0100 Subject: [PATCH] Reject invalid urls in linkchecker For example root-relative links will now be rejected. Also remove some exceptions which have since been fixed and fix a typo in the broken redirect handling. --- src/tools/linkchecker/main.rs | 43 ++++++++++++++++------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4b74833eaf7..80c37d55975 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -138,22 +138,6 @@ fn check(cache: &mut Cache, return None; } - if file.ends_with("std/sys/ext/index.html") { - return None; - } - - if let Some(file) = file.to_str() { - // FIXME(#31948) - if file.contains("ParseFloatError") { - return None; - } - // weird reexports, but this module is on its way out, so chalk it up to - // "rustdoc weirdness" and move on from there - if file.contains("scoped_tls") { - return None; - } - } - let mut parser = UrlParser::new(); parser.base_url(base); @@ -170,12 +154,24 @@ fn check(cache: &mut Cache, // Search for anything that's the regex 'href[ ]*=[ ]*".*?"' with_attrs_in_source(&contents, " href", |url, i| { + // Ignore external URLs + if url.starts_with("http:") || url.starts_with("https:") || + url.starts_with("javascript:") || url.starts_with("ftp:") || + url.starts_with("irc:") || url.starts_with("data:") { + return; + } // Once we've plucked out the URL, parse it using our base url and - // then try to extract a file path. If either of these fail then we - // just keep going. + // then try to extract a file path. let (parsed_url, path) = match url_to_file_path(&parser, url) { Some((url, path)) => (url, PathBuf::from(path)), - None => return, + None => { + *errors = true; + println!("{}:{}: invalid link - {}", + pretty_file.display(), + i + 1, + url); + return; + } }; // Alright, if we've found a file name then this file had better @@ -197,10 +193,11 @@ fn check(cache: &mut Cache, Ok(res) => res, Err(LoadError::IOError(err)) => panic!(format!("{}", err)), Err(LoadError::BrokenRedirect(target, _)) => { - print!("{}:{}: broken redirect to {}", - pretty_file.display(), - i + 1, - target.display()); + *errors = true; + println!("{}:{}: broken redirect to {}", + pretty_file.display(), + i + 1, + target.display()); return; } Err(LoadError::IsRedirect) => unreachable!(),