rustbuild: Add a link checker for documentation
Add a script to get run which verifies that `href` links in documents are correct. We're always getting a steady stream of "fix a broken link" PRs and issue reports, and we should probably just nip them all in the bud.
This commit is contained in:
parent
f7b7535fd7
commit
defd1b3392
@ -73,7 +73,8 @@ class RustBuild:
|
||||
|
||||
if self.rustc().startswith(self.bin_root()) and \
|
||||
(not os.path.exists(self.rustc()) or self.rustc_out_of_date()):
|
||||
shutil.rmtree(self.bin_root())
|
||||
if os.path.exists(self.bin_root()):
|
||||
shutil.rmtree(self.bin_root())
|
||||
filename = "rust-std-nightly-" + self.build + ".tar.gz"
|
||||
url = "https://static.rust-lang.org/dist/" + self.snap_rustc_date()
|
||||
tarball = os.path.join(rustc_cache, filename)
|
||||
|
21
src/bootstrap/build/check.rs
Normal file
21
src/bootstrap/build/check.rs
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::process::Command;
|
||||
|
||||
use build::{Build, Compiler};
|
||||
|
||||
pub fn linkcheck(build: &Build, stage: u32, host: &str) {
|
||||
println!("Linkcheck stage{} ({})", stage, host);
|
||||
let compiler = Compiler::new(stage, host);
|
||||
let linkchecker = build.tool(&compiler, "linkchecker");
|
||||
build.run(Command::new(&linkchecker)
|
||||
.arg(build.out.join(host).join("doc")));
|
||||
}
|
@ -30,6 +30,7 @@ macro_rules! t {
|
||||
|
||||
mod cc;
|
||||
mod channel;
|
||||
mod check;
|
||||
mod clean;
|
||||
mod compile;
|
||||
mod config;
|
||||
@ -171,6 +172,9 @@ impl Build {
|
||||
Rustc { stage } => {
|
||||
compile::assemble_rustc(self, stage, target.target);
|
||||
}
|
||||
ToolLinkchecker { stage } => {
|
||||
compile::tool(self, stage, target.target, "linkchecker");
|
||||
}
|
||||
ToolRustbook { stage } => {
|
||||
compile::tool(self, stage, target.target, "rustbook");
|
||||
}
|
||||
@ -195,6 +199,10 @@ impl Build {
|
||||
doc::rustc(self, stage, target.target, &doc_out);
|
||||
}
|
||||
|
||||
CheckLinkcheck { stage } => {
|
||||
check::linkcheck(self, stage, target.target);
|
||||
}
|
||||
|
||||
Doc { .. } | // pseudo-steps
|
||||
Check { .. } => {}
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ macro_rules! targets {
|
||||
}),
|
||||
|
||||
// Various tools that we can build as part of the build.
|
||||
(tool_linkchecker, ToolLinkchecker { stage: u32 }),
|
||||
(tool_rustbook, ToolRustbook { stage: u32 }),
|
||||
|
||||
// Steps for long-running native builds. Ideally these wouldn't
|
||||
@ -71,6 +72,7 @@ macro_rules! targets {
|
||||
// Steps for running tests. The 'check' target is just a pseudo
|
||||
// target to depend on a bunch of others.
|
||||
(check, Check { stage: u32, compiler: Compiler<'a> }),
|
||||
(check_linkcheck, CheckLinkcheck { stage: u32 }),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -200,6 +202,8 @@ fn add_steps<'a>(build: &'a Build,
|
||||
}
|
||||
|
||||
targets!(add_step);
|
||||
|
||||
panic!("unknown step: {}", step);
|
||||
}
|
||||
}
|
||||
|
||||
@ -273,7 +277,15 @@ impl<'a> Step<'a> {
|
||||
self.doc_std(stage)]
|
||||
}
|
||||
Source::Check { stage, compiler: _ } => {
|
||||
vec![]
|
||||
vec![self.check_linkcheck(stage)]
|
||||
}
|
||||
Source::CheckLinkcheck { stage } => {
|
||||
vec![self.tool_linkchecker(stage), self.doc(stage)]
|
||||
}
|
||||
|
||||
Source::ToolLinkchecker { stage } => {
|
||||
vec![self.libstd(stage, self.compiler(stage))]
|
||||
}
|
||||
Source::ToolRustbook { stage } => {
|
||||
vec![self.librustc(stage, self.compiler(stage))]
|
||||
}
|
||||
|
64
src/tools/linkchecker/Cargo.lock
generated
Normal file
64
src/tools/linkchecker/Cargo.lock
generated
Normal file
@ -0,0 +1,64 @@
|
||||
[root]
|
||||
name = "linkchecker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"url 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-serialize"
|
||||
version = "0.3.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
11
src/tools/linkchecker/Cargo.toml
Normal file
11
src/tools/linkchecker/Cargo.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "linkchecker"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
|
||||
[dependencies]
|
||||
url = "0.5"
|
||||
|
||||
[[bin]]
|
||||
name = "linkchecker"
|
||||
path = "main.rs"
|
161
src/tools/linkchecker/main.rs
Normal file
161
src/tools/linkchecker/main.rs
Normal file
@ -0,0 +1,161 @@
|
||||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Script to check the validity of `href` links in our HTML documentation.
|
||||
//!
|
||||
//! In the past we've been quite error prone to writing in broken links as most
|
||||
//! of them are manually rather than automatically added. As files move over
|
||||
//! time or apis change old links become stale or broken. The purpose of this
|
||||
//! script is to check all relative links in our documentation to make sure they
|
||||
//! actually point to a valid place.
|
||||
//!
|
||||
//! Currently this doesn't actually do any HTML parsing or anything fancy like
|
||||
//! that, it just has a simple "regex" to search for `href` tags. These values
|
||||
//! are then translated to file URLs if possible and then the destination is
|
||||
//! asserted to exist.
|
||||
//!
|
||||
//! A few whitelisted exceptions are allowed as there's known bugs in rustdoc,
|
||||
//! but this should catch the majority of "broken link" cases.
|
||||
|
||||
extern crate url;
|
||||
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
use url::{Url, UrlParser};
|
||||
|
||||
macro_rules! t {
|
||||
($e:expr) => (match $e {
|
||||
Ok(e) => e,
|
||||
Err(e) => panic!("{} failed with {}", stringify!($e), e),
|
||||
})
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let docs = env::args().nth(1).unwrap();
|
||||
let docs = env::current_dir().unwrap().join(docs);
|
||||
let mut url = Url::from_file_path(&docs).unwrap();
|
||||
let mut errors = false;
|
||||
walk(&docs, &docs, &mut url, &mut errors);
|
||||
if errors {
|
||||
panic!("found some broken links");
|
||||
}
|
||||
}
|
||||
|
||||
fn walk(root: &Path, dir: &Path, url: &mut Url, errors: &mut bool) {
|
||||
for entry in t!(dir.read_dir()).map(|e| t!(e)) {
|
||||
let path = entry.path();
|
||||
let kind = t!(entry.file_type());
|
||||
url.path_mut().unwrap().push(entry.file_name().into_string().unwrap());
|
||||
if kind.is_dir() {
|
||||
walk(root, &path, url, errors);
|
||||
} else {
|
||||
check(root, &path, url, errors);
|
||||
}
|
||||
url.path_mut().unwrap().pop();
|
||||
}
|
||||
}
|
||||
|
||||
fn check(root: &Path, file: &Path, base: &Url, errors: &mut bool) {
|
||||
// ignore js files as they are not prone to errors as the rest of the
|
||||
// documentation is and they otherwise bring up false positives.
|
||||
if file.extension().and_then(|s| s.to_str()) == Some("js") {
|
||||
return
|
||||
}
|
||||
|
||||
let pretty_file = file.strip_prefix(root).unwrap_or(file);
|
||||
|
||||
// Unfortunately we're not 100% full of valid links today to we need a few
|
||||
// whitelists to get this past `make check` today.
|
||||
if let Some(path) = pretty_file.to_str() {
|
||||
// FIXME(#32129)
|
||||
if path == "std/string/struct.String.html" {
|
||||
return
|
||||
}
|
||||
// FIXME(#32130)
|
||||
if path.contains("btree_set/struct.BTreeSet.html") ||
|
||||
path == "collections/struct.BTreeSet.html" {
|
||||
return
|
||||
}
|
||||
// FIXME(#31948)
|
||||
if path.contains("ParseFloatError") {
|
||||
return
|
||||
}
|
||||
|
||||
// currently
|
||||
if path == "std/sys/ext/index.html" {
|
||||
return
|
||||
}
|
||||
|
||||
// weird reexports, but this module is on its way out, so chalk it up to
|
||||
// "rustdoc weirdness" and move on from there
|
||||
if path.contains("scoped_tls") {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
let mut parser = UrlParser::new();
|
||||
parser.base_url(base);
|
||||
let mut contents = String::new();
|
||||
if t!(File::open(file)).read_to_string(&mut contents).is_err() {
|
||||
return
|
||||
}
|
||||
|
||||
for (i, mut line) in contents.lines().enumerate() {
|
||||
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
|
||||
while let Some(j) = line.find(" href") {
|
||||
let rest = &line[j + 5..];
|
||||
line = rest;
|
||||
let pos_equals = match rest.find("=") {
|
||||
Some(i) => i,
|
||||
None => continue,
|
||||
};
|
||||
if rest[..pos_equals].trim_left_matches(" ") != "" {
|
||||
continue
|
||||
}
|
||||
let rest = &rest[pos_equals + 1..];
|
||||
let pos_quote = match rest.find("\"").or_else(|| rest.find("'")) {
|
||||
Some(i) => i,
|
||||
None => continue,
|
||||
};
|
||||
if rest[..pos_quote].trim_left_matches(" ") != "" {
|
||||
continue
|
||||
}
|
||||
let rest = &rest[pos_quote + 1..];
|
||||
let url = match rest.find("\"").or_else(|| rest.find("'")) {
|
||||
Some(i) => &rest[..i],
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// Once we've plucked out the URL, parse it using our base url and
|
||||
// then try to extract a file path. If either if these fail then we
|
||||
// just keep going.
|
||||
let parsed_url = match parser.parse(url) {
|
||||
Ok(url) => url,
|
||||
Err(..) => continue,
|
||||
};
|
||||
let path = match parsed_url.to_file_path() {
|
||||
Ok(path) => path,
|
||||
Err(..) => continue,
|
||||
};
|
||||
|
||||
// Alright, if we've found a file name then this file had better
|
||||
// exist! If it doesn't then we register and print an error.
|
||||
if !path.exists() {
|
||||
*errors = true;
|
||||
print!("{}:{}: broken link - ", pretty_file.display(), i + 1);
|
||||
let pretty_path = path.strip_prefix(root).unwrap_or(&path);
|
||||
println!("{}", pretty_path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user