From 5817351048b7c817720f696dd6a0f7005bd1a7a4 Mon Sep 17 00:00:00 2001
From: Zack Weinberg <zackw@panix.com>
Date: Mon, 13 Feb 2017 12:33:35 -0500
Subject: [PATCH 1/2] tidy: exempt URLs from the line length restriction

The length of a URL is usually not under our control, and Markdown
provides no way to split a URL in the middle.  Therefore, comment
lines consisting _solely_ of a URL (possibly with a Markdown link
label in front) should be exempt from the line-length restriction.

Inline hyperlink destinations ( `[foo](http://...)` notation ) are
_not_ exempt, because it is my arrogant opinion that long lines of
that type make the source text illegible.

The patch adds dependencies on the `regex` and `lazy_static` crates
to the tidy utility.  This _appears_ to Just Work, but if you would
rather not have that dependency I am willing to provide a hand-written
parser instead.
---
 src/tools/tidy/Cargo.toml   |  2 ++
 src/tools/tidy/src/main.rs  |  3 +++
 src/tools/tidy/src/style.rs | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/tools/tidy/Cargo.toml b/src/tools/tidy/Cargo.toml
index e900bd47fb7..39986d59289 100644
--- a/src/tools/tidy/Cargo.toml
+++ b/src/tools/tidy/Cargo.toml
@@ -4,3 +4,5 @@ version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
 
 [dependencies]
+regex = "*"
+lazy_static = "*"
diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs
index 9962c6ec9af..bbd6c8e87c2 100644
--- a/src/tools/tidy/src/main.rs
+++ b/src/tools/tidy/src/main.rs
@@ -14,6 +14,9 @@
 //! etc. This is run by default on `make check` and as part of the auto
 //! builders.
 
+extern crate regex;
+#[macro_use] extern crate lazy_static;
+
 use std::fs;
 use std::path::{PathBuf, Path};
 use std::env;
diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs
index c722eb690b8..91c5edfd75a 100644
--- a/src/tools/tidy/src/style.rs
+++ b/src/tools/tidy/src/style.rs
@@ -26,6 +26,8 @@ use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;
 
+use regex::Regex;
+
 const COLS: usize = 100;
 const LICENSE: &'static str = "\
 Copyright <year> The Rust Project Developers. See the COPYRIGHT
@@ -38,6 +40,32 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 option. This file may not be copied, modified, or distributed
 except according to those terms.";
 
+/// True if LINE is allowed to be longer than the normal limit.
+///
+/// Currently there is only one exception: if the line is within a
+/// comment, and its entire text is one URL (possibly with a Markdown
+/// link label in front), then it's allowed to be overlength.  This is
+/// because Markdown offers no way to split a line in the middle of a
+/// URL, and the length of URLs for external references is beyond our
+/// control.
+fn long_line_is_ok(line: &str) -> bool {
+    lazy_static! {
+        static ref URL_RE: Regex = Regex::new(
+            // This regexp uses the CommonMark definition of link
+            // label.  It thinks any sequence of nonwhitespace
+            // characters beginning with "http://" or "https://" is a
+            // URL.  Add more schemas as necessary.
+            r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
+        ).unwrap();
+    }
+
+    if URL_RE.is_match(line) {
+        return true;
+    }
+
+    false
+}
+
 pub fn check(path: &Path, bad: &mut bool) {
     let mut contents = String::new();
     super::walk(path, &mut super::filter_dirs, &mut |file| {
@@ -61,8 +89,9 @@ pub fn check(path: &Path, bad: &mut bool) {
                 println!("{}:{}: {}", file.display(), i + 1, msg);
                 *bad = true;
             };
-            if line.chars().count() > COLS && !skip_length {
-                err(&format!("line longer than {} chars", COLS));
+            if !skip_length && line.chars().count() > COLS
+                && !long_line_is_ok(line) {
+                    err(&format!("line longer than {} chars", COLS));
             }
             if line.contains("\t") && !skip_tab {
                 err("tab character");

From ff4758c2a0dffef264fe73b90668bd04b1b2fa89 Mon Sep 17 00:00:00 2001
From: Zack Weinberg <zackw@panix.com>
Date: Mon, 13 Feb 2017 15:44:51 -0500
Subject: [PATCH 2/2] Replace regex-based parser for URL lines with open-coded
 one.

---
 src/tools/tidy/Cargo.toml   |  2 --
 src/tools/tidy/src/main.rs  |  3 --
 src/tools/tidy/src/style.rs | 66 ++++++++++++++++++++++++++-----------
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/tools/tidy/Cargo.toml b/src/tools/tidy/Cargo.toml
index 39986d59289..e900bd47fb7 100644
--- a/src/tools/tidy/Cargo.toml
+++ b/src/tools/tidy/Cargo.toml
@@ -4,5 +4,3 @@ version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]
 
 [dependencies]
-regex = "*"
-lazy_static = "*"
diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs
index bbd6c8e87c2..9962c6ec9af 100644
--- a/src/tools/tidy/src/main.rs
+++ b/src/tools/tidy/src/main.rs
@@ -14,9 +14,6 @@
 //! etc. This is run by default on `make check` and as part of the auto
 //! builders.
 
-extern crate regex;
-#[macro_use] extern crate lazy_static;
-
 use std::fs;
 use std::path::{PathBuf, Path};
 use std::env;
diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs
index 91c5edfd75a..2233f8c3529 100644
--- a/src/tools/tidy/src/style.rs
+++ b/src/tools/tidy/src/style.rs
@@ -26,8 +26,6 @@ use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;
 
-use regex::Regex;
-
 const COLS: usize = 100;
 const LICENSE: &'static str = "\
 Copyright <year> The Rust Project Developers. See the COPYRIGHT
@@ -40,26 +38,54 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 option. This file may not be copied, modified, or distributed
 except according to those terms.";
 
-/// True if LINE is allowed to be longer than the normal limit.
-///
-/// Currently there is only one exception: if the line is within a
-/// comment, and its entire text is one URL (possibly with a Markdown
-/// link label in front), then it's allowed to be overlength.  This is
-/// because Markdown offers no way to split a line in the middle of a
-/// URL, and the length of URLs for external references is beyond our
-/// control.
-fn long_line_is_ok(line: &str) -> bool {
-    lazy_static! {
-        static ref URL_RE: Regex = Regex::new(
-            // This regexp uses the CommonMark definition of link
-            // label.  It thinks any sequence of nonwhitespace
-            // characters beginning with "http://" or "https://" is a
-            // URL.  Add more schemas as necessary.
-            r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
-        ).unwrap();
+/// Parser states for line_is_url.
+#[derive(PartialEq)]
+#[allow(non_camel_case_types)]
+enum LIUState { EXP_COMMENT_START,
+                EXP_LINK_LABEL_OR_URL,
+                EXP_URL,
+                EXP_END }
+
+/// True if LINE appears to be a line comment containing an URL,
+/// possibly with a Markdown link label in front, and nothing else.
+/// The Markdown link label, if present, may not contain whitespace.
+/// Lines of this form are allowed to be overlength, because Markdown
+/// offers no way to split a line in the middle of a URL, and the lengths
+/// of URLs to external references are beyond our control.
+fn line_is_url(line: &str) -> bool {
+    use self::LIUState::*;
+    let mut state: LIUState = EXP_COMMENT_START;
+
+    for tok in line.split_whitespace() {
+        match (state, tok) {
+            (EXP_COMMENT_START, "//") => state = EXP_LINK_LABEL_OR_URL,
+            (EXP_COMMENT_START, "///") => state = EXP_LINK_LABEL_OR_URL,
+            (EXP_COMMENT_START, "//!") => state = EXP_LINK_LABEL_OR_URL,
+
+            (EXP_LINK_LABEL_OR_URL, w)
+                if w.len() >= 4 && w.starts_with("[") && w.ends_with("]:")
+                => state = EXP_URL,
+
+            (EXP_LINK_LABEL_OR_URL, w)
+                if w.starts_with("http://") || w.starts_with("https://")
+                => state = EXP_END,
+
+            (EXP_URL, w)
+                if w.starts_with("http://") || w.starts_with("https://")
+                => state = EXP_END,
+
+            (_, _) => return false,
+        }
     }
 
-    if URL_RE.is_match(line) {
+    state == EXP_END
+}
+
+/// True if LINE is allowed to be longer than the normal limit.
+/// Currently there is only one exception, for long URLs, but more
+/// may be added in the future.
+fn long_line_is_ok(line: &str) -> bool {
+    if line_is_url(line) {
         return true;
     }