From 21755b58c900cd5d14422ebd980fe11390e021fe Mon Sep 17 00:00:00 2001
From: Vadim Petrochenkov <vadim.petrochenkov@gmail.com>
Date: Fri, 29 May 2020 21:51:46 +0300
Subject: [PATCH] rustc_lexer: Optimize shebang detection slightly

---
 src/librustc_lexer/src/lib.rs                 | 37 +++++++++----------
 .../ui/parser/shebang/shebang-doc-comment.rs  |  6 +++
 .../parser/shebang/shebang-doc-comment.stderr |  8 ++++
 3 files changed, 32 insertions(+), 19 deletions(-)
 create mode 100644 src/test/ui/parser/shebang/shebang-doc-comment.rs
 create mode 100644 src/test/ui/parser/shebang/shebang-doc-comment.stderr
diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs
index fe6785de009..c2139d07f37 100644
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@@ -238,26 +238,25 @@ pub enum Base {
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
 /// but shebang isn't a part of rust syntax.
 pub fn strip_shebang(input: &str) -> Option<usize> {
-    let first_line = input.lines().next()?;
-    // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
-    let could_be_shebang =
-        first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
-    if !could_be_shebang {
-        return None;
-    }
-    let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
-        !matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
-    );
-    let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
-    let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
-    if starts_with_attribute {
-        // If the file starts with #![ then it's definitely not a shebang -- it couldn't be
-        // a rust program since a Rust program can't start with `[`
-        None
-    } else {
-        // It's a #!... and there isn't a `[` in sight, must be a shebang
-        Some(first_line.len())
+    // Shebang must start with `#!` literally, without any preceding whitespace.
+    if input.starts_with("#!") {
+        let input_tail = &input[2..];
+        // Shebang must have something non-whitespace after `#!` on the first line.
+        let first_line_tail = input_tail.lines().next()?;
+        if first_line_tail.contains(|c| !is_whitespace(c)) {
+            // Ok, this is a shebang but if the next non-whitespace token is `[` or maybe
+            // a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level),
+            // then it may be valid Rust code, so consider it Rust code.
+            let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).filter(|tok|
+                !matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. })
+            ).next();
+            if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
+                // No other choice than to consider this a shebang.
+                return Some(2 + first_line_tail.len());
+            }
+        }
     }
+    None
 }
 
 /// Parses the first token from the provided input string.
diff --git a/src/test/ui/parser/shebang/shebang-doc-comment.rs b/src/test/ui/parser/shebang/shebang-doc-comment.rs
new file mode 100644
index 00000000000..7dbb9eebc75
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-doc-comment.rs
@@ -0,0 +1,6 @@
+#!///bin/bash
+[allow(unused_variables)]
+//~^^ ERROR expected `[`, found doc comment
+
+// Doc comment is misinterpreted as a whitespace (regular comment) during shebang detection.
+// Even if it wasn't, it would still result in an error, just a different one.
diff --git a/src/test/ui/parser/shebang/shebang-doc-comment.stderr b/src/test/ui/parser/shebang/shebang-doc-comment.stderr
new file mode 100644
index 00000000000..f524f556837
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-doc-comment.stderr
@@ -0,0 +1,8 @@
+error: expected `[`, found doc comment `///bin/bash`
+  --> $DIR/shebang-doc-comment.rs:1:3
+   |
+LL | #!///bin/bash
+   |   ^^^^^^^^^^^ expected `[`
+
+error: aborting due to previous error
+