From 21755b58c900cd5d14422ebd980fe11390e021fe Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Fri, 29 May 2020 21:51:46 +0300 Subject: [PATCH] rustc_lexer: Optimize shebang detection slightly --- src/librustc_lexer/src/lib.rs | 37 +++++++++---------- .../ui/parser/shebang/shebang-doc-comment.rs | 6 +++ .../parser/shebang/shebang-doc-comment.stderr | 8 ++++ 3 files changed, 32 insertions(+), 19 deletions(-) create mode 100644 src/test/ui/parser/shebang/shebang-doc-comment.rs create mode 100644 src/test/ui/parser/shebang/shebang-doc-comment.stderr diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index fe6785de009..c2139d07f37 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -238,26 +238,25 @@ pub enum Base { /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", /// but shebang isn't a part of rust syntax. pub fn strip_shebang(input: &str) -> Option { - let first_line = input.lines().next()?; - // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line. - let could_be_shebang = - first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c)); - if !could_be_shebang { - return None; - } - let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok| - !matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace) - ); - let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket]; - let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied()); - if starts_with_attribute { - // If the file starts with #![ then it's definitely not a shebang -- it couldn't be - // a rust program since a Rust program can't start with `[` - None - } else { - // It's a #!... and there isn't a `[` in sight, must be a shebang - Some(first_line.len()) + // Shebang must start with `#!` literally, without any preceding whitespace. + if input.starts_with("#!") { + let input_tail = &input[2..]; + // Shebang must have something non-whitespace after `#!` on the first line. + let first_line_tail = input_tail.lines().next()?; + if first_line_tail.contains(|c| !is_whitespace(c)) { + // Ok, this is a shebang but if the next non-whitespace token is `[` or maybe + // a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level), + // then it may be valid Rust code, so consider it Rust code. + let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).filter(|tok| + !matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. }) + ).next(); + if next_non_whitespace_token != Some(TokenKind::OpenBracket) { + // No other choice than to consider this a shebang. + return Some(2 + first_line_tail.len()); + } + } } + None } /// Parses the first token from the provided input string. diff --git a/src/test/ui/parser/shebang/shebang-doc-comment.rs b/src/test/ui/parser/shebang/shebang-doc-comment.rs new file mode 100644 index 00000000000..7dbb9eebc75 --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-doc-comment.rs @@ -0,0 +1,6 @@ +#!///bin/bash +[allow(unused_variables)] +//~^^ ERROR expected `[`, found doc comment + +// Doc comment is misinterpreted as a whitespace (regular comment) during shebang detection. +// Even if it wasn't, it would still result in an error, just a different one. diff --git a/src/test/ui/parser/shebang/shebang-doc-comment.stderr b/src/test/ui/parser/shebang/shebang-doc-comment.stderr new file mode 100644 index 00000000000..f524f556837 --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-doc-comment.stderr @@ -0,0 +1,8 @@ +error: expected `[`, found doc comment `///bin/bash` + --> $DIR/shebang-doc-comment.rs:1:3 + | +LL | #!///bin/bash + | ^^^^^^^^^^^ expected `[` + +error: aborting due to previous error +