From f6d18db402cfebcf5c1bdc8e730f3923b3bb0421 Mon Sep 17 00:00:00 2001 From: Sasha Date: Fri, 28 Aug 2020 23:04:42 +0200 Subject: [PATCH] Use string literal directly when available in format Previous implementation used the `Parser::parse_expr` function in order to extract the format expression. If the first comma following the format expression was mistakenly replaced with a dot, then the next format expression was eaten by the function, because it looked as a syntactically valid expression, which resulted in incorrectly spanned error messages. The way the format expression is exctracted is changed: we first look at the first available token in the first argument supplied to the `format!` macro call. If it is a string literal, then it is promoted as a format expression immediatly, otherwise we fall back to the original `parse_expr`-related method. This allows us to ensure that the parser won't consume too much tokens when a typo is made. A test has been created so that it is ensured that the issue is properly fixed. --- compiler/rustc_builtin_macros/src/format.rs | 21 +++++++++++- compiler/rustc_parse/src/parser/expr.rs | 2 +- src/test/ui/fmt/incorrect-first-separator.rs | 22 +++++++++++++ .../ui/fmt/incorrect-first-separator.stderr | 32 +++++++++++++++++++ 4 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 src/test/ui/fmt/incorrect-first-separator.rs create mode 100644 src/test/ui/fmt/incorrect-first-separator.stderr diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 373277f525d..48506148ed9 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -135,7 +135,26 @@ fn parse_args<'a>( return Err(ecx.struct_span_err(sp, "requires at least a format string argument")); } - let fmtstr = p.parse_expr()?; + let first_token = &p.token; + let fmtstr = match first_token.kind { + token::TokenKind::Literal(token::Lit { + kind: token::LitKind::Str | token::LitKind::StrRaw(_), + .. + }) => { + // If the first token is a string literal, then a format expression + // is constructed from it. + // + // This allows us to properly handle cases when the first comma + // after the format string is mistakenly replaced with any operator, + // which cause the expression parser to eat too much tokens. + p.parse_literal_maybe_minus()? + } + _ => { + // Otherwise, we fall back to the expression parser. + p.parse_expr()? + } + }; + let mut first = true; let mut named = false; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index f022c628fe2..69d13b5cf53 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1480,7 +1480,7 @@ impl<'a> Parser<'a> { /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). /// Keep this in sync with `Token::can_begin_literal_maybe_minus`. - pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { + pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { maybe_whole_expr!(self); let lo = self.token.span; diff --git a/src/test/ui/fmt/incorrect-first-separator.rs b/src/test/ui/fmt/incorrect-first-separator.rs new file mode 100644 index 00000000000..0b097fdfab8 --- /dev/null +++ b/src/test/ui/fmt/incorrect-first-separator.rs @@ -0,0 +1,22 @@ +// Allows to track issue #75492: +// https://github.com/rust-lang/rust/issues/75492 + +use std::iter; + +fn main() { + format!("A number: {}". iter::once(42).next().unwrap()); + //~^ ERROR expected token: `,` + + // Other kind of types are also checked: + + format!("A number: {}" / iter::once(42).next().unwrap()); + //~^ ERROR expected token: `,` + + format!("A number: {}"; iter::once(42).next().unwrap()); + //~^ ERROR expected token: `,` + + // Note: this character is an COMBINING COMMA BELOW unicode char + format!("A number: {}" ̦ iter::once(42).next().unwrap()); + //~^ ERROR expected token: `,` + //~^^ ERROR unknown start of token: \u{326} +} diff --git a/src/test/ui/fmt/incorrect-first-separator.stderr b/src/test/ui/fmt/incorrect-first-separator.stderr new file mode 100644 index 00000000000..60d2a82855e --- /dev/null +++ b/src/test/ui/fmt/incorrect-first-separator.stderr @@ -0,0 +1,32 @@ +error: unknown start of token: \u{326} + --> $DIR/incorrect-first-separator.rs:19:28 + | +LL | format!("A number: {}" ̦ iter::once(42).next().unwrap()); + | ^ + +error: expected token: `,` + --> $DIR/incorrect-first-separator.rs:7:27 + | +LL | format!("A number: {}". iter::once(42).next().unwrap()); + | ^ expected `,` + +error: expected token: `,` + --> $DIR/incorrect-first-separator.rs:12:28 + | +LL | format!("A number: {}" / iter::once(42).next().unwrap()); + | ^ expected `,` + +error: expected token: `,` + --> $DIR/incorrect-first-separator.rs:15:27 + | +LL | format!("A number: {}"; iter::once(42).next().unwrap()); + | ^ expected `,` + +error: expected token: `,` + --> $DIR/incorrect-first-separator.rs:19:30 + | +LL | format!("A number: {}" ̦ iter::once(42).next().unwrap()); + | ^^^^ expected `,` + +error: aborting due to 5 previous errors +