Merge #1049

1049: Add better restrictions around semicolons in statements r=CohenArthur a=CohenArthur When parsing macro invocations, rustc does not actually consume the statement's trailing semicolon. Let's take the following example: ```rust macro_rules! one_stmt { ($s:stmt) => {}; } macro_rules! one_or_more_stmt { ($($s:stmt)*) => {}; } one_stmt!(let a = 1); one_stmt!(let b = 2;); // error one_or_more_stmt!(;); // valid one_or_more_stmt!(let a = 15;); // valid, two statements! one_or_more_stmt!(let a = 15 let b = 13); // valid, two statements again ``` A semicolon can count as a valid empty statement, but cannot be part of a statement (in macro invocations). This commit adds more restrictions that allow the parser to not always expect a semicolon token after the statement. Furthermore, this fixes a test that was previously accepted by the compiler but not by rustc. Fixes #1046 Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
2022-03-23 09:28:09 +00:00 · 2022-03-23 09:28:09 +00:00 · eef1ee2638
parent b9720caa10 ef56381862
commit eef1ee2638
5 changed files with 53 additions and 34 deletions
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@ -477,9 +477,12 @@ MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
      parser.parse_visibility ();
      break;

-    case AST::MacroFragSpec::STMT:
-      parser.parse_stmt (/* allow_no_semi */ true);
-      break;
+      case AST::MacroFragSpec::STMT: {
+	auto restrictions = ParseRestrictions ();
+	restrictions.consume_semi = false;
+	parser.parse_stmt (restrictions);
+	break;
+      }

    case AST::MacroFragSpec::LIFETIME:
      parser.parse_lifetime_params ();
@ -887,11 +890,14 @@ transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser,
 static std::vector<AST::SingleASTNode>
 transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 {
+  auto restrictions = ParseRestrictions ();
+  restrictions.consume_semi = false;
+
  // FIXME: This is invalid! It needs to also handle cases where the macro
  // transcriber is an expression, but since the macro call is followed by
  // a semicolon, it's a valid ExprStmt
-  return parse_many (parser, delimiter, [&parser] () {
-    auto stmt = parser.parse_stmt (/* allow_no_semi */ true);
+  return parse_many (parser, delimiter, [&parser, restrictions] () {
+    auto stmt = parser.parse_stmt (restrictions);
    return AST::SingleASTNode (std::move (stmt));
  });
 }
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@ -6100,7 +6100,7 @@ Parser<ManagedTokenSource>::parse_named_function_param (
 // Parses a statement (will further disambiguate any statement).
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::Stmt>
-Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
+Parser<ManagedTokenSource>::parse_stmt (ParseRestrictions restrictions)
 {
  // quick exit for empty statement
  // FIXME: Can we have empty statements without semicolons? Just nothing?
@ -6125,7 +6125,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
    {
    case LET:
      // let statement
-      return parse_let_stmt (std::move (outer_attrs), allow_no_semi);
+      return parse_let_stmt (std::move (outer_attrs), restrictions);
    case PUB:
    case MOD:
    case EXTERN_TOK:
@ -6180,7 +6180,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
      // TODO: find out how to disable gcc "implicit fallthrough" warning
    default:
      // fallback: expression statement
-      return parse_expr_stmt (std::move (outer_attrs), allow_no_semi);
+      return parse_expr_stmt (std::move (outer_attrs), restrictions);
      break;
    }
 }
@ -6189,7 +6189,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::LetStmt>
 Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
-					    bool allow_no_semi)
+					    ParseRestrictions restrictions)
 {
  Location locus = lexer.peek_token ()->get_locus ();
  skip_token (LET);
@ -6244,13 +6244,9 @@ Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
 	}
    }

-  if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
-    {
-      // skip after somewhere
+  if (restrictions.consume_semi)
+    if (!skip_token (SEMICOLON))
      return nullptr;
-      /* TODO: how wise is it to ditch a mostly-valid let statement just
-       * because a semicolon is missing? */
-    }

  return std::unique_ptr<AST::LetStmt> (
    new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type),
@ -7085,7 +7081,7 @@ Parser<ManagedTokenSource>::parse_method ()
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::ExprStmt>
 Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
-					     bool allow_no_semi)
+					     ParseRestrictions restrictions)
 {
  /* potential thoughts - define new virtual method "has_block()" on expr. parse
   * expr and then determine whether semicolon is needed as a result of this
@ -7125,7 +7121,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
 	else
 	  {
 	    return parse_expr_stmt_without_block (std::move (outer_attrs),
-						  allow_no_semi);
+						  restrictions);
 	  }
      }
      case UNSAFE: {
@ -7139,7 +7135,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
 	else
 	  {
 	    return parse_expr_stmt_without_block (std::move (outer_attrs),
-						  allow_no_semi);
+						  restrictions);
 	  }
      }
    default:
@ -7148,7 +7144,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
       * initial tokens in order to prevent more syntactical errors at parse
       * time. */
      return parse_expr_stmt_without_block (std::move (outer_attrs),
-					    allow_no_semi);
+					    restrictions);
    }
 }

@ -7264,7 +7260,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt_with_block (
 template <typename ManagedTokenSource>
 std::unique_ptr<AST::ExprStmtWithoutBlock>
 Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
-  AST::AttrVec outer_attrs, bool allow_no_semi)
+  AST::AttrVec outer_attrs, ParseRestrictions restrictions)
 {
  /* TODO: maybe move more logic for expr without block in here for better error
   * handling */
@ -7273,7 +7269,6 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
  std::unique_ptr<AST::ExprWithoutBlock> expr = nullptr;
  Location locus = lexer.peek_token ()->get_locus ();

-  auto restrictions = ParseRestrictions ();
  restrictions.expr_can_be_stmt = true;

  expr = parse_expr_without_block (std::move (outer_attrs), restrictions);
@ -7288,12 +7283,9 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
      return nullptr;
    }

-  // skip semicolon at end that is required
-  if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
-    {
-      // skip somewhere?
+  if (restrictions.consume_semi)
+    if (!skip_token (SEMICOLON))
      return nullptr;
-    }

  return std::unique_ptr<AST::ExprStmtWithoutBlock> (
    new AST::ExprStmtWithoutBlock (std::move (expr), locus));
--- a/gcc/rust/parse/rust-parse.h
+++ b/gcc/rust/parse/rust-parse.h
@ -81,6 +81,7 @@ struct ParseRestrictions
  bool entered_from_unary = false;
  bool expr_can_be_null = false;
  bool expr_can_be_stmt = false;
+  bool consume_semi = true;
 };

 // Parser implementation for gccrs.
@ -129,11 +130,9 @@ public:
   *    | LetStatement
   *    | ExpressionStatement
   *    | MacroInvocationSemi
-   *
-   * @param allow_no_semi Allow the parser to not parse a semicolon after
-   * 		the statement without erroring out
   */
-  std::unique_ptr<AST::Stmt> parse_stmt (bool allow_no_semi = false);
+  std::unique_ptr<AST::Stmt> parse_stmt (ParseRestrictions restrictions
+					 = ParseRestrictions ());
  std::unique_ptr<AST::Type> parse_type ();
  std::unique_ptr<AST::ExternalItem> parse_external_item ();
  std::unique_ptr<AST::TraitItem> parse_trait_item ();
@ -616,14 +615,17 @@ private:
   * 		semicolon to follow it
   */
  std::unique_ptr<AST::LetStmt> parse_let_stmt (AST::AttrVec outer_attrs,
-						bool allow_no_semi = false);
+						ParseRestrictions restrictions
+						= ParseRestrictions ());
  std::unique_ptr<AST::ExprStmt> parse_expr_stmt (AST::AttrVec outer_attrs,
-						  bool allow_no_semi = false);
+						  ParseRestrictions restrictions
+						  = ParseRestrictions ());
  std::unique_ptr<AST::ExprStmtWithBlock>
  parse_expr_stmt_with_block (AST::AttrVec outer_attrs);
  std::unique_ptr<AST::ExprStmtWithoutBlock>
  parse_expr_stmt_without_block (AST::AttrVec outer_attrs,
-				 bool allow_no_semi = false);
+				 ParseRestrictions restrictions
+				 = ParseRestrictions ());
  ExprOrStmt parse_stmt_or_expr_without_block ();
  ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs);
  ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs);
--- a/gcc/testsuite/rust/compile/macro18.rs
+++ b/gcc/testsuite/rust/compile/macro18.rs
@ -7,7 +7,7 @@ macro_rules! take_stmt {
 }

 fn main() -> i32 {
-    take_stmt!(let complete = 15;);
+    take_stmt!(let complete = 15;); // { dg-error "Failed to match any rule within macro" }
    take_stmt!(let lacking = 14);

    0
--- a/gcc/testsuite/rust/compile/macro32.rs
+++ b/gcc/testsuite/rust/compile/macro32.rs
@ -0,0 +1,19 @@
+macro_rules! s {
+    ($s:stmt) => {{}};
+}
+
+macro_rules! multi_s {
+    ($($s:stmt)+) => {{}};
+}
+
+fn main() -> i32 {
+    s!(let a = 15);
+    s!(;); // Empty statement
+    s!(let a = 15;); // { dg-error "Failed to match any rule within macro" }
+    multi_s!(let a = 15;);
+    // ^ this actually gets parsed as two statements - one LetStmt and one
+    // empty statement. This is the same behavior as rustc, which you can
+    // see using a count!() macro
+
+    32
+}