1049: Add better restrictions around semicolons in statements r=CohenArthur a=CohenArthur

When parsing macro invocations, rustc does not actually consume the
statement's trailing semicolon.

Let's take the following example:
```rust
macro_rules! one_stmt {
    ($s:stmt) => {};
}

macro_rules! one_or_more_stmt {
    ($($s:stmt)*) => {};
}

one_stmt!(let a = 1);
one_stmt!(let b = 2;); // error

one_or_more_stmt!(;); // valid
one_or_more_stmt!(let a = 15;); // valid, two statements!
one_or_more_stmt!(let a = 15 let b = 13); // valid, two statements again
```

A semicolon can count as a valid empty statement, but cannot be part of
a statement (in macro invocations). This commit adds more restrictions
that allow the parser to not always expect a semicolon token after the
statement. Furthermore, this fixes a test that was previously accepted
by the compiler but not by rustc.

Fixes #1046 

Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
This commit is contained in:
bors[bot] 2022-03-23 09:28:09 +00:00 committed by GitHub
commit eef1ee2638
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 34 deletions

View File

@ -477,9 +477,12 @@ MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
parser.parse_visibility ();
break;
case AST::MacroFragSpec::STMT:
parser.parse_stmt (/* allow_no_semi */ true);
break;
case AST::MacroFragSpec::STMT: {
auto restrictions = ParseRestrictions ();
restrictions.consume_semi = false;
parser.parse_stmt (restrictions);
break;
}
case AST::MacroFragSpec::LIFETIME:
parser.parse_lifetime_params ();
@ -887,11 +890,14 @@ transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser,
static std::vector<AST::SingleASTNode>
transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
{
auto restrictions = ParseRestrictions ();
restrictions.consume_semi = false;
// FIXME: This is invalid! It needs to also handle cases where the macro
// transcriber is an expression, but since the macro call is followed by
// a semicolon, it's a valid ExprStmt
return parse_many (parser, delimiter, [&parser] () {
auto stmt = parser.parse_stmt (/* allow_no_semi */ true);
return parse_many (parser, delimiter, [&parser, restrictions] () {
auto stmt = parser.parse_stmt (restrictions);
return AST::SingleASTNode (std::move (stmt));
});
}

View File

@ -6100,7 +6100,7 @@ Parser<ManagedTokenSource>::parse_named_function_param (
// Parses a statement (will further disambiguate any statement).
template <typename ManagedTokenSource>
std::unique_ptr<AST::Stmt>
Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
Parser<ManagedTokenSource>::parse_stmt (ParseRestrictions restrictions)
{
// quick exit for empty statement
// FIXME: Can we have empty statements without semicolons? Just nothing?
@ -6125,7 +6125,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
{
case LET:
// let statement
return parse_let_stmt (std::move (outer_attrs), allow_no_semi);
return parse_let_stmt (std::move (outer_attrs), restrictions);
case PUB:
case MOD:
case EXTERN_TOK:
@ -6180,7 +6180,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
// TODO: find out how to disable gcc "implicit fallthrough" warning
default:
// fallback: expression statement
return parse_expr_stmt (std::move (outer_attrs), allow_no_semi);
return parse_expr_stmt (std::move (outer_attrs), restrictions);
break;
}
}
@ -6189,7 +6189,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
template <typename ManagedTokenSource>
std::unique_ptr<AST::LetStmt>
Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
bool allow_no_semi)
ParseRestrictions restrictions)
{
Location locus = lexer.peek_token ()->get_locus ();
skip_token (LET);
@ -6244,13 +6244,9 @@ Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
}
}
if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
{
// skip after somewhere
if (restrictions.consume_semi)
if (!skip_token (SEMICOLON))
return nullptr;
/* TODO: how wise is it to ditch a mostly-valid let statement just
* because a semicolon is missing? */
}
return std::unique_ptr<AST::LetStmt> (
new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type),
@ -7085,7 +7081,7 @@ Parser<ManagedTokenSource>::parse_method ()
template <typename ManagedTokenSource>
std::unique_ptr<AST::ExprStmt>
Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
bool allow_no_semi)
ParseRestrictions restrictions)
{
/* potential thoughts - define new virtual method "has_block()" on expr. parse
* expr and then determine whether semicolon is needed as a result of this
@ -7125,7 +7121,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
else
{
return parse_expr_stmt_without_block (std::move (outer_attrs),
allow_no_semi);
restrictions);
}
}
case UNSAFE: {
@ -7139,7 +7135,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
else
{
return parse_expr_stmt_without_block (std::move (outer_attrs),
allow_no_semi);
restrictions);
}
}
default:
@ -7148,7 +7144,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
* initial tokens in order to prevent more syntactical errors at parse
* time. */
return parse_expr_stmt_without_block (std::move (outer_attrs),
allow_no_semi);
restrictions);
}
}
@ -7264,7 +7260,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt_with_block (
template <typename ManagedTokenSource>
std::unique_ptr<AST::ExprStmtWithoutBlock>
Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
AST::AttrVec outer_attrs, bool allow_no_semi)
AST::AttrVec outer_attrs, ParseRestrictions restrictions)
{
/* TODO: maybe move more logic for expr without block in here for better error
* handling */
@ -7273,7 +7269,6 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
std::unique_ptr<AST::ExprWithoutBlock> expr = nullptr;
Location locus = lexer.peek_token ()->get_locus ();
auto restrictions = ParseRestrictions ();
restrictions.expr_can_be_stmt = true;
expr = parse_expr_without_block (std::move (outer_attrs), restrictions);
@ -7288,12 +7283,9 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
return nullptr;
}
// skip semicolon at end that is required
if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
{
// skip somewhere?
if (restrictions.consume_semi)
if (!skip_token (SEMICOLON))
return nullptr;
}
return std::unique_ptr<AST::ExprStmtWithoutBlock> (
new AST::ExprStmtWithoutBlock (std::move (expr), locus));

View File

@ -81,6 +81,7 @@ struct ParseRestrictions
bool entered_from_unary = false;
bool expr_can_be_null = false;
bool expr_can_be_stmt = false;
bool consume_semi = true;
};
// Parser implementation for gccrs.
@ -129,11 +130,9 @@ public:
* | LetStatement
* | ExpressionStatement
* | MacroInvocationSemi
*
* @param allow_no_semi Allow the parser to not parse a semicolon after
* the statement without erroring out
*/
std::unique_ptr<AST::Stmt> parse_stmt (bool allow_no_semi = false);
std::unique_ptr<AST::Stmt> parse_stmt (ParseRestrictions restrictions
= ParseRestrictions ());
std::unique_ptr<AST::Type> parse_type ();
std::unique_ptr<AST::ExternalItem> parse_external_item ();
std::unique_ptr<AST::TraitItem> parse_trait_item ();
@ -616,14 +615,17 @@ private:
* semicolon to follow it
*/
std::unique_ptr<AST::LetStmt> parse_let_stmt (AST::AttrVec outer_attrs,
bool allow_no_semi = false);
ParseRestrictions restrictions
= ParseRestrictions ());
std::unique_ptr<AST::ExprStmt> parse_expr_stmt (AST::AttrVec outer_attrs,
bool allow_no_semi = false);
ParseRestrictions restrictions
= ParseRestrictions ());
std::unique_ptr<AST::ExprStmtWithBlock>
parse_expr_stmt_with_block (AST::AttrVec outer_attrs);
std::unique_ptr<AST::ExprStmtWithoutBlock>
parse_expr_stmt_without_block (AST::AttrVec outer_attrs,
bool allow_no_semi = false);
ParseRestrictions restrictions
= ParseRestrictions ());
ExprOrStmt parse_stmt_or_expr_without_block ();
ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs);
ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs);

View File

@ -7,7 +7,7 @@ macro_rules! take_stmt {
}
fn main() -> i32 {
take_stmt!(let complete = 15;);
take_stmt!(let complete = 15;); // { dg-error "Failed to match any rule within macro" }
take_stmt!(let lacking = 14);
0

View File

@ -0,0 +1,19 @@
macro_rules! s {
($s:stmt) => {{}};
}
macro_rules! multi_s {
($($s:stmt)+) => {{}};
}
fn main() -> i32 {
s!(let a = 15);
s!(;); // Empty statement
s!(let a = 15;); // { dg-error "Failed to match any rule within macro" }
multi_s!(let a = 15;);
// ^ this actually gets parsed as two statements - one LetStmt and one
// empty statement. This is the same behavior as rustc, which you can
// see using a count!() macro
32
}