1044: Restrict follow-up tokens on `expr` and `stmt` r=CohenArthur a=CohenArthur

This adds a base for respecting the [Macro Follow-Set Ambiguity specification](https://doc.rust-lang.org/reference/macro-ambiguity.html).

If the design is validated, adding more restrictions on other fragment specifiers should not be difficult

Addresses #947 

Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
This commit is contained in:
bors[bot] 2022-03-23 09:00:05 +00:00 committed by GitHub
commit b9720caa10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 336 additions and 100 deletions

View File

@ -76,45 +76,6 @@ get_string_in_delims (std::string str_input, DelimType delim_type)
gcc_unreachable ();
}
// Converts a frag spec enum item to a string form.
std::string
frag_spec_to_str (MacroFragSpec frag_spec)
{
switch (frag_spec)
{
case BLOCK:
return "block";
case EXPR:
return "expr";
case IDENT:
return "ident";
case ITEM:
return "item";
case LIFETIME:
return "lifetime";
case LITERAL:
return "literal";
case META:
return "meta";
case PAT:
return "pat";
case PATH:
return "path";
case STMT:
return "stmt";
case TT:
return "tt";
case TY:
return "ty";
case VIS:
return "vis";
case INVALID:
return "INVALID_FRAG_SPEC";
default:
return "ERROR_MARK_STRING - unknown frag spec";
}
}
enum AttrMode
{
OUTER,
@ -2396,7 +2357,7 @@ LifetimeParam::as_string () const
std::string
MacroMatchFragment::as_string () const
{
return "$" + ident + ": " + frag_spec_to_str (frag_spec);
return "$" + ident + ": " + frag_spec.as_string ();
}
std::string

View File

@ -28,60 +28,128 @@ namespace AST {
// Decls as definitions moved to rust-ast.h
class MacroItem;
enum MacroFragSpec
class MacroFragSpec
{
BLOCK,
EXPR,
IDENT,
ITEM,
LIFETIME,
LITERAL,
META,
PAT,
PATH,
STMT,
TT,
TY,
VIS,
INVALID // not really a specifier, but used to mark invalid one passed in
};
public:
enum Kind
{
BLOCK,
EXPR,
IDENT,
ITEM,
LIFETIME,
LITERAL,
META,
PAT,
PATH,
STMT,
TT,
TY,
VIS,
INVALID // not really a specifier, but used to mark invalid one passed in
};
inline MacroFragSpec
get_frag_spec_from_str (std::string str)
{
if (str == "block")
return BLOCK;
else if (str == "expr")
return EXPR;
else if (str == "ident")
return IDENT;
else if (str == "item")
return ITEM;
else if (str == "lifetime")
return LIFETIME;
else if (str == "literal")
return LITERAL;
else if (str == "meta")
return META;
else if (str == "pat")
return PAT;
else if (str == "path")
return PATH;
else if (str == "stmt")
return STMT;
else if (str == "tt")
return TT;
else if (str == "ty")
return TY;
else if (str == "vis")
return VIS;
else
{
// error_at("invalid string '%s' used as fragment specifier",
// str->c_str());
return INVALID;
}
}
MacroFragSpec (Kind kind) : kind (kind) {}
static MacroFragSpec get_frag_spec_from_str (const std::string &str)
{
if (str == "block")
return MacroFragSpec (BLOCK);
else if (str == "expr")
return MacroFragSpec (EXPR);
else if (str == "ident")
return MacroFragSpec (IDENT);
else if (str == "item")
return MacroFragSpec (ITEM);
else if (str == "lifetime")
return MacroFragSpec (LIFETIME);
else if (str == "literal")
return MacroFragSpec (LITERAL);
else if (str == "meta")
return MacroFragSpec (META);
else if (str == "pat" || str == "pat_param")
return MacroFragSpec (PAT);
else if (str == "path")
return MacroFragSpec (PATH);
else if (str == "stmt")
return MacroFragSpec (STMT);
else if (str == "tt")
return MacroFragSpec (TT);
else if (str == "ty")
return MacroFragSpec (TY);
else if (str == "vis")
return MacroFragSpec (VIS);
else
{
// error_at("invalid string '%s' used as fragment specifier",
// str->c_str()));
return MacroFragSpec (INVALID);
}
}
Kind get_kind () const { return kind; }
bool is_error () const { return kind == Kind::INVALID; }
// Converts a frag spec enum item to a string form.
std::string as_string () const
{
switch (kind)
{
case BLOCK:
return "block";
case EXPR:
return "expr";
case IDENT:
return "ident";
case ITEM:
return "item";
case LIFETIME:
return "lifetime";
case LITERAL:
return "literal";
case META:
return "meta";
case PAT:
return "pat";
case PATH:
return "path";
case STMT:
return "stmt";
case TT:
return "tt";
case TY:
return "ty";
case VIS:
return "vis";
case INVALID:
return "INVALID_FRAG_SPEC";
default:
return "ERROR_MARK_STRING - unknown frag spec";
}
}
bool has_follow_set_restrictions ()
{
switch (kind)
{
case EXPR:
case STMT:
// FIXME: Add the following cases once we can handle them properly
// in `is_match_compatible()`
// case PAT:
// // case PAT_PARAM: FIXME: Doesn't <metavar>:pat_param exist?
// case PATH:
// case TY:
// case VIS:
return true;
default:
return false;
}
}
private:
Kind kind;
};
// A macro match that has an identifier and fragment spec
class MacroMatchFragment : public MacroMatch
@ -96,12 +164,17 @@ public:
{}
// Returns whether macro match fragment is in an error state.
bool is_error () const { return frag_spec == INVALID; }
bool is_error () const
{
return frag_spec.get_kind () == MacroFragSpec::INVALID;
}
// Creates an error state macro match fragment.
static MacroMatchFragment create_error (Location locus)
{
return MacroMatchFragment (std::string (""), INVALID, locus);
return MacroMatchFragment (std::string (""),
MacroFragSpec (MacroFragSpec::Kind::INVALID),
locus);
}
std::string as_string () const override;

View File

@ -439,7 +439,7 @@ bool
MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
AST::MacroMatchFragment &fragment)
{
switch (fragment.get_frag_spec ())
switch (fragment.get_frag_spec ().get_kind ())
{
case AST::MacroFragSpec::EXPR:
parser.parse_expr ();

View File

@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
#include "rust-diagnostics.h"
#include "util/rust-make-unique.h"
#include <algorithm>
namespace Rust {
// Left binding powers of operations.
@ -1767,6 +1768,13 @@ Parser<ManagedTokenSource>::parse_macro_matcher ()
return AST::MacroMatcher::create_error (t->get_locus ());
}
if (matches.size () > 0)
{
auto &last_match = matches.back ();
if (!is_match_compatible (*last_match, *match))
return AST::MacroMatcher::create_error (match->get_match_locus ());
}
matches.push_back (std::move (match));
// DEBUG
@ -1955,8 +1963,9 @@ Parser<ManagedTokenSource>::parse_macro_match_fragment ()
if (t == nullptr)
return nullptr;
AST::MacroFragSpec frag = AST::get_frag_spec_from_str (t->get_str ());
if (frag == AST::INVALID)
AST::MacroFragSpec frag
= AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ());
if (frag.is_error ())
{
Error error (t->get_locus (),
"invalid fragment specifier %qs in fragment macro match",

View File

@ -92,4 +92,145 @@ extract_module_path (const AST::AttrVec &inner_attrs,
return path;
}
static bool
peculiar_fragment_match_compatible (AST::MacroMatchFragment &last_match,
AST::MacroMatch &match)
{
static std::unordered_map<AST::MacroFragSpec::Kind, std::vector<TokenId>>
follow_set = {
{AST::MacroFragSpec::EXPR, {MATCH_ARROW, COMMA, SEMICOLON}},
{AST::MacroFragSpec::STMT, {MATCH_ARROW, COMMA, SEMICOLON}},
};
Location error_locus = match.get_match_locus ();
// There are two behaviors to handle here: If the follow-up match is a token,
// we want to check if it is allowed.
// If it is a fragment, repetition or matcher then we know that it will be
// an error.
// For repetitions and matchers we want to extract a proper location to report
// the error.
switch (match.get_macro_match_type ())
{
case AST::MacroMatch::Tok: {
auto tok = static_cast<AST::Token *> (&match);
auto &allowed_toks
= follow_set[last_match.get_frag_spec ().get_kind ()];
auto is_valid = std::find (allowed_toks.begin (), allowed_toks.end (),
tok->get_id ())
!= allowed_toks.end ();
if (!is_valid)
// FIXME: Add hint about allowed fragments
rust_error_at (tok->get_match_locus (),
"token %<%s%> is not allowed after %<%s%> fragment",
tok->get_str ().c_str (),
last_match.get_frag_spec ().as_string ().c_str ());
return is_valid;
}
break;
case AST::MacroMatch::Repetition: {
auto repetition = static_cast<AST::MacroMatchRepetition *> (&match);
auto &matches = repetition->get_matches ();
if (!matches.empty ())
error_locus = matches.front ()->get_match_locus ();
break;
}
case AST::MacroMatch::Matcher: {
auto matcher = static_cast<AST::MacroMatcher *> (&match);
auto &matches = matcher->get_matches ();
if (!matches.empty ())
error_locus = matches.front ()->get_match_locus ();
break;
}
default:
break;
}
rust_error_at (error_locus, "fragment not allowed after %<%s%> fragment",
last_match.get_frag_spec ().as_string ().c_str ());
return false;
}
/**
* Avoid UB by calling .front() and .back() on empty containers...
*/
template <typename T>
static T *
get_back_ptr (std::vector<std::unique_ptr<T>> &values)
{
if (values.empty ())
return nullptr;
return values.back ().get ();
}
template <typename T>
static T *
get_front_ptr (std::vector<std::unique_ptr<T>> &values)
{
if (values.empty ())
return nullptr;
return values.front ().get ();
}
bool
is_match_compatible (AST::MacroMatch &last_match, AST::MacroMatch &match)
{
AST::MacroMatch *new_last = nullptr;
// We want to "extract" the concerning matches. In cases such as matchers and
// repetitions, we actually store multiple matchers, but are only concerned
// about the follow-set ambiguities of certain elements.
// There are some cases where we can short-circuit the algorithm: There will
// never be restrictions on token literals, or on certain fragments which do
// not have a set of follow-restrictions.
switch (last_match.get_macro_match_type ())
{
// This is our main stop condition: When we are finally looking at the
// last match (or its actual last component), and it is a fragment, it
// may contain some follow up restrictions.
case AST::MacroMatch::Fragment: {
auto fragment = static_cast<AST::MacroMatchFragment *> (&last_match);
if (fragment->get_frag_spec ().has_follow_set_restrictions ())
return peculiar_fragment_match_compatible (*fragment, match);
else
return true;
}
case AST::MacroMatch::Repetition: {
// A repetition on the left hand side means we want to make sure the
// last match of the repetition is compatible with the new match
auto repetition
= static_cast<AST::MacroMatchRepetition *> (&last_match);
new_last = get_back_ptr (repetition->get_matches ());
// If there are no matches in the matcher, then it can be followed by
// anything
if (!new_last)
return true;
break;
}
case AST::MacroMatch::Matcher: {
// Likewise for another matcher
auto matcher = static_cast<AST::MacroMatcher *> (&last_match);
new_last = get_back_ptr (matcher->get_matches ());
// If there are no matches in the matcher, then it can be followed by
// anything
if (!new_last)
return true;
break;
}
case AST::MacroMatch::Tok:
return true;
}
rust_assert (new_last);
// We check recursively until we find a terminating condition
// FIXME: Does expansion depth/limit matter here?
return is_match_compatible (*new_last, match);
}
} // namespace Rust

View File

@ -702,6 +702,18 @@ private:
std::string
extract_module_path (const AST::AttrVec &inner_attrs,
const AST::AttrVec &outer_attrs, const std::string &name);
/**
* Check if a MacroMatch is allowed to follow the last parsed MacroMatch.
*
* @param last_match Last matcher parsed before the current match
* @param match Current matcher to check
*
* @return true if the follow-up is valid, false otherwise
*/
bool
is_match_compatible (AST::MacroMatch &last_match,
AST::MacroMatch &current_match);
} // namespace Rust
// as now template, include implementations of all methods

View File

@ -0,0 +1,8 @@
macro_rules! m {
($a:expr tok) => {
// { dg-error "token .tok. is not allowed after .expr. fragment" "" { target *-*-* } .-1 }
// { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
// { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
$a
};
}

View File

@ -0,0 +1,8 @@
macro_rules! m {
($a:expr $(tok $es:expr)*) => {
// { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
// { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
// { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
$a
};
}

View File

@ -0,0 +1,8 @@
macro_rules! m {
($($es:expr)* tok) => {
// { dg-error "token .tok. is not allowed after .expr. fragment" "" { target *-*-* } .-1 }
// { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
// { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
$a
};
}

View File

@ -0,0 +1,8 @@
macro_rules! m {
($e:expr $f:expr) => {
// { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
// { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
// { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
$e
};
}

View File

@ -0,0 +1,8 @@
macro_rules! m {
($($e:expr)* $($f:expr)*) => {
// { dg-error "fragment not allowed after .expr. fragment" "" { target *-*-* } .-1 }
// { dg-error "required first macro rule in macro rules definition could not be parsed" "" { target *-*-* } .-2 }
// { dg-error "failed to parse item in crate" "" { target *-*-* } .-3 }
$e
};
}

View File

@ -1,6 +1,6 @@
macro_rules! add {
($e:expr big_tok $($es:expr) big_tok *) => {
$e + add!($($es) big_tok *)
($e:expr , $($es:expr) , *) => {
$e + add!($($es) , *)
};
($e:expr) => {
$e
@ -8,7 +8,7 @@ macro_rules! add {
}
fn main() -> i32 {
let a = add!(15 big_tok 2 big_tok 9); // 26
let a = add!(15, 2, 9); // 26
a - 26
}