1043: implement include_bytes! and include_str! macros r=CohenArthur a=dafaust

Implement the include_bytes! and include_str! builtin macros.

Addresses:  #927 

1064: Handle :tt fragments properly r=CohenArthur a=CohenArthur

:tt fragments stand for token trees, and are composed of either a token,
or a delimited token tree, which is a token tree surrounded by
delimiters (parentheses, curly brackets or square brackets).

This should allow us to handle a lot more macros, including extremely
powerful macro patterns such as TT munchers


Co-authored-by: David Faust <david.faust@oracle.com>
Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
This commit is contained in:
bors[bot] 2022-03-24 16:54:54 +00:00 committed by GitHub
commit 0fa882160d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 300 additions and 4 deletions

View File

@ -20,6 +20,9 @@
#include "rust-diagnostics.h"
#include "rust-expr.h"
#include "rust-session-manager.h"
#include "rust-macro-invoc-lexer.h"
#include "rust-lex.h"
#include "rust-parse.h"
namespace Rust {
namespace {
@ -30,6 +33,107 @@ make_string (Location locus, std::string value)
new AST::LiteralExpr (value, AST::Literal::STRING,
PrimitiveCoreType::CORETYPE_STR, {}, locus));
}
/* Parse a single string literal from the given delimited token tree,
and return the LiteralExpr for it. Allow for an optional trailing comma,
but otherwise enforce that these are the only tokens. */
std::unique_ptr<AST::LiteralExpr>
parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree,
Location invoc_locus)
{
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
Parser<MacroInvocLexer> parser (std::move (lex));
auto last_token_id = TokenId::RIGHT_CURLY;
switch (invoc_token_tree.get_delim_type ())
{
case AST::DelimType::PARENS:
last_token_id = TokenId::RIGHT_PAREN;
rust_assert (parser.skip_token (LEFT_PAREN));
break;
case AST::DelimType::CURLY:
rust_assert (parser.skip_token (LEFT_CURLY));
break;
case AST::DelimType::SQUARE:
last_token_id = TokenId::RIGHT_SQUARE;
rust_assert (parser.skip_token (LEFT_SQUARE));
break;
}
std::unique_ptr<AST::LiteralExpr> lit_expr = nullptr;
if (parser.peek_current_token ()->get_id () == STRING_LITERAL)
{
lit_expr = parser.parse_literal_expr ();
parser.maybe_skip_token (COMMA);
if (parser.peek_current_token ()->get_id () != last_token_id)
{
lit_expr = nullptr;
rust_error_at (invoc_locus, "macro takes 1 argument");
}
}
else if (parser.peek_current_token ()->get_id () == last_token_id)
rust_error_at (invoc_locus, "macro takes 1 argument");
else
rust_error_at (invoc_locus, "argument must be a string literal");
parser.skip_token (last_token_id);
return lit_expr;
}
/* Treat PATH as a path relative to the source file currently being
compiled, and return the absolute path for it. */
std::string
source_relative_path (std::string path, Location locus)
{
std::string compile_fname
= Session::get_instance ().linemap->location_file (locus);
auto dir_separator_pos = compile_fname.rfind (file_separator);
/* If there is no file_separator in the path, use current dir ('.'). */
std::string dirname;
if (dir_separator_pos == std::string::npos)
dirname = std::string (".") + file_separator;
else
dirname = compile_fname.substr (0, dir_separator_pos) + file_separator;
return dirname + path;
}
/* Read the full contents of the file FILENAME and return them in a vector.
FIXME: platform specific. */
std::vector<uint8_t>
load_file_bytes (const char *filename)
{
RAIIFile file_wrap (filename);
if (file_wrap.get_raw () == nullptr)
{
rust_error_at (Location (), "cannot open filename %s: %m", filename);
return std::vector<uint8_t> ();
}
FILE *f = file_wrap.get_raw ();
fseek (f, 0L, SEEK_END);
long fsize = ftell (f);
fseek (f, 0L, SEEK_SET);
std::vector<uint8_t> buf (fsize);
if (fread (&buf[0], fsize, 1, f) != 1)
{
rust_error_at (Location (), "error reading file %s: %m", filename);
return std::vector<uint8_t> ();
}
return buf;
}
} // namespace
AST::ASTFragment
@ -63,4 +167,73 @@ MacroBuiltin::column (Location invoc_locus, AST::MacroInvocData &invoc)
return AST::ASTFragment ({column_no});
}
/* Expand builtin macro include_bytes!("filename"), which includes the contents
of the given file as reference to a byte array. Yields an expression of type
&'static [u8; N]. */
AST::ASTFragment
MacroBuiltin::include_bytes (Location invoc_locus, AST::MacroInvocData &invoc)
{
/* Get target filename from the macro invocation, which is treated as a path
relative to the include!-ing file (currently being compiled). */
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus);
if (lit_expr == nullptr)
return AST::ASTFragment::create_error ();
std::string target_filename
= source_relative_path (lit_expr->as_string (), invoc_locus);
std::vector<uint8_t> bytes = load_file_bytes (target_filename.c_str ());
/* Is there a more efficient way to do this? */
std::vector<std::unique_ptr<AST::Expr>> elts;
for (uint8_t b : bytes)
{
elts.emplace_back (
new AST::LiteralExpr (std::string (1, (char) b), AST::Literal::BYTE,
PrimitiveCoreType::CORETYPE_U8,
{} /* outer_attrs */, invoc_locus));
}
auto elems = std::unique_ptr<AST::ArrayElems> (
new AST::ArrayElemsValues (std::move (elts), invoc_locus));
auto array = std::unique_ptr<AST::Expr> (
new AST::ArrayExpr (std::move (elems), {}, {}, invoc_locus));
auto borrow = std::unique_ptr<AST::Expr> (
new AST::BorrowExpr (std::move (array), false, false, {}, invoc_locus));
auto node = AST::SingleASTNode (std::move (borrow));
return AST::ASTFragment ({node});
}
/* Expand builtin macro include_str!("filename"), which includes the contents
of the given file as a string. The file must be UTF-8 encoded. Yields an
expression of type &'static str. */
AST::ASTFragment
MacroBuiltin::include_str (Location invoc_locus, AST::MacroInvocData &invoc)
{
/* Get target filename from the macro invocation, which is treated as a path
relative to the include!-ing file (currently being compiled). */
auto lit_expr
= parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus);
if (lit_expr == nullptr)
return AST::ASTFragment::create_error ();
std::string target_filename
= source_relative_path (lit_expr->as_string (), invoc_locus);
std::vector<uint8_t> bytes = load_file_bytes (target_filename.c_str ());
/* FIXME: Enforce that the file contents are valid UTF-8. */
std::string str ((const char *) &bytes[0], bytes.size ());
auto node = AST::SingleASTNode (make_string (invoc_locus, str));
return AST::ASTFragment ({node});
}
} // namespace Rust

View File

@ -71,6 +71,12 @@ public:
static AST::ASTFragment column (Location invoc_locus,
AST::MacroInvocData &invoc);
static AST::ASTFragment include_bytes (Location invoc_locus,
AST::MacroInvocData &invoc);
static AST::ASTFragment include_str (Location invoc_locus,
AST::MacroInvocData &invoc);
};
} // namespace Rust

View File

@ -497,10 +497,8 @@ MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
gcc_unreachable ();
break;
// what is TT?
case AST::MacroFragSpec::TT:
// parser.parse_token_tree() ?
gcc_unreachable ();
parser.parse_token_tree ();
break;
// i guess we just ignore invalid and just error out

View File

@ -142,6 +142,7 @@ public:
std::vector<std::unique_ptr<AST::LifetimeParam> > parse_lifetime_params ();
AST::Visibility parse_visibility ();
std::unique_ptr<AST::IdentifierPattern> parse_identifier_pattern ();
std::unique_ptr<AST::TokenTree> parse_token_tree ();
private:
void skip_after_semicolon ();
@ -188,7 +189,6 @@ private:
// Token tree or macro related
AST::DelimTokenTree parse_delim_token_tree ();
std::unique_ptr<AST::TokenTree> parse_token_tree ();
std::unique_ptr<AST::MacroRulesDefinition>
parse_macro_rules_def (AST::AttrVec outer_attrs);
std::unique_ptr<AST::MacroInvocation>

View File

@ -751,6 +751,8 @@ Mappings::insert_macro_def (AST::MacroRulesDefinition *macro)
{"assert", MacroBuiltin::assert},
{"file", MacroBuiltin::file},
{"column", MacroBuiltin::column},
{"include_bytes", MacroBuiltin::include_bytes},
{"include_str", MacroBuiltin::include_str},
};
auto builtin = builtin_macros.find (macro->get_rule_name ());

View File

@ -0,0 +1,12 @@
macro_rules! include_bytes {
() => {{}};
}
fn main () {
let file = "include.txt";
include_bytes! (file); // { dg-error "argument must be a string literal" "" }
include_bytes! (); // { dg-error "macro takes 1 argument" "" }
include_bytes! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" }
include_bytes! ("builtin_macro_include_bytes.rs"); // ok
include_bytes! ("builtin_macro_include_bytes.rs",); // trailing comma ok
}

View File

@ -0,0 +1,12 @@
macro_rules! include_str {
() => {{}};
}
fn main () {
let file = "include.txt";
include_str! (file); // { dg-error "argument must be a string literal" "" }
include_str! (); // { dg-error "macro takes 1 argument" "" }
include_str! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" }
include_str! ("builtin_macro_include_str.rs"); // ok
include_str! ("builtin_macro_include_str.rs",); // trailing comma ok
}

View File

@ -0,0 +1,44 @@
// { dg-output "104\n33\n1\n" }
macro_rules! include_bytes {
() => {{}};
}
extern "C" {
fn printf(s: *const i8, ...);
}
fn print_int(value: i32) {
let s = "%d\n\0" as *const str as *const i8;
printf(s, value);
}
fn main() -> i32 {
let bytes = include_bytes! ("include.txt");
print_int (bytes[0] as i32);
print_int (bytes[14] as i32);
let the_bytes = b"hello, include!\n";
let x = bytes[0] == the_bytes[0]
&& bytes[1] == the_bytes [1]
&& bytes[2] == the_bytes [2]
&& bytes[3] == the_bytes [3]
&& bytes[4] == the_bytes [4]
&& bytes[5] == the_bytes [5]
&& bytes[6] == the_bytes [6]
&& bytes[7] == the_bytes [7]
&& bytes[8] == the_bytes [8]
&& bytes[9] == the_bytes [9]
&& bytes[10] == the_bytes [10]
&& bytes[11] == the_bytes [11]
&& bytes[12] == the_bytes [12]
&& bytes[13] == the_bytes [13]
&& bytes[14] == the_bytes [14]
&& bytes[15] == the_bytes [15];
print_int (x as i32);
0
}

View File

@ -0,0 +1,23 @@
// { dg-output "hello, include!\n" }
macro_rules! include_str {
() => {{}};
}
extern "C" {
fn printf(fmt: *const i8, ...);
}
fn print(s: &str) {
printf("%s" as *const str as *const i8, s as *const str as *const i8);
}
fn main() -> i32 {
// include_str! (and include_bytes!) allow for an optional trailing comma.
let my_str = include_str! ("include.txt",);
print (my_str);
0
}

View File

@ -0,0 +1 @@
hello, include!

View File

@ -0,0 +1,13 @@
macro_rules! t {
($t:tt) => {
$t
};
}
fn frob() -> i32 {
t!(15) + t!((14))
}
fn main() -> i32 {
frob() - 29
}

View File

@ -0,0 +1,12 @@
macro_rules! count_tt {
($t:tt) => { 1 };
($t:tt $($ts:tt)*) => { 1 + count_tt!($($ts)*) };
}
fn main() -> i32 {
let count = count_tt!(1 2 let a = 15) + count_tt!(1 2 (let a = 15));
// ^ ^ ^^^ ^ ^ ^^ ^ ^ ^^^^^^^^^^^^
// 6 token-trees 3 token-trees
count - 9
}