From 5651331236d39ce685e1de3396463fd88c3a83d2 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Thu, 24 Mar 2022 15:03:20 +0100 Subject: [PATCH 1/2] macros: Allow parsing :tt fragments :tt fragments stand for token trees, and are composed of either a token, or a delimited token tree, which is a token tree surrounded by delimiters (parentheses, curly brackets or square brackets). This should allow us to handle a lot more macros, including extremely powerful macro patterns such as TT munchers --- gcc/rust/expand/rust-macro-expand.cc | 4 +--- gcc/rust/parse/rust-parse.h | 2 +- gcc/testsuite/rust/execute/torture/macros25.rs | 13 +++++++++++++ gcc/testsuite/rust/execute/torture/macros26.rs | 12 ++++++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/rust/execute/torture/macros25.rs create mode 100644 gcc/testsuite/rust/execute/torture/macros26.rs diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index e0dfc502609..62273448677 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -497,10 +497,8 @@ MacroExpander::match_fragment (Parser &parser, gcc_unreachable (); break; - // what is TT? case AST::MacroFragSpec::TT: - // parser.parse_token_tree() ? - gcc_unreachable (); + parser.parse_token_tree (); break; // i guess we just ignore invalid and just error out diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index cb77033fe55..88bd311935b 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -142,6 +142,7 @@ public: std::vector > parse_lifetime_params (); AST::Visibility parse_visibility (); std::unique_ptr parse_identifier_pattern (); + std::unique_ptr parse_token_tree (); private: void skip_after_semicolon (); @@ -188,7 +189,6 @@ private: // Token tree or macro related AST::DelimTokenTree parse_delim_token_tree (); - std::unique_ptr parse_token_tree (); std::unique_ptr parse_macro_rules_def (AST::AttrVec outer_attrs); std::unique_ptr diff --git a/gcc/testsuite/rust/execute/torture/macros25.rs b/gcc/testsuite/rust/execute/torture/macros25.rs new file mode 100644 index 00000000000..c2658721bdf --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros25.rs @@ -0,0 +1,13 @@ +macro_rules! t { + ($t:tt) => { + $t + }; +} + +fn frob() -> i32 { + t!(15) + t!((14)) +} + +fn main() -> i32 { + frob() - 29 +} diff --git a/gcc/testsuite/rust/execute/torture/macros26.rs b/gcc/testsuite/rust/execute/torture/macros26.rs new file mode 100644 index 00000000000..30f0beef0d9 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros26.rs @@ -0,0 +1,12 @@ +macro_rules! count_tt { + ($t:tt) => { 1 }; + ($t:tt $($ts:tt)*) => { 1 + count_tt!($($ts)*) }; +} + +fn main() -> i32 { + let count = count_tt!(1 2 let a = 15) + count_tt!(1 2 (let a = 15)); + // ^ ^ ^^^ ^ ^ ^^ ^ ^ ^^^^^^^^^^^^ + // 6 token-trees 3 token-trees + + count - 9 +} From 261c753e56c245aadd6f842d29a7bdb5c5d11489 Mon Sep 17 00:00:00 2001 From: David Faust Date: Tue, 22 Mar 2022 10:42:52 -0700 Subject: [PATCH 2/2] macros: implement include_bytes! and include_str! --- gcc/rust/expand/rust-macro-builtins.cc | 173 ++++++++++++++++++ gcc/rust/expand/rust-macro-builtins.h | 6 + gcc/rust/util/rust-hir-map.cc | 2 + .../compile/builtin_macro_include_bytes.rs | 12 ++ .../rust/compile/builtin_macro_include_str.rs | 12 ++ .../torture/builtin_macro_include_bytes.rs | 44 +++++ .../torture/builtin_macro_include_str.rs | 23 +++ .../rust/execute/torture/include.txt | 1 + 8 files changed, 273 insertions(+) create mode 100644 gcc/testsuite/rust/compile/builtin_macro_include_bytes.rs create mode 100644 gcc/testsuite/rust/compile/builtin_macro_include_str.rs create mode 100644 gcc/testsuite/rust/execute/torture/builtin_macro_include_bytes.rs create mode 100644 gcc/testsuite/rust/execute/torture/builtin_macro_include_str.rs create mode 100644 gcc/testsuite/rust/execute/torture/include.txt diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc index c33a2e86f88..14f60d202cc 100644 --- a/gcc/rust/expand/rust-macro-builtins.cc +++ b/gcc/rust/expand/rust-macro-builtins.cc @@ -20,6 +20,9 @@ #include "rust-diagnostics.h" #include "rust-expr.h" #include "rust-session-manager.h" +#include "rust-macro-invoc-lexer.h" +#include "rust-lex.h" +#include "rust-parse.h" namespace Rust { namespace { @@ -30,6 +33,107 @@ make_string (Location locus, std::string value) new AST::LiteralExpr (value, AST::Literal::STRING, PrimitiveCoreType::CORETYPE_STR, {}, locus)); } + +/* Parse a single string literal from the given delimited token tree, + and return the LiteralExpr for it. Allow for an optional trailing comma, + but otherwise enforce that these are the only tokens. */ + +std::unique_ptr +parse_single_string_literal (AST::DelimTokenTree &invoc_token_tree, + Location invoc_locus) +{ + MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); + Parser parser (std::move (lex)); + + auto last_token_id = TokenId::RIGHT_CURLY; + switch (invoc_token_tree.get_delim_type ()) + { + case AST::DelimType::PARENS: + last_token_id = TokenId::RIGHT_PAREN; + rust_assert (parser.skip_token (LEFT_PAREN)); + break; + + case AST::DelimType::CURLY: + rust_assert (parser.skip_token (LEFT_CURLY)); + break; + + case AST::DelimType::SQUARE: + last_token_id = TokenId::RIGHT_SQUARE; + rust_assert (parser.skip_token (LEFT_SQUARE)); + break; + } + + std::unique_ptr lit_expr = nullptr; + + if (parser.peek_current_token ()->get_id () == STRING_LITERAL) + { + lit_expr = parser.parse_literal_expr (); + parser.maybe_skip_token (COMMA); + if (parser.peek_current_token ()->get_id () != last_token_id) + { + lit_expr = nullptr; + rust_error_at (invoc_locus, "macro takes 1 argument"); + } + } + else if (parser.peek_current_token ()->get_id () == last_token_id) + rust_error_at (invoc_locus, "macro takes 1 argument"); + else + rust_error_at (invoc_locus, "argument must be a string literal"); + + parser.skip_token (last_token_id); + + return lit_expr; +} + +/* Treat PATH as a path relative to the source file currently being + compiled, and return the absolute path for it. */ + +std::string +source_relative_path (std::string path, Location locus) +{ + std::string compile_fname + = Session::get_instance ().linemap->location_file (locus); + + auto dir_separator_pos = compile_fname.rfind (file_separator); + + /* If there is no file_separator in the path, use current dir ('.'). */ + std::string dirname; + if (dir_separator_pos == std::string::npos) + dirname = std::string (".") + file_separator; + else + dirname = compile_fname.substr (0, dir_separator_pos) + file_separator; + + return dirname + path; +} + +/* Read the full contents of the file FILENAME and return them in a vector. + FIXME: platform specific. */ + +std::vector +load_file_bytes (const char *filename) +{ + RAIIFile file_wrap (filename); + if (file_wrap.get_raw () == nullptr) + { + rust_error_at (Location (), "cannot open filename %s: %m", filename); + return std::vector (); + } + + FILE *f = file_wrap.get_raw (); + fseek (f, 0L, SEEK_END); + long fsize = ftell (f); + fseek (f, 0L, SEEK_SET); + + std::vector buf (fsize); + + if (fread (&buf[0], fsize, 1, f) != 1) + { + rust_error_at (Location (), "error reading file %s: %m", filename); + return std::vector (); + } + + return buf; +} } // namespace AST::ASTFragment @@ -63,4 +167,73 @@ MacroBuiltin::column (Location invoc_locus, AST::MacroInvocData &invoc) return AST::ASTFragment ({column_no}); } + +/* Expand builtin macro include_bytes!("filename"), which includes the contents + of the given file as reference to a byte array. Yields an expression of type + &'static [u8; N]. */ + +AST::ASTFragment +MacroBuiltin::include_bytes (Location invoc_locus, AST::MacroInvocData &invoc) +{ + /* Get target filename from the macro invocation, which is treated as a path + relative to the include!-ing file (currently being compiled). */ + auto lit_expr + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + if (lit_expr == nullptr) + return AST::ASTFragment::create_error (); + + std::string target_filename + = source_relative_path (lit_expr->as_string (), invoc_locus); + + std::vector bytes = load_file_bytes (target_filename.c_str ()); + + /* Is there a more efficient way to do this? */ + std::vector> elts; + for (uint8_t b : bytes) + { + elts.emplace_back ( + new AST::LiteralExpr (std::string (1, (char) b), AST::Literal::BYTE, + PrimitiveCoreType::CORETYPE_U8, + {} /* outer_attrs */, invoc_locus)); + } + + auto elems = std::unique_ptr ( + new AST::ArrayElemsValues (std::move (elts), invoc_locus)); + + auto array = std::unique_ptr ( + new AST::ArrayExpr (std::move (elems), {}, {}, invoc_locus)); + + auto borrow = std::unique_ptr ( + new AST::BorrowExpr (std::move (array), false, false, {}, invoc_locus)); + + auto node = AST::SingleASTNode (std::move (borrow)); + return AST::ASTFragment ({node}); +} + +/* Expand builtin macro include_str!("filename"), which includes the contents + of the given file as a string. The file must be UTF-8 encoded. Yields an + expression of type &'static str. */ + +AST::ASTFragment +MacroBuiltin::include_str (Location invoc_locus, AST::MacroInvocData &invoc) +{ + /* Get target filename from the macro invocation, which is treated as a path + relative to the include!-ing file (currently being compiled). */ + auto lit_expr + = parse_single_string_literal (invoc.get_delim_tok_tree (), invoc_locus); + if (lit_expr == nullptr) + return AST::ASTFragment::create_error (); + + std::string target_filename + = source_relative_path (lit_expr->as_string (), invoc_locus); + + std::vector bytes = load_file_bytes (target_filename.c_str ()); + + /* FIXME: Enforce that the file contents are valid UTF-8. */ + std::string str ((const char *) &bytes[0], bytes.size ()); + + auto node = AST::SingleASTNode (make_string (invoc_locus, str)); + return AST::ASTFragment ({node}); +} + } // namespace Rust diff --git a/gcc/rust/expand/rust-macro-builtins.h b/gcc/rust/expand/rust-macro-builtins.h index ae9ba375516..8b7c016b253 100644 --- a/gcc/rust/expand/rust-macro-builtins.h +++ b/gcc/rust/expand/rust-macro-builtins.h @@ -71,6 +71,12 @@ public: static AST::ASTFragment column (Location invoc_locus, AST::MacroInvocData &invoc); + + static AST::ASTFragment include_bytes (Location invoc_locus, + AST::MacroInvocData &invoc); + + static AST::ASTFragment include_str (Location invoc_locus, + AST::MacroInvocData &invoc); }; } // namespace Rust diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc index 5b0417b4549..7fbdbb0fc6d 100644 --- a/gcc/rust/util/rust-hir-map.cc +++ b/gcc/rust/util/rust-hir-map.cc @@ -751,6 +751,8 @@ Mappings::insert_macro_def (AST::MacroRulesDefinition *macro) {"assert", MacroBuiltin::assert}, {"file", MacroBuiltin::file}, {"column", MacroBuiltin::column}, + {"include_bytes", MacroBuiltin::include_bytes}, + {"include_str", MacroBuiltin::include_str}, }; auto builtin = builtin_macros.find (macro->get_rule_name ()); diff --git a/gcc/testsuite/rust/compile/builtin_macro_include_bytes.rs b/gcc/testsuite/rust/compile/builtin_macro_include_bytes.rs new file mode 100644 index 00000000000..966c073a794 --- /dev/null +++ b/gcc/testsuite/rust/compile/builtin_macro_include_bytes.rs @@ -0,0 +1,12 @@ +macro_rules! include_bytes { + () => {{}}; +} + +fn main () { + let file = "include.txt"; + include_bytes! (file); // { dg-error "argument must be a string literal" "" } + include_bytes! (); // { dg-error "macro takes 1 argument" "" } + include_bytes! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" } + include_bytes! ("builtin_macro_include_bytes.rs"); // ok + include_bytes! ("builtin_macro_include_bytes.rs",); // trailing comma ok +} diff --git a/gcc/testsuite/rust/compile/builtin_macro_include_str.rs b/gcc/testsuite/rust/compile/builtin_macro_include_str.rs new file mode 100644 index 00000000000..3e559cb92cb --- /dev/null +++ b/gcc/testsuite/rust/compile/builtin_macro_include_str.rs @@ -0,0 +1,12 @@ +macro_rules! include_str { + () => {{}}; +} + +fn main () { + let file = "include.txt"; + include_str! (file); // { dg-error "argument must be a string literal" "" } + include_str! (); // { dg-error "macro takes 1 argument" "" } + include_str! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" } + include_str! ("builtin_macro_include_str.rs"); // ok + include_str! ("builtin_macro_include_str.rs",); // trailing comma ok +} diff --git a/gcc/testsuite/rust/execute/torture/builtin_macro_include_bytes.rs b/gcc/testsuite/rust/execute/torture/builtin_macro_include_bytes.rs new file mode 100644 index 00000000000..3f7ebd288d9 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/builtin_macro_include_bytes.rs @@ -0,0 +1,44 @@ +// { dg-output "104\n33\n1\n" } + +macro_rules! include_bytes { + () => {{}}; +} + +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +fn main() -> i32 { + let bytes = include_bytes! ("include.txt"); + + print_int (bytes[0] as i32); + print_int (bytes[14] as i32); + + let the_bytes = b"hello, include!\n"; + + let x = bytes[0] == the_bytes[0] + && bytes[1] == the_bytes [1] + && bytes[2] == the_bytes [2] + && bytes[3] == the_bytes [3] + && bytes[4] == the_bytes [4] + && bytes[5] == the_bytes [5] + && bytes[6] == the_bytes [6] + && bytes[7] == the_bytes [7] + && bytes[8] == the_bytes [8] + && bytes[9] == the_bytes [9] + && bytes[10] == the_bytes [10] + && bytes[11] == the_bytes [11] + && bytes[12] == the_bytes [12] + && bytes[13] == the_bytes [13] + && bytes[14] == the_bytes [14] + && bytes[15] == the_bytes [15]; + + print_int (x as i32); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/builtin_macro_include_str.rs b/gcc/testsuite/rust/execute/torture/builtin_macro_include_str.rs new file mode 100644 index 00000000000..095d7cbc6e8 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/builtin_macro_include_str.rs @@ -0,0 +1,23 @@ +// { dg-output "hello, include!\n" } + +macro_rules! include_str { + () => {{}}; +} + +extern "C" { + fn printf(fmt: *const i8, ...); +} + +fn print(s: &str) { + printf("%s" as *const str as *const i8, s as *const str as *const i8); +} + + +fn main() -> i32 { + // include_str! (and include_bytes!) allow for an optional trailing comma. + let my_str = include_str! ("include.txt",); + + print (my_str); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/include.txt b/gcc/testsuite/rust/execute/torture/include.txt new file mode 100644 index 00000000000..12c368778e1 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/include.txt @@ -0,0 +1 @@ +hello, include!