From 6bf2cc2229768faa8e86e0e8a9f5bd8ebfc817a2 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 5 Feb 2020 09:44:03 +1100 Subject: [PATCH 1/3] Avoid instantiating many `Parser` structs in `generic_extension`. Currently, every iteration of the main loop in `generic_extension` instantiates a `Parser`, which is expensive because `Parser` is a large type. Many of those instantiations are only used immutably, particularly for simple-but-repetitive macros of the sort seen in `html5ever` and PR 68836. This commit initializes a single "base" parser outside the loop, and then uses `Cow` to avoid cloning it except for the mutating iterations. This speeds up `html5ever` runs by up to 15%. --- src/librustc_expand/lib.rs | 1 + src/librustc_expand/mbe/macro_parser.rs | 38 ++++----------- src/librustc_expand/mbe/macro_rules.rs | 62 ++++++++++++++++++------- 3 files changed, 56 insertions(+), 45 deletions(-) diff --git a/src/librustc_expand/lib.rs b/src/librustc_expand/lib.rs index 4fe7c268c4f..f119c956ced 100644 --- a/src/librustc_expand/lib.rs +++ b/src/librustc_expand/lib.rs @@ -1,3 +1,4 @@ +#![feature(cow_is_borrowed)] #![feature(crate_visibility_modifier)] #![feature(decl_macro)] #![feature(proc_macro_diagnostic)] diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index b14725fd731..78f22f3e443 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -78,13 +78,11 @@ use crate::mbe::{self, TokenTree}; use rustc_ast_pretty::pprust; use rustc_parse::parser::{FollowedByType, Parser, PathStyle}; -use rustc_parse::Directory; use rustc_session::parse::ParseSess; use rustc_span::symbol::{kw, sym, Symbol}; use syntax::ast::{Ident, Name}; use syntax::ptr::P; use syntax::token::{self, DocComment, Nonterminal, Token}; -use syntax::tokenstream::TokenStream; use rustc_errors::{FatalError, PResult}; use rustc_span::Span; @@ -92,6 +90,7 @@ use smallvec::{smallvec, SmallVec}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; +use std::borrow::Cow; use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::mem; use std::ops::{Deref, DerefMut}; @@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>( Success(()) } -/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts` -/// against it and return the match. -/// -/// # Parameters -/// -/// - `sess`: The session into which errors are emitted -/// - `tts`: The tokenstream we are matching against the pattern `ms` -/// - `ms`: A sequence of token trees representing a pattern against which we are matching -/// - `directory`: Information about the file locations (needed for the black-box parser) -/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box -/// parser) -pub(super) fn parse( - sess: &ParseSess, - tts: TokenStream, - ms: &[TokenTree], - directory: Option>, - recurse_into_modules: bool, -) -> NamedParseResult { - // Create a parser that can be used for the "black box" parts. - let mut parser = - Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS); - +/// Use the given sequence of token trees (`ms`) as a matcher. Match the token +/// stream from the given `parser` against it and return the match. +pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult { // A queue of possible matcher positions. We initialize it with the matcher position in which // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then // processes all of these possible matcher positions and produces possible next positions into @@ -659,7 +639,7 @@ pub(super) fn parse( // parsing from the black-box parser done. The result is that `next_items` will contain a // bunch of possible next matcher positions in `next_items`. match inner_parse_loop( - sess, + parser.sess, &mut cur_items, &mut next_items, &mut eof_items, @@ -684,7 +664,7 @@ pub(super) fn parse( if eof_items.len() == 1 { let matches = eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); - return nameize(sess, ms, matches); + return nameize(parser.sess, ms, matches); } else if eof_items.len() > 1 { return Error( parser.token.span, @@ -736,13 +716,13 @@ pub(super) fn parse( // If there are no possible next positions AND we aren't waiting for the black-box parser, // then there is a syntax error. else if bb_items.is_empty() && next_items.is_empty() { - return Failure(parser.token.take(), "no rules expected this token in macro call"); + return Failure(parser.token.clone(), "no rules expected this token in macro call"); } // Dump all possible `next_items` into `cur_items` for the next iteration. else if !next_items.is_empty() { // Now process the next token cur_items.extend(next_items.drain(..)); - parser.bump(); + parser.to_mut().bump(); } // Finally, we have the case where we need to call the black-box parser to get some // nonterminal. @@ -754,7 +734,7 @@ pub(super) fn parse( let match_cur = item.match_cur; item.push_match( match_cur, - MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))), + MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))), ); item.idx += 1; item.match_cur += 1; diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs index 29d41543fbf..9432790e78c 100644 --- a/src/librustc_expand/mbe/macro_rules.rs +++ b/src/librustc_expand/mbe/macro_rules.rs @@ -1,11 +1,11 @@ -use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander}; +use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander}; use crate::base::{SyntaxExtension, SyntaxExtensionKind}; use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind}; use crate::mbe; use crate::mbe::macro_check; -use crate::mbe::macro_parser::parse; +use crate::mbe::macro_parser::parse_tt; use crate::mbe::macro_parser::{Error, Failure, Success}; -use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult}; +use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq}; use crate::mbe::transcribe::transcribe; use rustc_ast_pretty::pprust; @@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander { } } -fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) { +fn trace_macros_note(cx_expansions: &mut FxHashMap>, sp: Span, message: String) { let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp); - cx.expansions.entry(sp).or_default().push(message); + cx_expansions.entry(sp).or_default().push(message); } /// Given `lhses` and `rhses`, this is the new macro we create @@ -184,12 +184,36 @@ fn generic_extension<'cx>( ) -> Box { if cx.trace_macros() { let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone())); - trace_macros_note(cx, sp, msg); + trace_macros_note(&mut cx.expansions, sp, msg); } // Which arm's failure should we report? (the one furthest along) let mut best_failure: Option<(Token, &str)> = None; + + // We create a base parser that can be used for the "black box" parts. + // Every iteration needs a fresh copy of that base parser. However, the + // parser is not mutated on many of the iterations, particularly when + // dealing with macros like this: + // + // macro_rules! foo { + // ("a") => (A); + // ("b") => (B); + // ("c") => (C); + // // ... etc. (maybe hundreds more) + // } + // + // as seen in the `html5ever` benchmark. We use a `Cow` so that the base + // parser is only cloned when necessary (upon mutation). Furthermore, we + // reinitialize the `Cow` with the base parser at the start of every + // iteration, so that any mutated parsers are not reused. This is all quite + // hacky, but speeds up the `html5ever` benchmark significantly. (Issue + // 68836 suggests a more comprehensive but more complex change to deal with + // this situation.) + let base_parser = base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone()); + for (i, lhs) in lhses.iter().enumerate() { + let mut parser = Cow::Borrowed(&base_parser); + // try each arm's matchers let lhs_tt = match *lhs { mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..], @@ -202,7 +226,7 @@ fn generic_extension<'cx>( // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut()); - match parse_tt(cx, lhs_tt, arg.clone()) { + match parse_tt(&mut parser, lhs_tt) { Success(named_matches) => { // The matcher was `Success(..)`ful. // Merge the gated spans from parsing the matcher with the pre-existing ones. @@ -232,7 +256,7 @@ fn generic_extension<'cx>( if cx.trace_macros() { let msg = format!("to `{}`", pprust::tts_to_string(tts.clone())); - trace_macros_note(cx, sp, msg); + trace_macros_note(&mut cx.expansions, sp, msg); } let directory = Directory { @@ -269,6 +293,7 @@ fn generic_extension<'cx>( // Restore to the state before snapshotting and maybe try again. mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut()); } + drop(base_parser); let (token, label) = best_failure.expect("ran no matchers"); let span = token.span.substitute_dummy(sp); @@ -286,7 +311,9 @@ fn generic_extension<'cx>( mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..], _ => continue, }; - match parse_tt(cx, lhs_tt, arg.clone()) { + let base_parser = + base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone()); + match parse_tt(&mut Cow::Borrowed(&base_parser), lhs_tt) { Success(_) => { if comma_span.is_dummy() { err.note("you might be missing a comma"); @@ -368,7 +395,8 @@ pub fn compile_declarative_macro( ), ]; - let argument_map = match parse(sess, body, &argument_gram, None, true) { + let base_parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS); + let argument_map = match parse_tt(&mut Cow::Borrowed(&base_parser), &argument_gram) { Success(m) => m, Failure(token, msg) => { let s = parse_failure_msg(&token); @@ -1184,14 +1212,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String { } } -/// Use this token tree as a matcher to parse given tts. -fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult { - // `None` is because we're not interpolating +fn base_parser_from_cx<'cx>( + current_expansion: &'cx ExpansionData, + sess: &'cx ParseSess, + tts: TokenStream, +) -> Parser<'cx> { let directory = Directory { - path: Cow::from(cx.current_expansion.module.directory.as_path()), - ownership: cx.current_expansion.directory_ownership, + path: Cow::from(current_expansion.module.directory.as_path()), + ownership: current_expansion.directory_ownership, }; - parse(cx.parse_sess(), tts, mtch, Some(directory), true) + Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS) } /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For From f840a955bd449810e75d8320b4c46482d6dbdec1 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 5 Feb 2020 14:33:08 +1100 Subject: [PATCH 2/3] Remove the `Cow` from `Directory`. The previous commit wrapped `Parser` within a `Cow` for the hot macro parsing path. As a result, there's no need for the `Cow` within `Directory`, because it lies within `Parser`. --- src/librustc_expand/mbe/macro_rules.rs | 4 ++-- src/librustc_parse/lib.rs | 9 ++++----- src/librustc_parse/parser/mod.rs | 9 ++++----- src/librustc_parse/parser/module.rs | 6 +++--- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs index 9432790e78c..9e6edee265c 100644 --- a/src/librustc_expand/mbe/macro_rules.rs +++ b/src/librustc_expand/mbe/macro_rules.rs @@ -260,7 +260,7 @@ fn generic_extension<'cx>( } let directory = Directory { - path: Cow::from(cx.current_expansion.module.directory.as_path()), + path: cx.current_expansion.module.directory.clone(), ownership: cx.current_expansion.directory_ownership, }; let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None); @@ -1218,7 +1218,7 @@ fn base_parser_from_cx<'cx>( tts: TokenStream, ) -> Parser<'cx> { let directory = Directory { - path: Cow::from(current_expansion.module.directory.as_path()), + path: current_expansion.module.directory.clone(), ownership: current_expansion.directory_ownership, }; Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS) diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index cd674e3c5eb..4aad2c0f68a 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -12,8 +12,7 @@ use syntax::ast; use syntax::token::{self, Nonterminal}; use syntax::tokenstream::{self, TokenStream, TokenTree}; -use std::borrow::Cow; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; use log::info; @@ -29,8 +28,8 @@ pub mod validate_attr; pub mod config; #[derive(Clone)] -pub struct Directory<'a> { - pub path: Cow<'a, Path>, +pub struct Directory { + pub path: PathBuf, pub ownership: DirectoryOwnership, } @@ -274,7 +273,7 @@ pub fn stream_to_parser<'a>( pub fn stream_to_parser_with_base_dir<'a>( sess: &'a ParseSess, stream: TokenStream, - base_dir: Directory<'a>, + base_dir: Directory, ) -> Parser<'a> { Parser::new(sess, stream, Some(base_dir), true, false, None) } diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 8c1839da1cb..cb95750d984 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -29,7 +29,6 @@ use syntax::token::{self, DelimToken, Token, TokenKind}; use syntax::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint}; use syntax::util::comments::{doc_comment_style, strip_doc_comment_decoration}; -use std::borrow::Cow; use std::path::PathBuf; use std::{cmp, mem, slice}; @@ -114,7 +113,7 @@ pub struct Parser<'a> { prev_token_kind: PrevTokenKind, restrictions: Restrictions, /// Used to determine the path to externally loaded source files. - pub(super) directory: Directory<'a>, + pub(super) directory: Directory, /// `true` to parse sub-modules in other files. // Public for rustfmt usage. pub recurse_into_file_modules: bool, @@ -376,7 +375,7 @@ impl<'a> Parser<'a> { pub fn new( sess: &'a ParseSess, tokens: TokenStream, - directory: Option>, + directory: Option, recurse_into_file_modules: bool, desugar_doc_comments: bool, subparser_name: Option<&'static str>, @@ -390,7 +389,7 @@ impl<'a> Parser<'a> { restrictions: Restrictions::empty(), recurse_into_file_modules, directory: Directory { - path: Cow::from(PathBuf::new()), + path: PathBuf::new(), ownership: DirectoryOwnership::Owned { relative: None }, }, root_module_name: None, @@ -418,7 +417,7 @@ impl<'a> Parser<'a> { &sess.source_map().lookup_char_pos(parser.token.span.lo()).file.unmapped_path { if let Some(directory_path) = path.parent() { - parser.directory.path = Cow::from(directory_path.to_path_buf()); + parser.directory.path = directory_path.to_path_buf(); } } } diff --git a/src/librustc_parse/parser/module.rs b/src/librustc_parse/parser/module.rs index 6ce94d3c679..0c8fad03d86 100644 --- a/src/librustc_parse/parser/module.rs +++ b/src/librustc_parse/parser/module.rs @@ -285,7 +285,7 @@ impl<'a> Parser<'a> { fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) { if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) { - self.directory.path.to_mut().push(&*path.as_str()); + self.directory.path.push(&*path.as_str()); self.directory.ownership = DirectoryOwnership::Owned { relative: None }; } else { // We have to push on the current module name in the case of relative @@ -297,10 +297,10 @@ impl<'a> Parser<'a> { if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership { if let Some(ident) = relative.take() { // remove the relative offset - self.directory.path.to_mut().push(&*ident.as_str()); + self.directory.path.push(&*ident.as_str()); } } - self.directory.path.to_mut().push(&*id.as_str()); + self.directory.path.push(&*id.as_str()); } } } From 2a13b24d369b8619f0197993cd5dc60f7217ed72 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 5 Feb 2020 15:09:24 +1100 Subject: [PATCH 3/3] Change condition ordering in `parse_tt`. This is a small win, because `Failure` is much more common than `Success`. --- src/librustc_expand/mbe/macro_parser.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index 78f22f3e443..5bf7602ea6e 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -689,9 +689,14 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na // unnecessary implicit clone later in Rc::make_mut. drop(eof_items); + // If there are no possible next positions AND we aren't waiting for the black-box parser, + // then there is a syntax error. + if bb_items.is_empty() && next_items.is_empty() { + return Failure(parser.token.clone(), "no rules expected this token in macro call"); + } // Another possibility is that we need to call out to parse some rust nonterminal // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. - if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { + else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { let nts = bb_items .iter() .map(|item| match item.top_elts.get_tt(item.idx) { @@ -713,11 +718,6 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na ), ); } - // If there are no possible next positions AND we aren't waiting for the black-box parser, - // then there is a syntax error. - else if bb_items.is_empty() && next_items.is_empty() { - return Failure(parser.token.clone(), "no rules expected this token in macro call"); - } // Dump all possible `next_items` into `cur_items` for the next iteration. else if !next_items.is_empty() { // Now process the next token