Rollup merge of #68848 - nnethercote:hasten-macro-parsing, r=petrochenkov

Hasten macro parsing

r? @eddyb
This commit is contained in:
Dylan DPC 2020-02-13 02:52:48 +01:00 committed by GitHub
commit 87ba8f2a19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 64 deletions

View File

@ -1,3 +1,4 @@
#![feature(cow_is_borrowed)]
#![feature(crate_visibility_modifier)]
#![feature(decl_macro)]
#![feature(proc_macro_diagnostic)]

View File

@ -78,13 +78,11 @@ use crate::mbe::{self, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
use rustc_parse::Directory;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Symbol};
use syntax::ast::{Ident, Name};
use syntax::ptr::P;
use syntax::token::{self, DocComment, Nonterminal, Token};
use syntax::tokenstream::TokenStream;
use rustc_errors::{FatalError, PResult};
use rustc_span::Span;
@ -92,6 +90,7 @@ use smallvec::{smallvec, SmallVec};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::mem;
use std::ops::{Deref, DerefMut};
@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
Success(())
}
/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
/// against it and return the match.
///
/// # Parameters
///
/// - `sess`: The session into which errors are emitted
/// - `tts`: The tokenstream we are matching against the pattern `ms`
/// - `ms`: A sequence of token trees representing a pattern against which we are matching
/// - `directory`: Information about the file locations (needed for the black-box parser)
/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
/// parser)
pub(super) fn parse(
sess: &ParseSess,
tts: TokenStream,
ms: &[TokenTree],
directory: Option<Directory<'_>>,
recurse_into_modules: bool,
) -> NamedParseResult {
// Create a parser that can be used for the "black box" parts.
let mut parser =
Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);
/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
/// stream from the given `parser` against it and return the match.
pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in which
// the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
// processes all of these possible matcher positions and produces possible next positions into
@ -659,7 +639,7 @@ pub(super) fn parse(
// parsing from the black-box parser done. The result is that `next_items` will contain a
// bunch of possible next matcher positions in `next_items`.
match inner_parse_loop(
sess,
parser.sess,
&mut cur_items,
&mut next_items,
&mut eof_items,
@ -684,7 +664,7 @@ pub(super) fn parse(
if eof_items.len() == 1 {
let matches =
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
return nameize(sess, ms, matches);
return nameize(parser.sess, ms, matches);
} else if eof_items.len() > 1 {
return Error(
parser.token.span,
@ -709,9 +689,14 @@ pub(super) fn parse(
// unnecessary implicit clone later in Rc::make_mut.
drop(eof_items);
// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
if bb_items.is_empty() && next_items.is_empty() {
return Failure(parser.token.clone(), "no rules expected this token in macro call");
}
// Another possibility is that we need to call out to parse some rust nonterminal
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
let nts = bb_items
.iter()
.map(|item| match item.top_elts.get_tt(item.idx) {
@ -733,16 +718,11 @@ pub(super) fn parse(
),
);
}
// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
else if bb_items.is_empty() && next_items.is_empty() {
return Failure(parser.token.take(), "no rules expected this token in macro call");
}
// Dump all possible `next_items` into `cur_items` for the next iteration.
else if !next_items.is_empty() {
// Now process the next token
cur_items.extend(next_items.drain(..));
parser.bump();
parser.to_mut().bump();
}
// Finally, we have the case where we need to call the black-box parser to get some
// nonterminal.
@ -754,7 +734,7 @@ pub(super) fn parse(
let match_cur = item.match_cur;
item.push_match(
match_cur,
MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
);
item.idx += 1;
item.match_cur += 1;

View File

@ -1,11 +1,11 @@
use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{SyntaxExtension, SyntaxExtensionKind};
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::parse;
use crate::mbe::macro_parser::parse_tt;
use crate::mbe::macro_parser::{Error, Failure, Success};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::transcribe::transcribe;
use rustc_ast_pretty::pprust;
@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
}
}
fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
cx.expansions.entry(sp).or_default().push(message);
cx_expansions.entry(sp).or_default().push(message);
}
/// Given `lhses` and `rhses`, this is the new macro we create
@ -184,12 +184,36 @@ fn generic_extension<'cx>(
) -> Box<dyn MacResult + 'cx> {
if cx.trace_macros() {
let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}
// Which arm's failure should we report? (the one furthest along)
let mut best_failure: Option<(Token, &str)> = None;
// We create a base parser that can be used for the "black box" parts.
// Every iteration needs a fresh copy of that base parser. However, the
// parser is not mutated on many of the iterations, particularly when
// dealing with macros like this:
//
// macro_rules! foo {
// ("a") => (A);
// ("b") => (B);
// ("c") => (C);
// // ... etc. (maybe hundreds more)
// }
//
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
// parser is only cloned when necessary (upon mutation). Furthermore, we
// reinitialize the `Cow` with the base parser at the start of every
// iteration, so that any mutated parsers are not reused. This is all quite
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
// 68836 suggests a more comprehensive but more complex change to deal with
// this situation.)
let base_parser = base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
for (i, lhs) in lhses.iter().enumerate() {
let mut parser = Cow::Borrowed(&base_parser);
// try each arm's matchers
let lhs_tt = match *lhs {
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
@ -202,7 +226,7 @@ fn generic_extension<'cx>(
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());
match parse_tt(cx, lhs_tt, arg.clone()) {
match parse_tt(&mut parser, lhs_tt) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
@ -232,11 +256,11 @@ fn generic_extension<'cx>(
if cx.trace_macros() {
let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}
let directory = Directory {
path: Cow::from(cx.current_expansion.module.directory.as_path()),
path: cx.current_expansion.module.directory.clone(),
ownership: cx.current_expansion.directory_ownership,
};
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None);
@ -269,6 +293,7 @@ fn generic_extension<'cx>(
// Restore to the state before snapshotting and maybe try again.
mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
}
drop(base_parser);
let (token, label) = best_failure.expect("ran no matchers");
let span = token.span.substitute_dummy(sp);
@ -286,7 +311,9 @@ fn generic_extension<'cx>(
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
_ => continue,
};
match parse_tt(cx, lhs_tt, arg.clone()) {
let base_parser =
base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
match parse_tt(&mut Cow::Borrowed(&base_parser), lhs_tt) {
Success(_) => {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
@ -368,7 +395,8 @@ pub fn compile_declarative_macro(
),
];
let argument_map = match parse(sess, body, &argument_gram, None, true) {
let base_parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
let argument_map = match parse_tt(&mut Cow::Borrowed(&base_parser), &argument_gram) {
Success(m) => m,
Failure(token, msg) => {
let s = parse_failure_msg(&token);
@ -1184,14 +1212,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
}
}
/// Use this token tree as a matcher to parse given tts.
fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
// `None` is because we're not interpolating
fn base_parser_from_cx<'cx>(
current_expansion: &'cx ExpansionData,
sess: &'cx ParseSess,
tts: TokenStream,
) -> Parser<'cx> {
let directory = Directory {
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
path: current_expansion.module.directory.clone(),
ownership: current_expansion.directory_ownership,
};
parse(cx.parse_sess(), tts, mtch, Some(directory), true)
Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
}
/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For

View File

@ -12,8 +12,7 @@ use syntax::ast;
use syntax::token::{self, Nonterminal};
use syntax::tokenstream::{self, TokenStream, TokenTree};
use std::borrow::Cow;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::str;
use log::info;
@ -29,8 +28,8 @@ pub mod validate_attr;
pub mod config;
#[derive(Clone)]
pub struct Directory<'a> {
pub path: Cow<'a, Path>,
pub struct Directory {
pub path: PathBuf,
pub ownership: DirectoryOwnership,
}
@ -274,7 +273,7 @@ pub fn stream_to_parser<'a>(
pub fn stream_to_parser_with_base_dir<'a>(
sess: &'a ParseSess,
stream: TokenStream,
base_dir: Directory<'a>,
base_dir: Directory,
) -> Parser<'a> {
Parser::new(sess, stream, Some(base_dir), true, false, None)
}

View File

@ -29,7 +29,6 @@ use syntax::token::{self, DelimToken, Token, TokenKind};
use syntax::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint};
use syntax::util::comments::{doc_comment_style, strip_doc_comment_decoration};
use std::borrow::Cow;
use std::path::PathBuf;
use std::{cmp, mem, slice};
@ -108,7 +107,7 @@ pub struct Parser<'a> {
pub prev_span: Span,
restrictions: Restrictions,
/// Used to determine the path to externally loaded source files.
pub(super) directory: Directory<'a>,
pub(super) directory: Directory,
/// `true` to parse sub-modules in other files.
// Public for rustfmt usage.
pub recurse_into_file_modules: bool,
@ -370,7 +369,7 @@ impl<'a> Parser<'a> {
pub fn new(
sess: &'a ParseSess,
tokens: TokenStream,
directory: Option<Directory<'a>>,
directory: Option<Directory>,
recurse_into_file_modules: bool,
desugar_doc_comments: bool,
subparser_name: Option<&'static str>,
@ -385,7 +384,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
recurse_into_file_modules,
directory: Directory {
path: Cow::from(PathBuf::new()),
path: PathBuf::new(),
ownership: DirectoryOwnership::Owned { relative: None },
},
root_module_name: None,
@ -413,7 +412,7 @@ impl<'a> Parser<'a> {
&sess.source_map().lookup_char_pos(parser.token.span.lo()).file.unmapped_path
{
if let Some(directory_path) = path.parent() {
parser.directory.path = Cow::from(directory_path.to_path_buf());
parser.directory.path = directory_path.to_path_buf();
}
}
}

View File

@ -285,7 +285,7 @@ impl<'a> Parser<'a> {
fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) {
self.directory.path.to_mut().push(&*path.as_str());
self.directory.path.push(&*path.as_str());
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
} else {
// We have to push on the current module name in the case of relative
@ -297,10 +297,10 @@ impl<'a> Parser<'a> {
if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership {
if let Some(ident) = relative.take() {
// remove the relative offset
self.directory.path.to_mut().push(&*ident.as_str());
self.directory.path.push(&*ident.as_str());
}
}
self.directory.path.to_mut().push(&*id.as_str());
self.directory.path.push(&*id.as_str());
}
}
}