Auto merge of #77250 - Aaron1011:feature/flat-token-collection, r=petrochenkov
Rewrite `collect_tokens` implementations to use a flattened buffer Instead of trying to collect tokens at each depth, we 'flatten' the stream as we go allong, pushing open/close delimiters to our buffer just like regular tokens. One capturing is complete, we reconstruct a nested `TokenTree::Delimited` structure, producing a normal `TokenStream`. The reconstructed `TokenStream` is not created immediately - instead, it is produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This closure stores a clone of the original `TokenCursor`, plus a record of the number of calls to `next()/next_desugared()`. This is sufficient to reconstruct the tokenstream seen by the callback without storing any additional state. If the tokenstream is never used (e.g. when a captured `macro_rules!` argument is never passed to a proc macro), we never actually create a `TokenStream`. This implementation has a number of advantages over the previous one: * It is significantly simpler, with no edge cases around capturing the start/end of a delimited group. * It can be easily extended to allow replacing tokens an an arbitrary 'depth' by just using `Vec::splice` at the proper position. This is important for PR #76130, which requires us to track information about attributes along with tokens. * The lazy approach to `TokenStream` construction allows us to easily parse an AST struct, and then decide after the fact whether we need a `TokenStream`. This will be useful when we start collecting tokens for `Attribute` - we can discard the `LazyTokenStream` if the parsed attribute doesn't need tokens (e.g. is a builtin attribute). The performance impact seems to be neglibile (see https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a small slowdown on a few benchmarks, but it only rises above 1% for incremental builds, where it represents a larger fraction of the much smaller instruction count. There a ~1% speedup on a few other incremental benchmarks - my guess is that the speedups and slowdowns will usually cancel out in practice.
This commit is contained in:
commit
22e6b9c689
@ -24,7 +24,7 @@ pub use UnsafeSource::*;
|
||||
|
||||
use crate::ptr::P;
|
||||
use crate::token::{self, CommentKind, DelimToken};
|
||||
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};
|
||||
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};
|
||||
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::stack::ensure_sufficient_stack;
|
||||
@ -97,7 +97,7 @@ pub struct Path {
|
||||
/// The segments in the path: the things separated by `::`.
|
||||
/// Global paths begin with `kw::PathRoot`.
|
||||
pub segments: Vec<PathSegment>,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
impl PartialEq<Symbol> for Path {
|
||||
@ -535,7 +535,7 @@ pub struct Block {
|
||||
/// Distinguishes between `unsafe { ... }` and `{ ... }`.
|
||||
pub rules: BlockCheckMode,
|
||||
pub span: Span,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
/// A match pattern.
|
||||
@ -546,7 +546,7 @@ pub struct Pat {
|
||||
pub id: NodeId,
|
||||
pub kind: PatKind,
|
||||
pub span: Span,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
impl Pat {
|
||||
@ -892,7 +892,7 @@ pub struct Stmt {
|
||||
pub id: NodeId,
|
||||
pub kind: StmtKind,
|
||||
pub span: Span,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
impl Stmt {
|
||||
@ -1040,7 +1040,7 @@ pub struct Expr {
|
||||
pub kind: ExprKind,
|
||||
pub span: Span,
|
||||
pub attrs: AttrVec,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
// `Expr` is used a lot. Make sure it doesn't unintentionally get bigger.
|
||||
@ -1835,7 +1835,7 @@ pub struct Ty {
|
||||
pub id: NodeId,
|
||||
pub kind: TyKind,
|
||||
pub span: Span,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
impl Clone for Ty {
|
||||
@ -2408,7 +2408,7 @@ impl<D: Decoder> rustc_serialize::Decodable<D> for AttrId {
|
||||
pub struct AttrItem {
|
||||
pub path: Path,
|
||||
pub args: MacArgs,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
/// A list of attributes.
|
||||
@ -2482,7 +2482,7 @@ pub enum CrateSugar {
|
||||
pub struct Visibility {
|
||||
pub kind: VisibilityKind,
|
||||
pub span: Span,
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Encodable, Decodable, Debug)]
|
||||
@ -2569,7 +2569,7 @@ pub struct Item<K = ItemKind> {
|
||||
///
|
||||
/// Note that the tokens here do not include the outer attributes, but will
|
||||
/// include inner attributes.
|
||||
pub tokens: Option<TokenStream>,
|
||||
pub tokens: Option<LazyTokenStream>,
|
||||
}
|
||||
|
||||
impl Item {
|
||||
|
@ -16,8 +16,9 @@
|
||||
use crate::token::{self, DelimToken, Token, TokenKind};
|
||||
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_data_structures::sync::{self, Lrc};
|
||||
use rustc_macros::HashStable_Generic;
|
||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
use rustc_span::{Span, DUMMY_SP};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
@ -119,6 +120,77 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// A cloneable callback which produces a `TokenStream`. Each clone
|
||||
// of this should produce the same `TokenStream`
|
||||
pub trait CreateTokenStream: sync::Send + sync::Sync + FnOnce() -> TokenStream {
|
||||
// Workaround for the fact that `Clone` is not object-safe
|
||||
fn clone_it(&self) -> Box<dyn CreateTokenStream>;
|
||||
}
|
||||
|
||||
impl<F: 'static + Clone + sync::Send + sync::Sync + FnOnce() -> TokenStream> CreateTokenStream
|
||||
for F
|
||||
{
|
||||
fn clone_it(&self) -> Box<dyn CreateTokenStream> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn CreateTokenStream> {
|
||||
fn clone(&self) -> Self {
|
||||
let val: &(dyn CreateTokenStream) = &**self;
|
||||
val.clone_it()
|
||||
}
|
||||
}
|
||||
|
||||
/// A lazy version of `TokenStream`, which may defer creation
|
||||
/// of an actual `TokenStream` until it is needed.
|
||||
pub type LazyTokenStream = Lrc<LazyTokenStreamInner>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum LazyTokenStreamInner {
|
||||
Lazy(Box<dyn CreateTokenStream>),
|
||||
Ready(TokenStream),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for LazyTokenStreamInner {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LazyTokenStreamInner::Lazy(..) => f.debug_struct("LazyTokenStream::Lazy").finish(),
|
||||
LazyTokenStreamInner::Ready(..) => f.debug_struct("LazyTokenStream::Ready").finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LazyTokenStreamInner {
|
||||
pub fn into_token_stream(&self) -> TokenStream {
|
||||
match self {
|
||||
// Note that we do not cache this. If this ever becomes a performance
|
||||
// problem, we should investigate wrapping `LazyTokenStreamInner`
|
||||
// in a lock
|
||||
LazyTokenStreamInner::Lazy(cb) => (cb.clone())(),
|
||||
LazyTokenStreamInner::Ready(stream) => stream.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Encoder> Encodable<S> for LazyTokenStreamInner {
|
||||
fn encode(&self, _s: &mut S) -> Result<(), S::Error> {
|
||||
panic!("Attempted to encode LazyTokenStream");
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for LazyTokenStreamInner {
|
||||
fn decode(_d: &mut D) -> Result<Self, D::Error> {
|
||||
panic!("Attempted to decode LazyTokenStream");
|
||||
}
|
||||
}
|
||||
|
||||
impl<CTX> HashStable<CTX> for LazyTokenStreamInner {
|
||||
fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
|
||||
panic!("Attempted to compute stable hash for LazyTokenStream");
|
||||
}
|
||||
}
|
||||
|
||||
/// A `TokenStream` is an abstract sequence of tokens, organized into `TokenTree`s.
|
||||
///
|
||||
/// The goal is for procedural macros to work with `TokenStream`s and `TokenTree`s
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
use rustc_ast as ast;
|
||||
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
|
||||
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
|
||||
use rustc_ast_pretty::pprust;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
|
||||
@ -248,29 +248,32 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
|
||||
// As a result, some AST nodes are annotated with the token stream they
|
||||
// came from. Here we attempt to extract these lossless token streams
|
||||
// before we fall back to the stringification.
|
||||
|
||||
let convert_tokens = |tokens: Option<LazyTokenStream>| tokens.map(|t| t.into_token_stream());
|
||||
|
||||
let tokens = match *nt {
|
||||
Nonterminal::NtItem(ref item) => {
|
||||
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
|
||||
}
|
||||
Nonterminal::NtBlock(ref block) => block.tokens.clone(),
|
||||
Nonterminal::NtBlock(ref block) => convert_tokens(block.tokens.clone()),
|
||||
Nonterminal::NtStmt(ref stmt) => {
|
||||
// FIXME: We currently only collect tokens for `:stmt`
|
||||
// matchers in `macro_rules!` macros. When we start collecting
|
||||
// tokens for attributes on statements, we will need to prepend
|
||||
// attributes here
|
||||
stmt.tokens.clone()
|
||||
convert_tokens(stmt.tokens.clone())
|
||||
}
|
||||
Nonterminal::NtPat(ref pat) => pat.tokens.clone(),
|
||||
Nonterminal::NtTy(ref ty) => ty.tokens.clone(),
|
||||
Nonterminal::NtPat(ref pat) => convert_tokens(pat.tokens.clone()),
|
||||
Nonterminal::NtTy(ref ty) => convert_tokens(ty.tokens.clone()),
|
||||
Nonterminal::NtIdent(ident, is_raw) => {
|
||||
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
|
||||
}
|
||||
Nonterminal::NtLifetime(ident) => {
|
||||
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
|
||||
}
|
||||
Nonterminal::NtMeta(ref attr) => attr.tokens.clone(),
|
||||
Nonterminal::NtPath(ref path) => path.tokens.clone(),
|
||||
Nonterminal::NtVis(ref vis) => vis.tokens.clone(),
|
||||
Nonterminal::NtMeta(ref attr) => convert_tokens(attr.tokens.clone()),
|
||||
Nonterminal::NtPath(ref path) => convert_tokens(path.tokens.clone()),
|
||||
Nonterminal::NtVis(ref vis) => convert_tokens(vis.tokens.clone()),
|
||||
Nonterminal::NtTT(ref tt) => Some(tt.clone().into()),
|
||||
Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => {
|
||||
if expr.tokens.is_none() {
|
||||
@ -602,10 +605,10 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
|
||||
fn prepend_attrs(
|
||||
sess: &ParseSess,
|
||||
attrs: &[ast::Attribute],
|
||||
tokens: Option<&tokenstream::TokenStream>,
|
||||
tokens: Option<&tokenstream::LazyTokenStream>,
|
||||
span: rustc_span::Span,
|
||||
) -> Option<tokenstream::TokenStream> {
|
||||
let tokens = tokens?;
|
||||
let tokens = tokens?.clone().into_token_stream();
|
||||
if attrs.is_empty() {
|
||||
return Some(tokens.clone());
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use rustc_ast::attr;
|
||||
use rustc_ast::token::{self, Nonterminal};
|
||||
use rustc_ast_pretty::pprust;
|
||||
use rustc_errors::{error_code, PResult};
|
||||
use rustc_span::Span;
|
||||
use rustc_span::{sym, Span};
|
||||
|
||||
use tracing::debug;
|
||||
|
||||
@ -302,3 +302,16 @@ impl<'a> Parser<'a> {
|
||||
Err(self.struct_span_err(self.token.span, &msg))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool {
|
||||
attrs.iter().any(|attr| {
|
||||
if let Some(ident) = attr.ident() {
|
||||
ident.name == sym::derive
|
||||
// This might apply a custom attribute/derive
|
||||
|| ident.name == sym::cfg_attr
|
||||
|| !rustc_feature::is_builtin_attr_name(ident.name)
|
||||
} else {
|
||||
true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath;
|
||||
|
||||
use rustc_ast::ptr::P;
|
||||
use rustc_ast::token::{self, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::Spacing;
|
||||
use rustc_ast::util::classify;
|
||||
use rustc_ast::util::literal::LitError;
|
||||
use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity};
|
||||
@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned};
|
||||
use rustc_span::symbol::{kw, sym, Ident, Symbol};
|
||||
use rustc_span::{BytePos, Pos};
|
||||
use std::mem;
|
||||
use tracing::debug;
|
||||
|
||||
/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression
|
||||
/// dropped into the token stream, which happens while parsing the result of
|
||||
@ -459,7 +459,7 @@ impl<'a> Parser<'a> {
|
||||
/// Parses a prefix-unary-operator expr.
|
||||
fn parse_prefix_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> {
|
||||
let attrs = self.parse_or_use_outer_attributes(attrs)?;
|
||||
self.maybe_collect_tokens(!attrs.is_empty(), |this| {
|
||||
self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| {
|
||||
let lo = this.token.span;
|
||||
// Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
|
||||
let (hi, ex) = match this.token.uninterpolate().kind {
|
||||
@ -884,7 +884,7 @@ impl<'a> Parser<'a> {
|
||||
assert!(suffix.is_none());
|
||||
let symbol = Symbol::intern(&i);
|
||||
self.token = Token::new(token::Ident(symbol, false), ident_span);
|
||||
let next_token = Token::new(token::Dot, dot_span);
|
||||
let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
|
||||
self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token))
|
||||
}
|
||||
// 1.2 | 1.2e3
|
||||
@ -902,12 +902,14 @@ impl<'a> Parser<'a> {
|
||||
};
|
||||
let symbol1 = Symbol::intern(&i1);
|
||||
self.token = Token::new(token::Ident(symbol1, false), ident1_span);
|
||||
let next_token1 = Token::new(token::Dot, dot_span);
|
||||
// This needs to be `Spacing::Alone` to prevent regressions.
|
||||
// See issue #76399 and PR #76285 for more details
|
||||
let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
|
||||
let base1 =
|
||||
self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1));
|
||||
let symbol2 = Symbol::intern(&i2);
|
||||
let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
|
||||
self.bump_with(next_token2); // `.`
|
||||
self.bump_with((next_token2, self.token_spacing)); // `.`
|
||||
self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None)
|
||||
}
|
||||
// 1e+ | 1e- (recovered)
|
||||
@ -930,7 +932,7 @@ impl<'a> Parser<'a> {
|
||||
base: P<Expr>,
|
||||
field: Symbol,
|
||||
suffix: Option<Symbol>,
|
||||
next_token: Option<Token>,
|
||||
next_token: Option<(Token, Spacing)>,
|
||||
) -> P<Expr> {
|
||||
match next_token {
|
||||
Some(next_token) => self.bump_with(next_token),
|
||||
@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> {
|
||||
|
||||
fn maybe_collect_tokens(
|
||||
&mut self,
|
||||
has_outer_attrs: bool,
|
||||
needs_tokens: bool,
|
||||
f: impl FnOnce(&mut Self) -> PResult<'a, P<Expr>>,
|
||||
) -> PResult<'a, P<Expr>> {
|
||||
if has_outer_attrs {
|
||||
if needs_tokens {
|
||||
let (mut expr, tokens) = self.collect_tokens(f)?;
|
||||
debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens);
|
||||
expr.tokens = Some(tokens);
|
||||
Ok(expr)
|
||||
} else {
|
||||
|
@ -116,15 +116,16 @@ impl<'a> Parser<'a> {
|
||||
Some(item.into_inner())
|
||||
});
|
||||
|
||||
let needs_tokens = super::attr::maybe_needs_tokens(&attrs);
|
||||
|
||||
let mut unclosed_delims = vec![];
|
||||
let has_attrs = !attrs.is_empty();
|
||||
let parse_item = |this: &mut Self| {
|
||||
let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
|
||||
unclosed_delims.append(&mut this.unclosed_delims);
|
||||
item
|
||||
};
|
||||
|
||||
let (mut item, tokens) = if has_attrs {
|
||||
let (mut item, tokens) = if needs_tokens {
|
||||
let (item, tokens) = self.collect_tokens(parse_item)?;
|
||||
(item, Some(tokens))
|
||||
} else {
|
||||
|
@ -16,13 +16,15 @@ pub use path::PathStyle;
|
||||
|
||||
use rustc_ast::ptr::P;
|
||||
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndSpacing};
|
||||
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, LazyTokenStreamInner, Spacing};
|
||||
use rustc_ast::tokenstream::{TokenStream, TokenTree};
|
||||
use rustc_ast::DUMMY_NODE_ID;
|
||||
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
|
||||
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
|
||||
use rustc_ast::{Visibility, VisibilityKind};
|
||||
use rustc_ast_pretty::pprust;
|
||||
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError, PResult};
|
||||
use rustc_errors::PResult;
|
||||
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
|
||||
use rustc_session::parse::ParseSess;
|
||||
use rustc_span::source_map::{Span, DUMMY_SP};
|
||||
use rustc_span::symbol::{kw, sym, Ident, Symbol};
|
||||
@ -85,10 +87,14 @@ pub struct Parser<'a> {
|
||||
pub sess: &'a ParseSess,
|
||||
/// The current token.
|
||||
pub token: Token,
|
||||
/// The spacing for the current token
|
||||
pub token_spacing: Spacing,
|
||||
/// The previous token.
|
||||
pub prev_token: Token,
|
||||
restrictions: Restrictions,
|
||||
expected_tokens: Vec<TokenType>,
|
||||
// Important: This must only be advanced from `next_tok`
|
||||
// to ensure that `token_cursor.num_next_calls` is updated properly
|
||||
token_cursor: TokenCursor,
|
||||
desugar_doc_comments: bool,
|
||||
/// This field is used to keep track of how many left angle brackets we have seen. This is
|
||||
@ -120,8 +126,10 @@ impl<'a> Drop for Parser<'a> {
|
||||
struct TokenCursor {
|
||||
frame: TokenCursorFrame,
|
||||
stack: Vec<TokenCursorFrame>,
|
||||
cur_token: Option<TreeAndSpacing>,
|
||||
collecting: Option<Collecting>,
|
||||
desugar_doc_comments: bool,
|
||||
// Counts the number of calls to `next` or `next_desugared`,
|
||||
// depending on whether `desugar_doc_comments` is set.
|
||||
num_next_calls: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -133,40 +141,22 @@ struct TokenCursorFrame {
|
||||
close_delim: bool,
|
||||
}
|
||||
|
||||
/// Used to track additional state needed by `collect_tokens`
|
||||
#[derive(Clone, Debug)]
|
||||
struct Collecting {
|
||||
/// Holds the current tokens captured during the most
|
||||
/// recent call to `collect_tokens`
|
||||
buf: Vec<TreeAndSpacing>,
|
||||
/// The depth of the `TokenCursor` stack at the time
|
||||
/// collection was started. When we encounter a `TokenTree::Delimited`,
|
||||
/// we want to record the `TokenTree::Delimited` itself,
|
||||
/// but *not* any of the inner tokens while we are inside
|
||||
/// the new frame (this would cause us to record duplicate tokens).
|
||||
///
|
||||
/// This `depth` fields tracks stack depth we are recording tokens.
|
||||
/// Only tokens encountered at this depth will be recorded. See
|
||||
/// `TokenCursor::next` for more details.
|
||||
depth: usize,
|
||||
}
|
||||
|
||||
impl TokenCursorFrame {
|
||||
fn new(span: DelimSpan, delim: DelimToken, tts: &TokenStream) -> Self {
|
||||
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
|
||||
TokenCursorFrame {
|
||||
delim,
|
||||
span,
|
||||
open_delim: delim == token::NoDelim,
|
||||
tree_cursor: tts.clone().into_trees(),
|
||||
tree_cursor: tts.into_trees(),
|
||||
close_delim: delim == token::NoDelim,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenCursor {
|
||||
fn next(&mut self) -> Token {
|
||||
fn next(&mut self) -> (Token, Spacing) {
|
||||
loop {
|
||||
let tree = if !self.frame.open_delim {
|
||||
let (tree, spacing) = if !self.frame.open_delim {
|
||||
self.frame.open_delim = true;
|
||||
TokenTree::open_tt(self.frame.span, self.frame.delim).into()
|
||||
} else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() {
|
||||
@ -178,40 +168,24 @@ impl TokenCursor {
|
||||
self.frame = frame;
|
||||
continue;
|
||||
} else {
|
||||
return Token::new(token::Eof, DUMMY_SP);
|
||||
(TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone)
|
||||
};
|
||||
|
||||
// Don't set an open delimiter as our current token - we want
|
||||
// to leave it as the full `TokenTree::Delimited` from the previous
|
||||
// iteration of this loop
|
||||
if !matches!(tree.0, TokenTree::Token(Token { kind: TokenKind::OpenDelim(_), .. })) {
|
||||
self.cur_token = Some(tree.clone());
|
||||
}
|
||||
|
||||
if let Some(collecting) = &mut self.collecting {
|
||||
if collecting.depth == self.stack.len() {
|
||||
debug!(
|
||||
"TokenCursor::next(): collected {:?} at depth {:?}",
|
||||
tree,
|
||||
self.stack.len()
|
||||
);
|
||||
collecting.buf.push(tree.clone())
|
||||
match tree {
|
||||
TokenTree::Token(token) => {
|
||||
return (token, spacing);
|
||||
}
|
||||
}
|
||||
|
||||
match tree.0 {
|
||||
TokenTree::Token(token) => return token,
|
||||
TokenTree::Delimited(sp, delim, tts) => {
|
||||
let frame = TokenCursorFrame::new(sp, delim, &tts);
|
||||
let frame = TokenCursorFrame::new(sp, delim, tts);
|
||||
self.stack.push(mem::replace(&mut self.frame, frame));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_desugared(&mut self) -> Token {
|
||||
fn next_desugared(&mut self) -> (Token, Spacing) {
|
||||
let (data, attr_style, sp) = match self.next() {
|
||||
Token { kind: token::DocComment(_, attr_style, data), span } => {
|
||||
(Token { kind: token::DocComment(_, attr_style, data), span }, _) => {
|
||||
(data, attr_style, span)
|
||||
}
|
||||
tok => return tok,
|
||||
@ -249,7 +223,7 @@ impl TokenCursor {
|
||||
TokenCursorFrame::new(
|
||||
delim_span,
|
||||
token::NoDelim,
|
||||
&if attr_style == AttrStyle::Inner {
|
||||
if attr_style == AttrStyle::Inner {
|
||||
[TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body]
|
||||
.iter()
|
||||
.cloned()
|
||||
@ -351,14 +325,15 @@ impl<'a> Parser<'a> {
|
||||
let mut parser = Parser {
|
||||
sess,
|
||||
token: Token::dummy(),
|
||||
token_spacing: Spacing::Alone,
|
||||
prev_token: Token::dummy(),
|
||||
restrictions: Restrictions::empty(),
|
||||
expected_tokens: Vec::new(),
|
||||
token_cursor: TokenCursor {
|
||||
frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, &tokens),
|
||||
frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens),
|
||||
stack: Vec::new(),
|
||||
cur_token: None,
|
||||
collecting: None,
|
||||
num_next_calls: 0,
|
||||
desugar_doc_comments,
|
||||
},
|
||||
desugar_doc_comments,
|
||||
unmatched_angle_bracket_count: 0,
|
||||
@ -375,17 +350,18 @@ impl<'a> Parser<'a> {
|
||||
parser
|
||||
}
|
||||
|
||||
fn next_tok(&mut self, fallback_span: Span) -> Token {
|
||||
let mut next = if self.desugar_doc_comments {
|
||||
fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
|
||||
let (mut next, spacing) = if self.desugar_doc_comments {
|
||||
self.token_cursor.next_desugared()
|
||||
} else {
|
||||
self.token_cursor.next()
|
||||
};
|
||||
self.token_cursor.num_next_calls += 1;
|
||||
if next.span.is_dummy() {
|
||||
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
||||
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
||||
}
|
||||
next
|
||||
(next, spacing)
|
||||
}
|
||||
|
||||
pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
|
||||
@ -577,7 +553,9 @@ impl<'a> Parser<'a> {
|
||||
let first_span = self.sess.source_map().start_point(self.token.span);
|
||||
let second_span = self.token.span.with_lo(first_span.hi());
|
||||
self.token = Token::new(first, first_span);
|
||||
self.bump_with(Token::new(second, second_span));
|
||||
// Use the spacing of the glued token as the spacing
|
||||
// of the unglued second token.
|
||||
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
||||
true
|
||||
}
|
||||
_ => {
|
||||
@ -809,7 +787,7 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
|
||||
/// Advance the parser by one token using provided token as the next one.
|
||||
fn bump_with(&mut self, next_token: Token) {
|
||||
fn bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
|
||||
// Bumping after EOF is a bad sign, usually an infinite loop.
|
||||
if self.prev_token.kind == TokenKind::Eof {
|
||||
let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
|
||||
@ -818,6 +796,7 @@ impl<'a> Parser<'a> {
|
||||
|
||||
// Update the current and previous tokens.
|
||||
self.prev_token = mem::replace(&mut self.token, next_token);
|
||||
self.token_spacing = next_spacing;
|
||||
|
||||
// Diagnostics.
|
||||
self.expected_tokens.clear();
|
||||
@ -988,13 +967,27 @@ impl<'a> Parser<'a> {
|
||||
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
|
||||
match self.token.kind {
|
||||
token::OpenDelim(..) => {
|
||||
let frame = mem::replace(
|
||||
&mut self.token_cursor.frame,
|
||||
self.token_cursor.stack.pop().unwrap(),
|
||||
);
|
||||
self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close);
|
||||
let depth = self.token_cursor.stack.len();
|
||||
|
||||
// We keep advancing the token cursor until we hit
|
||||
// the matching `CloseDelim` token.
|
||||
while !(depth == self.token_cursor.stack.len()
|
||||
&& matches!(self.token.kind, token::CloseDelim(_)))
|
||||
{
|
||||
// Advance one token at a time, so `TokenCursor::next()`
|
||||
// can capture these tokens if necessary.
|
||||
self.bump();
|
||||
}
|
||||
// We are still inside the frame corresponding
|
||||
// to the delimited stream we captured, so grab
|
||||
// the tokens from this frame.
|
||||
let frame = &self.token_cursor.frame;
|
||||
let stream = frame.tree_cursor.stream.clone();
|
||||
let span = frame.span;
|
||||
let delim = frame.delim;
|
||||
// Consume close delimiter
|
||||
self.bump();
|
||||
TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream)
|
||||
TokenTree::Delimited(span, delim, stream)
|
||||
}
|
||||
token::CloseDelim(_) | token::Eof => unreachable!(),
|
||||
_ => {
|
||||
@ -1202,79 +1195,45 @@ impl<'a> Parser<'a> {
|
||||
pub fn collect_tokens<R>(
|
||||
&mut self,
|
||||
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
|
||||
) -> PResult<'a, (R, TokenStream)> {
|
||||
// Record all tokens we parse when parsing this item.
|
||||
let tokens: Vec<TreeAndSpacing> = self.token_cursor.cur_token.clone().into_iter().collect();
|
||||
debug!("collect_tokens: starting with {:?}", tokens);
|
||||
) -> PResult<'a, (R, LazyTokenStream)> {
|
||||
let start_token = (self.token.clone(), self.token_spacing);
|
||||
let mut cursor_snapshot = self.token_cursor.clone();
|
||||
|
||||
// We need special handling for the case where `collect_tokens` is called
|
||||
// on an opening delimeter (e.g. '('). At this point, we have already pushed
|
||||
// a new frame - however, we want to record the original `TokenTree::Delimited`,
|
||||
// for consistency with the case where we start recording one token earlier.
|
||||
// See `TokenCursor::next` to see how `cur_token` is set up.
|
||||
let prev_depth =
|
||||
if matches!(self.token_cursor.cur_token, Some((TokenTree::Delimited(..), _))) {
|
||||
if self.token_cursor.stack.is_empty() {
|
||||
// There is nothing below us in the stack that
|
||||
// the function could consume, so the only thing it can legally
|
||||
// capture is the entire contents of the current frame.
|
||||
return Ok((f(self)?, TokenStream::new(tokens)));
|
||||
}
|
||||
// We have already recorded the full `TokenTree::Delimited` when we created
|
||||
// our `tokens` vector at the start of this function. We are now inside
|
||||
// a new frame corresponding to the `TokenTree::Delimited` we already recoreded.
|
||||
// We don't want to record any of the tokens inside this frame, since they
|
||||
// will be duplicates of the tokens nested inside the `TokenTree::Delimited`.
|
||||
// Therefore, we set our recording depth to the *previous* frame. This allows
|
||||
// us to record a sequence like: `(foo).bar()`: the `(foo)` will be recored
|
||||
// as our initial `cur_token`, while the `.bar()` will be recored after we
|
||||
// pop the `(foo)` frame.
|
||||
self.token_cursor.stack.len() - 1
|
||||
} else {
|
||||
self.token_cursor.stack.len()
|
||||
};
|
||||
let prev_collecting =
|
||||
self.token_cursor.collecting.replace(Collecting { buf: tokens, depth: prev_depth });
|
||||
let ret = f(self)?;
|
||||
|
||||
let ret = f(self);
|
||||
let new_calls = self.token_cursor.num_next_calls;
|
||||
let num_calls = new_calls - cursor_snapshot.num_next_calls;
|
||||
let desugar_doc_comments = self.desugar_doc_comments;
|
||||
|
||||
let mut collected_tokens = if let Some(collecting) = self.token_cursor.collecting.take() {
|
||||
collecting.buf
|
||||
} else {
|
||||
let msg = "our vector went away?";
|
||||
debug!("collect_tokens: {}", msg);
|
||||
self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg);
|
||||
// This can happen due to a bad interaction of two unrelated recovery mechanisms
|
||||
// with mismatched delimiters *and* recovery lookahead on the likely typo
|
||||
// `pub ident(` (#62895, different but similar to the case above).
|
||||
return Ok((ret?, TokenStream::default()));
|
||||
// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
|
||||
// and `num_calls`, we can reconstruct the `TokenStream` seen
|
||||
// by the callback. This allows us to avoid producing a `TokenStream`
|
||||
// if it is never needed - for example, a captured `macro_rules!`
|
||||
// argument that is never passed to a proc macro.
|
||||
//
|
||||
// This also makes `Parser` very cheap to clone, since
|
||||
// there is no intermediate collection buffer to clone.
|
||||
let lazy_cb = move || {
|
||||
// The token produced by the final call to `next` or `next_desugared`
|
||||
// was not actually consumed by the callback. The combination
|
||||
// of chaining the initial token and using `take` produces the desired
|
||||
// result - we produce an empty `TokenStream` if no calls were made,
|
||||
// and omit the final token otherwise.
|
||||
let tokens = std::iter::once(start_token)
|
||||
.chain((0..num_calls).map(|_| {
|
||||
if desugar_doc_comments {
|
||||
cursor_snapshot.next_desugared()
|
||||
} else {
|
||||
cursor_snapshot.next()
|
||||
}
|
||||
}))
|
||||
.take(num_calls);
|
||||
|
||||
make_token_stream(tokens)
|
||||
};
|
||||
let stream = LazyTokenStream::new(LazyTokenStreamInner::Lazy(Box::new(lazy_cb)));
|
||||
|
||||
debug!("collect_tokens: got raw tokens {:?}", collected_tokens);
|
||||
|
||||
// If we're not at EOF our current token wasn't actually consumed by
|
||||
// `f`, but it'll still be in our list that we pulled out. In that case
|
||||
// put it back.
|
||||
let extra_token = if self.token != token::Eof { collected_tokens.pop() } else { None };
|
||||
|
||||
if let Some(mut collecting) = prev_collecting {
|
||||
// If we were previously collecting at the same depth,
|
||||
// then the previous call to `collect_tokens` needs to see
|
||||
// the tokens we just recorded.
|
||||
//
|
||||
// If we were previously recording at an lower `depth`,
|
||||
// then the previous `collect_tokens` call already recorded
|
||||
// this entire frame in the form of a `TokenTree::Delimited`,
|
||||
// so there is nothing else for us to do.
|
||||
if collecting.depth == prev_depth {
|
||||
collecting.buf.extend(collected_tokens.iter().cloned());
|
||||
collecting.buf.extend(extra_token);
|
||||
debug!("collect_tokens: updating previous buf to {:?}", collecting);
|
||||
}
|
||||
self.token_cursor.collecting = Some(collecting)
|
||||
}
|
||||
|
||||
Ok((ret?, TokenStream::new(collected_tokens)))
|
||||
Ok((ret, stream))
|
||||
}
|
||||
|
||||
/// `::{` or `::*`
|
||||
@ -1323,3 +1282,41 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
|
||||
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
|
||||
/// of open and close delims.
|
||||
fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
|
||||
#[derive(Debug)]
|
||||
struct FrameData {
|
||||
open: Span,
|
||||
inner: Vec<(TokenTree, Spacing)>,
|
||||
}
|
||||
let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }];
|
||||
for (token, spacing) in tokens {
|
||||
match token {
|
||||
Token { kind: TokenKind::OpenDelim(_), span } => {
|
||||
stack.push(FrameData { open: span, inner: vec![] });
|
||||
}
|
||||
Token { kind: TokenKind::CloseDelim(delim), span } => {
|
||||
let frame_data = stack.pop().expect("Token stack was empty!");
|
||||
let dspan = DelimSpan::from_pair(frame_data.open, span);
|
||||
let stream = TokenStream::new(frame_data.inner);
|
||||
let delimited = TokenTree::Delimited(dspan, delim, stream);
|
||||
stack
|
||||
.last_mut()
|
||||
.unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!"))
|
||||
.inner
|
||||
.push((delimited, Spacing::Alone));
|
||||
}
|
||||
token => stack
|
||||
.last_mut()
|
||||
.expect("Bottom token frame is missing!")
|
||||
.inner
|
||||
.push((TokenTree::Token(token), spacing)),
|
||||
}
|
||||
}
|
||||
let final_buf = stack.pop().expect("Missing final buf!");
|
||||
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
|
||||
TokenStream::new(final_buf.inner)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user