Rollup merge of #72724 - Aaron1011:revert-tokenstream-expand, r=petrochenkov

Revert recursive `TokenKind::Interpolated` expansion for now

The crater run https://github.com/rust-lang/rust/issues/72622 revealed many root regressions, at least one of which is going to take some time to fix.

For now, let's revert https://github.com/rust-lang/rust/pull/72388 to allow the 709 affected crates to continue building on the latest nightly.
This commit is contained in:
Yuki Okushi 2020-05-30 12:39:19 +09:00 committed by GitHub
commit 047b3bd4df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 185 additions and 240 deletions

View File

@ -4144,7 +4144,6 @@ dependencies = [
"rustc_lexer",
"rustc_session",
"rustc_span",
"smallvec 1.4.0",
"unicode-normalization",
]

View File

@ -673,6 +673,62 @@ impl Token {
Some(Token::new(kind, self.span.to(joint.span)))
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
return false;
}
match (&self.kind, &other.kind) {
(&Eq, &Eq)
| (&Lt, &Lt)
| (&Le, &Le)
| (&EqEq, &EqEq)
| (&Ne, &Ne)
| (&Ge, &Ge)
| (&Gt, &Gt)
| (&AndAnd, &AndAnd)
| (&OrOr, &OrOr)
| (&Not, &Not)
| (&Tilde, &Tilde)
| (&At, &At)
| (&Dot, &Dot)
| (&DotDot, &DotDot)
| (&DotDotDot, &DotDotDot)
| (&DotDotEq, &DotDotEq)
| (&Comma, &Comma)
| (&Semi, &Semi)
| (&Colon, &Colon)
| (&ModSep, &ModSep)
| (&RArrow, &RArrow)
| (&LArrow, &LArrow)
| (&FatArrow, &FatArrow)
| (&Pound, &Pound)
| (&Dollar, &Dollar)
| (&Question, &Question)
| (&Whitespace, &Whitespace)
| (&Comment, &Comment)
| (&Eof, &Eof) => true,
(&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
(&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
(&Literal(a), &Literal(b)) => a == b,
(&Lifetime(a), &Lifetime(b)) => a == b,
(&Ident(a, b), &Ident(c, d)) => {
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
}
(&Interpolated(_), &Interpolated(_)) => false,
_ => panic!("forgot to add a token?"),
}
}
}
impl PartialEq<TokenKind> for Token {

View File

@ -21,6 +21,8 @@ use rustc_macros::HashStable_Generic;
use rustc_span::{Span, DUMMY_SP};
use smallvec::{smallvec, SmallVec};
use log::debug;
use std::{iter, mem};
/// When the main rust parser encounters a syntax-extension invocation, it
@ -66,6 +68,23 @@ impl TokenTree {
}
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
match (self, other) {
(TokenTree::Token(token), TokenTree::Token(token2)) => {
token.probably_equal_for_proc_macro(token2)
}
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
delim == delim2 && tts.probably_equal_for_proc_macro(&tts2)
}
_ => false,
}
}
/// Retrieves the TokenTree's span.
pub fn span(&self) -> Span {
match self {
@ -288,6 +307,112 @@ impl TokenStream {
t1.next().is_none() && t2.next().is_none()
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
// When checking for `probably_eq`, we ignore certain tokens that aren't
// preserved in the AST. Because they are not preserved, the pretty
// printer arbitrarily adds or removes them when printing as token
// streams, making a comparison between a token stream generated from an
// AST and a token stream which was parsed into an AST more reliable.
fn semantic_tree(tree: &TokenTree) -> bool {
if let TokenTree::Token(token) = tree {
if let
// The pretty printer tends to add trailing commas to
// everything, and in particular, after struct fields.
| token::Comma
// The pretty printer emits `NoDelim` as whitespace.
| token::OpenDelim(DelimToken::NoDelim)
| token::CloseDelim(DelimToken::NoDelim)
// The pretty printer collapses many semicolons into one.
| token::Semi
// The pretty printer collapses whitespace arbitrarily and can
// introduce whitespace from `NoDelim`.
| token::Whitespace
// The pretty printer can turn `$crate` into `::crate_name`
| token::ModSep = token.kind {
return false;
}
}
true
}
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
//
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
// when determining if two `TokenStream`s are 'probably equal'.
//
// Therefore, we use `break_two_token_op` to convert all tokens
// to the 'unglued' form (if it exists). This ensures that two
// `TokenStream`s which differ only in how their tokens are glued
// will be considered 'probably equal', which allows us to keep spans.
//
// This is important when the original `TokenStream` contained
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
// will be omitted when we pretty-print, which can cause the original
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
// leading to some tokens being 'glued' together in one stream but not
// the other. See #68489 for more details.
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
// In almost all cases, we should have either zero or one levels
// of 'unglueing'. However, in some unusual cases, we may need
// to iterate breaking tokens mutliple times. For example:
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
let mut token_trees: SmallVec<[_; 2]>;
if let TokenTree::Token(token) = &tree {
let mut out = SmallVec::<[_; 2]>::new();
out.push(token.clone());
// Iterate to fixpoint:
// * We start off with 'out' containing our initial token, and `temp` empty
// * If we are able to break any tokens in `out`, then `out` will have
// at least one more element than 'temp', so we will try to break tokens
// again.
// * If we cannot break any tokens in 'out', we are done
loop {
let mut temp = SmallVec::<[_; 2]>::new();
let mut changed = false;
for token in out.into_iter() {
if let Some((first, second)) = token.kind.break_two_token_op() {
temp.push(Token::new(first, DUMMY_SP));
temp.push(Token::new(second, DUMMY_SP));
changed = true;
} else {
temp.push(token);
}
}
out = temp;
if !changed {
break;
}
}
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
if token_trees.len() != 1 {
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
}
} else {
token_trees = SmallVec::new();
token_trees.push(tree);
}
token_trees.into_iter()
}
let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
if !t1.probably_equal_for_proc_macro(&t2) {
return false;
}
}
t1.next().is_none() && t2.next().is_none()
}
pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
TokenStream(Lrc::new(
self.0

View File

@ -12,7 +12,6 @@ doctest = false
[dependencies]
bitflags = "1.0"
log = "0.4"
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
rustc_ast_pretty = { path = "../librustc_ast_pretty" }
rustc_data_structures = { path = "../librustc_data_structures" }
rustc_feature = { path = "../librustc_feature" }

View File

@ -7,18 +7,14 @@
#![feature(or_patterns)]
use rustc_ast::ast;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
use rustc_ast::token::{self, Nonterminal};
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::symbol::kw;
use rustc_span::{FileName, SourceFile, Span, DUMMY_SP};
use rustc_span::{FileName, SourceFile, Span};
use smallvec::SmallVec;
use std::mem;
use std::path::Path;
use std::str;
@ -310,7 +306,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens;
}
info!(
@ -385,203 +381,3 @@ fn prepend_attrs(
builder.push(tokens.clone());
Some(builder.build())
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn tokenstream_probably_equal_for_proc_macro(
first: &TokenStream,
other: &TokenStream,
sess: &ParseSess,
) -> bool {
// When checking for `probably_eq`, we ignore certain tokens that aren't
// preserved in the AST. Because they are not preserved, the pretty
// printer arbitrarily adds or removes them when printing as token
// streams, making a comparison between a token stream generated from an
// AST and a token stream which was parsed into an AST more reliable.
fn semantic_tree(tree: &TokenTree) -> bool {
if let TokenTree::Token(token) = tree {
if let
// The pretty printer tends to add trailing commas to
// everything, and in particular, after struct fields.
| token::Comma
// The pretty printer emits `NoDelim` as whitespace.
| token::OpenDelim(DelimToken::NoDelim)
| token::CloseDelim(DelimToken::NoDelim)
// The pretty printer collapses many semicolons into one.
| token::Semi
// The pretty printer collapses whitespace arbitrarily and can
// introduce whitespace from `NoDelim`.
| token::Whitespace
// The pretty printer can turn `$crate` into `::crate_name`
| token::ModSep = token.kind {
return false;
}
}
true
}
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
//
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
// when determining if two `TokenStream`s are 'probably equal'.
//
// Therefore, we use `break_two_token_op` to convert all tokens
// to the 'unglued' form (if it exists). This ensures that two
// `TokenStream`s which differ only in how their tokens are glued
// will be considered 'probably equal', which allows us to keep spans.
//
// This is important when the original `TokenStream` contained
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
// will be omitted when we pretty-print, which can cause the original
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
// leading to some tokens being 'glued' together in one stream but not
// the other. See #68489 for more details.
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
// In almost all cases, we should have either zero or one levels
// of 'unglueing'. However, in some unusual cases, we may need
// to iterate breaking tokens mutliple times. For example:
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
let mut token_trees: SmallVec<[_; 2]>;
if let TokenTree::Token(token) = &tree {
let mut out = SmallVec::<[_; 2]>::new();
out.push(token.clone());
// Iterate to fixpoint:
// * We start off with 'out' containing our initial token, and `temp` empty
// * If we are able to break any tokens in `out`, then `out` will have
// at least one more element than 'temp', so we will try to break tokens
// again.
// * If we cannot break any tokens in 'out', we are done
loop {
let mut temp = SmallVec::<[_; 2]>::new();
let mut changed = false;
for token in out.into_iter() {
if let Some((first, second)) = token.kind.break_two_token_op() {
temp.push(Token::new(first, DUMMY_SP));
temp.push(Token::new(second, DUMMY_SP));
changed = true;
} else {
temp.push(token);
}
}
out = temp;
if !changed {
break;
}
}
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
if token_trees.len() != 1 {
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
}
} else {
token_trees = SmallVec::new();
token_trees.push(tree);
}
token_trees.into_iter()
}
let expand_nt = |tree: TokenTree| {
if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
nt_to_tokenstream(nt, sess, *span).into_trees()
} else {
TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
}
};
// Break tokens after we expand any nonterminals, so that we break tokens
// that are produced as a result of nonterminal expansion.
let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
return false;
}
}
t1.next().is_none() && t2.next().is_none()
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
crate fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
use TokenKind::*;
if mem::discriminant(&first.kind) != mem::discriminant(&other.kind) {
return false;
}
match (&first.kind, &other.kind) {
(&Eq, &Eq)
| (&Lt, &Lt)
| (&Le, &Le)
| (&EqEq, &EqEq)
| (&Ne, &Ne)
| (&Ge, &Ge)
| (&Gt, &Gt)
| (&AndAnd, &AndAnd)
| (&OrOr, &OrOr)
| (&Not, &Not)
| (&Tilde, &Tilde)
| (&At, &At)
| (&Dot, &Dot)
| (&DotDot, &DotDot)
| (&DotDotDot, &DotDotDot)
| (&DotDotEq, &DotDotEq)
| (&Comma, &Comma)
| (&Semi, &Semi)
| (&Colon, &Colon)
| (&ModSep, &ModSep)
| (&RArrow, &RArrow)
| (&LArrow, &LArrow)
| (&FatArrow, &FatArrow)
| (&Pound, &Pound)
| (&Dollar, &Dollar)
| (&Question, &Question)
| (&Whitespace, &Whitespace)
| (&Comment, &Comment)
| (&Eof, &Eof) => true,
(&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
(&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
(&Literal(a), &Literal(b)) => a == b,
(&Lifetime(a), &Lifetime(b)) => a == b,
(&Ident(a, b), &Ident(c, d)) => {
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
}
// Expanded by `tokenstream_probably_equal_for_proc_macro`
(&Interpolated(_), &Interpolated(_)) => unreachable!(),
_ => panic!("forgot to add a token?"),
}
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn tokentree_probably_equal_for_proc_macro(
first: &TokenTree,
other: &TokenTree,
sess: &ParseSess,
) -> bool {
match (first, other) {
(TokenTree::Token(token), TokenTree::Token(token2)) => {
token_probably_equal_for_proc_macro(token, token2)
}
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
}
_ => false,
}
}

View File

@ -1,18 +0,0 @@
// aux-build: test-macros.rs
extern crate test_macros;
use test_macros::recollect_attr;
macro_rules! reemit {
($name:ident => $($token:expr)*) => {
#[recollect_attr]
pub fn $name() {
$($token)*;
}
}
}
reemit! { foo => 45u32.into() } //~ ERROR type annotations
fn main() {}

View File

@ -1,12 +0,0 @@
error[E0282]: type annotations needed
--> $DIR/macro-rules-capture.rs:16:24
|
LL | reemit! { foo => 45u32.into() }
| ------^^^^--
| | |
| | cannot infer type for type parameter `T` declared on the trait `Into`
| this method call resolves to `T`
error: aborting due to previous error
For more information about this error, try `rustc --explain E0282`.