Rollup merge of #72724 - Aaron1011:revert-tokenstream-expand, r=petrochenkov

Revert recursive `TokenKind::Interpolated` expansion for now The crater run https://github.com/rust-lang/rust/issues/72622 revealed many root regressions, at least one of which is going to take some time to fix. For now, let's revert https://github.com/rust-lang/rust/pull/72388 to allow the 709 affected crates to continue building on the latest nightly.
2020-05-30 12:39:19 +09:00 · 2020-05-30 12:39:19 +09:00 · 047b3bd4df
parent 875c6b281d b802eebc67
commit 047b3bd4df
7 changed files with 185 additions and 240 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4144,7 +4144,6 @@ dependencies = [
 "rustc_lexer",
 "rustc_session",
 "rustc_span",
- "smallvec 1.4.0",
 "unicode-normalization",
 ]

--- a/src/librustc_ast/token.rs
+++ b/src/librustc_ast/token.rs
@ -673,6 +673,62 @@ impl Token {

        Some(Token::new(kind, self.span.to(joint.span)))
    }
+
+    // See comments in `Nonterminal::to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
+        if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
+            return false;
+        }
+        match (&self.kind, &other.kind) {
+            (&Eq, &Eq)
+            | (&Lt, &Lt)
+            | (&Le, &Le)
+            | (&EqEq, &EqEq)
+            | (&Ne, &Ne)
+            | (&Ge, &Ge)
+            | (&Gt, &Gt)
+            | (&AndAnd, &AndAnd)
+            | (&OrOr, &OrOr)
+            | (&Not, &Not)
+            | (&Tilde, &Tilde)
+            | (&At, &At)
+            | (&Dot, &Dot)
+            | (&DotDot, &DotDot)
+            | (&DotDotDot, &DotDotDot)
+            | (&DotDotEq, &DotDotEq)
+            | (&Comma, &Comma)
+            | (&Semi, &Semi)
+            | (&Colon, &Colon)
+            | (&ModSep, &ModSep)
+            | (&RArrow, &RArrow)
+            | (&LArrow, &LArrow)
+            | (&FatArrow, &FatArrow)
+            | (&Pound, &Pound)
+            | (&Dollar, &Dollar)
+            | (&Question, &Question)
+            | (&Whitespace, &Whitespace)
+            | (&Comment, &Comment)
+            | (&Eof, &Eof) => true,
+
+            (&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
+
+            (&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
+
+            (&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
+
+            (&Literal(a), &Literal(b)) => a == b,
+
+            (&Lifetime(a), &Lifetime(b)) => a == b,
+            (&Ident(a, b), &Ident(c, d)) => {
+                b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
+            }
+
+            (&Interpolated(_), &Interpolated(_)) => false,
+
+            _ => panic!("forgot to add a token?"),
+        }
+    }
 }

 impl PartialEq<TokenKind> for Token {
--- a/src/librustc_ast/tokenstream.rs
+++ b/src/librustc_ast/tokenstream.rs
@ -21,6 +21,8 @@ use rustc_macros::HashStable_Generic;
 use rustc_span::{Span, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};

+use log::debug;
+
 use std::{iter, mem};

 /// When the main rust parser encounters a syntax-extension invocation, it
@ -66,6 +68,23 @@ impl TokenTree {
        }
    }

+    // See comments in `Nonterminal::to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    //
+    // This is otherwise the same as `eq_unspanned`, only recursing with a
+    // different method.
+    pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
+        match (self, other) {
+            (TokenTree::Token(token), TokenTree::Token(token2)) => {
+                token.probably_equal_for_proc_macro(token2)
+            }
+            (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
+                delim == delim2 && tts.probably_equal_for_proc_macro(&tts2)
+            }
+            _ => false,
+        }
+    }
+
    /// Retrieves the TokenTree's span.
    pub fn span(&self) -> Span {
        match self {
@ -288,6 +307,112 @@ impl TokenStream {
        t1.next().is_none() && t2.next().is_none()
    }

+    // See comments in `Nonterminal::to_tokenstream` for why we care about
+    // *probably* equal here rather than actual equality
+    //
+    // This is otherwise the same as `eq_unspanned`, only recursing with a
+    // different method.
+    pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
+        // When checking for `probably_eq`, we ignore certain tokens that aren't
+        // preserved in the AST. Because they are not preserved, the pretty
+        // printer arbitrarily adds or removes them when printing as token
+        // streams, making a comparison between a token stream generated from an
+        // AST and a token stream which was parsed into an AST more reliable.
+        fn semantic_tree(tree: &TokenTree) -> bool {
+            if let TokenTree::Token(token) = tree {
+                if let
+                    // The pretty printer tends to add trailing commas to
+                    // everything, and in particular, after struct fields.
+                    | token::Comma
+                    // The pretty printer emits `NoDelim` as whitespace.
+                    | token::OpenDelim(DelimToken::NoDelim)
+                    | token::CloseDelim(DelimToken::NoDelim)
+                    // The pretty printer collapses many semicolons into one.
+                    | token::Semi
+                    // The pretty printer collapses whitespace arbitrarily and can
+                    // introduce whitespace from `NoDelim`.
+                    | token::Whitespace
+                    // The pretty printer can turn `$crate` into `::crate_name`
+                    | token::ModSep = token.kind {
+                    return false;
+                }
+            }
+            true
+        }
+
+        // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
+        //
+        // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
+        // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
+        // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
+        // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
+        // when determining if two `TokenStream`s are 'probably equal'.
+        //
+        // Therefore, we use `break_two_token_op` to convert all tokens
+        // to the 'unglued' form (if it exists). This ensures that two
+        // `TokenStream`s which differ only in how their tokens are glued
+        // will be considered 'probably equal', which allows us to keep spans.
+        //
+        // This is important when the original `TokenStream` contained
+        // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
+        // will be omitted when we pretty-print, which can cause the original
+        // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
+        // leading to some tokens being 'glued' together in one stream but not
+        // the other. See #68489 for more details.
+        fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
+            // In almost all cases, we should have either zero or one levels
+            // of 'unglueing'. However, in some unusual cases, we may need
+            // to iterate breaking tokens mutliple times. For example:
+            // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
+            let mut token_trees: SmallVec<[_; 2]>;
+            if let TokenTree::Token(token) = &tree {
+                let mut out = SmallVec::<[_; 2]>::new();
+                out.push(token.clone());
+                // Iterate to fixpoint:
+                // * We start off with 'out' containing our initial token, and `temp` empty
+                // * If we are able to break any tokens in `out`, then `out` will have
+                //   at least one more element than 'temp', so we will try to break tokens
+                //   again.
+                // * If we cannot break any tokens in 'out', we are done
+                loop {
+                    let mut temp = SmallVec::<[_; 2]>::new();
+                    let mut changed = false;
+
+                    for token in out.into_iter() {
+                        if let Some((first, second)) = token.kind.break_two_token_op() {
+                            temp.push(Token::new(first, DUMMY_SP));
+                            temp.push(Token::new(second, DUMMY_SP));
+                            changed = true;
+                        } else {
+                            temp.push(token);
+                        }
+                    }
+                    out = temp;
+                    if !changed {
+                        break;
+                    }
+                }
+                token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
+                if token_trees.len() != 1 {
+                    debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
+                }
+            } else {
+                token_trees = SmallVec::new();
+                token_trees.push(tree);
+            }
+            token_trees.into_iter()
+        }
+
+        let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
+        let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
+        for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
+            if !t1.probably_equal_for_proc_macro(&t2) {
+                return false;
+            }
+        }
+        t1.next().is_none() && t2.next().is_none()
+    }
+
    pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
        TokenStream(Lrc::new(
            self.0
--- a/src/librustc_parse/Cargo.toml
+++ b/src/librustc_parse/Cargo.toml
@ -12,7 +12,6 @@ doctest = false
 [dependencies]
 bitflags = "1.0"
 log = "0.4"
-smallvec = { version = "1.0", features = ["union", "may_dangle"] }
 rustc_ast_pretty = { path = "../librustc_ast_pretty" }
 rustc_data_structures = { path = "../librustc_data_structures" }
 rustc_feature = { path = "../librustc_feature" }
--- a/src/librustc_parse/lib.rs
+++ b/src/librustc_parse/lib.rs
@ -7,18 +7,14 @@
 #![feature(or_patterns)]

 use rustc_ast::ast;
-use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
+use rustc_ast::token::{self, Nonterminal};
+use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::{Diagnostic, FatalError, Level, PResult};
 use rustc_session::parse::ParseSess;
-use rustc_span::symbol::kw;
-use rustc_span::{FileName, SourceFile, Span, DUMMY_SP};
+use rustc_span::{FileName, SourceFile, Span};

-use smallvec::SmallVec;
-
-use std::mem;
 use std::path::Path;
 use std::str;

@ -310,7 +306,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
    // modifications, including adding/removing typically non-semantic
    // tokens such as extra braces and commas, don't happen.
    if let Some(tokens) = tokens {
-        if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
+        if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
            return tokens;
        }
        info!(
@ -385,203 +381,3 @@ fn prepend_attrs(
    builder.push(tokens.clone());
    Some(builder.build())
 }
-
-// See comments in `Nonterminal::to_tokenstream` for why we care about
-// *probably* equal here rather than actual equality
-//
-// This is otherwise the same as `eq_unspanned`, only recursing with a
-// different method.
-pub fn tokenstream_probably_equal_for_proc_macro(
-    first: &TokenStream,
-    other: &TokenStream,
-    sess: &ParseSess,
-) -> bool {
-    // When checking for `probably_eq`, we ignore certain tokens that aren't
-    // preserved in the AST. Because they are not preserved, the pretty
-    // printer arbitrarily adds or removes them when printing as token
-    // streams, making a comparison between a token stream generated from an
-    // AST and a token stream which was parsed into an AST more reliable.
-    fn semantic_tree(tree: &TokenTree) -> bool {
-        if let TokenTree::Token(token) = tree {
-            if let
-                // The pretty printer tends to add trailing commas to
-                // everything, and in particular, after struct fields.
-                | token::Comma
-                // The pretty printer emits `NoDelim` as whitespace.
-                | token::OpenDelim(DelimToken::NoDelim)
-                | token::CloseDelim(DelimToken::NoDelim)
-                // The pretty printer collapses many semicolons into one.
-                | token::Semi
-                // The pretty printer collapses whitespace arbitrarily and can
-                // introduce whitespace from `NoDelim`.
-                | token::Whitespace
-                // The pretty printer can turn `$crate` into `::crate_name`
-                | token::ModSep = token.kind {
-                return false;
-            }
-        }
-        true
-    }
-
-    // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
-    //
-    // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
-    // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
-    // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
-    // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
-    // when determining if two `TokenStream`s are 'probably equal'.
-    //
-    // Therefore, we use `break_two_token_op` to convert all tokens
-    // to the 'unglued' form (if it exists). This ensures that two
-    // `TokenStream`s which differ only in how their tokens are glued
-    // will be considered 'probably equal', which allows us to keep spans.
-    //
-    // This is important when the original `TokenStream` contained
-    // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
-    // will be omitted when we pretty-print, which can cause the original
-    // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
-    // leading to some tokens being 'glued' together in one stream but not
-    // the other. See #68489 for more details.
-    fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
-        // In almost all cases, we should have either zero or one levels
-        // of 'unglueing'. However, in some unusual cases, we may need
-        // to iterate breaking tokens mutliple times. For example:
-        // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
-        let mut token_trees: SmallVec<[_; 2]>;
-        if let TokenTree::Token(token) = &tree {
-            let mut out = SmallVec::<[_; 2]>::new();
-            out.push(token.clone());
-            // Iterate to fixpoint:
-            // * We start off with 'out' containing our initial token, and `temp` empty
-            // * If we are able to break any tokens in `out`, then `out` will have
-            //   at least one more element than 'temp', so we will try to break tokens
-            //   again.
-            // * If we cannot break any tokens in 'out', we are done
-            loop {
-                let mut temp = SmallVec::<[_; 2]>::new();
-                let mut changed = false;
-
-                for token in out.into_iter() {
-                    if let Some((first, second)) = token.kind.break_two_token_op() {
-                        temp.push(Token::new(first, DUMMY_SP));
-                        temp.push(Token::new(second, DUMMY_SP));
-                        changed = true;
-                    } else {
-                        temp.push(token);
-                    }
-                }
-                out = temp;
-                if !changed {
-                    break;
-                }
-            }
-            token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
-            if token_trees.len() != 1 {
-                debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
-            }
-        } else {
-            token_trees = SmallVec::new();
-            token_trees.push(tree);
-        }
-        token_trees.into_iter()
-    }
-
-    let expand_nt = |tree: TokenTree| {
-        if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
-            nt_to_tokenstream(nt, sess, *span).into_trees()
-        } else {
-            TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
-        }
-    };
-
-    // Break tokens after we expand any nonterminals, so that we break tokens
-    // that are produced as a result of nonterminal expansion.
-    let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
-    let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
-    for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
-        if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
-            return false;
-        }
-    }
-    t1.next().is_none() && t2.next().is_none()
-}
-
-// See comments in `Nonterminal::to_tokenstream` for why we care about
-// *probably* equal here rather than actual equality
-crate fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
-    use TokenKind::*;
-
-    if mem::discriminant(&first.kind) != mem::discriminant(&other.kind) {
-        return false;
-    }
-    match (&first.kind, &other.kind) {
-        (&Eq, &Eq)
-        | (&Lt, &Lt)
-        | (&Le, &Le)
-        | (&EqEq, &EqEq)
-        | (&Ne, &Ne)
-        | (&Ge, &Ge)
-        | (&Gt, &Gt)
-        | (&AndAnd, &AndAnd)
-        | (&OrOr, &OrOr)
-        | (&Not, &Not)
-        | (&Tilde, &Tilde)
-        | (&At, &At)
-        | (&Dot, &Dot)
-        | (&DotDot, &DotDot)
-        | (&DotDotDot, &DotDotDot)
-        | (&DotDotEq, &DotDotEq)
-        | (&Comma, &Comma)
-        | (&Semi, &Semi)
-        | (&Colon, &Colon)
-        | (&ModSep, &ModSep)
-        | (&RArrow, &RArrow)
-        | (&LArrow, &LArrow)
-        | (&FatArrow, &FatArrow)
-        | (&Pound, &Pound)
-        | (&Dollar, &Dollar)
-        | (&Question, &Question)
-        | (&Whitespace, &Whitespace)
-        | (&Comment, &Comment)
-        | (&Eof, &Eof) => true,
-
-        (&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
-
-        (&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
-
-        (&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
-
-        (&Literal(a), &Literal(b)) => a == b,
-
-        (&Lifetime(a), &Lifetime(b)) => a == b,
-        (&Ident(a, b), &Ident(c, d)) => {
-            b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
-        }
-
-        // Expanded by `tokenstream_probably_equal_for_proc_macro`
-        (&Interpolated(_), &Interpolated(_)) => unreachable!(),
-
-        _ => panic!("forgot to add a token?"),
-    }
-}
-
-// See comments in `Nonterminal::to_tokenstream` for why we care about
-// *probably* equal here rather than actual equality
-//
-// This is otherwise the same as `eq_unspanned`, only recursing with a
-// different method.
-pub fn tokentree_probably_equal_for_proc_macro(
-    first: &TokenTree,
-    other: &TokenTree,
-    sess: &ParseSess,
-) -> bool {
-    match (first, other) {
-        (TokenTree::Token(token), TokenTree::Token(token2)) => {
-            token_probably_equal_for_proc_macro(token, token2)
-        }
-        (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
-            delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
-        }
-        _ => false,
-    }
-}
--- a/src/test/ui/proc-macro/macro-rules-capture.rs
+++ b/src/test/ui/proc-macro/macro-rules-capture.rs
@ -1,18 +0,0 @@
-// aux-build: test-macros.rs
-
-extern crate test_macros;
-use test_macros::recollect_attr;
-
-macro_rules! reemit {
-    ($name:ident => $($token:expr)*) => {
-
-        #[recollect_attr]
-        pub fn $name() {
-            $($token)*;
-        }
-    }
-}
-
-reemit! { foo => 45u32.into() } //~ ERROR type annotations
-
-fn main() {}
--- a/src/test/ui/proc-macro/macro-rules-capture.stderr
+++ b/src/test/ui/proc-macro/macro-rules-capture.stderr
@ -1,12 +0,0 @@
-error[E0282]: type annotations needed
-  --> $DIR/macro-rules-capture.rs:16:24
-   |
-LL | reemit! { foo => 45u32.into() }
-   |                  ------^^^^--
-   |                  |     |
-   |                  |     cannot infer type for type parameter `T` declared on the trait `Into`
-   |                  this method call resolves to `T`
-
-error: aborting due to previous error
-
-For more information about this error, try `rustc --explain E0282`.