Auto merge of #37290 - petrochenkov:pnp, r=jseyfried

syntax: Tweak path parsing logic

Associated paths starting with `<<` are parsed in patterns.

Paths like `self::foo::bar` are interpreted as paths and not as `self` arguments in methods (cc @matklad).
Now, I believe, *all* paths are consistently parsed greedily in case of ambiguity.
Detection of `&'a mut self::` requires pretty large (but still fixed) lookahead, so I had to increase the size of parser's lookahead buffer.
Curiously, if `lookahead_distance >= lookahead_buffer_size` was used previously, the parser hung forever, I fixed this as well, now it ICEs.

r? @jseyfried
This commit is contained in:
bors 2016-10-21 06:41:45 -07:00 committed by GitHub
commit 5509ae399e
11 changed files with 134 additions and 74 deletions

View File

@ -74,6 +74,12 @@ pub struct TokenAndSpan {
pub sp: Span,
}
impl Default for TokenAndSpan {
fn default() -> Self {
TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP }
}
}
pub struct StringReader<'a> {
pub span_diagnostic: &'a Handler,
/// The absolute offset within the codemap of the next character to read

View File

@ -245,6 +245,22 @@ enum PrevTokenKind {
Other,
}
// Simple circular buffer used for keeping few next tokens.
#[derive(Default)]
struct LookaheadBuffer {
buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY],
start: usize,
end: usize,
}
const LOOKAHEAD_BUFFER_CAPACITY: usize = 8;
impl LookaheadBuffer {
fn len(&self) -> usize {
(LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY
}
}
/* ident is handled by common.rs */
pub struct Parser<'a> {
@ -258,9 +274,7 @@ pub struct Parser<'a> {
pub cfg: CrateConfig,
/// the previous token kind
prev_token_kind: PrevTokenKind,
pub buffer: [TokenAndSpan; 4],
pub buffer_start: isize,
pub buffer_end: isize,
lookahead_buffer: LookaheadBuffer,
pub tokens_consumed: usize,
pub restrictions: Restrictions,
pub quote_depth: usize, // not (yet) related to the quasiquoter
@ -356,10 +370,6 @@ impl<'a> Parser<'a> {
_ => PathBuf::from(sess.codemap().span_to_filename(span)),
};
directory.pop();
let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: span,
};
Parser {
reader: rdr,
@ -369,14 +379,7 @@ impl<'a> Parser<'a> {
span: span,
prev_span: span,
prev_token_kind: PrevTokenKind::Other,
buffer: [
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
],
buffer_start: 0,
buffer_end: 0,
lookahead_buffer: Default::default(),
tokens_consumed: 0,
restrictions: Restrictions::empty(),
quote_depth: 0,
@ -937,19 +940,13 @@ impl<'a> Parser<'a> {
_ => PrevTokenKind::Other,
};
let next = if self.buffer_start == self.buffer_end {
let next = if self.lookahead_buffer.start == self.lookahead_buffer.end {
self.reader.real_token()
} else {
// Avoid token copies with `replace`.
let buffer_start = self.buffer_start as usize;
let next_index = (buffer_start + 1) & 3;
self.buffer_start = next_index as isize;
let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: self.span,
};
mem::replace(&mut self.buffer[buffer_start], placeholder)
let old_start = self.lookahead_buffer.start;
self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY;
mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default())
};
self.span = next.sp;
self.token = next.tok;
@ -982,21 +979,22 @@ impl<'a> Parser<'a> {
self.expected_tokens.clear();
}
pub fn buffer_length(&mut self) -> isize {
if self.buffer_start <= self.buffer_end {
return self.buffer_end - self.buffer_start;
}
return (4 - self.buffer_start) + self.buffer_end;
}
pub fn look_ahead<R, F>(&mut self, distance: usize, f: F) -> R where
pub fn look_ahead<R, F>(&mut self, dist: usize, f: F) -> R where
F: FnOnce(&token::Token) -> R,
{
let dist = distance as isize;
while self.buffer_length() < dist {
self.buffer[self.buffer_end as usize] = self.reader.real_token();
self.buffer_end = (self.buffer_end + 1) & 3;
if dist == 0 {
f(&self.token)
} else if dist < LOOKAHEAD_BUFFER_CAPACITY {
while self.lookahead_buffer.len() < dist {
self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.reader.real_token();
self.lookahead_buffer.end =
(self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY;
}
let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY;
f(&self.lookahead_buffer.buffer[index].tok)
} else {
self.bug("lookahead distance is too large");
}
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as usize].tok)
}
pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> {
self.sess.span_diagnostic.struct_span_fatal(self.span, m)
@ -1118,7 +1116,6 @@ impl<'a> Parser<'a> {
Ok(ast::TyKind::ImplTrait(bounds))
}
pub fn parse_ty_path(&mut self) -> PResult<'a, TyKind> {
Ok(TyKind::Path(None, self.parse_path(PathStyle::Type)?))
}
@ -3623,7 +3620,7 @@ impl<'a> Parser<'a> {
// Parse box pat
let subpat = self.parse_pat()?;
pat = PatKind::Box(subpat);
} else if self.token.is_ident() && self.token.is_path_start() &&
} else if self.token.is_ident() && !self.token.is_any_keyword() &&
self.look_ahead(1, |t| match *t {
token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) |
token::DotDotDot | token::ModSep | token::Not => false,
@ -3874,6 +3871,11 @@ impl<'a> Parser<'a> {
})
}
fn is_union_item(&mut self) -> bool {
self.token.is_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword())
}
fn parse_stmt_without_recovery(&mut self,
macro_legacy_warnings: bool)
-> PResult<'a, Option<Stmt>> {
@ -3888,10 +3890,10 @@ impl<'a> Parser<'a> {
node: StmtKind::Local(self.parse_local(attrs.into())?),
span: mk_sp(lo, self.prev_span.hi),
}
} else if self.token.is_path_start() && self.token != token::Lt && {
!self.check_keyword(keywords::Union) ||
self.look_ahead(1, |t| *t == token::Not || *t == token::ModSep)
} {
// Starts like a simple path, but not a union item.
} else if self.token.is_path_start() &&
!self.token.is_qpath_start() &&
!self.is_union_item() {
let pth = self.parse_path(PathStyle::Expr)?;
if !self.eat(&token::Not) {
@ -4602,6 +4604,10 @@ impl<'a> Parser<'a> {
token::Ident(ident) => { this.bump(); codemap::respan(this.prev_span, ident) }
_ => unreachable!()
};
let isolated_self = |this: &mut Self, n| {
this.look_ahead(n, |t| t.is_keyword(keywords::SelfValue)) &&
this.look_ahead(n + 1, |t| t != &token::ModSep)
};
// Parse optional self parameter of a method.
// Only a limited set of initial token sequences is considered self parameters, anything
@ -4614,22 +4620,22 @@ impl<'a> Parser<'a> {
// &'lt self
// &'lt mut self
// &not_self
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
(SelfKind::Region(None, Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
(SelfKind::Region(None, Mutability::Mutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
let lt = self.parse_lifetime()?;
(SelfKind::Region(Some(lt), Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(3, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 3) {
self.bump();
let lt = self.parse_lifetime()?;
self.bump();
@ -4644,12 +4650,12 @@ impl<'a> Parser<'a> {
// *mut self
// *not_self
// Emit special error for `self` cases.
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
(SelfKind::Value(Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_mutability()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
@ -4659,7 +4665,7 @@ impl<'a> Parser<'a> {
}
}
token::Ident(..) => {
if self.token.is_keyword(keywords::SelfValue) {
if isolated_self(self, 0) {
// self
// self: TYPE
let eself_ident = expect_ident(self);
@ -4670,7 +4676,7 @@ impl<'a> Parser<'a> {
(SelfKind::Value(Mutability::Immutable), eself_ident)
}
} else if self.token.is_keyword(keywords::Mut) &&
self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 1) {
// mut self
// mut self: TYPE
self.bump();
@ -5961,8 +5967,7 @@ impl<'a> Parser<'a> {
maybe_append(attrs, extra_attrs));
return Ok(Some(item));
}
if self.check_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) {
if self.is_union_item() {
// UNION ITEM
self.bump();
let (ident, item_, extra_attrs) = self.parse_item_union()?;

View File

@ -159,10 +159,8 @@ impl Token {
/// Returns `true` if the token can appear at the start of an expression.
pub fn can_begin_expr(&self) -> bool {
match *self {
OpenDelim(_) => true,
OpenDelim(..) => true,
Ident(..) => true,
Underscore => true,
Tilde => true,
Literal(..) => true,
Not => true,
BinOp(Minus) => true,
@ -172,6 +170,7 @@ impl Token {
OrOr => true, // in lambda syntax
AndAnd => true, // double borrow
DotDot | DotDotDot => true, // range notation
Lt | BinOp(Shl) => true, // associated path
ModSep => true,
Interpolated(NtExpr(..)) => true,
Interpolated(NtIdent(..)) => true,
@ -236,8 +235,12 @@ impl Token {
self.is_keyword(keywords::Const)
}
pub fn is_qpath_start(&self) -> bool {
self == &Lt || self == &BinOp(Shl)
}
pub fn is_path_start(&self) -> bool {
self == &ModSep || self == &Lt || self.is_path() ||
self == &ModSep || self.is_qpath_start() || self.is_path() ||
self.is_path_segment_keyword() || self.is_ident() && !self.is_any_keyword()
}

View File

@ -0,0 +1,20 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Check that associated paths starting with `<<` are successfully parsed.
fn main() {
let _: <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let _ = <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let 0 ... <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
//~^ ERROR only char and numeric types are allowed in range patterns
<<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
}

View File

@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// compile-flags: -Z parse-only
fn main() {
let Self = "foo"; //~ error: expected identifier, found keyword `Self`
let Self = "foo"; //~ ERROR unresolved unit struct/variant or constant `Self`
}

View File

@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// compile-flags: -Z parse-only
fn main() {
let super = "foo"; //~ error: expected identifier, found keyword `super`
let super = "foo"; //~ ERROR unresolved unit struct/variant or constant `super`
}

View File

@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// compile-flags: -Z parse-only
fn main() {
let super: isize; //~ ERROR expected identifier, found keyword `super`
let super: isize; //~ ERROR unresolved unit struct/variant or constant `super`
}

View File

@ -0,0 +1,23 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Check that `self::foo` is parsed as a general pattern and not a self argument.
struct S;
impl S {
fn f(self::S: S) {}
fn g(&self::S: &S) {}
fn h(&mut self::S: &mut S) {}
fn i(&'a self::S: &S) {} //~ ERROR unexpected lifetime `'a` in pattern
//~^ ERROR expected one of `)` or `mut`, found `'a`
}
fn main() {}

View File

@ -10,4 +10,14 @@
use self::Self as Foo; //~ ERROR unresolved import `self::Self`
pub fn main() {}
pub fn main() {
let Self = 5;
//~^ ERROR unresolved unit struct/variant or constant `Self`
match 15 {
Self => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
Foo { x: Self } => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
}
}

View File

@ -17,12 +17,7 @@ struct Bar<'Self>;
//~^ ERROR lifetimes cannot use keyword names
pub fn main() {
let Self = 5;
//~^ ERROR expected identifier, found keyword `Self`
match 15 {
Self => (),
//~^ ERROR expected identifier, found keyword `Self`
ref Self => (),
//~^ ERROR expected identifier, found keyword `Self`
mut Self => (),
@ -31,8 +26,6 @@ pub fn main() {
//~^ ERROR expected identifier, found keyword `Self`
Self!() => (),
//~^ ERROR macro undefined: 'Self!'
Foo { x: Self } => (),
//~^ ERROR expected identifier, found keyword `Self`
Foo { Self } => (),
//~^ ERROR expected identifier, found keyword `Self`
}

View File

@ -10,6 +10,12 @@
#![feature(untagged_unions)]
macro_rules! union {
() => (struct S;)
}
union!();
fn union() {}
fn main() {