flatten rustc_lexer::character_properties module
On the call site, `rustc_lexer::is_whitespace` reads much better than `character_properties::is_whitespace`.
This commit is contained in:
parent
a0c186c34f
commit
206fe8e1c3
|
@ -23,7 +23,6 @@ use std::string;
|
|||
use std::iter;
|
||||
|
||||
use syntax_pos::{InnerSpan, Symbol};
|
||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct InnerOffset(usize);
|
||||
|
@ -602,7 +601,7 @@ impl<'a> Parser<'a> {
|
|||
/// Rust identifier, except that it can't start with `_` character.
|
||||
fn word(&mut self) -> &'a str {
|
||||
let start = match self.cur.peek() {
|
||||
Some(&(pos, c)) if c != '_' && is_id_start(c) => {
|
||||
Some(&(pos, c)) if c != '_' && rustc_lexer::is_id_start(c) => {
|
||||
self.cur.next();
|
||||
pos
|
||||
}
|
||||
|
@ -611,7 +610,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
};
|
||||
while let Some(&(pos, c)) = self.cur.peek() {
|
||||
if is_id_continue(c) {
|
||||
if rustc_lexer::is_id_continue(c) {
|
||||
self.cur.next();
|
||||
} else {
|
||||
return &self.input[start..pos];
|
||||
|
|
|
@ -102,6 +102,62 @@ pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
|
|||
})
|
||||
}
|
||||
|
||||
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
|
||||
// classes.
|
||||
|
||||
/// True if `c` is considered a whitespace according to Rust language definition.
|
||||
pub fn is_whitespace(c: char) -> bool {
|
||||
// This is Pattern_White_Space.
|
||||
//
|
||||
// Note that this set is stable (ie, it doesn't change with different
|
||||
// Unicode versions), so it's ok to just hard-code the values.
|
||||
|
||||
match c {
|
||||
// Usual ASCII suspects
|
||||
| '\u{0009}' // \t
|
||||
| '\u{000A}' // \n
|
||||
| '\u{000B}' // vertical tab
|
||||
| '\u{000C}' // form feed
|
||||
| '\u{000D}' // \r
|
||||
| '\u{0020}' // space
|
||||
|
||||
// NEXT LINE from latin1
|
||||
| '\u{0085}'
|
||||
|
||||
// Bidi markers
|
||||
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
||||
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
||||
|
||||
// Dedicated whitespace characters from Unicode
|
||||
| '\u{2028}' // LINE SEPARATOR
|
||||
| '\u{2029}' // PARAGRAPH SEPARATOR
|
||||
=> true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// True if `c` is valid as a first character of an identifier.
|
||||
pub fn is_id_start(c: char) -> bool {
|
||||
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
||||
// We also add fast-path for ascii idents
|
||||
('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
|
||||
}
|
||||
|
||||
/// True if `c` is valid as a non-first character of an identifier.
|
||||
pub fn is_id_continue(c: char) -> bool {
|
||||
// This is exactly XID_Continue.
|
||||
// We also add fast-path for ascii idents
|
||||
('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| ('0' <= c && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
|
||||
}
|
||||
|
||||
|
||||
impl Cursor<'_> {
|
||||
fn advance_token(&mut self) -> Token {
|
||||
let first_char = self.bump().unwrap();
|
||||
|
@ -111,9 +167,9 @@ impl Cursor<'_> {
|
|||
'*' => self.block_comment(),
|
||||
_ => Slash,
|
||||
},
|
||||
c if character_properties::is_whitespace(c) => self.whitespace(),
|
||||
c if is_whitespace(c) => self.whitespace(),
|
||||
'r' => match (self.nth_char(0), self.nth_char(1)) {
|
||||
('#', c1) if character_properties::is_id_start(c1) => self.raw_ident(),
|
||||
('#', c1) if is_id_start(c1) => self.raw_ident(),
|
||||
('#', _) | ('"', _) => {
|
||||
let (n_hashes, started, terminated) = self.raw_double_quoted_string();
|
||||
let suffix_start = self.len_consumed();
|
||||
|
@ -158,7 +214,7 @@ impl Cursor<'_> {
|
|||
}
|
||||
_ => self.ident(),
|
||||
},
|
||||
c if character_properties::is_id_start(c) => self.ident(),
|
||||
c if is_id_start(c) => self.ident(),
|
||||
c @ '0'..='9' => {
|
||||
let literal_kind = self.number(c);
|
||||
let suffix_start = self.len_consumed();
|
||||
|
@ -246,8 +302,8 @@ impl Cursor<'_> {
|
|||
}
|
||||
|
||||
fn whitespace(&mut self) -> TokenKind {
|
||||
debug_assert!(character_properties::is_whitespace(self.prev()));
|
||||
while character_properties::is_whitespace(self.nth_char(0)) {
|
||||
debug_assert!(is_whitespace(self.prev()));
|
||||
while is_whitespace(self.nth_char(0)) {
|
||||
self.bump();
|
||||
}
|
||||
Whitespace
|
||||
|
@ -257,19 +313,19 @@ impl Cursor<'_> {
|
|||
debug_assert!(
|
||||
self.prev() == 'r'
|
||||
&& self.nth_char(0) == '#'
|
||||
&& character_properties::is_id_start(self.nth_char(1))
|
||||
&& is_id_start(self.nth_char(1))
|
||||
);
|
||||
self.bump();
|
||||
self.bump();
|
||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
||||
while is_id_continue(self.nth_char(0)) {
|
||||
self.bump();
|
||||
}
|
||||
RawIdent
|
||||
}
|
||||
|
||||
fn ident(&mut self) -> TokenKind {
|
||||
debug_assert!(character_properties::is_id_start(self.prev()));
|
||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
||||
debug_assert!(is_id_start(self.prev()));
|
||||
while is_id_continue(self.nth_char(0)) {
|
||||
self.bump();
|
||||
}
|
||||
Ident
|
||||
|
@ -314,7 +370,7 @@ impl Cursor<'_> {
|
|||
// integer literal followed by field/method access or a range pattern
|
||||
// (`0..2` and `12.foo()`)
|
||||
'.' if self.nth_char(1) != '.'
|
||||
&& !character_properties::is_id_start(self.nth_char(1)) =>
|
||||
&& !is_id_start(self.nth_char(1)) =>
|
||||
{
|
||||
// might have stuff after the ., and if it does, it needs to start
|
||||
// with a number
|
||||
|
@ -344,7 +400,7 @@ impl Cursor<'_> {
|
|||
fn lifetime_or_char(&mut self) -> TokenKind {
|
||||
debug_assert!(self.prev() == '\'');
|
||||
let mut starts_with_number = false;
|
||||
if (character_properties::is_id_start(self.nth_char(0))
|
||||
if (is_id_start(self.nth_char(0))
|
||||
|| self.nth_char(0).is_digit(10) && {
|
||||
starts_with_number = true;
|
||||
true
|
||||
|
@ -352,7 +408,7 @@ impl Cursor<'_> {
|
|||
&& self.nth_char(1) != '\''
|
||||
{
|
||||
self.bump();
|
||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
||||
while is_id_continue(self.nth_char(0)) {
|
||||
self.bump();
|
||||
}
|
||||
|
||||
|
@ -494,64 +550,13 @@ impl Cursor<'_> {
|
|||
}
|
||||
|
||||
fn eat_literal_suffix(&mut self) {
|
||||
if !character_properties::is_id_start(self.nth_char(0)) {
|
||||
if !is_id_start(self.nth_char(0)) {
|
||||
return;
|
||||
}
|
||||
self.bump();
|
||||
|
||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
||||
while is_id_continue(self.nth_char(0)) {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod character_properties {
|
||||
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
|
||||
// classes.
|
||||
|
||||
// This is Pattern_White_Space.
|
||||
//
|
||||
// Note that this set is stable (ie, it doesn't change with different
|
||||
// Unicode versions), so it's ok to just hard-code the values.
|
||||
pub fn is_whitespace(c: char) -> bool {
|
||||
match c {
|
||||
// Usual ASCII suspects
|
||||
| '\u{0009}' // \t
|
||||
| '\u{000A}' // \n
|
||||
| '\u{000B}' // vertical tab
|
||||
| '\u{000C}' // form feed
|
||||
| '\u{000D}' // \r
|
||||
| '\u{0020}' // space
|
||||
|
||||
// NEXT LINE from latin1
|
||||
| '\u{0085}'
|
||||
|
||||
// Bidi markers
|
||||
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
||||
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
||||
|
||||
// Dedicated whitespace characters from Unicode
|
||||
| '\u{2028}' // LINE SEPARATOR
|
||||
| '\u{2029}' // PARAGRAPH SEPARATOR
|
||||
=> true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
||||
pub fn is_id_start(c: char) -> bool {
|
||||
('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
|
||||
}
|
||||
|
||||
// This is XID_Continue.
|
||||
pub fn is_id_continue(c: char) -> bool {
|
||||
('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| ('0' <= c && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
use rustc::mir::*;
|
||||
use rustc::ty;
|
||||
use rustc_errors::{DiagnosticBuilder,Applicability};
|
||||
use rustc_lexer::character_properties::is_whitespace;
|
||||
use syntax_pos::Span;
|
||||
|
||||
use crate::borrow_check::MirBorrowckCtxt;
|
||||
|
@ -525,7 +524,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, 'tcx> {
|
|||
let suggestion;
|
||||
let to_remove;
|
||||
if pat_snippet.starts_with("mut")
|
||||
&& pat_snippet["mut".len()..].starts_with(is_whitespace)
|
||||
&& pat_snippet["mut".len()..].starts_with(rustc_lexer::is_whitespace)
|
||||
{
|
||||
suggestion = pat_snippet["mut".len()..].trim_start();
|
||||
to_remove = "&mut";
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use rustc_lexer::character_properties::is_whitespace;
|
||||
use rustc::hir;
|
||||
use rustc::hir::Node;
|
||||
use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body};
|
||||
|
@ -715,7 +714,7 @@ fn annotate_struct_field(
|
|||
fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option<String> {
|
||||
let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?;
|
||||
if hi_src.starts_with("ref")
|
||||
&& hi_src["ref".len()..].starts_with(is_whitespace)
|
||||
&& hi_src["ref".len()..].starts_with(rustc_lexer::is_whitespace)
|
||||
{
|
||||
let replacement = format!("ref mut{}", &hi_src["ref".len()..]);
|
||||
Some(replacement)
|
||||
|
|
|
@ -4,7 +4,6 @@ use rustc::hir;
|
|||
use rustc::hir::intravisit;
|
||||
use rustc::session::{self, config, DiagnosticOutput};
|
||||
use rustc::util::common::ErrorReported;
|
||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
||||
use syntax::ast;
|
||||
use syntax::with_globals;
|
||||
use syntax::source_map::SourceMap;
|
||||
|
@ -764,8 +763,8 @@ impl Tester for Collector {
|
|||
// We use these headings as test names, so it's good if
|
||||
// they're valid identifiers.
|
||||
let name = name.chars().enumerate().map(|(i, c)| {
|
||||
if (i == 0 && is_id_start(c)) ||
|
||||
(i != 0 && is_id_continue(c)) {
|
||||
if (i == 0 && rustc_lexer::is_id_start(c)) ||
|
||||
(i != 0 && rustc_lexer::is_id_continue(c)) {
|
||||
c
|
||||
} else {
|
||||
'_'
|
||||
|
|
|
@ -6,7 +6,6 @@ use crate::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint}
|
|||
|
||||
use errors::{Diagnostic, DiagnosticBuilder};
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
||||
use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span};
|
||||
use syntax_pos::symbol::{kw, sym, Symbol};
|
||||
|
||||
|
@ -323,7 +322,7 @@ impl Ident {
|
|||
fn is_valid(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
if let Some(start) = chars.next() {
|
||||
is_id_start(start) && chars.all(is_id_continue)
|
||||
rustc_lexer::is_id_start(start) && chars.all(rustc_lexer::is_id_continue)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
|||
(None, None) => return true,
|
||||
(None, _) => return false,
|
||||
(Some(&a), None) => {
|
||||
if is_pattern_whitespace(a) {
|
||||
if rustc_lexer::is_whitespace(a) {
|
||||
break // trailing whitespace check is out of loop for borrowck
|
||||
} else {
|
||||
return false
|
||||
|
@ -72,11 +72,11 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
|||
(Some(&a), Some(&b)) => (a, b)
|
||||
};
|
||||
|
||||
if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
|
||||
if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) {
|
||||
// skip whitespace for a and b
|
||||
scan_for_non_ws_or_end(&mut a_iter);
|
||||
scan_for_non_ws_or_end(&mut b_iter);
|
||||
} else if is_pattern_whitespace(a) {
|
||||
} else if rustc_lexer::is_whitespace(a) {
|
||||
// skip whitespace for a
|
||||
scan_for_non_ws_or_end(&mut a_iter);
|
||||
} else if a == b {
|
||||
|
@ -88,20 +88,16 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
|||
}
|
||||
|
||||
// check if a has *only* trailing whitespace
|
||||
a_iter.all(is_pattern_whitespace)
|
||||
a_iter.all(rustc_lexer::is_whitespace)
|
||||
}
|
||||
|
||||
/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
|
||||
fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) {
|
||||
while iter.peek().copied().map(|c| is_pattern_whitespace(c)) == Some(true) {
|
||||
while iter.peek().copied().map(|c| rustc_lexer::is_whitespace(c)) == Some(true) {
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn is_pattern_whitespace(c: char) -> bool {
|
||||
rustc_lexer::character_properties::is_whitespace(c)
|
||||
}
|
||||
|
||||
/// Identify a position in the text by the Nth occurrence of a string.
|
||||
struct Position {
|
||||
string: &'static str,
|
||||
|
|
Loading…
Reference in New Issue