flatten rustc_lexer::character_properties module
On the call site, `rustc_lexer::is_whitespace` reads much better than `character_properties::is_whitespace`.
This commit is contained in:
parent
a0c186c34f
commit
206fe8e1c3
|
@ -23,7 +23,6 @@ use std::string;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
use syntax_pos::{InnerSpan, Symbol};
|
use syntax_pos::{InnerSpan, Symbol};
|
||||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
struct InnerOffset(usize);
|
struct InnerOffset(usize);
|
||||||
|
@ -602,7 +601,7 @@ impl<'a> Parser<'a> {
|
||||||
/// Rust identifier, except that it can't start with `_` character.
|
/// Rust identifier, except that it can't start with `_` character.
|
||||||
fn word(&mut self) -> &'a str {
|
fn word(&mut self) -> &'a str {
|
||||||
let start = match self.cur.peek() {
|
let start = match self.cur.peek() {
|
||||||
Some(&(pos, c)) if c != '_' && is_id_start(c) => {
|
Some(&(pos, c)) if c != '_' && rustc_lexer::is_id_start(c) => {
|
||||||
self.cur.next();
|
self.cur.next();
|
||||||
pos
|
pos
|
||||||
}
|
}
|
||||||
|
@ -611,7 +610,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
while let Some(&(pos, c)) = self.cur.peek() {
|
while let Some(&(pos, c)) = self.cur.peek() {
|
||||||
if is_id_continue(c) {
|
if rustc_lexer::is_id_continue(c) {
|
||||||
self.cur.next();
|
self.cur.next();
|
||||||
} else {
|
} else {
|
||||||
return &self.input[start..pos];
|
return &self.input[start..pos];
|
||||||
|
|
|
@ -102,6 +102,62 @@ pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
|
||||||
|
// classes.
|
||||||
|
|
||||||
|
/// True if `c` is considered a whitespace according to Rust language definition.
|
||||||
|
pub fn is_whitespace(c: char) -> bool {
|
||||||
|
// This is Pattern_White_Space.
|
||||||
|
//
|
||||||
|
// Note that this set is stable (ie, it doesn't change with different
|
||||||
|
// Unicode versions), so it's ok to just hard-code the values.
|
||||||
|
|
||||||
|
match c {
|
||||||
|
// Usual ASCII suspects
|
||||||
|
| '\u{0009}' // \t
|
||||||
|
| '\u{000A}' // \n
|
||||||
|
| '\u{000B}' // vertical tab
|
||||||
|
| '\u{000C}' // form feed
|
||||||
|
| '\u{000D}' // \r
|
||||||
|
| '\u{0020}' // space
|
||||||
|
|
||||||
|
// NEXT LINE from latin1
|
||||||
|
| '\u{0085}'
|
||||||
|
|
||||||
|
// Bidi markers
|
||||||
|
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
||||||
|
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
||||||
|
|
||||||
|
// Dedicated whitespace characters from Unicode
|
||||||
|
| '\u{2028}' // LINE SEPARATOR
|
||||||
|
| '\u{2029}' // PARAGRAPH SEPARATOR
|
||||||
|
=> true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True if `c` is valid as a first character of an identifier.
|
||||||
|
pub fn is_id_start(c: char) -> bool {
|
||||||
|
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
||||||
|
// We also add fast-path for ascii idents
|
||||||
|
('a' <= c && c <= 'z')
|
||||||
|
|| ('A' <= c && c <= 'Z')
|
||||||
|
|| c == '_'
|
||||||
|
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True if `c` is valid as a non-first character of an identifier.
|
||||||
|
pub fn is_id_continue(c: char) -> bool {
|
||||||
|
// This is exactly XID_Continue.
|
||||||
|
// We also add fast-path for ascii idents
|
||||||
|
('a' <= c && c <= 'z')
|
||||||
|
|| ('A' <= c && c <= 'Z')
|
||||||
|
|| ('0' <= c && c <= '9')
|
||||||
|
|| c == '_'
|
||||||
|
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
impl Cursor<'_> {
|
impl Cursor<'_> {
|
||||||
fn advance_token(&mut self) -> Token {
|
fn advance_token(&mut self) -> Token {
|
||||||
let first_char = self.bump().unwrap();
|
let first_char = self.bump().unwrap();
|
||||||
|
@ -111,9 +167,9 @@ impl Cursor<'_> {
|
||||||
'*' => self.block_comment(),
|
'*' => self.block_comment(),
|
||||||
_ => Slash,
|
_ => Slash,
|
||||||
},
|
},
|
||||||
c if character_properties::is_whitespace(c) => self.whitespace(),
|
c if is_whitespace(c) => self.whitespace(),
|
||||||
'r' => match (self.nth_char(0), self.nth_char(1)) {
|
'r' => match (self.nth_char(0), self.nth_char(1)) {
|
||||||
('#', c1) if character_properties::is_id_start(c1) => self.raw_ident(),
|
('#', c1) if is_id_start(c1) => self.raw_ident(),
|
||||||
('#', _) | ('"', _) => {
|
('#', _) | ('"', _) => {
|
||||||
let (n_hashes, started, terminated) = self.raw_double_quoted_string();
|
let (n_hashes, started, terminated) = self.raw_double_quoted_string();
|
||||||
let suffix_start = self.len_consumed();
|
let suffix_start = self.len_consumed();
|
||||||
|
@ -158,7 +214,7 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
_ => self.ident(),
|
_ => self.ident(),
|
||||||
},
|
},
|
||||||
c if character_properties::is_id_start(c) => self.ident(),
|
c if is_id_start(c) => self.ident(),
|
||||||
c @ '0'..='9' => {
|
c @ '0'..='9' => {
|
||||||
let literal_kind = self.number(c);
|
let literal_kind = self.number(c);
|
||||||
let suffix_start = self.len_consumed();
|
let suffix_start = self.len_consumed();
|
||||||
|
@ -246,8 +302,8 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn whitespace(&mut self) -> TokenKind {
|
fn whitespace(&mut self) -> TokenKind {
|
||||||
debug_assert!(character_properties::is_whitespace(self.prev()));
|
debug_assert!(is_whitespace(self.prev()));
|
||||||
while character_properties::is_whitespace(self.nth_char(0)) {
|
while is_whitespace(self.nth_char(0)) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
Whitespace
|
Whitespace
|
||||||
|
@ -257,19 +313,19 @@ impl Cursor<'_> {
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
self.prev() == 'r'
|
self.prev() == 'r'
|
||||||
&& self.nth_char(0) == '#'
|
&& self.nth_char(0) == '#'
|
||||||
&& character_properties::is_id_start(self.nth_char(1))
|
&& is_id_start(self.nth_char(1))
|
||||||
);
|
);
|
||||||
self.bump();
|
self.bump();
|
||||||
self.bump();
|
self.bump();
|
||||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
while is_id_continue(self.nth_char(0)) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
RawIdent
|
RawIdent
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ident(&mut self) -> TokenKind {
|
fn ident(&mut self) -> TokenKind {
|
||||||
debug_assert!(character_properties::is_id_start(self.prev()));
|
debug_assert!(is_id_start(self.prev()));
|
||||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
while is_id_continue(self.nth_char(0)) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
Ident
|
Ident
|
||||||
|
@ -314,7 +370,7 @@ impl Cursor<'_> {
|
||||||
// integer literal followed by field/method access or a range pattern
|
// integer literal followed by field/method access or a range pattern
|
||||||
// (`0..2` and `12.foo()`)
|
// (`0..2` and `12.foo()`)
|
||||||
'.' if self.nth_char(1) != '.'
|
'.' if self.nth_char(1) != '.'
|
||||||
&& !character_properties::is_id_start(self.nth_char(1)) =>
|
&& !is_id_start(self.nth_char(1)) =>
|
||||||
{
|
{
|
||||||
// might have stuff after the ., and if it does, it needs to start
|
// might have stuff after the ., and if it does, it needs to start
|
||||||
// with a number
|
// with a number
|
||||||
|
@ -344,7 +400,7 @@ impl Cursor<'_> {
|
||||||
fn lifetime_or_char(&mut self) -> TokenKind {
|
fn lifetime_or_char(&mut self) -> TokenKind {
|
||||||
debug_assert!(self.prev() == '\'');
|
debug_assert!(self.prev() == '\'');
|
||||||
let mut starts_with_number = false;
|
let mut starts_with_number = false;
|
||||||
if (character_properties::is_id_start(self.nth_char(0))
|
if (is_id_start(self.nth_char(0))
|
||||||
|| self.nth_char(0).is_digit(10) && {
|
|| self.nth_char(0).is_digit(10) && {
|
||||||
starts_with_number = true;
|
starts_with_number = true;
|
||||||
true
|
true
|
||||||
|
@ -352,7 +408,7 @@ impl Cursor<'_> {
|
||||||
&& self.nth_char(1) != '\''
|
&& self.nth_char(1) != '\''
|
||||||
{
|
{
|
||||||
self.bump();
|
self.bump();
|
||||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
while is_id_continue(self.nth_char(0)) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -494,64 +550,13 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat_literal_suffix(&mut self) {
|
fn eat_literal_suffix(&mut self) {
|
||||||
if !character_properties::is_id_start(self.nth_char(0)) {
|
if !is_id_start(self.nth_char(0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
|
|
||||||
while character_properties::is_id_continue(self.nth_char(0)) {
|
while is_id_continue(self.nth_char(0)) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod character_properties {
|
|
||||||
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
|
|
||||||
// classes.
|
|
||||||
|
|
||||||
// This is Pattern_White_Space.
|
|
||||||
//
|
|
||||||
// Note that this set is stable (ie, it doesn't change with different
|
|
||||||
// Unicode versions), so it's ok to just hard-code the values.
|
|
||||||
pub fn is_whitespace(c: char) -> bool {
|
|
||||||
match c {
|
|
||||||
// Usual ASCII suspects
|
|
||||||
| '\u{0009}' // \t
|
|
||||||
| '\u{000A}' // \n
|
|
||||||
| '\u{000B}' // vertical tab
|
|
||||||
| '\u{000C}' // form feed
|
|
||||||
| '\u{000D}' // \r
|
|
||||||
| '\u{0020}' // space
|
|
||||||
|
|
||||||
// NEXT LINE from latin1
|
|
||||||
| '\u{0085}'
|
|
||||||
|
|
||||||
// Bidi markers
|
|
||||||
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
|
||||||
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
|
||||||
|
|
||||||
// Dedicated whitespace characters from Unicode
|
|
||||||
| '\u{2028}' // LINE SEPARATOR
|
|
||||||
| '\u{2029}' // PARAGRAPH SEPARATOR
|
|
||||||
=> true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
|
||||||
pub fn is_id_start(c: char) -> bool {
|
|
||||||
('a' <= c && c <= 'z')
|
|
||||||
|| ('A' <= c && c <= 'Z')
|
|
||||||
|| c == '_'
|
|
||||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is XID_Continue.
|
|
||||||
pub fn is_id_continue(c: char) -> bool {
|
|
||||||
('a' <= c && c <= 'z')
|
|
||||||
|| ('A' <= c && c <= 'Z')
|
|
||||||
|| ('0' <= c && c <= '9')
|
|
||||||
|| c == '_'
|
|
||||||
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
use rustc::mir::*;
|
use rustc::mir::*;
|
||||||
use rustc::ty;
|
use rustc::ty;
|
||||||
use rustc_errors::{DiagnosticBuilder,Applicability};
|
use rustc_errors::{DiagnosticBuilder,Applicability};
|
||||||
use rustc_lexer::character_properties::is_whitespace;
|
|
||||||
use syntax_pos::Span;
|
use syntax_pos::Span;
|
||||||
|
|
||||||
use crate::borrow_check::MirBorrowckCtxt;
|
use crate::borrow_check::MirBorrowckCtxt;
|
||||||
|
@ -525,7 +524,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, 'tcx> {
|
||||||
let suggestion;
|
let suggestion;
|
||||||
let to_remove;
|
let to_remove;
|
||||||
if pat_snippet.starts_with("mut")
|
if pat_snippet.starts_with("mut")
|
||||||
&& pat_snippet["mut".len()..].starts_with(is_whitespace)
|
&& pat_snippet["mut".len()..].starts_with(rustc_lexer::is_whitespace)
|
||||||
{
|
{
|
||||||
suggestion = pat_snippet["mut".len()..].trim_start();
|
suggestion = pat_snippet["mut".len()..].trim_start();
|
||||||
to_remove = "&mut";
|
to_remove = "&mut";
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use rustc_lexer::character_properties::is_whitespace;
|
|
||||||
use rustc::hir;
|
use rustc::hir;
|
||||||
use rustc::hir::Node;
|
use rustc::hir::Node;
|
||||||
use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body};
|
use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body};
|
||||||
|
@ -715,7 +714,7 @@ fn annotate_struct_field(
|
||||||
fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option<String> {
|
fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option<String> {
|
||||||
let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?;
|
let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?;
|
||||||
if hi_src.starts_with("ref")
|
if hi_src.starts_with("ref")
|
||||||
&& hi_src["ref".len()..].starts_with(is_whitespace)
|
&& hi_src["ref".len()..].starts_with(rustc_lexer::is_whitespace)
|
||||||
{
|
{
|
||||||
let replacement = format!("ref mut{}", &hi_src["ref".len()..]);
|
let replacement = format!("ref mut{}", &hi_src["ref".len()..]);
|
||||||
Some(replacement)
|
Some(replacement)
|
||||||
|
|
|
@ -4,7 +4,6 @@ use rustc::hir;
|
||||||
use rustc::hir::intravisit;
|
use rustc::hir::intravisit;
|
||||||
use rustc::session::{self, config, DiagnosticOutput};
|
use rustc::session::{self, config, DiagnosticOutput};
|
||||||
use rustc::util::common::ErrorReported;
|
use rustc::util::common::ErrorReported;
|
||||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
|
||||||
use syntax::ast;
|
use syntax::ast;
|
||||||
use syntax::with_globals;
|
use syntax::with_globals;
|
||||||
use syntax::source_map::SourceMap;
|
use syntax::source_map::SourceMap;
|
||||||
|
@ -764,8 +763,8 @@ impl Tester for Collector {
|
||||||
// We use these headings as test names, so it's good if
|
// We use these headings as test names, so it's good if
|
||||||
// they're valid identifiers.
|
// they're valid identifiers.
|
||||||
let name = name.chars().enumerate().map(|(i, c)| {
|
let name = name.chars().enumerate().map(|(i, c)| {
|
||||||
if (i == 0 && is_id_start(c)) ||
|
if (i == 0 && rustc_lexer::is_id_start(c)) ||
|
||||||
(i != 0 && is_id_continue(c)) {
|
(i != 0 && rustc_lexer::is_id_continue(c)) {
|
||||||
c
|
c
|
||||||
} else {
|
} else {
|
||||||
'_'
|
'_'
|
||||||
|
|
|
@ -6,7 +6,6 @@ use crate::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint}
|
||||||
|
|
||||||
use errors::{Diagnostic, DiagnosticBuilder};
|
use errors::{Diagnostic, DiagnosticBuilder};
|
||||||
use rustc_data_structures::sync::Lrc;
|
use rustc_data_structures::sync::Lrc;
|
||||||
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
|
|
||||||
use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span};
|
use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span};
|
||||||
use syntax_pos::symbol::{kw, sym, Symbol};
|
use syntax_pos::symbol::{kw, sym, Symbol};
|
||||||
|
|
||||||
|
@ -323,7 +322,7 @@ impl Ident {
|
||||||
fn is_valid(string: &str) -> bool {
|
fn is_valid(string: &str) -> bool {
|
||||||
let mut chars = string.chars();
|
let mut chars = string.chars();
|
||||||
if let Some(start) = chars.next() {
|
if let Some(start) = chars.next() {
|
||||||
is_id_start(start) && chars.all(is_id_continue)
|
rustc_lexer::is_id_start(start) && chars.all(rustc_lexer::is_id_continue)
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,7 +63,7 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
||||||
(None, None) => return true,
|
(None, None) => return true,
|
||||||
(None, _) => return false,
|
(None, _) => return false,
|
||||||
(Some(&a), None) => {
|
(Some(&a), None) => {
|
||||||
if is_pattern_whitespace(a) {
|
if rustc_lexer::is_whitespace(a) {
|
||||||
break // trailing whitespace check is out of loop for borrowck
|
break // trailing whitespace check is out of loop for borrowck
|
||||||
} else {
|
} else {
|
||||||
return false
|
return false
|
||||||
|
@ -72,11 +72,11 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
||||||
(Some(&a), Some(&b)) => (a, b)
|
(Some(&a), Some(&b)) => (a, b)
|
||||||
};
|
};
|
||||||
|
|
||||||
if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
|
if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) {
|
||||||
// skip whitespace for a and b
|
// skip whitespace for a and b
|
||||||
scan_for_non_ws_or_end(&mut a_iter);
|
scan_for_non_ws_or_end(&mut a_iter);
|
||||||
scan_for_non_ws_or_end(&mut b_iter);
|
scan_for_non_ws_or_end(&mut b_iter);
|
||||||
} else if is_pattern_whitespace(a) {
|
} else if rustc_lexer::is_whitespace(a) {
|
||||||
// skip whitespace for a
|
// skip whitespace for a
|
||||||
scan_for_non_ws_or_end(&mut a_iter);
|
scan_for_non_ws_or_end(&mut a_iter);
|
||||||
} else if a == b {
|
} else if a == b {
|
||||||
|
@ -88,20 +88,16 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if a has *only* trailing whitespace
|
// check if a has *only* trailing whitespace
|
||||||
a_iter.all(is_pattern_whitespace)
|
a_iter.all(rustc_lexer::is_whitespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
|
/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
|
||||||
fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) {
|
fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) {
|
||||||
while iter.peek().copied().map(|c| is_pattern_whitespace(c)) == Some(true) {
|
while iter.peek().copied().map(|c| rustc_lexer::is_whitespace(c)) == Some(true) {
|
||||||
iter.next();
|
iter.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_pattern_whitespace(c: char) -> bool {
|
|
||||||
rustc_lexer::character_properties::is_whitespace(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Identify a position in the text by the Nth occurrence of a string.
|
/// Identify a position in the text by the Nth occurrence of a string.
|
||||||
struct Position {
|
struct Position {
|
||||||
string: &'static str,
|
string: &'static str,
|
||||||
|
|
Loading…
Reference in New Issue