Normalize identifiers in librustc_parse.
This commit is contained in:
parent
b13d65ab92
commit
0bcddfe23a
@ -3696,6 +3696,7 @@ dependencies = [
|
|||||||
"smallvec 1.0.0",
|
"smallvec 1.0.0",
|
||||||
"syntax",
|
"syntax",
|
||||||
"syntax_pos",
|
"syntax_pos",
|
||||||
|
"unicode-normalization",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -4913,9 +4914,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-normalization"
|
name = "unicode-normalization"
|
||||||
version = "0.1.7"
|
version = "0.1.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
|
checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
|
||||||
|
dependencies = [
|
||||||
|
"smallvec 1.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-segmentation"
|
name = "unicode-segmentation"
|
||||||
|
@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
|
|||||||
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
|
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
|
||||||
syntax_pos = { path = "../libsyntax_pos" }
|
syntax_pos = { path = "../libsyntax_pos" }
|
||||||
syntax = { path = "../libsyntax" }
|
syntax = { path = "../libsyntax" }
|
||||||
|
unicode-normalization = "0.1.11"
|
||||||
|
@ -220,8 +220,7 @@ impl<'a> StringReader<'a> {
|
|||||||
if is_raw_ident {
|
if is_raw_ident {
|
||||||
ident_start = ident_start + BytePos(2);
|
ident_start = ident_start + BytePos(2);
|
||||||
}
|
}
|
||||||
// FIXME: perform NFKC normalization here. (Issue #2253)
|
let sym = self.nfc_symbol_from(ident_start);
|
||||||
let sym = self.symbol_from(ident_start);
|
|
||||||
if is_raw_ident {
|
if is_raw_ident {
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
if !sym.can_be_raw() {
|
if !sym.can_be_raw() {
|
||||||
@ -470,6 +469,20 @@ impl<'a> StringReader<'a> {
|
|||||||
Symbol::intern(self.str_from_to(start, end))
|
Symbol::intern(self.str_from_to(start, end))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// As symbol_from, with the text normalized into Unicode NFC form.
|
||||||
|
fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
|
||||||
|
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
|
||||||
|
debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
|
||||||
|
let sym = self.str_from(start);
|
||||||
|
match is_nfc_quick(sym.chars()) {
|
||||||
|
IsNormalized::Yes => Symbol::intern(sym),
|
||||||
|
_ => {
|
||||||
|
let sym_str: String = sym.chars().nfc().collect();
|
||||||
|
Symbol::intern(&sym_str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Slice of the source text spanning from `start` up to but excluding `end`.
|
/// Slice of the source text spanning from `start` up to but excluding `end`.
|
||||||
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
|
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
|
||||||
&self.src[self.src_index(start)..self.src_index(end)]
|
&self.src[self.src_index(start)..self.src_index(end)]
|
||||||
|
Loading…
Reference in New Issue
Block a user