Normalize identifiers in librustc_parse.

2019-11-23 22:33:40 +08:00 · 2019-11-23 22:33:40 +08:00 · 0bcddfe23a
commit 0bcddfe23a
parent b13d65ab92
3 changed files with 22 additions and 4 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3696,6 +3696,7 @@ dependencies = [
 "smallvec 1.0.0",
 "syntax",
 "syntax_pos",
 "unicode-normalization",
 ]
 [[package]]
@ -4913,9 +4914,12 @@ dependencies = [
 [[package]]
 name = "unicode-normalization"
-version = "0.1.7"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
+checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
 dependencies = [
 "smallvec 1.0.0",
 ]
 [[package]]
 name = "unicode-segmentation"
--- a/src/librustc_parse/Cargo.toml
+++ b/src/librustc_parse/Cargo.toml
@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
 smallvec = { version = "1.0", features = ["union", "may_dangle"] }
 syntax_pos = { path = "../libsyntax_pos" }
 syntax = { path = "../libsyntax" }
 unicode-normalization = "0.1.11"
--- a/src/librustc_parse/lexer/mod.rs
+++ b/src/librustc_parse/lexer/mod.rs
@ -220,8 +220,7 @@ impl<'a> StringReader<'a> {
                if is_raw_ident {
                    ident_start = ident_start + BytePos(2);
                }
-                // FIXME: perform NFKC normalization here. (Issue #2253)
+                let sym = self.nfc_symbol_from(ident_start);
                let sym = self.symbol_from(ident_start);
                if is_raw_ident {
                    let span = self.mk_sp(start, self.pos);
                    if !sym.can_be_raw() {
@ -470,6 +469,20 @@ impl<'a> StringReader<'a> {
        Symbol::intern(self.str_from_to(start, end))
    }
    /// As symbol_from, with the text normalized into Unicode NFC form.
    fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
        use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
        debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
        let sym = self.str_from(start);
        match is_nfc_quick(sym.chars()) {
            IsNormalized::Yes => Symbol::intern(sym),
            _ => {
                let sym_str: String = sym.chars().nfc().collect();
                Symbol::intern(&sym_str)
            }
        }
    }
    /// Slice of the source text spanning from `start` up to but excluding `end`.
    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
        &self.src[self.src_index(start)..self.src_index(end)]