diff --git a/Cargo.lock b/Cargo.lock index ba0f55ab5af..45a1a169be4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3642,6 +3642,7 @@ dependencies = [ "rustc_span", "rustc_target", "syntax", + "unicode-security", ] [[package]] @@ -4940,6 +4941,21 @@ dependencies = [ "smallvec 1.0.0", ] +[[package]] +name = "unicode-script" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b2c5c29e805da6817f5af6a627d65adb045cebf05cccd5a3493d6109454391c" + +[[package]] +name = "unicode-security" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c49d35967fa037b881acc34ef717c38c4b5560eba10e3685271b3f530bb19634" +dependencies = [ + "unicode-script", +] + [[package]] name = "unicode-segmentation" version = "1.6.0" diff --git a/src/librustc_lint/Cargo.toml b/src/librustc_lint/Cargo.toml index 600f7031ed5..a40c5d1697c 100644 --- a/src/librustc_lint/Cargo.toml +++ b/src/librustc_lint/Cargo.toml @@ -10,6 +10,7 @@ path = "lib.rs" [dependencies] log = "0.4" +unicode-security = "0.0.2" rustc = { path = "../librustc" } rustc_target = { path = "../librustc_target" } syntax = { path = "../libsyntax" } diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 9ec84553942..f30d0bcbdd5 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -7,15 +7,32 @@ declare_lint! { "detects non-ASCII identifiers" } -declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS]); +declare_lint! { + pub UNCOMMON_CODEPOINTS, + Warn, + "detects uncommon Unicode codepoints in identifiers" +} + +declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS]); impl EarlyLintPass for NonAsciiIdents { fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: ast::Ident) { - if !ident.name.as_str().is_ascii() { + use unicode_security::GeneralSecurityProfile; + let name_str = ident.name.as_str(); + if name_str.is_ascii() { + return; + } + cx.struct_span_lint( + NON_ASCII_IDENTS, + ident.span, + "identifier contains non-ASCII characters", + ) + .emit(); + if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) { cx.struct_span_lint( - NON_ASCII_IDENTS, + UNCOMMON_CODEPOINTS, ident.span, - "identifier contains non-ASCII characters", + "identifier contains uncommon Unicode codepoints", ) .emit(); } diff --git a/src/test/ui/issues/issue-48508.rs b/src/test/ui/issues/issue-48508.rs index b7aa6422876..87965c204ad 100644 --- a/src/test/ui/issues/issue-48508.rs +++ b/src/test/ui/issues/issue-48508.rs @@ -11,6 +11,7 @@ // ignore-asmjs wasm2js does not support source maps yet #![feature(non_ascii_idents)] +#[allow(uncommon_codepoints)] #[path = "issue-48508-aux.rs"] mod other_file; diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs new file mode 100644 index 00000000000..7ac0d035d5b --- /dev/null +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs @@ -0,0 +1,11 @@ +#![feature(non_ascii_idents)] +#![deny(uncommon_codepoints)] + +const µ: f64 = 0.000001; //~ ERROR identifier contains uncommon Unicode codepoints + +fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints + +fn main() { + let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints + println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints +} diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr new file mode 100644 index 00000000000..4580d25665e --- /dev/null +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr @@ -0,0 +1,32 @@ +error: identifier contains uncommon Unicode codepoints + --> $DIR/lint-uncommon-codepoints.rs:4:7 + | +LL | const µ: f64 = 0.000001; + | ^ + | +note: lint level defined here + --> $DIR/lint-uncommon-codepoints.rs:2:9 + | +LL | #![deny(uncommon_codepoints)] + | ^^^^^^^^^^^^^^^^^^^ + +error: identifier contains uncommon Unicode codepoints + --> $DIR/lint-uncommon-codepoints.rs:6:4 + | +LL | fn dijkstra() {} + | ^^^^^^^ + +error: identifier contains uncommon Unicode codepoints + --> $DIR/lint-uncommon-codepoints.rs:9:9 + | +LL | let ㇻㇲㇳ = "rust"; + | ^^^^^^ + +error: identifier contains uncommon Unicode codepoints + --> $DIR/lint-uncommon-codepoints.rs:10:20 + | +LL | println!("{}", ㇻㇲㇳ); + | ^^^^^^ + +error: aborting due to 4 previous errors + diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index a3042803dd7..352c00dbe41 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -171,6 +171,8 @@ const WHITELIST: &[Crate<'_>] = &[ Crate("thread_local"), Crate("ucd-util"), Crate("unicode-normalization"), + Crate("unicode-script"), + Crate("unicode-security"), Crate("unicode-width"), Crate("unicode-xid"), Crate("unreachable"),