Implement mixed script confusable lint.
This commit is contained in:
parent
ef24faf130
commit
25e864e198
@ -5405,15 +5405,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "unicode-script"
|
||||
version = "0.4.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b2c5c29e805da6817f5af6a627d65adb045cebf05cccd5a3493d6109454391c"
|
||||
checksum = "58b33414ea8db4b7ea0343548dbdc31d27aef06beacf7044a87e564d9b0feb7d"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-security"
|
||||
version = "0.0.3"
|
||||
version = "0.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5f9011bbed9c13372bc8df618b55a38138445199caf3b61d432c6859c36dee0"
|
||||
checksum = "5d87c28edc5b263377e448d6cdcb935c06b95413d8013ba6fae470558ccab18f"
|
||||
dependencies = [
|
||||
"unicode-normalization",
|
||||
"unicode-script",
|
||||
|
@ -10,7 +10,7 @@ path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
log = "0.4"
|
||||
unicode-security = "0.0.3"
|
||||
unicode-security = "0.0.5"
|
||||
rustc_middle = { path = "../librustc_middle" }
|
||||
rustc_ast_pretty = { path = "../librustc_ast_pretty" }
|
||||
rustc_attr = { path = "../librustc_attr" }
|
||||
|
@ -24,12 +24,20 @@ declare_lint! {
|
||||
crate_level_only
|
||||
}
|
||||
|
||||
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
|
||||
declare_lint! {
|
||||
pub MIXED_SCRIPT_CONFUSABLES,
|
||||
Warn,
|
||||
"detects Unicode scripts whose mixed script confusables codepoints are solely used",
|
||||
crate_level_only
|
||||
}
|
||||
|
||||
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS, MIXED_SCRIPT_CONFUSABLES]);
|
||||
|
||||
impl EarlyLintPass for NonAsciiIdents {
|
||||
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
|
||||
use rustc_session::lint::Level;
|
||||
use rustc_span::Span;
|
||||
use std::collections::BTreeMap;
|
||||
use unicode_security::GeneralSecurityProfile;
|
||||
use utils::CowBoxSymStr;
|
||||
|
||||
@ -37,8 +45,14 @@ impl EarlyLintPass for NonAsciiIdents {
|
||||
let check_uncommon_codepoints =
|
||||
cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
|
||||
let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
|
||||
let check_mixed_script_confusables =
|
||||
cx.builder.lint_level(MIXED_SCRIPT_CONFUSABLES).0 != Level::Allow;
|
||||
|
||||
if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
|
||||
if !check_non_ascii_idents
|
||||
&& !check_uncommon_codepoints
|
||||
&& !check_confusable_idents
|
||||
&& !check_mixed_script_confusables
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@ -107,6 +121,115 @@ impl EarlyLintPass for NonAsciiIdents {
|
||||
.or_insert((symbol_str, sp, is_ascii));
|
||||
}
|
||||
}
|
||||
|
||||
if has_non_ascii_idents && check_mixed_script_confusables {
|
||||
use unicode_security::is_potential_mixed_script_confusable_char;
|
||||
use unicode_security::mixed_script::AugmentedScriptSet;
|
||||
|
||||
#[derive(Clone)]
|
||||
enum ScriptSetUsage {
|
||||
Suspicious(Vec<char>, Span),
|
||||
Verified,
|
||||
}
|
||||
|
||||
let mut script_states: FxHashMap<AugmentedScriptSet, ScriptSetUsage> =
|
||||
FxHashMap::default();
|
||||
let latin_augmented_script_set = AugmentedScriptSet::for_char('A');
|
||||
script_states.insert(latin_augmented_script_set, ScriptSetUsage::Verified);
|
||||
|
||||
let mut has_suspicous = false;
|
||||
for (symbol, &sp) in symbols.iter() {
|
||||
let symbol_str = symbol.as_str();
|
||||
for ch in symbol_str.chars() {
|
||||
if ch.is_ascii() {
|
||||
// all ascii characters are covered by exception.
|
||||
continue;
|
||||
}
|
||||
if !GeneralSecurityProfile::identifier_allowed(ch) {
|
||||
// this character is covered by `uncommon_codepoints` lint.
|
||||
continue;
|
||||
}
|
||||
let augmented_script_set = AugmentedScriptSet::for_char(ch);
|
||||
script_states
|
||||
.entry(augmented_script_set)
|
||||
.and_modify(|existing_state| {
|
||||
if let ScriptSetUsage::Suspicious(ch_list, _) = existing_state {
|
||||
if is_potential_mixed_script_confusable_char(ch) {
|
||||
ch_list.push(ch);
|
||||
} else {
|
||||
*existing_state = ScriptSetUsage::Verified;
|
||||
}
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| {
|
||||
if !is_potential_mixed_script_confusable_char(ch) {
|
||||
ScriptSetUsage::Verified
|
||||
} else {
|
||||
has_suspicous = true;
|
||||
ScriptSetUsage::Suspicious(vec![ch], sp)
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if has_suspicous {
|
||||
let verified_augmented_script_sets = script_states
|
||||
.iter()
|
||||
.flat_map(|(k, v)| match v {
|
||||
ScriptSetUsage::Verified => Some(*k),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// we're sorting the output here.
|
||||
let mut lint_reports: BTreeMap<(Span, Vec<char>), AugmentedScriptSet> =
|
||||
BTreeMap::new();
|
||||
|
||||
'outerloop: for (augment_script_set, usage) in script_states {
|
||||
let (mut ch_list, sp) = match usage {
|
||||
ScriptSetUsage::Verified => continue,
|
||||
ScriptSetUsage::Suspicious(ch_list, sp) => (ch_list, sp),
|
||||
};
|
||||
|
||||
if augment_script_set.is_all() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for existing in verified_augmented_script_sets.iter() {
|
||||
if existing.is_all() {
|
||||
continue;
|
||||
}
|
||||
let mut intersect = *existing;
|
||||
intersect.intersect_with(augment_script_set);
|
||||
if !intersect.is_empty() && !intersect.is_all() {
|
||||
continue 'outerloop;
|
||||
}
|
||||
}
|
||||
|
||||
ch_list.sort();
|
||||
ch_list.dedup();
|
||||
lint_reports.insert((sp, ch_list), augment_script_set);
|
||||
}
|
||||
|
||||
for ((sp, ch_list), script_set) in lint_reports {
|
||||
cx.struct_span_lint(MIXED_SCRIPT_CONFUSABLES, sp, |lint| {
|
||||
let message = format!(
|
||||
"The usage of Script Group `{}` in this crate consists solely of mixed script confusables",
|
||||
script_set);
|
||||
let mut note = "The usage includes ".to_string();
|
||||
for (idx, ch) in ch_list.into_iter().enumerate() {
|
||||
if idx != 0 {
|
||||
note += ", ";
|
||||
}
|
||||
let char_info = format!("'{}' (U+{:04X})", ch, ch as u32);
|
||||
note += &char_info;
|
||||
}
|
||||
note += ".";
|
||||
lint.build(&message).note(¬e).note("Please recheck to make sure their usages are indeed what you want.").emit()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,20 @@
|
||||
// check-pass
|
||||
#![feature(non_ascii_idents)]
|
||||
#![deny(mixed_script_confusables)]
|
||||
|
||||
struct ΑctuallyNotLatin;
|
||||
|
||||
fn main() {
|
||||
let λ = 42; // this usage of Greek confirms that Greek is used intentionally.
|
||||
}
|
||||
|
||||
mod роре {
|
||||
const エ: &'static str = "アイウ";
|
||||
|
||||
// this usage of Katakana confirms that Katakana is used intentionally.
|
||||
fn ニャン() {
|
||||
let д: usize = 100; // this usage of Cyrillic confirms that Cyrillic is used intentionally.
|
||||
|
||||
println!("meow!");
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
#![feature(non_ascii_idents)]
|
||||
#![deny(mixed_script_confusables)]
|
||||
|
||||
struct ΑctuallyNotLatin;
|
||||
//~^ ERROR The usage of Script Group `Greek` in this crate consists solely of
|
||||
|
||||
fn main() {
|
||||
let v = ΑctuallyNotLatin;
|
||||
}
|
||||
|
||||
mod роре {
|
||||
//~^ ERROR The usage of Script Group `Cyrillic` in this crate consists solely of
|
||||
const エ: &'static str = "アイウ";
|
||||
//~^ ERROR The usage of Script Group `Japanese, Katakana` in this crate consists solely of
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
error: The usage of Script Group `Greek` in this crate consists solely of mixed script confusables
|
||||
--> $DIR/lint-mixed-script-confusables.rs:4:8
|
||||
|
|
||||
LL | struct ΑctuallyNotLatin;
|
||||
| ^^^^^^^^^^^^^^^^
|
||||
|
|
||||
note: the lint level is defined here
|
||||
--> $DIR/lint-mixed-script-confusables.rs:2:9
|
||||
|
|
||||
LL | #![deny(mixed_script_confusables)]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
= note: The usage includes 'Α' (U+0391).
|
||||
= note: Please recheck to make sure their usages are indeed what you want.
|
||||
|
||||
error: The usage of Script Group `Cyrillic` in this crate consists solely of mixed script confusables
|
||||
--> $DIR/lint-mixed-script-confusables.rs:11:5
|
||||
|
|
||||
LL | mod роре {
|
||||
| ^^^^
|
||||
|
|
||||
= note: The usage includes 'е' (U+0435), 'о' (U+043E), 'р' (U+0440).
|
||||
= note: Please recheck to make sure their usages are indeed what you want.
|
||||
|
||||
error: The usage of Script Group `Japanese, Katakana` in this crate consists solely of mixed script confusables
|
||||
--> $DIR/lint-mixed-script-confusables.rs:13:11
|
||||
|
|
||||
LL | const エ: &'static str = "アイウ";
|
||||
| ^^
|
||||
|
|
||||
= note: The usage includes 'エ' (U+30A8).
|
||||
= note: Please recheck to make sure their usages are indeed what you want.
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
@ -1,3 +1,5 @@
|
||||
#![allow(mixed_script_confusables)]
|
||||
|
||||
fn foo<
|
||||
'β, //~ ERROR non-ascii idents are not fully supported
|
||||
γ //~ ERROR non-ascii idents are not fully supported
|
||||
|
@ -1,5 +1,5 @@
|
||||
error[E0658]: non-ascii idents are not fully supported
|
||||
--> $DIR/utf8_idents.rs:2:5
|
||||
--> $DIR/utf8_idents.rs:4:5
|
||||
|
|
||||
LL | 'β,
|
||||
| ^^
|
||||
@ -8,7 +8,7 @@ LL | 'β,
|
||||
= help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
|
||||
|
||||
error[E0658]: non-ascii idents are not fully supported
|
||||
--> $DIR/utf8_idents.rs:3:5
|
||||
--> $DIR/utf8_idents.rs:5:5
|
||||
|
|
||||
LL | γ
|
||||
| ^
|
||||
@ -17,7 +17,7 @@ LL | γ
|
||||
= help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
|
||||
|
||||
error[E0658]: non-ascii idents are not fully supported
|
||||
--> $DIR/utf8_idents.rs:8:5
|
||||
--> $DIR/utf8_idents.rs:10:5
|
||||
|
|
||||
LL | δ: usize
|
||||
| ^
|
||||
@ -26,7 +26,7 @@ LL | δ: usize
|
||||
= help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
|
||||
|
||||
error[E0658]: non-ascii idents are not fully supported
|
||||
--> $DIR/utf8_idents.rs:12:9
|
||||
--> $DIR/utf8_idents.rs:14:9
|
||||
|
|
||||
LL | let α = 0.00001f64;
|
||||
| ^
|
||||
@ -35,7 +35,7 @@ LL | let α = 0.00001f64;
|
||||
= help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
|
||||
|
||||
warning: type parameter `γ` should have an upper camel case name
|
||||
--> $DIR/utf8_idents.rs:3:5
|
||||
--> $DIR/utf8_idents.rs:5:5
|
||||
|
|
||||
LL | γ
|
||||
| ^ help: convert the identifier to upper camel case: `Γ`
|
||||
|
Loading…
Reference in New Issue
Block a user