Merge pull request #299 from Manishearth/unicode_str

Unicode lints, second attempt: Lint whole strings, help with replacement

This fixes #85
This commit is contained in:
llogiq 2015-09-04 14:27:26 +02:00
commit 0c50d763fc
5 changed files with 59 additions and 25 deletions

View File

@ -16,6 +16,9 @@ keywords = ["clippy", "lint", "plugin"]
name = "clippy"
plugin = true
[dependencies]
unicode-normalization = "*"
[dev-dependencies]
compiletest_rs = "*"
regex = "*"

View File

@ -4,7 +4,7 @@
A collection of lints that give helpful tips to newbies and catch oversights.
##Lints
There are 53 lints included in this crate:
There are 54 lints included in this crate:
name | default | meaning
-----------------------------------------------------------------------------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@ -56,6 +56,7 @@ name
[string_to_string](https://github.com/Manishearth/rust-clippy/wiki#string_to_string) | warn | calling `String.to_string()` which is a no-op
[toplevel_ref_arg](https://github.com/Manishearth/rust-clippy/wiki#toplevel_ref_arg) | warn | a function argument is declared `ref` (i.e. `fn foo(ref x: u8)`, but not `fn foo((ref x, ref y): (u8, u8))`)
[type_complexity](https://github.com/Manishearth/rust-clippy/wiki#type_complexity) | warn | usage of very complex types; recommends factoring out parts into `type` definitions
[unicode_not_nfc](https://github.com/Manishearth/rust-clippy/wiki#unicode_not_nfc) | allow | using a unicode literal not in NFC normal form (see http://www.unicode.org/reports/tr15/ for further information)
[unit_cmp](https://github.com/Manishearth/rust-clippy/wiki#unit_cmp) | warn | comparing unit values (which is always `true` or `false`, respectively)
[unused_collect](https://github.com/Manishearth/rust-clippy/wiki#unused_collect) | warn | `collect()`ing an iterator without using the result; this is usually better written as a for loop
[while_let_loop](https://github.com/Manishearth/rust-clippy/wiki#while_let_loop) | warn | `loop { if let { ... } else break }` can be written as a `while let` loop

View File

@ -14,6 +14,9 @@ extern crate rustc_front;
extern crate core;
extern crate collections;
// for unicode nfc normalization
extern crate unicode_normalization;
use rustc::plugin::Registry;
use rustc::lint::LintPassObject;
@ -96,6 +99,7 @@ pub fn plugin_registrar(reg: &mut Registry) {
types::CAST_PRECISION_LOSS,
types::CAST_SIGN_LOSS,
unicode::NON_ASCII_LITERAL,
unicode::UNICODE_NOT_NFC,
]);
reg.register_lint_group("clippy", vec![

View File

@ -1,49 +1,73 @@
use rustc::lint::*;
use rustc_front::hir::*;
use syntax::codemap::{BytePos, Span};
use syntax::codemap::Span;
use utils::span_lint;
use unicode_normalization::UnicodeNormalization;
use utils::{snippet, span_help_and_lint};
declare_lint!{ pub ZERO_WIDTH_SPACE, Deny,
"using a zero-width space in a string literal, which is confusing" }
declare_lint!{ pub NON_ASCII_LITERAL, Allow,
"using any literal non-ASCII chars in a string literal; suggests \
using the \\u escape instead" }
declare_lint!{ pub UNICODE_NOT_NFC, Allow,
"using a unicode literal not in NFC normal form (see \
http://www.unicode.org/reports/tr15/ for further information)" }
#[derive(Copy, Clone)]
pub struct Unicode;
impl LintPass for Unicode {
fn get_lints(&self) -> LintArray {
lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL)
lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC)
}
fn check_expr(&mut self, cx: &Context, expr: &Expr) {
if let ExprLit(ref lit) = expr.node {
if let LitStr(ref string, _) = lit.node {
check_str(cx, string, lit.span)
if let LitStr(_, _) = lit.node {
check_str(cx, lit.span)
}
}
}
}
fn check_str(cx: &Context, string: &str, span: Span) {
for (i, c) in string.char_indices() {
if c == '\u{200B}' {
str_pos_lint(cx, ZERO_WIDTH_SPACE, span, i,
"zero-width space detected. Consider using `\\u{200B}`");
}
fn escape<T: Iterator<Item=char>>(s: T) -> String {
let mut result = String::new();
for c in s {
if c as u32 > 0x7F {
str_pos_lint(cx, NON_ASCII_LITERAL, span, i, &format!(
"literal non-ASCII character detected. Consider using `\\u{{{:X}}}`", c as u32));
for d in c.escape_unicode() { result.push(d) };
} else {
result.push(c);
}
}
result
}
#[allow(cast_possible_truncation)]
fn str_pos_lint(cx: &Context, lint: &'static Lint, span: Span, index: usize, msg: &str) {
span_lint(cx, lint, Span { lo: span.lo + BytePos((1 + index) as u32),
hi: span.lo + BytePos((1 + index) as u32),
expn_id: span.expn_id }, msg);
fn check_str(cx: &Context, span: Span) {
let string = snippet(cx, span, "");
if string.contains('\u{200B}') {
span_help_and_lint(cx, ZERO_WIDTH_SPACE, span,
"zero-width space detected",
&format!("Consider replacing the string with:\n\"{}\"",
string.replace("\u{200B}", "\\u{200B}")));
}
if string.chars().any(|c| c as u32 > 0x7F) {
span_help_and_lint(cx, NON_ASCII_LITERAL, span,
"literal non-ASCII character detected",
&format!("Consider replacing the string with:\n\"{}\"",
if cx.current_level(UNICODE_NOT_NFC) == Level::Allow {
escape(string.chars())
} else {
escape(string.nfc())
}));
}
if cx.current_level(NON_ASCII_LITERAL) == Level::Allow &&
string.chars().zip(string.nfc()).any(|(a, b)| a != b) {
span_help_and_lint(cx, UNICODE_NOT_NFC, span,
"non-nfc unicode sequence detected",
&format!("Consider replacing the string with:\n\"{}\"",
string.nfc().collect::<String>()));
}
}

View File

@ -4,18 +4,20 @@
#[deny(zero_width_space)]
fn zero() {
print!("Here >< is a ZWS, and another");
//~^ ERROR zero-width space detected. Consider using `\u{200B}`
//~^^ ERROR zero-width space detected. Consider using `\u{200B}`
//~^ ERROR zero-width space detected
print!("This\u{200B}is\u{200B}fine");
}
//#[deny(unicode_canon)]
#[deny(unicode_not_nfc)]
fn canon() {
print!("̀ah?"); //not yet ~ERROR non-canonical unicode sequence detected. Consider using à
print!("̀àh?"); //~ERROR non-nfc unicode sequence detected
print!("a\u{0300}h?"); // also okay
}
#[deny(non_ascii_literal)]
fn uni() {
print!("Üben!"); //~ERROR literal non-ASCII character detected. Consider using `\u{DC}`
print!("Üben!"); //~ERROR literal non-ASCII character detected
print!("\u{DC}ben!"); // this is okay
}
fn main() {