Merge pull request #299 from Manishearth/unicode_str
Unicode lints, second attempt: Lint whole strings, help with replacement This fixes #85
This commit is contained in:
commit
0c50d763fc
@ -16,6 +16,9 @@ keywords = ["clippy", "lint", "plugin"]
|
||||
name = "clippy"
|
||||
plugin = true
|
||||
|
||||
[dependencies]
|
||||
unicode-normalization = "*"
|
||||
|
||||
[dev-dependencies]
|
||||
compiletest_rs = "*"
|
||||
regex = "*"
|
||||
|
@ -4,7 +4,7 @@
|
||||
A collection of lints that give helpful tips to newbies and catch oversights.
|
||||
|
||||
##Lints
|
||||
There are 53 lints included in this crate:
|
||||
There are 54 lints included in this crate:
|
||||
|
||||
name | default | meaning
|
||||
-----------------------------------------------------------------------------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
@ -56,6 +56,7 @@ name
|
||||
[string_to_string](https://github.com/Manishearth/rust-clippy/wiki#string_to_string) | warn | calling `String.to_string()` which is a no-op
|
||||
[toplevel_ref_arg](https://github.com/Manishearth/rust-clippy/wiki#toplevel_ref_arg) | warn | a function argument is declared `ref` (i.e. `fn foo(ref x: u8)`, but not `fn foo((ref x, ref y): (u8, u8))`)
|
||||
[type_complexity](https://github.com/Manishearth/rust-clippy/wiki#type_complexity) | warn | usage of very complex types; recommends factoring out parts into `type` definitions
|
||||
[unicode_not_nfc](https://github.com/Manishearth/rust-clippy/wiki#unicode_not_nfc) | allow | using a unicode literal not in NFC normal form (see http://www.unicode.org/reports/tr15/ for further information)
|
||||
[unit_cmp](https://github.com/Manishearth/rust-clippy/wiki#unit_cmp) | warn | comparing unit values (which is always `true` or `false`, respectively)
|
||||
[unused_collect](https://github.com/Manishearth/rust-clippy/wiki#unused_collect) | warn | `collect()`ing an iterator without using the result; this is usually better written as a for loop
|
||||
[while_let_loop](https://github.com/Manishearth/rust-clippy/wiki#while_let_loop) | warn | `loop { if let { ... } else break }` can be written as a `while let` loop
|
||||
|
@ -14,6 +14,9 @@ extern crate rustc_front;
|
||||
extern crate core;
|
||||
extern crate collections;
|
||||
|
||||
// for unicode nfc normalization
|
||||
extern crate unicode_normalization;
|
||||
|
||||
use rustc::plugin::Registry;
|
||||
use rustc::lint::LintPassObject;
|
||||
|
||||
@ -96,6 +99,7 @@ pub fn plugin_registrar(reg: &mut Registry) {
|
||||
types::CAST_PRECISION_LOSS,
|
||||
types::CAST_SIGN_LOSS,
|
||||
unicode::NON_ASCII_LITERAL,
|
||||
unicode::UNICODE_NOT_NFC,
|
||||
]);
|
||||
|
||||
reg.register_lint_group("clippy", vec![
|
||||
|
@ -1,49 +1,73 @@
|
||||
use rustc::lint::*;
|
||||
use rustc_front::hir::*;
|
||||
use syntax::codemap::{BytePos, Span};
|
||||
use syntax::codemap::Span;
|
||||
|
||||
use utils::span_lint;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
use utils::{snippet, span_help_and_lint};
|
||||
|
||||
declare_lint!{ pub ZERO_WIDTH_SPACE, Deny,
|
||||
"using a zero-width space in a string literal, which is confusing" }
|
||||
declare_lint!{ pub NON_ASCII_LITERAL, Allow,
|
||||
"using any literal non-ASCII chars in a string literal; suggests \
|
||||
using the \\u escape instead" }
|
||||
declare_lint!{ pub UNICODE_NOT_NFC, Allow,
|
||||
"using a unicode literal not in NFC normal form (see \
|
||||
http://www.unicode.org/reports/tr15/ for further information)" }
|
||||
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Unicode;
|
||||
|
||||
impl LintPass for Unicode {
|
||||
fn get_lints(&self) -> LintArray {
|
||||
lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL)
|
||||
lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC)
|
||||
}
|
||||
|
||||
fn check_expr(&mut self, cx: &Context, expr: &Expr) {
|
||||
if let ExprLit(ref lit) = expr.node {
|
||||
if let LitStr(ref string, _) = lit.node {
|
||||
check_str(cx, string, lit.span)
|
||||
if let LitStr(_, _) = lit.node {
|
||||
check_str(cx, lit.span)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check_str(cx: &Context, string: &str, span: Span) {
|
||||
for (i, c) in string.char_indices() {
|
||||
if c == '\u{200B}' {
|
||||
str_pos_lint(cx, ZERO_WIDTH_SPACE, span, i,
|
||||
"zero-width space detected. Consider using `\\u{200B}`");
|
||||
}
|
||||
fn escape<T: Iterator<Item=char>>(s: T) -> String {
|
||||
let mut result = String::new();
|
||||
for c in s {
|
||||
if c as u32 > 0x7F {
|
||||
str_pos_lint(cx, NON_ASCII_LITERAL, span, i, &format!(
|
||||
"literal non-ASCII character detected. Consider using `\\u{{{:X}}}`", c as u32));
|
||||
for d in c.escape_unicode() { result.push(d) };
|
||||
} else {
|
||||
result.push(c);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[allow(cast_possible_truncation)]
|
||||
fn str_pos_lint(cx: &Context, lint: &'static Lint, span: Span, index: usize, msg: &str) {
|
||||
span_lint(cx, lint, Span { lo: span.lo + BytePos((1 + index) as u32),
|
||||
hi: span.lo + BytePos((1 + index) as u32),
|
||||
expn_id: span.expn_id }, msg);
|
||||
|
||||
fn check_str(cx: &Context, span: Span) {
|
||||
let string = snippet(cx, span, "");
|
||||
if string.contains('\u{200B}') {
|
||||
span_help_and_lint(cx, ZERO_WIDTH_SPACE, span,
|
||||
"zero-width space detected",
|
||||
&format!("Consider replacing the string with:\n\"{}\"",
|
||||
string.replace("\u{200B}", "\\u{200B}")));
|
||||
}
|
||||
if string.chars().any(|c| c as u32 > 0x7F) {
|
||||
span_help_and_lint(cx, NON_ASCII_LITERAL, span,
|
||||
"literal non-ASCII character detected",
|
||||
&format!("Consider replacing the string with:\n\"{}\"",
|
||||
if cx.current_level(UNICODE_NOT_NFC) == Level::Allow {
|
||||
escape(string.chars())
|
||||
} else {
|
||||
escape(string.nfc())
|
||||
}));
|
||||
}
|
||||
if cx.current_level(NON_ASCII_LITERAL) == Level::Allow &&
|
||||
string.chars().zip(string.nfc()).any(|(a, b)| a != b) {
|
||||
span_help_and_lint(cx, UNICODE_NOT_NFC, span,
|
||||
"non-nfc unicode sequence detected",
|
||||
&format!("Consider replacing the string with:\n\"{}\"",
|
||||
string.nfc().collect::<String>()));
|
||||
}
|
||||
}
|
||||
|
@ -4,18 +4,20 @@
|
||||
#[deny(zero_width_space)]
|
||||
fn zero() {
|
||||
print!("Here >< is a ZWS, and another");
|
||||
//~^ ERROR zero-width space detected. Consider using `\u{200B}`
|
||||
//~^^ ERROR zero-width space detected. Consider using `\u{200B}`
|
||||
//~^ ERROR zero-width space detected
|
||||
print!("This\u{200B}is\u{200B}fine");
|
||||
}
|
||||
|
||||
//#[deny(unicode_canon)]
|
||||
#[deny(unicode_not_nfc)]
|
||||
fn canon() {
|
||||
print!("̀ah?"); //not yet ~ERROR non-canonical unicode sequence detected. Consider using à
|
||||
print!("̀àh?"); //~ERROR non-nfc unicode sequence detected
|
||||
print!("a\u{0300}h?"); // also okay
|
||||
}
|
||||
|
||||
#[deny(non_ascii_literal)]
|
||||
fn uni() {
|
||||
print!("Üben!"); //~ERROR literal non-ASCII character detected. Consider using `\u{DC}`
|
||||
print!("Üben!"); //~ERROR literal non-ASCII character detected
|
||||
print!("\u{DC}ben!"); // this is okay
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
Loading…
Reference in New Issue
Block a user