From a02b8124de9b778e822814608217ca774ec231fa Mon Sep 17 00:00:00 2001 From: mcarton Date: Fri, 5 Feb 2016 23:10:48 +0100 Subject: [PATCH] Lint about trivial regexes --- README.md | 3 +- src/lib.rs | 1 + src/regex.rs | 60 ++++++++++++++++++++++++++++++++----- tests/compile-fail/regex.rs | 47 +++++++++++++++++++++++++++-- 4 files changed, 100 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 59a12e09b17..08ad214e690 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A collection of lints to catch common mistakes and improve your Rust code. [Jump to usage instructions](#usage) ##Lints -There are 112 lints included in this crate: +There are 113 lints included in this crate: name | default | meaning ---------------------------------------------------------------------------------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -104,6 +104,7 @@ name [string_to_string](https://github.com/Manishearth/rust-clippy/wiki#string_to_string) | warn | calling `String::to_string` which is inefficient [temporary_assignment](https://github.com/Manishearth/rust-clippy/wiki#temporary_assignment) | warn | assignments to temporaries [toplevel_ref_arg](https://github.com/Manishearth/rust-clippy/wiki#toplevel_ref_arg) | warn | An entire binding was declared as `ref`, in a function argument (`fn foo(ref x: Bar)`), or a `let` statement (`let ref x = foo()`). In such cases, it is preferred to take references with `&`. +[trivial_regex](https://github.com/Manishearth/rust-clippy/wiki#trivial_regex) | warn | finds trivial regular expressions in `Regex::new(_)` invocations [type_complexity](https://github.com/Manishearth/rust-clippy/wiki#type_complexity) | warn | usage of very complex types; recommends factoring out parts into `type` definitions [unicode_not_nfc](https://github.com/Manishearth/rust-clippy/wiki#unicode_not_nfc) | allow | using a unicode literal not in NFC normal form (see http://www.unicode.org/reports/tr15/ for further information) [unit_cmp](https://github.com/Manishearth/rust-clippy/wiki#unit_cmp) | warn | comparing unit values (which is always `true` or `false`, respectively) diff --git a/src/lib.rs b/src/lib.rs index 3f18bcb17ce..33de4d6fb79 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -257,6 +257,7 @@ pub fn plugin_registrar(reg: &mut Registry) { ranges::RANGE_STEP_BY_ZERO, ranges::RANGE_ZIP_WITH_LEN, regex::INVALID_REGEX, + regex::TRIVIAL_REGEX, returns::LET_AND_RETURN, returns::NEEDLESS_RETURN, strings::STRING_LIT_AS_BYTES, diff --git a/src/regex.rs b/src/regex.rs index c24edc564ae..0558b77acb0 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -8,7 +8,7 @@ use rustc::middle::const_eval::{eval_const_expr_partial, ConstVal}; use rustc::middle::const_eval::EvalHint::ExprTypeChecked; use rustc::lint::*; -use utils::{match_path, REGEX_NEW_PATH, span_lint}; +use utils::{match_path, REGEX_NEW_PATH, span_lint, span_help_and_lint}; /// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax. It is `deny` by default. /// @@ -23,12 +23,26 @@ declare_lint! { "finds invalid regular expressions in `Regex::new(_)` invocations" } +/// **What it does:** This lint checks for `Regex::new(_)` invocations with trivial regex. +/// +/// **Why is this bad?** This can likely be replaced by `==` or `str::starts_with`, +/// `str::ends_with` or `std::contains` or other `str` methods. +/// +/// **Known problems:** None. +/// +/// **Example:** `Regex::new("^foobar")` +declare_lint! { + pub TRIVIAL_REGEX, + Warn, + "finds trivial regular expressions in `Regex::new(_)` invocations" +} + #[derive(Copy,Clone)] pub struct RegexPass; impl LintPass for RegexPass { fn get_lints(&self) -> LintArray { - lint_array!(INVALID_REGEX) + lint_array!(INVALID_REGEX, TRIVIAL_REGEX) } } @@ -48,19 +62,26 @@ impl LateLintPass for RegexPass { &format!("regex syntax error: {}", e.description())); } + else if let Some(repl) = is_trivial_regex(r) { + span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span, + &"trivial regex", + &format!("consider using {}", repl)); + } } - } else { - if_let_chain!{[ - let Some(r) = const_str(cx, &*args[0]), - let Err(e) = regex_syntax::Expr::parse(&r) - ], { + } else if let Some(r) = const_str(cx, &*args[0]) { + if let Err(e) = regex_syntax::Expr::parse(&r) { span_lint(cx, INVALID_REGEX, args[0].span, &format!("regex syntax error on position {}: {}", e.position(), e.description())); - }} + } + else if let Some(repl) = is_trivial_regex(&r) { + span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span, + &"trivial regex", + &format!("{}", repl)); + } } }} } @@ -81,3 +102,26 @@ fn const_str(cx: &LateContext, e: &Expr) -> Option { _ => None } } + +fn is_trivial_regex(s: &str) -> Option<&'static str> { + // some unlikely but valid corner cases + match s { + "" | "^" | "$" => return Some("the regex is unlikely to be useful as it is"), + "^$" => return Some("consider using `str::is_empty`"), + _ => (), + } + + let (start, end, repl) = match (s.starts_with('^'), s.ends_with('$')) { + (true, true) => (1, s.len()-1, "consider using `==` on `str`s"), + (false, true) => (0, s.len()-1, "consider using `str::ends_with`"), + (true, false) => (1, s.len(), "consider using `str::starts_with`"), + (false, false) => (0, s.len(), "consider using `str::contains`"), + }; + + if !s.chars().take(end).skip(start).any(regex_syntax::is_punct) { + Some(repl) + } + else { + None + } +} diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs index 5a3f8b1a368..cd10d47c1bb 100644 --- a/tests/compile-fail/regex.rs +++ b/tests/compile-fail/regex.rs @@ -2,15 +2,16 @@ #![plugin(clippy)] #![allow(unused)] -#![deny(invalid_regex)] +#![deny(invalid_regex, trivial_regex)] extern crate regex; use regex::Regex; const OPENING_PAREN : &'static str = "("; +const NOT_A_REAL_REGEX : &'static str = "foobar"; -fn main() { +fn syntax_error() { let pipe_in_wrong_position = Regex::new("|"); //~^ERROR: regex syntax error: empty alternate let wrong_char_ranice = Regex::new("[z-a]"); @@ -22,3 +23,45 @@ fn main() { let closing_paren = ")"; let not_linted = Regex::new(closing_paren); } + +fn trivial_regex() { + let trivial_eq = Regex::new("^foobar$"); + //~^ERROR: trivial regex + //~|HELP consider using `==` on `str`s + + let trivial_starts_with = Regex::new("^foobar"); + //~^ERROR: trivial regex + //~|HELP consider using `str::starts_with` + + let trivial_ends_with = Regex::new("foobar$"); + //~^ERROR: trivial regex + //~|HELP consider using `str::ends_with` + + let trivial_contains = Regex::new("foobar"); + //~^ERROR: trivial regex + //~|HELP consider using `str::contains` + + let trivial_contains = Regex::new(NOT_A_REAL_REGEX); + //~^ERROR: trivial regex + //~|HELP consider using `str::contains` + + // unlikely corner cases + let trivial_empty = Regex::new(""); + //~^ERROR: trivial regex + //~|HELP the regex is unlikely to be useful + + let trivial_empty = Regex::new("^$"); + //~^ERROR: trivial regex + //~|HELP consider using `str::is_empty` + + // non-trivial regexes + let non_trivial_eq = Regex::new("^foo|bar$"); + let non_trivial_starts_with = Regex::new("^foo|bar"); + let non_trivial_ends_with = Regex::new("^foo|bar"); + let non_trivial_ends_with = Regex::new("foo|bar"); +} + +fn main() { + syntax_error(); + trivial_regex(); +}