Merge pull request #208 from Robzz/iss114

Implementation of lossy cast lints (issue #114)
2015-08-21 07:49:19 +02:00 · 2015-08-21 07:49:19 +02:00 · d3793865c4
commit d3793865c4
parent c91857c62d ad0bc66402
6 changed files with 201 additions and 52 deletions
--- a/README.md
+++ b/README.md
@ -6,47 +6,50 @@ A collection of lints that give helpful tips to newbies and catch oversights.
 ##Lints
 Lints included in this crate:

-name                 | default | meaning
---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-approx_constant      | warn    | the approximate of a known float constant (in `std::f64::consts` or `std::f32::consts`) is found; suggests to use the constant
-bad_bit_mask         | deny    | expressions of the form `_ & mask == select` that will only ever return `true` or `false` (because in the example `select` containing bits that `mask` doesn't have)
-box_vec              | warn    | usage of `Box<Vec<T>>`, vector elements are already on the heap
-cmp_nan              | deny    | comparisons to NAN (which will always return false, which is probably not intended)
-cmp_owned            | warn    | creating owned instances for comparing with others, e.g. `x == "foo".to_string()`
-collapsible_if       | warn    | two nested `if`-expressions can be collapsed into one, e.g. `if x { if y { foo() } }` can be written as `if x && y { foo() }`
-eq_op                | warn    | equal operands on both sides of a comparison or bitwise combination (e.g. `x == x`)
-explicit_iter_loop   | warn    | for-looping over `_.iter()` or `_.iter_mut()` when `&_` or `&mut _` would do
-float_cmp            | warn    | using `==` or `!=` on float values (as floating-point operations usually involve rounding errors, it is always better to check for approximate equality within small bounds)
-identity_op          | warn    | using identity operations, e.g. `x + 0` or `y / 1`
-ineffective_bit_mask | warn    | expressions where a bit mask will be rendered useless by a comparison, e.g. `(x | 1) > 2`
-inline_always        | warn    | `#[inline(always)]` is a bad idea in most cases
-iter_next_loop       | warn    | for-looping over `_.next()` which is probably not intended
-len_without_is_empty | warn    | traits and impls that have `.len()` but not `.is_empty()`
-len_zero             | warn    | checking `.len() == 0` or `.len() > 0` (or similar) when `.is_empty()` could be used instead
-let_and_return       | warn    | creating a let-binding and then immediately returning it like `let x = expr; x` at the end of a function
-let_unit_value       | warn    | creating a let binding to a value of unit type, which usually can't be used afterwards
-linkedlist           | warn    | usage of LinkedList, usually a vector is faster, or a more specialized data structure like a RingBuf
-modulo_one           | warn    | taking a number modulo 1, which always returns 0
-mut_mut              | warn    | usage of double-mut refs, e.g. `&mut &mut ...` (either copy'n'paste error, or shows a fundamental misunderstanding of references)
-needless_bool        | warn    | if-statements with plain booleans in the then- and else-clause, e.g. `if p { true } else { false }`
-needless_lifetimes   | warn    | using explicit lifetimes for references in function arguments when elision rules would allow omitting them
-needless_range_loop  | warn    | for-looping over a range of indices where an iterator over items would do
-needless_return      | warn    | using a return statement like `return expr;` where an expression would suffice
-non_ascii_literal    | allow   | using any literal non-ASCII chars in a string literal; suggests using the \\u escape instead
-option_unwrap_used   | allow   | using `Option.unwrap()`, which should at least get a better message using `expect()`
-precedence           | warn    | expressions where precedence may trip up the unwary reader of the source; suggests adding parentheses, e.g. `x << 2 + y` will be parsed as `x << (2 + y)`
-ptr_arg              | allow   | fn arguments of the type `&Vec<...>` or `&String`, suggesting to use `&[...]` or `&str` instead, respectively
-range_step_by_zero   | warn    | using Range::step_by(0), which produces an infinite iterator
-redundant_closure    | warn    | using redundant closures, i.e. `|a| foo(a)` (which can be written as just `foo`)
-result_unwrap_used   | allow   | using `Result.unwrap()`, which might be better handled
-single_match         | warn    | a match statement with a single nontrivial arm (i.e, where the other arm is `_ => {}`) is used; recommends `if let` instead
-str_to_string        | warn    | using `to_string()` on a str, which should be `to_owned()`
-string_add           | allow   | using `x + ..` where x is a `String`; suggests using `push_str()` instead
-string_add_assign    | allow   | using `x = x + ..` where x is a `String`; suggests using `push_str()` instead
-string_to_string     | warn    | calling `String.to_string()` which is a no-op
-toplevel_ref_arg     | warn    | a function argument is declared `ref` (i.e. `fn foo(ref x: u8)`, but not `fn foo((ref x, ref y): (u8, u8))`)
-unit_cmp             | warn    | comparing unit values (which is always `true` or `false`, respectively)
-zero_width_space     | deny    | using a zero-width space in a string literal, which is confusing
+name                     | default | meaning
+-------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+approx_constant          | warn    | the approximate of a known float constant (in `std::f64::consts` or `std::f32::consts`) is found; suggests to use the constant
+bad_bit_mask             | deny    | expressions of the form `_ & mask == select` that will only ever return `true` or `false` (because in the example `select` containing bits that `mask` doesn't have)
+box_vec                  | warn    | usage of `Box<Vec<T>>`, vector elements are already on the heap
+cast_possible_truncation | allow   | casts that may cause truncation of the value, e.g `x as u8` where `x: u32`, or `x as i32` where `x: f32`
+cast_precision_loss      | allow   | casts that cause loss of precision, e.g `x as f32` where `x: u64`
+cast_sign_loss           | allow   | casts from signed types to unsigned types, e.g `x as u32` where `x: i32`
+cmp_nan                  | deny    | comparisons to NAN (which will always return false, which is probably not intended)
+cmp_owned                | warn    | creating owned instances for comparing with others, e.g. `x == "foo".to_string()`
+collapsible_if           | warn    | two nested `if`-expressions can be collapsed into one, e.g. `if x { if y { foo() } }` can be written as `if x && y { foo() }`
+eq_op                    | warn    | equal operands on both sides of a comparison or bitwise combination (e.g. `x == x`)
+explicit_iter_loop       | warn    | for-looping over `_.iter()` or `_.iter_mut()` when `&_` or `&mut _` would do
+float_cmp                | warn    | using `==` or `!=` on float values (as floating-point operations usually involve rounding errors, it is always better to check for approximate equality within small bounds)
+identity_op              | warn    | using identity operations, e.g. `x + 0` or `y / 1`
+ineffective_bit_mask     | warn    | expressions where a bit mask will be rendered useless by a comparison, e.g. `(x | 1) > 2`
+inline_always            | warn    | `#[inline(always)]` is a bad idea in most cases
+iter_next_loop           | warn    | for-looping over `_.next()` which is probably not intended
+len_without_is_empty     | warn    | traits and impls that have `.len()` but not `.is_empty()`
+len_zero                 | warn    | checking `.len() == 0` or `.len() > 0` (or similar) when `.is_empty()` could be used instead
+let_and_return           | warn    | creating a let-binding and then immediately returning it like `let x = expr; x` at the end of a function
+let_unit_value           | warn    | creating a let binding to a value of unit type, which usually can't be used afterwards
+linkedlist               | warn    | usage of LinkedList, usually a vector is faster, or a more specialized data structure like a RingBuf
+modulo_one               | warn    | taking a number modulo 1, which always returns 0
+mut_mut                  | warn    | usage of double-mut refs, e.g. `&mut &mut ...` (either copy'n'paste error, or shows a fundamental misunderstanding of references)
+needless_bool            | warn    | if-statements with plain booleans in the then- and else-clause, e.g. `if p { true } else { false }`
+needless_lifetimes       | warn    | using explicit lifetimes for references in function arguments when elision rules would allow omitting them
+needless_range_loop      | warn    | for-looping over a range of indices where an iterator over items would do
+needless_return          | warn    | using a return statement like `return expr;` where an expression would suffice
+non_ascii_literal        | allow   | using any literal non-ASCII chars in a string literal; suggests using the \\u escape instead
+option_unwrap_used       | allow   | using `Option.unwrap()`, which should at least get a better message using `expect()`
+precedence               | warn    | expressions where precedence may trip up the unwary reader of the source; suggests adding parentheses, e.g. `x << 2 + y` will be parsed as `x << (2 + y)`
+ptr_arg                  | allow   | fn arguments of the type `&Vec<...>` or `&String`, suggesting to use `&[...]` or `&str` instead, respectively
+range_step_by_zero       | warn    | using Range::step_by(0), which produces an infinite iterator
+redundant_closure        | warn    | using redundant closures, i.e. `|a| foo(a)` (which can be written as just `foo`)
+result_unwrap_used       | allow   | using `Result.unwrap()`, which might be better handled
+single_match             | warn    | a match statement with a single nontrivial arm (i.e, where the other arm is `_ => {}`) is used; recommends `if let` instead
+str_to_string            | warn    | using `to_string()` on a str, which should be `to_owned()`
+string_add               | allow   | using `x + ..` where x is a `String`; suggests using `push_str()` instead
+string_add_assign        | allow   | using `x = x + ..` where x is a `String`; suggests using `push_str()` instead
+string_to_string         | warn    | calling `String.to_string()` which is a no-op
+toplevel_ref_arg         | warn    | a function argument is declared `ref` (i.e. `fn foo(ref x: u8)`, but not `fn foo((ref x, ref y): (u8, u8))`)
+unit_cmp                 | warn    | comparing unit values (which is always `true` or `false`, respectively)
+zero_width_space         | deny    | using a zero-width space in a string literal, which is confusing

 To use, add the following lines to your Cargo.toml:

--- a/src/consts.rs
+++ b/src/consts.rs
@ -67,15 +67,16 @@ impl Constant {
    }

    /// convert this constant to a f64, if possible
-   pub fn as_float(&self) -> Option<f64> {
-       match *self {
-           ConstantByte(b) => Some(b as f64),
-           ConstantFloat(ref s, _) => s.parse().ok(),
-           ConstantInt(i, ty) => Some(if is_negative(ty) {
-               -(i as f64) } else { i as f64 }),
-           _ => None
-       }
-   }
+    #[allow(cast_precision_loss)]
+    pub fn as_float(&self) -> Option<f64> {
+        match *self {
+            ConstantByte(b) => Some(b as f64),
+            ConstantFloat(ref s, _) => s.parse().ok(),
+            ConstantInt(i, ty) => Some(if is_negative(ty) {
+                -(i as f64) } else { i as f64 }),
+            _ => None
+        }
+    }
 }

 impl PartialEq for Constant {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,6 +1,7 @@
 #![feature(plugin_registrar, box_syntax)]
 #![feature(rustc_private, core, collections)]
-#![feature(str_split_at)]
+#![feature(str_split_at, num_bits_bytes)]
+#![allow(unknown_lints)]

 #[macro_use]
 extern crate syntax;
@ -68,6 +69,7 @@ pub fn plugin_registrar(reg: &mut Registry) {
    reg.register_lint_pass(box loops::LoopsPass as LintPassObject);
    reg.register_lint_pass(box lifetimes::LifetimePass as LintPassObject);
    reg.register_lint_pass(box ranges::StepByZero as LintPassObject);
+    reg.register_lint_pass(box types::CastPass as LintPassObject);

    reg.register_lint_group("clippy", vec![
        approx_const::APPROX_CONSTANT,
@ -104,6 +106,9 @@ pub fn plugin_registrar(reg: &mut Registry) {
        strings::STRING_ADD,
        strings::STRING_ADD_ASSIGN,
        types::BOX_VEC,
+        types::CAST_POSSIBLE_TRUNCATION,
+        types::CAST_PRECISION_LOSS,
+        types::CAST_SIGN_LOSS,
        types::LET_UNIT_VALUE,
        types::LINKEDLIST,
        types::UNIT_CMP,
--- a/src/types.rs
+++ b/src/types.rs
@ -6,7 +6,7 @@ use syntax::ptr::P;
 use rustc::middle::ty;
 use syntax::codemap::ExpnInfo;

-use utils::{in_macro, snippet, span_lint, span_help_and_lint};
+use utils::{in_macro, snippet, span_lint, span_help_and_lint, in_external_macro};

 /// Handles all the linting of funky types
 #[allow(missing_copy_implementations)]
@ -136,3 +136,85 @@ impl LintPass for UnitCmp {
        }
    }
 }
+
+pub struct CastPass;
+
+declare_lint!(pub CAST_PRECISION_LOSS, Allow,
+              "casts that cause loss of precision, e.g `x as f32` where `x: u64`");
+declare_lint!(pub CAST_SIGN_LOSS, Allow,
+              "casts from signed types to unsigned types, e.g `x as u32` where `x: i32`");
+declare_lint!(pub CAST_POSSIBLE_TRUNCATION, Allow,
+              "casts that may cause truncation of the value, e.g `x as u8` where `x: u32`, or `x as i32` where `x: f32`");
+
+/// Returns the size in bits of an integral type.
+/// Will return 0 if the type is not an int or uint variant
+fn int_ty_to_nbits(typ: &ty::TyS) -> usize {
+    let n = match &typ.sty {
+    &ty::TyInt(i) =>  4 << (i as usize),
+    &ty::TyUint(u) => 4 << (u as usize),
+    _ => 0
+    };
+    // n == 4 is the usize/isize case
+    if n == 4 { ::std::usize::BITS } else { n }
+}
+
+impl LintPass for CastPass {
+    fn get_lints(&self) -> LintArray {
+        lint_array!(CAST_PRECISION_LOSS,
+                    CAST_SIGN_LOSS,
+                    CAST_POSSIBLE_TRUNCATION)
+    }
+
+    fn check_expr(&mut self, cx: &Context, expr: &Expr) {
+        if let ExprCast(ref ex, _) = expr.node {
+            let (cast_from, cast_to) = (cx.tcx.expr_ty(&*ex), cx.tcx.expr_ty(expr));
+            if cast_from.is_numeric() && cast_to.is_numeric() && !in_external_macro(cx, expr.span) {
+                match (cast_from.is_integral(), cast_to.is_integral()) {
+                    (true, false) => {
+                        let from_nbits = int_ty_to_nbits(cast_from);
+                        let to_nbits : usize = match &cast_to.sty {
+                            &ty::TyFloat(ast::TyF32) => 32,
+                            &ty::TyFloat(ast::TyF64) => 64,
+                            _ => 0
+                        };
+                        if from_nbits != 0 {
+                            if from_nbits >= to_nbits {
+                                span_lint(cx, CAST_PRECISION_LOSS, expr.span,
+                                          &format!("converting from {0} to {1}, which causes a loss of precision \
+                                          			({0} is {2} bits wide, but {1}'s mantissa is only {3} bits wide)",
+                                                   cast_from, cast_to, from_nbits, if to_nbits == 64 {52} else {23} ));
+                            }
+                        }
+                    },
+                    (false, true) => {
+                        span_lint(cx, CAST_POSSIBLE_TRUNCATION, expr.span,
+                                  &format!("casting {} to {} may cause truncation of the value", cast_from, cast_to));
+                        if !cast_to.is_signed() {
+                            span_lint(cx, CAST_SIGN_LOSS, expr.span,
+                                      &format!("casting from {} to {} loses the sign of the value", cast_from, cast_to));
+                        }
+                    },
+                    (true, true) => {
+                        if cast_from.is_signed() && !cast_to.is_signed() {
+                            span_lint(cx, CAST_SIGN_LOSS, expr.span,
+                                      &format!("casting from {} to {} loses the sign of the value", cast_from, cast_to));
+                        }
+                        let from_nbits = int_ty_to_nbits(cast_from);
+                        let to_nbits   = int_ty_to_nbits(cast_to);
+                        if to_nbits < from_nbits ||
+                           (!cast_from.is_signed() && cast_to.is_signed() && to_nbits <= from_nbits) {
+                                span_lint(cx, CAST_POSSIBLE_TRUNCATION, expr.span,
+                                          &format!("casting {} to {} may cause truncation of the value", cast_from, cast_to));
+                        }
+                    }
+                    (false, false) => {
+                        if let (&ty::TyFloat(ast::TyF64),
+                                &ty::TyFloat(ast::TyF32)) = (&cast_from.sty, &cast_to.sty) {
+                            span_lint(cx, CAST_POSSIBLE_TRUNCATION, expr.span, "casting f64 to f32 may cause truncation of the value");
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/src/unicode.rs
+++ b/src/unicode.rs
@ -40,6 +40,7 @@ fn check_str(cx: &Context, string: &str, span: Span) {
    }
 }

+#[allow(cast_possible_truncation)]
 fn str_pos_lint(cx: &Context, lint: &'static Lint, span: Span, index: usize, msg: &str) {
    span_lint(cx, lint, Span { lo: span.lo + BytePos((1 + index) as u32),
                               hi: span.lo + BytePos((1 + index) as u32),
--- a/tests/compile-fail/cast.rs
+++ b/tests/compile-fail/cast.rs
@ -0,0 +1,57 @@
+#![feature(plugin)]
+#![plugin(clippy)]
+
+#[deny(cast_precision_loss, cast_possible_truncation, cast_sign_loss)]
+#[allow(dead_code)]
+fn main() {
+    let i : i32 = 42;
+    let u : u32 = 42;
+    let f : f32 = 42.0;
+
+    // Test cast_precision_loss
+    i as f32; //~ERROR converting from i32 to f32, which causes a loss of precision (i32 is 32 bits wide, but f32's mantissa is only 23 bits wide)
+    (i as i64) as f32; //~ERROR converting from i64 to f32, which causes a loss of precision (i64 is 64 bits wide, but f32's mantissa is only 23 bits wide)
+    (i as i64) as f64; //~ERROR converting from i64 to f64, which causes a loss of precision (i64 is 64 bits wide, but f64's mantissa is only 52 bits wide)
+    u as f32; //~ERROR converting from u32 to f32, which causes a loss of precision (u32 is 32 bits wide, but f32's mantissa is only 23 bits wide)
+    (u as u64) as f32; //~ERROR converting from u64 to f32, which causes a loss of precision (u64 is 64 bits wide, but f32's mantissa is only 23 bits wide)
+    (u as u64) as f64; //~ERROR converting from u64 to f64, which causes a loss of precision (u64 is 64 bits wide, but f64's mantissa is only 52 bits wide)
+    i as f64; // Should not trigger the lint
+    u as f64; // Should not trigger the lint
+
+    // Test cast_possible_truncation
+    f as i32; //~ERROR casting f32 to i32 may cause truncation of the value
+    f as u32; //~ERROR casting f32 to u32 may cause truncation of the value
+              //~^ERROR casting from f32 to u32 loses the sign of the value
+    i as u8;  //~ERROR casting i32 to u8 may cause truncation of the value
+              //~^ERROR casting from i32 to u8 loses the sign of the value
+    (f as f64) as f32; //~ERROR casting f64 to f32 may cause truncation of the value
+    i as i8;  //~ERROR casting i32 to i8 may cause truncation of the value
+    u as i32; //~ERROR casting u32 to i32 may cause truncation of the value
+
+    // Test cast_sign_loss
+    i as u32; //~ERROR casting from i32 to u32 loses the sign of the value
+
+    // Extra checks for usize/isize
+    let is : isize = -42;
+    is as usize; //~ERROR casting from isize to usize loses the sign of the value
+    is as i8; //~ERROR casting isize to i8 may cause truncation of the value
+
+    // FIXME : enable these checks when we figure out a way to make compiletest deal with conditional compilation
+    /*
+    #[cfg(target_pointer_width = "64")]
+    fn check_64() {
+        let is : isize = -42;
+        let us : usize = 42;
+        is as f32; //ERROR converting from isize to f32, which causes a loss of precision (isize is 64 bits wide, but f32's mantissa is only 23 bits wide)
+        us as u32; //ERROR casting usize to u32 may cause truncation of the value
+        us as u64; // Should not trigger any lint
+    }
+    #[cfg(target_pointer_width = "32")]
+    fn check_32() {
+        let is : isize = -42;
+        let us : usize = 42;
+        is as f32; //ERROR converting from isize to f32, which causes a loss of precision (isize is 32 bits wide, but f32's mantissa is only 23 bits wide)
+        us as u32; // Should not trigger any lint
+        us as u64; // Should not trigger any lint
+    }*/
+}