diff --git a/src/librustc_codegen_llvm/builder.rs b/src/librustc_codegen_llvm/builder.rs index 8a1bb258d42..1124e91bf71 100644 --- a/src/librustc_codegen_llvm/builder.rs +++ b/src/librustc_codegen_llvm/builder.rs @@ -703,11 +703,67 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { None } + fn fptosui_may_trap(&self, val: &'ll Value, dest_ty: &'ll Type) -> bool { + // Most of the time we'll be generating the `fptosi` or `fptoui` + // instruction for floating-point-to-integer conversions. These + // instructions by definition in LLVM do not trap. For the WebAssembly + // target, however, we'll lower in some cases to intrinsic calls instead + // which may trap. If we detect that this is a situation where we'll be + // using the intrinsics then we report that the call map trap, which + // callers might need to handle. + if !self.wasm_and_missing_nontrapping_fptoint() { + return false; + } + let src_ty = self.cx.val_ty(val); + let float_width = self.cx.float_width(src_ty); + let int_width = self.cx.int_width(dest_ty); + match (int_width, float_width) { + (32, 32) | (32, 64) | (64, 32) | (64, 64) => true, + _ => false, + } + } + fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value { + // When we can, use the native wasm intrinsics which have tighter + // codegen. Note that this has a semantic difference in that the + // intrinsic can trap whereas `fptoui` never traps. That difference, + // however, is handled by `fptosui_may_trap` above. + if self.wasm_and_missing_nontrapping_fptoint() { + let src_ty = self.cx.val_ty(val); + let float_width = self.cx.float_width(src_ty); + let int_width = self.cx.int_width(dest_ty); + let name = match (int_width, float_width) { + (32, 32) => Some("llvm.wasm.trunc.unsigned.i32.f32"), + (32, 64) => Some("llvm.wasm.trunc.unsigned.i32.f64"), + (64, 32) => Some("llvm.wasm.trunc.unsigned.i64.f32"), + (64, 64) => Some("llvm.wasm.trunc.unsigned.i64.f64"), + _ => None, + }; + if let Some(name) = name { + let intrinsic = self.get_intrinsic(name); + return self.call(intrinsic, &[val], None); + } + } unsafe { llvm::LLVMBuildFPToUI(self.llbuilder, val, dest_ty, UNNAMED) } } fn fptosi(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value { + if self.wasm_and_missing_nontrapping_fptoint() { + let src_ty = self.cx.val_ty(val); + let float_width = self.cx.float_width(src_ty); + let int_width = self.cx.int_width(dest_ty); + let name = match (int_width, float_width) { + (32, 32) => Some("llvm.wasm.trunc.signed.i32.f32"), + (32, 64) => Some("llvm.wasm.trunc.signed.i32.f64"), + (64, 32) => Some("llvm.wasm.trunc.signed.i64.f32"), + (64, 64) => Some("llvm.wasm.trunc.signed.i64.f64"), + _ => None, + }; + if let Some(name) = name { + let intrinsic = self.get_intrinsic(name); + return self.call(intrinsic, &[val], None); + } + } unsafe { llvm::LLVMBuildFPToSI(self.llbuilder, val, dest_ty, UNNAMED) } } @@ -1349,4 +1405,9 @@ impl Builder<'a, 'll, 'tcx> { llvm::LLVMAddIncoming(phi, &val, &bb, 1 as c_uint); } } + + fn wasm_and_missing_nontrapping_fptoint(&self) -> bool { + self.sess().target.target.arch == "wasm32" + && !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint) + } } diff --git a/src/librustc_codegen_llvm/intrinsic.rs b/src/librustc_codegen_llvm/intrinsic.rs index 728af7b0a8c..e3a308181b6 100644 --- a/src/librustc_codegen_llvm/intrinsic.rs +++ b/src/librustc_codegen_llvm/intrinsic.rs @@ -634,22 +634,19 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> { } sym::float_to_int_unchecked => { - let float_width = match float_type_width(arg_tys[0]) { - Some(width) => width, - None => { - span_invalid_monomorphization_error( - tcx.sess, - span, - &format!( - "invalid monomorphization of `float_to_int_unchecked` \ + if float_type_width(arg_tys[0]).is_none() { + span_invalid_monomorphization_error( + tcx.sess, + span, + &format!( + "invalid monomorphization of `float_to_int_unchecked` \ intrinsic: expected basic float type, \ found `{}`", - arg_tys[0] - ), - ); - return; - } - }; + arg_tys[0] + ), + ); + return; + } let (width, signed) = match int_type_width_signed(ret_ty, self.cx) { Some(pair) => pair, None => { @@ -666,48 +663,11 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> { return; } }; - - // The LLVM backend can reorder and speculate `fptosi` and - // `fptoui`, so on WebAssembly the codegen for this instruction - // is quite heavyweight. To avoid this heavyweight codegen we - // instead use the raw wasm intrinsics which will lower to one - // instruction in WebAssembly (`iNN.trunc_fMM_{s,u}`). This one - // instruction will trap if the operand is out of bounds, but - // that's ok since this intrinsic is UB if the operands are out - // of bounds, so the behavior can be different on WebAssembly - // than other targets. - // - // Note, however, that when the `nontrapping-fptoint` feature is - // enabled in LLVM then LLVM will lower `fptosi` to - // `iNN.trunc_sat_fMM_{s,u}`, so if that's the case we don't - // bother with intrinsics. - let mut result = None; - if self.sess().target.target.arch == "wasm32" - && !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint) - { - let name = match (width, float_width, signed) { - (32, 32, true) => Some("llvm.wasm.trunc.signed.i32.f32"), - (32, 64, true) => Some("llvm.wasm.trunc.signed.i32.f64"), - (64, 32, true) => Some("llvm.wasm.trunc.signed.i64.f32"), - (64, 64, true) => Some("llvm.wasm.trunc.signed.i64.f64"), - (32, 32, false) => Some("llvm.wasm.trunc.unsigned.i32.f32"), - (32, 64, false) => Some("llvm.wasm.trunc.unsigned.i32.f64"), - (64, 32, false) => Some("llvm.wasm.trunc.unsigned.i64.f32"), - (64, 64, false) => Some("llvm.wasm.trunc.unsigned.i64.f64"), - _ => None, - }; - if let Some(name) = name { - let intrinsic = self.get_intrinsic(name); - result = Some(self.call(intrinsic, &[args[0].immediate()], None)); - } + if signed { + self.fptosi(args[0].immediate(), self.cx.type_ix(width)) + } else { + self.fptoui(args[0].immediate(), self.cx.type_ix(width)) } - result.unwrap_or_else(|| { - if signed { - self.fptosi(args[0].immediate(), self.cx.type_ix(width)) - } else { - self.fptoui(args[0].immediate(), self.cx.type_ix(width)) - } - }) } sym::discriminant_value => { diff --git a/src/librustc_codegen_ssa/mir/rvalue.rs b/src/librustc_codegen_ssa/mir/rvalue.rs index 9c108998bc9..77e94fe3d0a 100644 --- a/src/librustc_codegen_ssa/mir/rvalue.rs +++ b/src/librustc_codegen_ssa/mir/rvalue.rs @@ -11,7 +11,7 @@ use rustc_apfloat::{ieee, Float, Round, Status}; use rustc_hir::lang_items::ExchangeMallocFnLangItem; use rustc_middle::mir; use rustc_middle::ty::cast::{CastTy, IntTy}; -use rustc_middle::ty::layout::HasTyCtxt; +use rustc_middle::ty::layout::{HasTyCtxt, TyAndLayout}; use rustc_middle::ty::{self, adjustment::PointerCast, Instance, Ty, TyCtxt}; use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::sym; @@ -369,10 +369,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { bx.inttoptr(usize_llval, ll_t_out) } (CastTy::Float, CastTy::Int(IntTy::I)) => { - cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out) + cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out, cast) } (CastTy::Float, CastTy::Int(_)) => { - cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out) + cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out, cast) } _ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty), }; @@ -772,6 +772,7 @@ fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( x: Bx::Value, float_ty: Bx::Type, int_ty: Bx::Type, + int_layout: TyAndLayout<'tcx>, ) -> Bx::Value { if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts { return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) }; @@ -782,8 +783,6 @@ fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( return try_sat_result; } - let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) }; - let int_width = bx.cx().int_width(int_ty); let float_width = bx.cx().float_width(float_ty); // LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the @@ -870,36 +869,138 @@ fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // int_ty::MIN and therefore the return value of int_ty::MIN is correct. // QED. - // Step 1 was already performed above. - - // Step 2: We use two comparisons and two selects, with %s1 being the result: - // %less_or_nan = fcmp ult %x, %f_min - // %greater = fcmp olt %x, %f_max - // %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result - // %s1 = select %greater, int_ty::MAX, %s0 - // Note that %less_or_nan uses an *unordered* comparison. This comparison is true if the - // operands are not comparable (i.e., if x is NaN). The unordered comparison ensures that s1 - // becomes int_ty::MIN if x is NaN. - // Performance note: Unordered comparison can be lowered to a "flipped" comparison and a - // negation, and the negation can be merged into the select. Therefore, it not necessarily any - // more expensive than a ordered ("normal") comparison. Whether these optimizations will be - // performed is ultimately up to the backend, but at least x86 does perform them. - let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min); - let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max); let int_max = bx.cx().const_uint_big(int_ty, int_max(signed, int_width)); let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128); - let s0 = bx.select(less_or_nan, int_min, fptosui_result); - let s1 = bx.select(greater, int_max, s0); + let zero = bx.cx().const_uint(int_ty, 0); - // Step 3: NaN replacement. - // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN. - // Therefore we only need to execute this step for signed integer types. - if signed { - // LLVM has no isNaN predicate, so we use (x == x) instead - let zero = bx.cx().const_uint(int_ty, 0); - let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x); - bx.select(cmp, s1, zero) + // The codegen here differs quite a bit depending on whether our builder's + // `fptosi` and `fptoui` instructions may trap for out-of-bounds values. If + // they don't trap then we can start doing everything inline with a + // `select` instruction because it's ok to execute `fptosi` and `fptoui` + // even if we don't use the results. + if !bx.fptosui_may_trap(x, int_ty) { + // Step 1 ... + let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) }; + let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min); + let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max); + + // Step 2: We use two comparisons and two selects, with %s1 being the + // result: + // %less_or_nan = fcmp ult %x, %f_min + // %greater = fcmp olt %x, %f_max + // %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result + // %s1 = select %greater, int_ty::MAX, %s0 + // Note that %less_or_nan uses an *unordered* comparison. This + // comparison is true if the operands are not comparable (i.e., if x is + // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if + // x is NaN. + // + // Performance note: Unordered comparison can be lowered to a "flipped" + // comparison and a negation, and the negation can be merged into the + // select. Therefore, it not necessarily any more expensive than a + // ordered ("normal") comparison. Whether these optimizations will be + // performed is ultimately up to the backend, but at least x86 does + // perform them. + let s0 = bx.select(less_or_nan, int_min, fptosui_result); + let s1 = bx.select(greater, int_max, s0); + + // Step 3: NaN replacement. + // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN. + // Therefore we only need to execute this step for signed integer types. + if signed { + // LLVM has no isNaN predicate, so we use (x == x) instead + let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x); + bx.select(cmp, s1, zero) + } else { + s1 + } } else { - s1 + // In this case we cannot execute `fptosi` or `fptoui` and then later + // discard the result. The builder is telling us that these instructions + // will trap on out-of-bounds values, so we need to use basic blocks and + // control flow to avoid executing the `fptosi` and `fptoui` + // instructions. + // + // The general idea of what we're constructing here is, for f64 -> i32: + // + // ;; block so far... %0 is the argument + // %result = alloca i32, align 4 + // %inbound_lower = fcmp oge double %0, 0xC1E0000000000000 + // %inbound_upper = fcmp ole double %0, 0x41DFFFFFFFC00000 + // ;; match (inbound_lower, inbound_upper) { + // ;; (true, true) => %0 can be converted without trapping + // ;; (false, false) => %0 is a NaN + // ;; (true, false) => %0 is too large + // ;; (false, true) => %0 is too small + // ;; } + // ;; + // ;; The (true, true) check, go to %convert if so. + // %inbounds = and i1 %inbound_lower, %inbound_upper + // br i1 %inbounds, label %convert, label %specialcase + // + // convert: + // %cvt = call i32 @llvm.wasm.trunc.signed.i32.f64(double %0) + // store i32 %cvt, i32* %result, align 4 + // br label %done + // + // specialcase: + // ;; Handle the cases where the number is NaN, too large or too small + // + // ;; Either (true, false) or (false, true) + // %is_not_nan = or i1 %inbound_lower, %inbound_upper + // ;; Figure out which saturated value we are interested in if not `NaN` + // %saturated = select i1 %inbound_lower, i32 2147483647, i32 -2147483648 + // ;; Figure out between saturated and NaN representations + // %result_nan = select i1 %is_not_nan, i32 %saturated, i32 0 + // store i32 %result_nan, i32* %result, align 4 + // br label %done + // + // done: + // %r = load i32, i32* %result, align 4 + // ;; ... + let done = bx.build_sibling_block("float_cast_done"); + let mut convert = bx.build_sibling_block("float_cast_convert"); + let mut specialcase = bx.build_sibling_block("float_cast_specialcase"); + + let result = PlaceRef::alloca(bx, int_layout); + result.storage_live(bx); + + // Use control flow to figure out whether we can execute `fptosi` in a + // basic block, or whether we go to a different basic block to implement + // the saturating logic. + let inbound_lower = bx.fcmp(RealPredicate::RealOGE, x, f_min); + let inbound_upper = bx.fcmp(RealPredicate::RealOLE, x, f_max); + let inbounds = bx.and(inbound_lower, inbound_upper); + bx.cond_br(inbounds, convert.llbb(), specialcase.llbb()); + + // Translation of the `convert` basic block + let cvt = if signed { convert.fptosi(x, int_ty) } else { convert.fptoui(x, int_ty) }; + convert.store(cvt, result.llval, result.align); + convert.br(done.llbb()); + + // Translation of the `specialcase` basic block. Note that like above + // we try to be a bit clever here for unsigned conversions. In those + // cases the `int_min` is zero so we don't need two select instructions, + // just one to choose whether we need `int_max` or not. If + // `inbound_lower` is true then we're guaranteed to not be `NaN` and + // since we're greater than zero we must be saturating to `int_max`. If + // `inbound_lower` is false then we're either NaN or less than zero, so + // we saturate to zero. + let result_nan = if signed { + let is_not_nan = specialcase.or(inbound_lower, inbound_upper); + let saturated = specialcase.select(inbound_lower, int_max, int_min); + specialcase.select(is_not_nan, saturated, zero) + } else { + specialcase.select(inbound_lower, int_max, int_min) + }; + specialcase.store(result_nan, result.llval, result.align); + specialcase.br(done.llbb()); + + // Translation of the `done` basic block, positioning ourselves to + // continue from that point as well. + *bx = done; + let ret = bx.load(result.llval, result.align); + result.storage_dead(bx); + ret } } diff --git a/src/librustc_codegen_ssa/traits/builder.rs b/src/librustc_codegen_ssa/traits/builder.rs index 65eb70e173e..4e11ef5fd6e 100644 --- a/src/librustc_codegen_ssa/traits/builder.rs +++ b/src/librustc_codegen_ssa/traits/builder.rs @@ -160,6 +160,7 @@ pub trait BuilderMethods<'a, 'tcx>: fn sext(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value; fn fptoui_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Option; fn fptosi_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Option; + fn fptosui_may_trap(&self, val: Self::Value, dest_ty: Self::Type) -> bool; fn fptoui(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value; fn fptosi(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value; fn uitofp(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value; diff --git a/src/test/codegen/wasm_casts_trapping.rs b/src/test/codegen/wasm_casts_trapping.rs index b7f8522fdfb..ed51faa7be1 100644 --- a/src/test/codegen/wasm_casts_trapping.rs +++ b/src/test/codegen/wasm_casts_trapping.rs @@ -5,72 +5,72 @@ // CHECK-LABEL: @cast_f64_i64 #[no_mangle] pub fn cast_f64_i64(a: f64) -> i64 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptosi double {{.*}} to i64 - // CHECK-NEXT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK-NOT: fptosi double {{.*}} to i64 + // CHECK-NOT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f64_i32 #[no_mangle] pub fn cast_f64_i32(a: f64) -> i32 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptosi double {{.*}} to i32 - // CHECK-NEXT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK-NOT: fptosi double {{.*}} to i32 + // CHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f32_i64 #[no_mangle] pub fn cast_f32_i64(a: f32) -> i64 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptosi float {{.*}} to i64 - // CHECK-NEXT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK-NOT: fptosi float {{.*}} to i64 + // CHECK-NOT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f32_i32 #[no_mangle] pub fn cast_f32_i32(a: f32) -> i32 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptosi float {{.*}} to i32 - // CHECK-NEXT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK-NOT: fptosi float {{.*}} to i32 + // CHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f64_u64 #[no_mangle] pub fn cast_f64_u64(a: f64) -> u64 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptoui double {{.*}} to i64 - // CHECK-NEXT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK-NOT: fptoui double {{.*}} to i64 + // CHECK-NOT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f64_u32 #[no_mangle] pub fn cast_f64_u32(a: f64) -> u32 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptoui double {{.*}} to i32 - // CHECK-NEXT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK-NOT: fptoui double {{.*}} to i32 + // CHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f32_u64 #[no_mangle] pub fn cast_f32_u64(a: f32) -> u64 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptoui float {{.*}} to i64 - // CHECK-NEXT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK-NOT: fptoui float {{.*}} to i64 + // CHECK-NOT: select i1 {{.*}}, i64 {{.*}}, i64 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ } // CHECK-LABEL: @cast_f32_u32 #[no_mangle] pub fn cast_f32_u32(a: f32) -> u32 { - // CHECK-NOT: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} - // CHECK: fptoui float {{.*}} to i32 - // CHECK-NEXT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK-NOT: fptoui float {{.*}} to i32 + // CHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} + // CHECK: {{.*}} call {{.*}} @llvm.wasm.trunc.{{.*}} a as _ }