Auto merge of #34141 - eddyb:trans-abi-memcpy, r=nikomatsakis

trans: always use a memcpy for ABI argument/return casts.

When storing incoming arguments or values returned by call/invoke, always do a `memcpy` from a temporary of the cast type, if there is an ABI cast.
While Clang has gotten smarter ([store](https://godbolt.org/g/EphFuK) vs [memcpy](https://godbolt.org/g/5dikH9)), a `memcpy` will always work.
This is what @dotdash has wanted to do all along, and it fixes #32049.
This commit is contained in:
bors 2016-06-07 14:45:39 -07:00
commit ec872dc8a3
6 changed files with 137 additions and 194 deletions

View File

@ -10,8 +10,8 @@
use llvm::{self, ValueRef};
use base;
use builder::Builder;
use common::{type_is_fat_ptr, BlockAndBuilder};
use build::AllocaFcx;
use common::{type_is_fat_ptr, BlockAndBuilder, C_uint};
use context::CrateContext;
use cabi_x86;
use cabi_x86_64;
@ -22,7 +22,7 @@ use cabi_powerpc;
use cabi_powerpc64;
use cabi_mips;
use cabi_asmjs;
use machine::{llalign_of_min, llsize_of, llsize_of_real};
use machine::{llalign_of_min, llsize_of, llsize_of_real, llsize_of_store};
use type_::Type;
use type_of;
@ -30,6 +30,7 @@ use rustc::hir;
use rustc::ty::{self, Ty};
use libc::c_uint;
use std::cmp;
pub use syntax::abi::Abi;
pub use rustc::ty::layout::{FAT_PTR_ADDR, FAT_PTR_EXTRA};
@ -150,26 +151,63 @@ impl ArgType {
/// lvalue for the original Rust type of this argument/return.
/// Can be used for both storing formal arguments into Rust variables
/// or results of call/invoke instructions into their destinations.
pub fn store(&self, b: &Builder, mut val: ValueRef, dst: ValueRef) {
pub fn store(&self, bcx: &BlockAndBuilder, mut val: ValueRef, dst: ValueRef) {
if self.is_ignore() {
return;
}
let ccx = bcx.ccx();
if self.is_indirect() {
let llsz = llsize_of(b.ccx, self.ty);
let llalign = llalign_of_min(b.ccx, self.ty);
base::call_memcpy(b, dst, val, llsz, llalign as u32);
let llsz = llsize_of(ccx, self.ty);
let llalign = llalign_of_min(ccx, self.ty);
base::call_memcpy(bcx, dst, val, llsz, llalign as u32);
} else if let Some(ty) = self.cast {
let cast_dst = b.pointercast(dst, ty.ptr_to());
let store = b.store(val, cast_dst);
let llalign = llalign_of_min(b.ccx, self.ty);
unsafe {
llvm::LLVMSetAlignment(store, llalign);
// FIXME(eddyb): Figure out when the simpler Store is safe, clang
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
let cast_dst = bcx.pointercast(dst, ty.ptr_to());
let store = bcx.store(val, cast_dst);
let llalign = llalign_of_min(ccx, self.ty);
unsafe {
llvm::LLVMSetAlignment(store, llalign);
}
} else {
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.
// We instead thus allocate some scratch space...
let llscratch = AllocaFcx(bcx.fcx(), ty, "abi_cast");
base::Lifetime::Start.call(bcx, llscratch);
// ...where we first store the value...
bcx.store(val, llscratch);
// ...and then memcpy it to the intended destination.
base::call_memcpy(bcx,
bcx.pointercast(dst, Type::i8p(ccx)),
bcx.pointercast(llscratch, Type::i8p(ccx)),
C_uint(ccx, llsize_of_store(ccx, self.ty)),
cmp::min(llalign_of_min(ccx, self.ty),
llalign_of_min(ccx, ty)) as u32);
base::Lifetime::End.call(bcx, llscratch);
}
} else {
if self.original_ty == Type::i1(b.ccx) {
val = b.zext(val, Type::i8(b.ccx));
if self.original_ty == Type::i1(ccx) {
val = bcx.zext(val, Type::i8(ccx));
}
b.store(val, dst);
bcx.store(val, dst);
}
}

View File

@ -1043,7 +1043,7 @@ pub fn with_cond<'blk, 'tcx, F>(bcx: Block<'blk, 'tcx>, val: ValueRef, f: F) ->
next_cx
}
enum Lifetime { Start, End }
pub enum Lifetime { Start, End }
// If LLVM lifetime intrinsic support is enabled (i.e. optimizations
// on), and `ptr` is nonzero-sized, then extracts the size of `ptr`
@ -1080,24 +1080,25 @@ fn core_lifetime_emit<'blk, 'tcx, F>(ccx: &'blk CrateContext<'blk, 'tcx>,
emit(ccx, size, lifetime_intrinsic)
}
pub fn call_lifetime_start(cx: Block, ptr: ValueRef) {
core_lifetime_emit(cx.ccx(), ptr, Lifetime::Start, |ccx, size, lifetime_start| {
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
Call(cx,
lifetime_start,
&[C_u64(ccx, size), ptr],
DebugLoc::None);
})
impl Lifetime {
pub fn call(self, b: &Builder, ptr: ValueRef) {
core_lifetime_emit(b.ccx, ptr, self, |ccx, size, lifetime_intrinsic| {
let ptr = b.pointercast(ptr, Type::i8p(ccx));
b.call(lifetime_intrinsic, &[C_u64(ccx, size), ptr], None);
});
}
}
pub fn call_lifetime_end(cx: Block, ptr: ValueRef) {
core_lifetime_emit(cx.ccx(), ptr, Lifetime::End, |ccx, size, lifetime_end| {
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
Call(cx,
lifetime_end,
&[C_u64(ccx, size), ptr],
DebugLoc::None);
})
pub fn call_lifetime_start(bcx: Block, ptr: ValueRef) {
if !bcx.unreachable.get() {
Lifetime::Start.call(&bcx.build(), ptr);
}
}
pub fn call_lifetime_end(bcx: Block, ptr: ValueRef) {
if !bcx.unreachable.get() {
Lifetime::End.call(&bcx.build(), ptr);
}
}
// Generates code for resumption of unwind at the end of a landing pad.
@ -1664,29 +1665,21 @@ impl<'blk, 'tcx> FunctionContext<'blk, 'tcx> {
arg_ty,
datum::Lvalue::new("FunctionContext::bind_args"))
} else {
let lltmp = if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
let lltemp = alloc_ty(bcx, arg_ty, "");
unpack_datum!(bcx, datum::lvalue_scratch_datum(bcx, arg_ty, "",
uninit_reason,
arg_scope_id, |bcx, dst| {
debug!("FunctionContext::bind_args: {:?}: {:?}", hir_arg, arg_ty);
let b = &bcx.build();
// we pass fat pointers as two words, but we want to
// represent them internally as a pointer to two words,
// so make an alloca to store them in.
let meta = &self.fn_ty.args[idx];
idx += 1;
arg.store_fn_arg(b, &mut llarg_idx, expr::get_dataptr(bcx, lltemp));
meta.store_fn_arg(b, &mut llarg_idx, expr::get_meta(bcx, lltemp));
lltemp
} else {
// otherwise, arg is passed by value, so store it into a temporary.
let llarg_ty = arg.cast.unwrap_or(arg.memory_ty(bcx.ccx()));
let lltemp = alloca(bcx, llarg_ty, "");
let b = &bcx.build();
arg.store_fn_arg(b, &mut llarg_idx, lltemp);
// And coerce the temporary into the type we expect.
b.pointercast(lltemp, arg.memory_ty(bcx.ccx()).ptr_to())
};
bcx.fcx.schedule_drop_mem(arg_scope_id, lltmp, arg_ty, None);
datum::Datum::new(lltmp, arg_ty,
datum::Lvalue::new("bind_args"))
if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
let meta = &self.fn_ty.args[idx];
idx += 1;
arg.store_fn_arg(b, &mut llarg_idx, expr::get_dataptr(bcx, dst));
meta.store_fn_arg(b, &mut llarg_idx, expr::get_meta(bcx, dst));
} else {
arg.store_fn_arg(b, &mut llarg_idx, dst);
}
bcx
}))
}
} else {
// FIXME(pcwalton): Reduce the amount of code bloat this is responsible for.
@ -1721,19 +1714,16 @@ impl<'blk, 'tcx> FunctionContext<'blk, 'tcx> {
};
let pat = &hir_arg.pat;
bcx = match simple_name(pat) {
// The check for alloca is necessary because above for the immediate argument case
// we had to cast. At this point arg_datum is not an alloca anymore and thus
// breaks debuginfo if we allow this optimisation.
Some(name)
if unsafe { llvm::LLVMIsAAllocaInst(arg_datum.val) != ::std::ptr::null_mut() } => {
// Generate nicer LLVM for the common case of fn a pattern
// like `x: T`
set_value_name(arg_datum.val, &bcx.name(name));
self.lllocals.borrow_mut().insert(pat.id, arg_datum);
bcx
},
_ => _match::bind_irrefutable_pat(bcx, pat, arg_datum.match_input(), arg_scope_id)
bcx = if let Some(name) = simple_name(pat) {
// Generate nicer LLVM for the common case of fn a pattern
// like `x: T`
set_value_name(arg_datum.val, &bcx.name(name));
self.lllocals.borrow_mut().insert(pat.id, arg_datum);
bcx
} else {
// General path. Copy out the values that are used in the
// pattern.
_match::bind_irrefutable_pat(bcx, pat, arg_datum.match_input(), arg_scope_id)
};
debuginfo::create_argument_metadata(bcx, hir_arg);
}

View File

@ -34,8 +34,7 @@ use build::*;
use cleanup;
use cleanup::CleanupMethods;
use closure;
use common::{self, Block, Result, CrateContext, FunctionContext};
use common::{C_uint, C_undef};
use common::{self, Block, Result, CrateContext, FunctionContext, C_undef};
use consts;
use datum::*;
use debuginfo::DebugLoc;
@ -44,7 +43,7 @@ use expr;
use glue;
use inline;
use intrinsic;
use machine::{llalign_of_min, llsize_of_store};
use machine::llalign_of_min;
use meth;
use monomorphize::{self, Instance};
use type_::Type;
@ -58,8 +57,6 @@ use syntax::codemap::DUMMY_SP;
use syntax::errors;
use syntax::ptr::P;
use std::cmp;
#[derive(Debug)]
pub enum CalleeData {
/// Constructor for enum variant/tuple-like-struct.
@ -689,49 +686,16 @@ fn trans_call_inner<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
let (llret, mut bcx) = base::invoke(bcx, llfn, &llargs, debug_loc);
if !bcx.unreachable.get() {
fn_ty.apply_attrs_callsite(llret);
}
// If the function we just called does not use an outpointer,
// store the result into the rust outpointer. Cast the outpointer
// type to match because some ABIs will use a different type than
// the Rust type. e.g., a {u32,u32} struct could be returned as
// u64.
if !fn_ty.ret.is_ignore() && !fn_ty.ret.is_indirect() {
if let Some(llforeign_ret_ty) = fn_ty.ret.cast {
let llrust_ret_ty = fn_ty.ret.original_ty;
let llretslot = opt_llretslot.unwrap();
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.
let llscratch = base::alloca(bcx, llforeign_ret_ty, "__cast");
base::call_lifetime_start(bcx, llscratch);
Store(bcx, llret, llscratch);
let llscratch_i8 = PointerCast(bcx, llscratch, Type::i8(ccx).ptr_to());
let llretptr_i8 = PointerCast(bcx, llretslot, Type::i8(ccx).ptr_to());
let llrust_size = llsize_of_store(ccx, llrust_ret_ty);
let llforeign_align = llalign_of_min(ccx, llforeign_ret_ty);
let llrust_align = llalign_of_min(ccx, llrust_ret_ty);
let llalign = cmp::min(llforeign_align, llrust_align);
debug!("llrust_size={}", llrust_size);
if !bcx.unreachable.get() {
base::call_memcpy(&B(bcx), llretptr_i8, llscratch_i8,
C_uint(ccx, llrust_size), llalign as u32);
// If the function we just called does not use an outpointer,
// store the result into the rust outpointer. Cast the outpointer
// type to match because some ABIs will use a different type than
// the Rust type. e.g., a {u32,u32} struct could be returned as
// u64.
if !fn_ty.ret.is_indirect() {
if let Some(llretslot) = opt_llretslot {
fn_ty.ret.store(&bcx.build(), llret, llretslot);
}
base::call_lifetime_end(bcx, llscratch);
} else if let Some(llretslot) = opt_llretslot {
base::store_ty(bcx, llret, llretslot, output.unwrap());
}
}

View File

@ -19,11 +19,11 @@ use base;
use build;
use callee::{Callee, CalleeData, Fn, Intrinsic, NamedTupleConstructor, Virtual};
use common::{self, Block, BlockAndBuilder, LandingPad};
use common::{C_bool, C_str_slice, C_struct, C_u32, C_uint, C_undef};
use common::{C_bool, C_str_slice, C_struct, C_u32, C_undef};
use consts;
use debuginfo::DebugLoc;
use Disr;
use machine::{llalign_of_min, llbitsize_of_real, llsize_of_store};
use machine::{llalign_of_min, llbitsize_of_real};
use meth;
use type_of;
use glue;
@ -39,8 +39,6 @@ use super::lvalue::{LvalueRef, load_fat_ptr};
use super::operand::OperandRef;
use super::operand::OperandValue::*;
use std::cmp;
impl<'bcx, 'tcx> MirContext<'bcx, 'tcx> {
pub fn trans_block(&mut self, bb: mir::BasicBlock) {
let mut bcx = self.bcx(bb);
@ -852,77 +850,27 @@ impl<'bcx, 'tcx> MirContext<'bcx, 'tcx> {
op: OperandRef<'tcx>) {
use self::ReturnDest::*;
// Handle the simple cases that don't require casts, first.
let llcast_ty = match dest {
Nothing => return,
Store(dst) => {
if let Some(llcast_ty) = ret_ty.cast {
llcast_ty
} else {
ret_ty.store(bcx, op.immediate(), dst);
return;
}
}
match dest {
Nothing => (),
Store(dst) => ret_ty.store(bcx, op.immediate(), dst),
IndirectOperand(tmp, idx) => {
let op = self.trans_load(bcx, tmp, op.ty);
self.temps[idx as usize] = TempRef::Operand(Some(op));
return;
}
DirectOperand(idx) => {
if let Some(llcast_ty) = ret_ty.cast {
llcast_ty
// If there is a cast, we have to store and reload.
let op = if ret_ty.cast.is_some() {
let tmp = bcx.with_block(|bcx| {
base::alloc_ty(bcx, op.ty, "tmp_ret")
});
ret_ty.store(bcx, op.immediate(), tmp);
self.trans_load(bcx, tmp, op.ty)
} else {
let op = op.unpack_if_pair(bcx);
self.temps[idx as usize] = TempRef::Operand(Some(op));
return;
}
}
};
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.
// We instead thus allocate some scratch space...
let llscratch = bcx.with_block(|bcx| {
let alloca = base::alloca(bcx, llcast_ty, "fn_ret_cast");
base::call_lifetime_start(bcx, alloca);
alloca
});
// ...where we first store the value...
bcx.store(op.immediate(), llscratch);
let ccx = bcx.ccx();
match dest {
Store(dst) => {
// ...and then memcpy it to the intended destination.
base::call_memcpy(bcx,
bcx.pointercast(dst, Type::i8p(ccx)),
bcx.pointercast(llscratch, Type::i8p(ccx)),
C_uint(ccx, llsize_of_store(ccx, ret_ty.original_ty)),
cmp::min(llalign_of_min(ccx, ret_ty.original_ty),
llalign_of_min(ccx, llcast_ty)) as u32);
}
DirectOperand(idx) => {
let llptr = bcx.pointercast(llscratch, ret_ty.original_ty.ptr_to());
let op = self.trans_load(bcx, llptr, op.ty);
op.unpack_if_pair(bcx)
};
self.temps[idx as usize] = TempRef::Operand(Some(op));
}
Nothing | IndirectOperand(_, _) => bug!()
}
bcx.with_block(|bcx| base::call_lifetime_end(bcx, llscratch));
}
}

View File

@ -348,10 +348,10 @@ fn arg_value_refs<'bcx, 'tcx>(bcx: &BlockAndBuilder<'bcx, 'tcx>,
llarg_idx += 1;
llarg
} else {
let lltemp = bcx.with_block(|bcx| {
base::alloc_ty(bcx, arg_ty, &format!("arg{}", arg_index))
});
if common::type_is_fat_ptr(tcx, arg_ty) {
let lltemp = bcx.with_block(|bcx| {
base::alloc_ty(bcx, arg_ty, &format!("arg{}", arg_index))
});
// we pass fat pointers as two words, but we want to
// represent them internally as a pointer to two words,
// so make an alloca to store them in.
@ -359,17 +359,12 @@ fn arg_value_refs<'bcx, 'tcx>(bcx: &BlockAndBuilder<'bcx, 'tcx>,
idx += 1;
arg.store_fn_arg(bcx, &mut llarg_idx, get_dataptr(bcx, lltemp));
meta.store_fn_arg(bcx, &mut llarg_idx, get_meta(bcx, lltemp));
lltemp
} else {
// otherwise, arg is passed by value, so store it into a temporary.
let llarg_ty = arg.cast.unwrap_or(arg.memory_ty(bcx.ccx()));
let lltemp = bcx.with_block(|bcx| {
base::alloca(bcx, llarg_ty, &format!("arg{}", arg_index))
});
// otherwise, arg is passed by value, so make a
// temporary and store it there
arg.store_fn_arg(bcx, &mut llarg_idx, lltemp);
// And coerce the temporary into the type we expect.
bcx.pointercast(lltemp, arg.memory_ty(bcx.ccx()).ptr_to())
}
lltemp
};
bcx.with_block(|bcx| arg_scope.map(|scope| {
// Is this a regular argument?

View File

@ -26,8 +26,12 @@ pub struct Bytes {
#[no_mangle]
#[rustc_no_mir] // FIXME #27840 MIR has different codegen.
pub fn small_array_alignment(x: &mut [i8; 4], y: [i8; 4]) {
// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1
// CHECK: [[VAR:%[0-9]+]] = bitcast i32* %{{.*}} to [4 x i8]*
// CHECK: %y = alloca [4 x i8]
// CHECK: [[TMP:%.+]] = alloca i32
// CHECK: store i32 %1, i32* [[TMP]]
// CHECK: [[Y8:%[0-9]+]] = bitcast [4 x i8]* %y to i8*
// CHECK: [[TMP8:%[0-9]+]] = bitcast i32* [[TMP]] to i8*
// CHECK: call void @llvm.memcpy.{{.*}}(i8* [[Y8]], i8* [[TMP8]], i{{[0-9]+}} 4, i32 1, i1 false)
*x = y;
}
@ -37,7 +41,11 @@ pub fn small_array_alignment(x: &mut [i8; 4], y: [i8; 4]) {
#[no_mangle]
#[rustc_no_mir] // FIXME #27840 MIR has different codegen.
pub fn small_struct_alignment(x: &mut Bytes, y: Bytes) {
// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1
// CHECK: [[VAR:%[0-9]+]] = bitcast i32* %{{.*}} to %Bytes*
// CHECK: %y = alloca %Bytes
// CHECK: [[TMP:%.+]] = alloca i32
// CHECK: store i32 %1, i32* [[TMP]]
// CHECK: [[Y8:%[0-9]+]] = bitcast %Bytes* %y to i8*
// CHECK: [[TMP8:%[0-9]+]] = bitcast i32* [[TMP]] to i8*
// CHECK: call void @llvm.memcpy.{{.*}}(i8* [[Y8]], i8* [[TMP8]], i{{[0-9]+}} 4, i32 1, i1 false)
*x = y;
}