Auto merge of #80200 - mahkoh:dst-offset, r=nagisa

Optimize DST field access

For

    struct X<T: ?Sized>(T)
    struct Y<T: ?Sized>(u8, T)

the offset of the unsized field is

    0
    mem::align_of_val(&self.1)

respectively. This patch changes the expression used to compute these
offsets so that the optimizer can perform this optimization.

Consider

```rust
fn f(x: &X<dyn Any>) -> &dyn Any {
    &x.0
}
```

Before:

```asm
test:
	movq	%rsi, %rdx
	movq	16(%rsi), %rax
	leaq	-1(%rax), %rcx
	negq	%rax
	andq	%rcx, %rax
	addq	%rdi, %rax
	retq
```

After:

```asm
test:
	movq	%rsi, %rdx
	movq	%rdi, %rax
	retq
```
This commit is contained in:
bors 2021-01-07 03:13:21 +00:00
commit dfdfaa1f04
1 changed files with 44 additions and 10 deletions

View File

@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> {
// Get the alignment of the field
let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);
// Bump the unaligned offset up to the appropriate alignment using the
// following expression:
//
// (unaligned offset + (align - 1)) & -align
// Calculate offset.
let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
let and_lhs = bx.add(unaligned_offset, align_sub_1);
let and_rhs = bx.neg(unsized_align);
let offset = bx.and(and_lhs, and_rhs);
// Bump the unaligned offset up to the appropriate alignment
let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);
debug!("struct_field_ptr: DST field offset: {:?}", offset);
@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
self.monomorphize(place_ty.ty)
}
}
fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
value: Bx::Value,
align: Bx::Value,
) -> Bx::Value {
// In pseudo code:
//
// if value & (align - 1) == 0 {
// value
// } else {
// (value & !(align - 1)) + align
// }
//
// Usually this is written without branches as
//
// (value + align - 1) & !(align - 1)
//
// But this formula cannot take advantage of constant `value`. E.g. if `value` is known
// at compile time to be `1`, this expression should be optimized to `align`. However,
// optimization only holds if `align` is a power of two. Since the optimizer doesn't know
// that `align` is a power of two, it cannot perform this optimization.
//
// Instead we use
//
// value + (-value & (align - 1))
//
// Since `align` is used only once, the expression can be optimized. For `value = 0`
// its optimized to `0` even in debug mode.
//
// NB: The previous version of this code used
//
// (value + align - 1) & -align
//
// Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
// `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
let one = bx.const_usize(1);
let align_minus_1 = bx.sub(align, one);
let neg_value = bx.neg(value);
let offset = bx.and(neg_value, align_minus_1);
bx.add(value, offset)
}