Auto merge of #80200 - mahkoh:dst-offset, r=nagisa

Optimize DST field access For struct X<T: ?Sized>(T) struct Y<T: ?Sized>(u8, T) the offset of the unsized field is 0 mem::align_of_val(&self.1) respectively. This patch changes the expression used to compute these offsets so that the optimizer can perform this optimization. Consider ```rust fn f(x: &X<dyn Any>) -> &dyn Any { &x.0 } ``` Before: ```asm test: movq %rsi, %rdx movq 16(%rsi), %rax leaq -1(%rax), %rcx negq %rax andq %rcx, %rax addq %rdi, %rax retq ``` After: ```asm test: movq %rsi, %rdx movq %rdi, %rax retq ```
2021-01-07 03:13:21 +00:00 · 2021-01-07 03:13:21 +00:00 · dfdfaa1f04
parent 5b3d52414e be1511408e
commit dfdfaa1f04
1 changed files with 44 additions and 10 deletions
--- a/compiler/rustc_codegen_ssa/src/mir/place.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/place.rs
@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> {
        // Get the alignment of the field
        let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);

-        // Bump the unaligned offset up to the appropriate alignment using the
-        // following expression:
-        //
-        //     (unaligned offset + (align - 1)) & -align
-
-        // Calculate offset.
-        let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
-        let and_lhs = bx.add(unaligned_offset, align_sub_1);
-        let and_rhs = bx.neg(unsized_align);
-        let offset = bx.and(and_lhs, and_rhs);
+        // Bump the unaligned offset up to the appropriate alignment
+        let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);

        debug!("struct_field_ptr: DST field offset: {:?}", offset);

@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        self.monomorphize(place_ty.ty)
    }
 }
+
+fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    value: Bx::Value,
+    align: Bx::Value,
+) -> Bx::Value {
+    // In pseudo code:
+    //
+    //     if value & (align - 1) == 0 {
+    //         value
+    //     } else {
+    //         (value & !(align - 1)) + align
+    //     }
+    //
+    // Usually this is written without branches as
+    //
+    //     (value + align - 1) & !(align - 1)
+    //
+    // But this formula cannot take advantage of constant `value`. E.g. if `value` is known
+    // at compile time to be `1`, this expression should be optimized to `align`. However,
+    // optimization only holds if `align` is a power of two. Since the optimizer doesn't know
+    // that `align` is a power of two, it cannot perform this optimization.
+    //
+    // Instead we use
+    //
+    //     value + (-value & (align - 1))
+    //
+    // Since `align` is used only once, the expression can be optimized. For `value = 0`
+    // its optimized to `0` even in debug mode.
+    //
+    // NB: The previous version of this code used
+    //
+    //     (value + align - 1) & -align
+    //
+    // Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
+    // `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
+    let one = bx.const_usize(1);
+    let align_minus_1 = bx.sub(align, one);
+    let neg_value = bx.neg(value);
+    let offset = bx.and(neg_value, align_minus_1);
+    bx.add(value, offset)
+}