Fix peeling for alignment with negative step
The following fixes a regression causing us to no longer peel negative step loops for alignment. With dr_misalignment now applying the bias for negative step we have to do the reverse when adjusting the misalignment for peeled DRs. 2021-09-29 Richard Biener <rguenther@suse.de> * tree-vect-data-refs.c (vect_dr_misalign_for_aligned_access): New helper. (vect_update_misalignment_for_peel): Use it to update misaligned to the value necessary for an aligned access. (vect_get_peeling_costs_all_drs): Likewise. (vect_enhance_data_refs_alignment): Likewise. * gcc.target/i386/vect-alignment-peeling-1.c: New testcase. * gcc.target/i386/vect-alignment-peeling-2.c: Likewise.
This commit is contained in:
parent
a459ee44c0
commit
4c77310816
90
gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c
Normal file
90
gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* { dg-do run { target lp64 } } */
|
||||
/* This is a test exercising peeling for alignment for a negative step
|
||||
vector loop. We're forcing atom tuning here because that has a higher
|
||||
unaligned vs aligned cost unlike most other archs. */
|
||||
/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
|
||||
|
||||
float a[1024], b[1024];
|
||||
|
||||
void __attribute__((noipa)) foo1 ()
|
||||
{
|
||||
for (int i = 507; i > 1; --i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo2 ()
|
||||
{
|
||||
for (int i = 506; i > 1; --i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo3 ()
|
||||
{
|
||||
for (int i = 505; i > 1; --i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo4 ()
|
||||
{
|
||||
for (int i = 504; i > 1; --i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo5 (int start)
|
||||
{
|
||||
for (int i = start; i > 1; --i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for (int i = 2; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo1 ();
|
||||
for (int i = 2; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 2; i < 507; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo2 ();
|
||||
for (int i = 2; i < 507; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 2; i < 506; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo3 ();
|
||||
for (int i = 2; i < 506; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 2; i < 505; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo4 ();
|
||||
for (int i = 2; i < 505; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 2; i < 506; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo5 (505);
|
||||
for (int i = 2; i < 506; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
|
||||
/* Verify all vector accesses are emitted as aligned. */
|
||||
/* { dg-final { scan-assembler-not "movup" } } */
|
90
gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c
Normal file
90
gcc/testsuite/gcc.target/i386/vect-alignment-peeling-2.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* { dg-do run { target lp64 } } */
|
||||
/* This is a test exercising peeling for alignment for a positive step
|
||||
vector loop. We're forcing atom tuning here because that has a higher
|
||||
unaligned vs aligned cost unlike most other archs. */
|
||||
/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
|
||||
|
||||
float a[1024], b[1024];
|
||||
|
||||
void __attribute__((noipa)) foo1 ()
|
||||
{
|
||||
for (int i = 2; i < 508; ++i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo2 ()
|
||||
{
|
||||
for (int i = 3; i < 508; ++i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo3 ()
|
||||
{
|
||||
for (int i = 4; i < 508; ++i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo4 ()
|
||||
{
|
||||
for (int i = 5; i < 508; ++i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
void __attribute__((noipa)) foo5 (int start)
|
||||
{
|
||||
for (int i = start; i < 508; ++i)
|
||||
a[i] = b[i] * 2.;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for (int i = 2; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo1 ();
|
||||
for (int i = 2; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 3; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo2 ();
|
||||
for (int i = 3; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 4; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo3 ();
|
||||
for (int i = 4; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 5; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo4 ();
|
||||
for (int i = 5; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 3; i < 508; ++i)
|
||||
{
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
b[i] = i;
|
||||
}
|
||||
foo5 (3);
|
||||
for (int i = 3; i < 508; ++i)
|
||||
if (a[i] != 2*i)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
|
||||
/* Verify all vector accesses are emitted as aligned. */
|
||||
/* { dg-final { scan-assembler-not "movup" } } */
|
@ -1214,6 +1214,29 @@ vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
|
||||
return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
|
||||
}
|
||||
|
||||
/* Compute the value for dr_info->misalign so that the access appears
|
||||
aligned. This is used by peeling to compensate for dr_misalignment
|
||||
applying the offset for negative step. */
|
||||
|
||||
int
|
||||
vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
|
||||
{
|
||||
if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
|
||||
return 0;
|
||||
|
||||
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
|
||||
poly_int64 misalignment
|
||||
= ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
|
||||
* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
|
||||
|
||||
unsigned HOST_WIDE_INT target_alignment_c;
|
||||
int misalign;
|
||||
if (!dr_info->target_alignment.is_constant (&target_alignment_c)
|
||||
|| !known_misalignment (misalignment, target_alignment_c, &misalign))
|
||||
return DR_MISALIGNMENT_UNKNOWN;
|
||||
return misalign;
|
||||
}
|
||||
|
||||
/* Function vect_update_misalignment_for_peel.
|
||||
Sets DR_INFO's misalignment
|
||||
- to 0 if it has the same alignment as DR_PEEL_INFO,
|
||||
@ -1233,7 +1256,8 @@ vect_update_misalignment_for_peel (dr_vec_info *dr_info,
|
||||
/* If dr_info is aligned of dr_peel_info is, then mark it so. */
|
||||
if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
|
||||
{
|
||||
SET_DR_MISALIGNMENT (dr_info, 0);
|
||||
SET_DR_MISALIGNMENT (dr_info,
|
||||
vect_dr_misalign_for_aligned_access (dr_peel_info));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1241,9 +1265,9 @@ vect_update_misalignment_for_peel (dr_vec_info *dr_info,
|
||||
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
|
||||
if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
|
||||
&& known_alignment_for_access_p (dr_info, vectype)
|
||||
&& known_alignment_for_access_p (dr_peel_info, vectype))
|
||||
&& npeel != -1)
|
||||
{
|
||||
int misal = dr_misalignment (dr_info, vectype);
|
||||
int misal = dr_info->misalignment;
|
||||
misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
|
||||
misal &= alignment - 1;
|
||||
set_dr_misalignment (dr_info, misal);
|
||||
@ -1516,7 +1540,8 @@ vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
|
||||
if (npeel == 0)
|
||||
;
|
||||
else if (unknown_misalignment && dr_info == dr0_info)
|
||||
SET_DR_MISALIGNMENT (dr_info, 0);
|
||||
SET_DR_MISALIGNMENT (dr_info,
|
||||
vect_dr_misalign_for_aligned_access (dr0_info));
|
||||
else
|
||||
vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
|
||||
vect_get_data_access_cost (loop_vinfo, dr_info, inside_cost, outside_cost,
|
||||
@ -2278,7 +2303,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
|
||||
else
|
||||
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
|
||||
SET_DR_MISALIGNMENT (dr0_info, 0);
|
||||
SET_DR_MISALIGNMENT (dr0_info,
|
||||
vect_dr_misalign_for_aligned_access (dr0_info));
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
@ -2402,7 +2428,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
|
||||
{
|
||||
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
|
||||
SET_DR_MISALIGNMENT (dr_info, 0);
|
||||
SET_DR_MISALIGNMENT (dr_info,
|
||||
vect_dr_misalign_for_aligned_access (dr_info));
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Alignment of access forced using versioning.\n");
|
||||
|
Loading…
Reference in New Issue
Block a user