From afb119beca72d64c166127445ca6883358e24174 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 10 May 2013 07:52:25 +0000 Subject: [PATCH] tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling when we version for aliasing. 2013-05-10 Richard Biener * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling when we version for aliasing. (vector_alignment_reachable_p): Honor explicit user alignment. (vect_supportable_dr_alignment): Likewise. * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it. * tree-vect-loop.c (vect_transform_loop): First apply versioning, then peeling to arrange for the cost-model check to come first. * gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined. * gcc.target/i386/l_fma_double_1.c: Adjust. * gcc.target/i386/l_fma_double_2.c: Likewise. * gcc.target/i386/l_fma_double_3.c: Likewise. * gcc.target/i386/l_fma_double_4.c: Likewise. * gcc.target/i386/l_fma_double_5.c: Likewise. * gcc.target/i386/l_fma_double_6.c: Likewise. * gcc.target/i386/l_fma_float_1.c: Likewise. * gcc.target/i386/l_fma_float_2.c: Likewise. * gcc.target/i386/l_fma_float_3.c: Likewise. * gcc.target/i386/l_fma_float_4.c: Likewise. * gcc.target/i386/l_fma_float_5.c: Likewise. * gcc.target/i386/l_fma_float_6.c: Likewise. From-SVN: r198767 --- gcc/ChangeLog | 11 +++++++ gcc/testsuite/ChangeLog | 16 ++++++++++ .../gcc.target/i386/avx256-unaligned-load-2.c | 21 +++----------- .../gcc.target/i386/l_fma_double_1.c | 23 ++++++++------- .../gcc.target/i386/l_fma_double_2.c | 11 +++---- .../gcc.target/i386/l_fma_double_3.c | 23 ++++++++------- .../gcc.target/i386/l_fma_double_4.c | 11 +++---- .../gcc.target/i386/l_fma_double_5.c | 11 +++---- .../gcc.target/i386/l_fma_double_6.c | 11 +++---- gcc/testsuite/gcc.target/i386/l_fma_float_1.c | 20 ++++++------- gcc/testsuite/gcc.target/i386/l_fma_float_2.c | 8 ++--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c | 20 ++++++------- gcc/testsuite/gcc.target/i386/l_fma_float_4.c | 8 ++--- gcc/testsuite/gcc.target/i386/l_fma_float_5.c | 8 ++--- gcc/testsuite/gcc.target/i386/l_fma_float_6.c | 8 ++--- gcc/tree-vect-data-refs.c | 29 ++++++++----------- gcc/tree-vect-loop-manip.c | 27 +++-------------- gcc/tree-vect-loop.c | 17 ++++++----- 18 files changed, 141 insertions(+), 142 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b6101bad68d..c7d6db9a744 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2013-05-10 Richard Biener + + * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not + disable peeling when we version for aliasing. + (vector_alignment_reachable_p): Honor explicit user alignment. + (vect_supportable_dr_alignment): Likewise. + * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use + STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it. + * tree-vect-loop.c (vect_transform_loop): First apply versioning, + then peeling to arrange for the cost-model check to come first. + 2013-05-10 Alan Modra * configure.ac (HAVE_AS_TLS): Swap powerpc64 and powerpc cases. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3ab939af5f5..7656de9b0ab 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,19 @@ +2013-05-10 Richard Biener + + * gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined. + * gcc.target/i386/l_fma_double_1.c: Adjust. + * gcc.target/i386/l_fma_double_2.c: Likewise. + * gcc.target/i386/l_fma_double_3.c: Likewise. + * gcc.target/i386/l_fma_double_4.c: Likewise. + * gcc.target/i386/l_fma_double_5.c: Likewise. + * gcc.target/i386/l_fma_double_6.c: Likewise. + * gcc.target/i386/l_fma_float_1.c: Likewise. + * gcc.target/i386/l_fma_float_2.c: Likewise. + * gcc.target/i386/l_fma_float_3.c: Likewise. + * gcc.target/i386/l_fma_float_4.c: Likewise. + * gcc.target/i386/l_fma_float_5.c: Likewise. + * gcc.target/i386/l_fma_float_6.c: Likewise. + 2013-05-08 Paolo Carlini PR c++/51226 diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c index 2947d9eab3e..e3ec8542e01 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c @@ -1,26 +1,13 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ -#define N 1024 - -char **ep; -char **fp; - void -avx_test (void) +avx_test (char **cp, char **ep) { int i; - char **ap; - char **bp; - char **cp; - - ap = ep; - bp = fp; - for (i = 128; i >= 0; i--) - { - *ap++ = *cp++; - *bp++ = 0; - } + char **ap = __builtin_assume_aligned (ep, 32); + for (i = 128; i > 0; i--) + *ap++ = *cp++; } /* { dg-final { scan-assembler-not "avx_loaddqu256" } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c index 270659359f4..b3ffcf2c00f 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c @@ -4,23 +4,24 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_1.h" /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c index e8933e25d53..713b24b7c27 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c @@ -4,7 +4,8 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_2.h" @@ -12,7 +13,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c index 00c756775c8..cbc6ef832aa 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c @@ -4,23 +4,24 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_3.h" /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c index 09970bdb5c6..d571aca1b01 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c @@ -4,7 +4,8 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_4.h" @@ -12,7 +13,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c index 2a1428e4d9c..56d86369bc3 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c @@ -4,7 +4,8 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_5.h" @@ -12,7 +13,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c index 092032aa0b5..f22763d6f99 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c @@ -4,7 +4,8 @@ /* Test that the compiler properly optimizes floating point multiply and add instructions into FMA3 instructions. */ -#define TYPE double +typedef double adouble __attribute__((aligned(sizeof (double)))); +#define TYPE adouble #include "l_fma_6.h" @@ -12,7 +13,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c index 4bcd81de9da..b2f58ac2f95 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c @@ -9,18 +9,18 @@ #include "l_fma_1.h" /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c index 34b7fcb6dd5..6377585bffe 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c index 6ff2c6eacd5..878babb31c5 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c @@ -9,18 +9,18 @@ #include "l_fma_3.h" /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c index 39548bfa76b..bacb01e7f3f 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c index 83d79512592..a32fc41d9f9 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c index 1eefc817c36..a7a74fb4222 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index c1b5826ee12..bf0b510db44 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1024,7 +1024,8 @@ vector_alignment_reachable_p (struct data_reference *dr) if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Unknown misalignment, is_packed = %d",is_packed); - if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) + if ((TYPE_USER_ALIGN (type) && !is_packed) + || targetm.vectorize.vector_alignment_reachable (type, is_packed)) return true; else return false; @@ -1323,7 +1324,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) bool stat; gimple stmt; stmt_vec_info stmt_info; - int vect_versioning_for_alias_required; unsigned int npeel = 0; bool all_misalignments_unknown = true; unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -1510,15 +1510,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) } } - vect_versioning_for_alias_required - = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); - - /* Temporarily, if versioning for alias is required, we disable peeling - until we support peeling and versioning. Often peeling for alignment - will require peeling for loop-bound, which in turn requires that we - know how to adjust the loop ivs after the loop. */ - if (vect_versioning_for_alias_required - || !vect_can_advance_ivs_p (loop_vinfo) + /* Check if we can possibly peel the loop. */ + if (!vect_can_advance_ivs_p (loop_vinfo) || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) do_peeling = false; @@ -4722,9 +4715,10 @@ vect_supportable_dr_alignment (struct data_reference *dr, if (!known_alignment_for_access_p (dr)) is_packed = not_size_aligned (DR_REF (dr)); - if (targetm.vectorize. - support_vector_misalignment (mode, type, - DR_MISALIGNMENT (dr), is_packed)) + if ((TYPE_USER_ALIGN (type) && !is_packed) + || targetm.vectorize. + support_vector_misalignment (mode, type, + DR_MISALIGNMENT (dr), is_packed)) /* Can't software pipeline the loads, but can at least do them. */ return dr_unaligned_supported; } @@ -4736,9 +4730,10 @@ vect_supportable_dr_alignment (struct data_reference *dr, if (!known_alignment_for_access_p (dr)) is_packed = not_size_aligned (DR_REF (dr)); - if (targetm.vectorize. - support_vector_misalignment (mode, type, - DR_MISALIGNMENT (dr), is_packed)) + if ((TYPE_USER_ALIGN (type) && !is_packed) + || targetm.vectorize. + support_vector_misalignment (mode, type, + DR_MISALIGNMENT (dr), is_packed)) return dr_unaligned_supported; } diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index bff5c22130e..82e724f1d9c 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1555,7 +1555,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:"); for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { - tree access_fn = NULL; tree evolution_part; phi = gsi_stmt (gsi); @@ -1588,31 +1587,13 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) /* Analyze the evolution function. */ - access_fn = instantiate_parameters - (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); - - if (!access_fn) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "No Access function."); - return false; - } - - STRIP_NOPS (access_fn); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "Access function of PHI: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn); - } - - evolution_part = evolution_part_in_loop_num (access_fn, loop->num); - + evolution_part + = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi)); if (evolution_part == NULL_TREE) { if (dump_enabled_p ()) - dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution."); + dump_printf (MSG_MISSED_OPTIMIZATION, + "No access function or evolution."); return false; } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 40eccea5500..0fb2daed370 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5499,6 +5499,16 @@ vect_transform_loop (loop_vec_info loop_vinfo) check_profitability = true; } + /* Version the loop first, if required, so the profitability check + comes first. */ + + if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) + || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) + { + vect_loop_versioning (loop_vinfo, th, check_profitability); + check_profitability = false; + } + /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ @@ -5508,13 +5518,6 @@ vect_transform_loop (loop_vec_info loop_vinfo) check_profitability = false; } - if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) - || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) - { - vect_loop_versioning (loop_vinfo, th, check_profitability); - check_profitability = false; - } - /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a compile time constant), or it is a constant that doesn't divide by the vectorization factor, then an epilog loop needs to be created.