cfgloop.h (struct loop): Add simdlen member.
* cfgloop.h (struct loop): Add simdlen member. * cfgloopmanip.c (copy_loop_info): Copy simdlen as well. * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present. * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0 as new argument to autovectorize_vector_sizes target hook. If loop->simdlen, pick up vector size where the vectorization factor is equal to loop->simd, and if there is none, fall back to the first successful one. (vect_transform_loop): Adjust autovectorize_vector_sizes target hook caller. * omp-low.c (omp_clause_aligned_alignment): Likewise. * omp-general.c (omp_max_vf): Likewise. * optabs-query.c (can_vec_mask_load_store_p): Likewise. * tree-vect-slp.c (vect_slp_bb): Likewise. * target.def (autovectorize_vector_sizes): Add ALL argument and document it. * doc/tm.texi: Adjust documentation. * targhooks.c (default_autovectorize_vector_sizes): Add bool argument. * targhooks.h (default_autovectorize_vector_sizes): Likewise. * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add bool argument. * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise. * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise. * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise. * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if preferred vector size is not 512-bit or 256-bit, just put those unpreferred ones last. * gcc.target/i386/avx512f-simd-1.c: New test. From-SVN: r271403
This commit is contained in:
parent
0ec537f350
commit
f63445e56c
|
@ -1,3 +1,34 @@
|
|||
2019-05-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* cfgloop.h (struct loop): Add simdlen member.
|
||||
* cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
|
||||
* omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
|
||||
* tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
|
||||
as new argument to autovectorize_vector_sizes target hook. If
|
||||
loop->simdlen, pick up vector size where the vectorization factor
|
||||
is equal to loop->simd, and if there is none, fall back to the first
|
||||
successful one.
|
||||
(vect_transform_loop): Adjust autovectorize_vector_sizes target hook
|
||||
caller.
|
||||
* omp-low.c (omp_clause_aligned_alignment): Likewise.
|
||||
* omp-general.c (omp_max_vf): Likewise.
|
||||
* optabs-query.c (can_vec_mask_load_store_p): Likewise.
|
||||
* tree-vect-slp.c (vect_slp_bb): Likewise.
|
||||
* target.def (autovectorize_vector_sizes): Add ALL argument and
|
||||
document it.
|
||||
* doc/tm.texi: Adjust documentation.
|
||||
* targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
|
||||
* targhooks.h (default_autovectorize_vector_sizes): Likewise.
|
||||
* config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
|
||||
bool argument.
|
||||
* config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
|
||||
* config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
|
||||
* config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
|
||||
* config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If
|
||||
true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
|
||||
preferred vector size is not 512-bit or 256-bit, just put those
|
||||
unpreferred ones last.
|
||||
|
||||
2019-05-20 Martin Liska <mliska@suse.cz>
|
||||
|
||||
* targhooks.c (default_libc_has_fast_function): New function.
|
||||
|
|
|
@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
|
|||
of the loop can be safely evaluated concurrently. */
|
||||
int safelen;
|
||||
|
||||
/* Preferred vectorization factor for the loop if non-zero. */
|
||||
int simdlen;
|
||||
|
||||
/* Constraints are generally set by consumers and affect certain
|
||||
semantics of niter analyzer APIs. Currently the APIs affected are
|
||||
number_of_iterations_exit* functions and their callers. One typical
|
||||
|
|
|
@ -1016,6 +1016,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
|
|||
target->nb_iterations_estimate = loop->nb_iterations_estimate;
|
||||
target->estimate_state = loop->estimate_state;
|
||||
target->safelen = loop->safelen;
|
||||
target->simdlen = loop->simdlen;
|
||||
target->constraints = loop->constraints;
|
||||
target->can_be_parallel = loop->can_be_parallel;
|
||||
target->warned_aggressive_loop_optimizations
|
||||
|
|
|
@ -14109,7 +14109,7 @@ aarch64_preferred_simd_mode (scalar_mode mode)
|
|||
/* Return a list of possible vector sizes for the vectorizer
|
||||
to iterate over. */
|
||||
static void
|
||||
aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
|
||||
aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
|
||||
{
|
||||
if (TARGET_SVE)
|
||||
sizes->safe_push (BYTES_PER_SVE_VECTOR);
|
||||
|
|
|
@ -480,7 +480,7 @@ arc_preferred_simd_mode (scalar_mode mode)
|
|||
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
|
||||
|
||||
static void
|
||||
arc_autovectorize_vector_sizes (vector_sizes *sizes)
|
||||
arc_autovectorize_vector_sizes (vector_sizes *sizes, bool)
|
||||
{
|
||||
if (TARGET_PLUS_QMACW)
|
||||
{
|
||||
|
|
|
@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
|
|||
static void arm_conditional_register_usage (void);
|
||||
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
|
||||
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
|
||||
static void arm_autovectorize_vector_sizes (vector_sizes *);
|
||||
static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
|
||||
static int arm_default_branch_cost (bool, bool);
|
||||
static int arm_cortex_a5_branch_cost (bool, bool);
|
||||
static int arm_cortex_m_branch_cost (bool, bool);
|
||||
|
@ -28351,7 +28351,7 @@ arm_vector_alignment (const_tree type)
|
|||
}
|
||||
|
||||
static void
|
||||
arm_autovectorize_vector_sizes (vector_sizes *sizes)
|
||||
arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
|
||||
{
|
||||
if (!TARGET_NEON_VECTORIZE_DOUBLE)
|
||||
{
|
||||
|
|
|
@ -21332,7 +21332,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
|
|||
256bit and 128bit vectors. */
|
||||
|
||||
static void
|
||||
ix86_autovectorize_vector_sizes (vector_sizes *sizes)
|
||||
ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
|
||||
{
|
||||
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
|
||||
{
|
||||
|
@ -21340,11 +21340,22 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
|
|||
sizes->safe_push (32);
|
||||
sizes->safe_push (16);
|
||||
}
|
||||
else if (TARGET_AVX512F && all)
|
||||
{
|
||||
sizes->safe_push (32);
|
||||
sizes->safe_push (16);
|
||||
sizes->safe_push (64);
|
||||
}
|
||||
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
|
||||
{
|
||||
sizes->safe_push (32);
|
||||
sizes->safe_push (16);
|
||||
}
|
||||
else if (TARGET_AVX && all)
|
||||
{
|
||||
sizes->safe_push (16);
|
||||
sizes->safe_push (32);
|
||||
}
|
||||
}
|
||||
|
||||
/* Implemenation of targetm.vectorize.get_mask_mode. */
|
||||
|
|
|
@ -13460,7 +13460,7 @@ mips_preferred_simd_mode (scalar_mode mode)
|
|||
/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
|
||||
|
||||
static void
|
||||
mips_autovectorize_vector_sizes (vector_sizes *sizes)
|
||||
mips_autovectorize_vector_sizes (vector_sizes *sizes, bool)
|
||||
{
|
||||
if (ISA_HAS_MSA)
|
||||
sizes->safe_push (16);
|
||||
|
|
|
@ -6021,11 +6021,13 @@ against lower halves of vectors recursively until the specified mode is
|
|||
reached. The default is @var{mode} which means no splitting.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
|
||||
@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all})
|
||||
If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
|
||||
the only one that is worth considering, this hook should add all suitable
|
||||
vector sizes to @var{sizes}, in order of decreasing preference. The first
|
||||
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
|
||||
If @var{all} is true, add suitable vector sizes even when they are generally
|
||||
not expected to be worthwhile.
|
||||
|
||||
The hook does not need to do anything if the vector returned by
|
||||
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
|
||||
|
|
|
@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
|
|||
&& loop->safelen > 1)
|
||||
{
|
||||
loop->force_vectorize = true;
|
||||
if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
|
||||
{
|
||||
unsigned HOST_WIDE_INT v
|
||||
= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
|
||||
if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
|
||||
loop->simdlen = v;
|
||||
}
|
||||
cfun->has_force_vectorize_loops = true;
|
||||
}
|
||||
else if (dont_vectorize)
|
||||
|
|
|
@ -469,7 +469,7 @@ omp_max_vf (void)
|
|||
return 1;
|
||||
|
||||
auto_vector_sizes sizes;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
|
||||
if (!sizes.is_empty ())
|
||||
{
|
||||
poly_uint64 vf = 0;
|
||||
|
|
|
@ -3600,7 +3600,7 @@ omp_clause_aligned_alignment (tree clause)
|
|||
unsigned int al = 1;
|
||||
opt_scalar_mode mode_iter;
|
||||
auto_vector_sizes sizes;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
|
||||
poly_uint64 vs = 0;
|
||||
for (unsigned int i = 0; i < sizes.length (); ++i)
|
||||
vs = ordered_max (vs, sizes[i]);
|
||||
|
|
|
@ -593,7 +593,7 @@ can_vec_mask_load_store_p (machine_mode mode,
|
|||
return true;
|
||||
|
||||
auto_vector_sizes vector_sizes;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true);
|
||||
for (unsigned int i = 0; i < vector_sizes.length (); ++i)
|
||||
{
|
||||
poly_uint64 cur = vector_sizes[i];
|
||||
|
|
|
@ -1899,12 +1899,14 @@ DEFHOOK
|
|||
the only one that is worth considering, this hook should add all suitable\n\
|
||||
vector sizes to @var{sizes}, in order of decreasing preference. The first\n\
|
||||
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
|
||||
If @var{all} is true, add suitable vector sizes even when they are generally\n\
|
||||
not expected to be worthwhile.\n\
|
||||
\n\
|
||||
The hook does not need to do anything if the vector returned by\n\
|
||||
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
|
||||
for autovectorization. The default implementation does nothing.",
|
||||
void,
|
||||
(vector_sizes *sizes),
|
||||
(vector_sizes *sizes, bool all),
|
||||
default_autovectorize_vector_sizes)
|
||||
|
||||
/* Function to get a target mode for a vector mask. */
|
||||
|
|
|
@ -1316,7 +1316,7 @@ default_split_reduction (machine_mode mode)
|
|||
is tried. */
|
||||
|
||||
void
|
||||
default_autovectorize_vector_sizes (vector_sizes *)
|
||||
default_autovectorize_vector_sizes (vector_sizes *, bool)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ default_builtin_support_vector_misalignment (machine_mode mode,
|
|||
int, bool);
|
||||
extern machine_mode default_preferred_simd_mode (scalar_mode mode);
|
||||
extern machine_mode default_split_reduction (machine_mode);
|
||||
extern void default_autovectorize_vector_sizes (vector_sizes *);
|
||||
extern void default_autovectorize_vector_sizes (vector_sizes *, bool);
|
||||
extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
|
||||
extern bool default_empty_mask_is_expensive (unsigned);
|
||||
extern void *default_init_cost (struct loop *);
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2019-05-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.target/i386/avx512f-simd-1.c: New test.
|
||||
|
||||
2019-05-20 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* gcc.target/aarch64/target_attr_10.c: Add quotes to expected
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
|
||||
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
|
||||
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
|
||||
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
|
||||
|
||||
#define N 1024
|
||||
int a[N];
|
||||
|
||||
void
|
||||
f1 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd simdlen (4)
|
||||
for (i = 0; i < N; ++i)
|
||||
a[i] = a[i] + 1;
|
||||
}
|
||||
|
||||
void
|
||||
f2 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd simdlen (8)
|
||||
for (i = 0; i < N; ++i)
|
||||
a[i] = a[i] + 2;
|
||||
}
|
||||
|
||||
void
|
||||
f3 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd simdlen (16)
|
||||
for (i = 0; i < N; ++i)
|
||||
a[i] = a[i] + 3;
|
||||
}
|
|
@ -2254,7 +2254,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|||
|
||||
/* Autodetect first vector size we try. */
|
||||
current_vector_size = 0;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
|
||||
loop->simdlen != 0);
|
||||
unsigned int next_size = 0;
|
||||
|
||||
DUMP_VECT_SCOPE ("analyze_loop_nest");
|
||||
|
@ -2273,6 +2274,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|||
|
||||
unsigned n_stmts = 0;
|
||||
poly_uint64 autodetected_vector_size = 0;
|
||||
opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
|
||||
poly_uint64 first_vector_size = 0;
|
||||
while (1)
|
||||
{
|
||||
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
|
||||
|
@ -2283,6 +2286,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"bad loop form.\n");
|
||||
gcc_checking_assert (first_loop_vinfo == NULL);
|
||||
return loop_vinfo;
|
||||
}
|
||||
|
||||
|
@ -2296,10 +2300,27 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|||
{
|
||||
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
|
||||
|
||||
return loop_vinfo;
|
||||
if (loop->simdlen
|
||||
&& maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
|
||||
(unsigned HOST_WIDE_INT) loop->simdlen))
|
||||
{
|
||||
if (first_loop_vinfo == NULL)
|
||||
{
|
||||
first_loop_vinfo = loop_vinfo;
|
||||
first_vector_size = current_vector_size;
|
||||
loop->aux = NULL;
|
||||
}
|
||||
else
|
||||
delete loop_vinfo;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete first_loop_vinfo;
|
||||
return loop_vinfo;
|
||||
}
|
||||
}
|
||||
|
||||
delete loop_vinfo;
|
||||
else
|
||||
delete loop_vinfo;
|
||||
|
||||
if (next_size == 0)
|
||||
autodetected_vector_size = current_vector_size;
|
||||
|
@ -2308,10 +2329,31 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|||
&& known_eq (vector_sizes[next_size], autodetected_vector_size))
|
||||
next_size += 1;
|
||||
|
||||
if (fatal
|
||||
|| next_size == vector_sizes.length ()
|
||||
if (fatal)
|
||||
{
|
||||
gcc_checking_assert (first_loop_vinfo == NULL);
|
||||
return opt_loop_vec_info::propagate_failure (res);
|
||||
}
|
||||
|
||||
if (next_size == vector_sizes.length ()
|
||||
|| known_eq (current_vector_size, 0U))
|
||||
return opt_loop_vec_info::propagate_failure (res);
|
||||
{
|
||||
if (first_loop_vinfo)
|
||||
{
|
||||
current_vector_size = first_vector_size;
|
||||
loop->aux = (loop_vec_info) first_loop_vinfo;
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"***** Choosing vector size ");
|
||||
dump_dec (MSG_NOTE, current_vector_size);
|
||||
dump_printf (MSG_NOTE, "\n");
|
||||
}
|
||||
return first_loop_vinfo;
|
||||
}
|
||||
else
|
||||
return opt_loop_vec_info::propagate_failure (res);
|
||||
}
|
||||
|
||||
/* Try the next biggest vector size. */
|
||||
current_vector_size = vector_sizes[next_size++];
|
||||
|
@ -8670,7 +8712,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
if (epilogue)
|
||||
{
|
||||
auto_vector_sizes vector_sizes;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
|
||||
unsigned int next_size = 0;
|
||||
|
||||
/* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
|
||||
|
|
|
@ -2983,7 +2983,7 @@ vect_slp_bb (basic_block bb)
|
|||
|
||||
/* Autodetect first vector size we try. */
|
||||
current_vector_size = 0;
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
||||
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
|
||||
unsigned int next_size = 0;
|
||||
|
||||
gsi = gsi_start_bb (bb);
|
||||
|
|
Loading…
Reference in New Issue