cfgloop.h (struct loop): Add simdlen member.

* cfgloop.h (struct loop): Add simdlen member.
	* cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
	* omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
	* tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
	as new argument to autovectorize_vector_sizes target hook.  If
	loop->simdlen, pick up vector size where the vectorization factor
	is equal to loop->simd, and if there is none, fall back to the first
	successful one.
	(vect_transform_loop): Adjust autovectorize_vector_sizes target hook
	caller.
	* omp-low.c (omp_clause_aligned_alignment): Likewise.
	* omp-general.c (omp_max_vf): Likewise.
	* optabs-query.c (can_vec_mask_load_store_p): Likewise.
	* tree-vect-slp.c (vect_slp_bb): Likewise.
	* target.def (autovectorize_vector_sizes): Add ALL argument and
	document it.
	* doc/tm.texi: Adjust documentation.
	* targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
	* targhooks.h (default_autovectorize_vector_sizes): Likewise.
	* config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
	bool argument.
	* config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
	* config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
	* config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
	* config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise.  If
	true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
	preferred vector size is not 512-bit or 256-bit, just put those
	unpreferred ones last.

	* gcc.target/i386/avx512f-simd-1.c: New test.

From-SVN: r271403
This commit is contained in:
Jakub Jelinek 2019-05-20 11:49:07 +02:00 committed by Jakub Jelinek
parent 0ec537f350
commit f63445e56c
20 changed files with 160 additions and 22 deletions

View File

@ -1,3 +1,34 @@
2019-05-20 Jakub Jelinek <jakub@redhat.com>
* cfgloop.h (struct loop): Add simdlen member.
* cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
* omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
* tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
as new argument to autovectorize_vector_sizes target hook. If
loop->simdlen, pick up vector size where the vectorization factor
is equal to loop->simd, and if there is none, fall back to the first
successful one.
(vect_transform_loop): Adjust autovectorize_vector_sizes target hook
caller.
* omp-low.c (omp_clause_aligned_alignment): Likewise.
* omp-general.c (omp_max_vf): Likewise.
* optabs-query.c (can_vec_mask_load_store_p): Likewise.
* tree-vect-slp.c (vect_slp_bb): Likewise.
* target.def (autovectorize_vector_sizes): Add ALL argument and
document it.
* doc/tm.texi: Adjust documentation.
* targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
* targhooks.h (default_autovectorize_vector_sizes): Likewise.
* config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
bool argument.
* config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
* config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
* config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
* config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If
true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
preferred vector size is not 512-bit or 256-bit, just put those
unpreferred ones last.
2019-05-20 Martin Liska <mliska@suse.cz>
* targhooks.c (default_libc_has_fast_function): New function.

View File

@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
of the loop can be safely evaluated concurrently. */
int safelen;
/* Preferred vectorization factor for the loop if non-zero. */
int simdlen;
/* Constraints are generally set by consumers and affect certain
semantics of niter analyzer APIs. Currently the APIs affected are
number_of_iterations_exit* functions and their callers. One typical

View File

@ -1016,6 +1016,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
target->nb_iterations_estimate = loop->nb_iterations_estimate;
target->estimate_state = loop->estimate_state;
target->safelen = loop->safelen;
target->simdlen = loop->simdlen;
target->constraints = loop->constraints;
target->can_be_parallel = loop->can_be_parallel;
target->warned_aggressive_loop_optimizations

View File

@ -14109,7 +14109,7 @@ aarch64_preferred_simd_mode (scalar_mode mode)
/* Return a list of possible vector sizes for the vectorizer
to iterate over. */
static void
aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_SVE)
sizes->safe_push (BYTES_PER_SVE_VECTOR);

View File

@ -480,7 +480,7 @@ arc_preferred_simd_mode (scalar_mode mode)
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
arc_autovectorize_vector_sizes (vector_sizes *sizes)
arc_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_PLUS_QMACW)
{

View File

@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
static void arm_conditional_register_usage (void);
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static void arm_autovectorize_vector_sizes (vector_sizes *);
static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
@ -28351,7 +28351,7 @@ arm_vector_alignment (const_tree type)
}
static void
arm_autovectorize_vector_sizes (vector_sizes *sizes)
arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (!TARGET_NEON_VECTORIZE_DOUBLE)
{

View File

@ -21332,7 +21332,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
256bit and 128bit vectors. */
static void
ix86_autovectorize_vector_sizes (vector_sizes *sizes)
ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
{
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
@ -21340,11 +21340,22 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
sizes->safe_push (32);
sizes->safe_push (16);
}
else if (TARGET_AVX512F && all)
{
sizes->safe_push (32);
sizes->safe_push (16);
sizes->safe_push (64);
}
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
{
sizes->safe_push (32);
sizes->safe_push (16);
}
else if (TARGET_AVX && all)
{
sizes->safe_push (16);
sizes->safe_push (32);
}
}
/* Implemenation of targetm.vectorize.get_mask_mode. */

View File

@ -13460,7 +13460,7 @@ mips_preferred_simd_mode (scalar_mode mode)
/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
mips_autovectorize_vector_sizes (vector_sizes *sizes)
mips_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (ISA_HAS_MSA)
sizes->safe_push (16);

View File

@ -6021,11 +6021,13 @@ against lower halves of vectors recursively until the specified mode is
reached. The default is @var{mode} which means no splitting.
@end deftypefn
@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all})
If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
the only one that is worth considering, this hook should add all suitable
vector sizes to @var{sizes}, in order of decreasing preference. The first
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
If @var{all} is true, add suitable vector sizes even when they are generally
not expected to be worthwhile.
The hook does not need to do anything if the vector returned by
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant

View File

@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
&& loop->safelen > 1)
{
loop->force_vectorize = true;
if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
{
unsigned HOST_WIDE_INT v
= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
loop->simdlen = v;
}
cfun->has_force_vectorize_loops = true;
}
else if (dont_vectorize)

View File

@ -469,7 +469,7 @@ omp_max_vf (void)
return 1;
auto_vector_sizes sizes;
targetm.vectorize.autovectorize_vector_sizes (&sizes);
targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
if (!sizes.is_empty ())
{
poly_uint64 vf = 0;

View File

@ -3600,7 +3600,7 @@ omp_clause_aligned_alignment (tree clause)
unsigned int al = 1;
opt_scalar_mode mode_iter;
auto_vector_sizes sizes;
targetm.vectorize.autovectorize_vector_sizes (&sizes);
targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
poly_uint64 vs = 0;
for (unsigned int i = 0; i < sizes.length (); ++i)
vs = ordered_max (vs, sizes[i]);

View File

@ -593,7 +593,7 @@ can_vec_mask_load_store_p (machine_mode mode,
return true;
auto_vector_sizes vector_sizes;
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true);
for (unsigned int i = 0; i < vector_sizes.length (); ++i)
{
poly_uint64 cur = vector_sizes[i];

View File

@ -1899,12 +1899,14 @@ DEFHOOK
the only one that is worth considering, this hook should add all suitable\n\
vector sizes to @var{sizes}, in order of decreasing preference. The first\n\
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
If @var{all} is true, add suitable vector sizes even when they are generally\n\
not expected to be worthwhile.\n\
\n\
The hook does not need to do anything if the vector returned by\n\
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
for autovectorization. The default implementation does nothing.",
void,
(vector_sizes *sizes),
(vector_sizes *sizes, bool all),
default_autovectorize_vector_sizes)
/* Function to get a target mode for a vector mask. */

View File

@ -1316,7 +1316,7 @@ default_split_reduction (machine_mode mode)
is tried. */
void
default_autovectorize_vector_sizes (vector_sizes *)
default_autovectorize_vector_sizes (vector_sizes *, bool)
{
}

View File

@ -110,7 +110,7 @@ default_builtin_support_vector_misalignment (machine_mode mode,
int, bool);
extern machine_mode default_preferred_simd_mode (scalar_mode mode);
extern machine_mode default_split_reduction (machine_mode);
extern void default_autovectorize_vector_sizes (vector_sizes *);
extern void default_autovectorize_vector_sizes (vector_sizes *, bool);
extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
extern bool default_empty_mask_is_expensive (unsigned);
extern void *default_init_cost (struct loop *);

View File

@ -1,3 +1,7 @@
2019-05-20 Jakub Jelinek <jakub@redhat.com>
* gcc.target/i386/avx512f-simd-1.c: New test.
2019-05-20 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/aarch64/target_attr_10.c: Add quotes to expected

View File

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
#define N 1024
int a[N];
void
f1 (void)
{
int i;
#pragma omp simd simdlen (4)
for (i = 0; i < N; ++i)
a[i] = a[i] + 1;
}
void
f2 (void)
{
int i;
#pragma omp simd simdlen (8)
for (i = 0; i < N; ++i)
a[i] = a[i] + 2;
}
void
f3 (void)
{
int i;
#pragma omp simd simdlen (16)
for (i = 0; i < N; ++i)
a[i] = a[i] + 3;
}

View File

@ -2254,7 +2254,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
/* Autodetect first vector size we try. */
current_vector_size = 0;
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
loop->simdlen != 0);
unsigned int next_size = 0;
DUMP_VECT_SCOPE ("analyze_loop_nest");
@ -2273,6 +2274,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
unsigned n_stmts = 0;
poly_uint64 autodetected_vector_size = 0;
opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
poly_uint64 first_vector_size = 0;
while (1)
{
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
@ -2283,6 +2286,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"bad loop form.\n");
gcc_checking_assert (first_loop_vinfo == NULL);
return loop_vinfo;
}
@ -2296,10 +2300,27 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
return loop_vinfo;
if (loop->simdlen
&& maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
(unsigned HOST_WIDE_INT) loop->simdlen))
{
if (first_loop_vinfo == NULL)
{
first_loop_vinfo = loop_vinfo;
first_vector_size = current_vector_size;
loop->aux = NULL;
}
else
delete loop_vinfo;
}
else
{
delete first_loop_vinfo;
return loop_vinfo;
}
}
delete loop_vinfo;
else
delete loop_vinfo;
if (next_size == 0)
autodetected_vector_size = current_vector_size;
@ -2308,10 +2329,31 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
&& known_eq (vector_sizes[next_size], autodetected_vector_size))
next_size += 1;
if (fatal
|| next_size == vector_sizes.length ()
if (fatal)
{
gcc_checking_assert (first_loop_vinfo == NULL);
return opt_loop_vec_info::propagate_failure (res);
}
if (next_size == vector_sizes.length ()
|| known_eq (current_vector_size, 0U))
return opt_loop_vec_info::propagate_failure (res);
{
if (first_loop_vinfo)
{
current_vector_size = first_vector_size;
loop->aux = (loop_vec_info) first_loop_vinfo;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"***** Choosing vector size ");
dump_dec (MSG_NOTE, current_vector_size);
dump_printf (MSG_NOTE, "\n");
}
return first_loop_vinfo;
}
else
return opt_loop_vec_info::propagate_failure (res);
}
/* Try the next biggest vector size. */
current_vector_size = vector_sizes[next_size++];
@ -8670,7 +8712,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (epilogue)
{
auto_vector_sizes vector_sizes;
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
/* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work

View File

@ -2983,7 +2983,7 @@ vect_slp_bb (basic_block bb)
/* Autodetect first vector size we try. */
current_vector_size = 0;
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
gsi = gsi_start_bb (bb);