re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)
PR tree-optimization/78899 * tree-if-conv.c (version_loop_for_if_conversion): Instead of returning bool return struct loop *, NULL for failure and the new loop on success. (versionable_outer_loop_p): Don't version outer loop if it has dont_vectorized bit set. (tree_if_conversion): When versioning outer loop, ensure tree_if_conversion is performed also on the inner loop of the non-vectorizable outer loop copy. * tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold LOOP_VECTORIZED in inner loop of the scalar outer loop and prevent vectorization of it. (vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure the outer loop vectorization of the non-scalar version is attempted before vectorization of the inner loop in scalar version. If outer LOOP_VECTORIZED guarded loop is not vectorized, prevent vectorization of its inner loop. * tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop has 2 inner loops, rename also on edges from bb whose single pred is outer_loop->header. Fix typo in function comment. * gcc.target/i386/pr78899.c: New test. * gcc.dg/pr71077.c: New test. From-SVN: r244238
This commit is contained in:
parent
47d5beb478
commit
cb330ba582
@ -1,3 +1,26 @@
|
|||||||
|
2017-01-09 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
|
PR tree-optimization/78899
|
||||||
|
* tree-if-conv.c (version_loop_for_if_conversion): Instead of
|
||||||
|
returning bool return struct loop *, NULL for failure and the new
|
||||||
|
loop on success.
|
||||||
|
(versionable_outer_loop_p): Don't version outer loop if it has
|
||||||
|
dont_vectorized bit set.
|
||||||
|
(tree_if_conversion): When versioning outer loop, ensure
|
||||||
|
tree_if_conversion is performed also on the inner loop of the
|
||||||
|
non-vectorizable outer loop copy.
|
||||||
|
* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold
|
||||||
|
LOOP_VECTORIZED in inner loop of the scalar outer loop and
|
||||||
|
prevent vectorization of it.
|
||||||
|
(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
|
||||||
|
the outer loop vectorization of the non-scalar version is attempted
|
||||||
|
before vectorization of the inner loop in scalar version. If
|
||||||
|
outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
|
||||||
|
vectorization of its inner loop.
|
||||||
|
* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
|
||||||
|
has 2 inner loops, rename also on edges from bb whose single pred
|
||||||
|
is outer_loop->header. Fix typo in function comment.
|
||||||
|
|
||||||
2017-01-09 Martin Sebor <msebor@redhat.com>
|
2017-01-09 Martin Sebor <msebor@redhat.com>
|
||||||
|
|
||||||
PR bootstrap/79033
|
PR bootstrap/79033
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2017-01-09 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
|
PR tree-optimization/78899
|
||||||
|
* gcc.target/i386/pr78899.c: New test.
|
||||||
|
* gcc.dg/pr71077.c: New test.
|
||||||
|
|
||||||
2017-01-09 Martin Jambor <mjambor@suse.cz>
|
2017-01-09 Martin Jambor <mjambor@suse.cz>
|
||||||
|
|
||||||
PR ipa/78365
|
PR ipa/78365
|
||||||
|
14
gcc/testsuite/gcc.dg/pr71077.c
Normal file
14
gcc/testsuite/gcc.dg/pr71077.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
/* PR c++/71077 */
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O3" } */
|
||||||
|
/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
|
||||||
|
|
||||||
|
void
|
||||||
|
foo (int *a, int n)
|
||||||
|
{
|
||||||
|
int b, c;
|
||||||
|
for (b = 0; b < n; b++)
|
||||||
|
for (c = 0; c < 32; c++)
|
||||||
|
if ((b & 1U) << c)
|
||||||
|
a[b + c] = 0;
|
||||||
|
}
|
27
gcc/testsuite/gcc.target/i386/pr78899.c
Normal file
27
gcc/testsuite/gcc.target/i386/pr78899.c
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/* PR tree-optimization/78899 */
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-Ofast -fopenmp-simd -mavx2 -mno-avx512f" } */
|
||||||
|
|
||||||
|
#define N 1024
|
||||||
|
#define M 4
|
||||||
|
int p1[N], p2[N], p3[N], c[N];
|
||||||
|
|
||||||
|
void
|
||||||
|
foo (int n)
|
||||||
|
{
|
||||||
|
int i, k;
|
||||||
|
for (k = 0; k < n / M; k++)
|
||||||
|
{
|
||||||
|
#pragma omp simd
|
||||||
|
for (i = 0; i < M; i++)
|
||||||
|
if (c[k * M + i])
|
||||||
|
{
|
||||||
|
p1[k * M + i] += 1;
|
||||||
|
p2[k * M + i] = p3[k * M + i] + 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure the loop is vectorized. */
|
||||||
|
/* { dg-final { scan-assembler "vpmaskmov" } } */
|
||||||
|
/* { dg-final { scan-assembler "vpadd" } } */
|
@ -2535,7 +2535,7 @@ combine_blocks (struct loop *loop)
|
|||||||
loop to execute. The vectorizer pass will fold this
|
loop to execute. The vectorizer pass will fold this
|
||||||
internal call into either true or false. */
|
internal call into either true or false. */
|
||||||
|
|
||||||
static bool
|
static struct loop *
|
||||||
version_loop_for_if_conversion (struct loop *loop)
|
version_loop_for_if_conversion (struct loop *loop)
|
||||||
{
|
{
|
||||||
basic_block cond_bb;
|
basic_block cond_bb;
|
||||||
@ -2566,7 +2566,7 @@ version_loop_for_if_conversion (struct loop *loop)
|
|||||||
ifc_bbs[i]->aux = saved_preds[i];
|
ifc_bbs[i]->aux = saved_preds[i];
|
||||||
|
|
||||||
if (new_loop == NULL)
|
if (new_loop == NULL)
|
||||||
return false;
|
return NULL;
|
||||||
|
|
||||||
new_loop->dont_vectorize = true;
|
new_loop->dont_vectorize = true;
|
||||||
new_loop->force_vectorize = false;
|
new_loop->force_vectorize = false;
|
||||||
@ -2574,7 +2574,7 @@ version_loop_for_if_conversion (struct loop *loop)
|
|||||||
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
|
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
|
||||||
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
||||||
update_ssa (TODO_update_ssa);
|
update_ssa (TODO_update_ssa);
|
||||||
return true;
|
return new_loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return true when LOOP satisfies the follow conditions that will
|
/* Return true when LOOP satisfies the follow conditions that will
|
||||||
@ -2594,6 +2594,7 @@ static bool
|
|||||||
versionable_outer_loop_p (struct loop *loop)
|
versionable_outer_loop_p (struct loop *loop)
|
||||||
{
|
{
|
||||||
if (!loop_outer (loop)
|
if (!loop_outer (loop)
|
||||||
|
|| loop->dont_vectorize
|
||||||
|| !loop->inner
|
|| !loop->inner
|
||||||
|| loop->inner->next
|
|| loop->inner->next
|
||||||
|| !single_exit (loop)
|
|| !single_exit (loop)
|
||||||
@ -2789,7 +2790,10 @@ tree_if_conversion (struct loop *loop)
|
|||||||
{
|
{
|
||||||
unsigned int todo = 0;
|
unsigned int todo = 0;
|
||||||
bool aggressive_if_conv;
|
bool aggressive_if_conv;
|
||||||
|
struct loop *rloop;
|
||||||
|
|
||||||
|
again:
|
||||||
|
rloop = NULL;
|
||||||
ifc_bbs = NULL;
|
ifc_bbs = NULL;
|
||||||
any_pred_load_store = false;
|
any_pred_load_store = false;
|
||||||
any_complicated_phi = false;
|
any_complicated_phi = false;
|
||||||
@ -2829,8 +2833,31 @@ tree_if_conversion (struct loop *loop)
|
|||||||
struct loop *vloop
|
struct loop *vloop
|
||||||
= (versionable_outer_loop_p (loop_outer (loop))
|
= (versionable_outer_loop_p (loop_outer (loop))
|
||||||
? loop_outer (loop) : loop);
|
? loop_outer (loop) : loop);
|
||||||
if (!version_loop_for_if_conversion (vloop))
|
struct loop *nloop = version_loop_for_if_conversion (vloop);
|
||||||
|
if (nloop == NULL)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
if (vloop != loop)
|
||||||
|
{
|
||||||
|
/* If versionable_outer_loop_p decided to version the
|
||||||
|
outer loop, version also the inner loop of the non-vectorized
|
||||||
|
loop copy. So we transform:
|
||||||
|
loop1
|
||||||
|
loop2
|
||||||
|
into:
|
||||||
|
if (LOOP_VECTORIZED (1, 3))
|
||||||
|
{
|
||||||
|
loop1
|
||||||
|
loop2
|
||||||
|
}
|
||||||
|
else
|
||||||
|
loop3 (copy of loop1)
|
||||||
|
if (LOOP_VECTORIZED (4, 5))
|
||||||
|
loop4 (copy of loop2)
|
||||||
|
else
|
||||||
|
loop5 (copy of loop4) */
|
||||||
|
gcc_assert (nloop->inner && nloop->inner->next == NULL);
|
||||||
|
rloop = nloop->inner;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now all statements are if-convertible. Combine all the basic
|
/* Now all statements are if-convertible. Combine all the basic
|
||||||
@ -2854,6 +2881,11 @@ tree_if_conversion (struct loop *loop)
|
|||||||
free (ifc_bbs);
|
free (ifc_bbs);
|
||||||
ifc_bbs = NULL;
|
ifc_bbs = NULL;
|
||||||
}
|
}
|
||||||
|
if (rloop != NULL)
|
||||||
|
{
|
||||||
|
loop = rloop;
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
return todo;
|
return todo;
|
||||||
}
|
}
|
||||||
|
@ -71,7 +71,7 @@ rename_use_op (use_operand_p op_p)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Renames the variables in basic block BB. Allow renaming of PHI argumnets
|
/* Renames the variables in basic block BB. Allow renaming of PHI arguments
|
||||||
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
|
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
|
||||||
true. */
|
true. */
|
||||||
|
|
||||||
@ -102,9 +102,25 @@ rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
|
|||||||
|
|
||||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||||
{
|
{
|
||||||
if (!flow_bb_inside_loop_p (loop, e->src)
|
if (!flow_bb_inside_loop_p (loop, e->src))
|
||||||
&& (!rename_from_outer_loop || e->src != outer_loop->header))
|
{
|
||||||
|
if (!rename_from_outer_loop)
|
||||||
continue;
|
continue;
|
||||||
|
if (e->src != outer_loop->header)
|
||||||
|
{
|
||||||
|
if (outer_loop->inner->next)
|
||||||
|
{
|
||||||
|
/* If outer_loop has 2 inner loops, allow there to
|
||||||
|
be an extra basic block which decides which of the
|
||||||
|
two loops to use using LOOP_VECTORIZED. */
|
||||||
|
if (!single_pred_p (e->src)
|
||||||
|
|| single_pred (e->src) != outer_loop->header)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
|
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
|
||||||
gsi_next (&gsi))
|
gsi_next (&gsi))
|
||||||
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
|
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
|
||||||
|
@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value)
|
|||||||
update_stmt (use_stmt);
|
update_stmt (use_stmt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the uids of all the statements in basic blocks inside loop
|
/* Set the uids of all the statements in basic blocks inside loop
|
||||||
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
|
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
|
||||||
call guarding the loop which has been if converted. */
|
call guarding the loop which has been if converted. */
|
||||||
@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|
|||||||
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
|
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
|
||||||
|
|
||||||
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
|
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
|
||||||
gcc_checking_assert (vect_loop_vectorized_call
|
gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
|
||||||
(LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
|
|
||||||
== loop_vectorized_call);
|
== loop_vectorized_call);
|
||||||
|
/* If we are going to vectorize outer loop, prevent vectorization
|
||||||
|
of the inner loop in the scalar loop - either the scalar loop is
|
||||||
|
thrown away, so it is a wasted work, or is used only for
|
||||||
|
a few iterations. */
|
||||||
|
if (scalar_loop->inner)
|
||||||
|
{
|
||||||
|
gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
|
||||||
|
if (g)
|
||||||
|
{
|
||||||
|
arg = gimple_call_arg (g, 0);
|
||||||
|
get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
|
||||||
|
fold_loop_vectorized_call (g, boolean_false_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
bbs = get_loop_body (scalar_loop);
|
bbs = get_loop_body (scalar_loop);
|
||||||
for (i = 0; i < scalar_loop->num_nodes; i++)
|
for (i = 0; i < scalar_loop->num_nodes; i++)
|
||||||
{
|
{
|
||||||
@ -534,13 +548,58 @@ vectorize_loops (void)
|
|||||||
only over initial loops skipping newly generated ones. */
|
only over initial loops skipping newly generated ones. */
|
||||||
FOR_EACH_LOOP (loop, 0)
|
FOR_EACH_LOOP (loop, 0)
|
||||||
if (loop->dont_vectorize)
|
if (loop->dont_vectorize)
|
||||||
any_ifcvt_loops = true;
|
|
||||||
else if ((flag_tree_loop_vectorize
|
|
||||||
&& optimize_loop_nest_for_speed_p (loop))
|
|
||||||
|| loop->force_vectorize)
|
|
||||||
{
|
{
|
||||||
loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
|
any_ifcvt_loops = true;
|
||||||
gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
|
/* If-conversion sometimes versions both the outer loop
|
||||||
|
(for the case when outer loop vectorization might be
|
||||||
|
desirable) as well as the inner loop in the scalar version
|
||||||
|
of the loop. So we have:
|
||||||
|
if (LOOP_VECTORIZED (1, 3))
|
||||||
|
{
|
||||||
|
loop1
|
||||||
|
loop2
|
||||||
|
}
|
||||||
|
else
|
||||||
|
loop3 (copy of loop1)
|
||||||
|
if (LOOP_VECTORIZED (4, 5))
|
||||||
|
loop4 (copy of loop2)
|
||||||
|
else
|
||||||
|
loop5 (copy of loop4)
|
||||||
|
If FOR_EACH_LOOP gives us loop3 first (which has
|
||||||
|
dont_vectorize set), make sure to process loop1 before loop4;
|
||||||
|
so that we can prevent vectorization of loop4 if loop1
|
||||||
|
is successfully vectorized. */
|
||||||
|
if (loop->inner)
|
||||||
|
{
|
||||||
|
gimple *loop_vectorized_call
|
||||||
|
= vect_loop_vectorized_call (loop);
|
||||||
|
if (loop_vectorized_call
|
||||||
|
&& vect_loop_vectorized_call (loop->inner))
|
||||||
|
{
|
||||||
|
tree arg = gimple_call_arg (loop_vectorized_call, 0);
|
||||||
|
struct loop *vector_loop
|
||||||
|
= get_loop (cfun, tree_to_shwi (arg));
|
||||||
|
if (vector_loop && vector_loop != loop)
|
||||||
|
{
|
||||||
|
loop = vector_loop;
|
||||||
|
/* Make sure we don't vectorize it twice. */
|
||||||
|
loop->dont_vectorize = true;
|
||||||
|
goto try_vectorize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
loop_vec_info loop_vinfo, orig_loop_vinfo;
|
||||||
|
gimple *loop_vectorized_call;
|
||||||
|
try_vectorize:
|
||||||
|
if (!((flag_tree_loop_vectorize
|
||||||
|
&& optimize_loop_nest_for_speed_p (loop))
|
||||||
|
|| loop->force_vectorize))
|
||||||
|
continue;
|
||||||
|
orig_loop_vinfo = NULL;
|
||||||
|
loop_vectorized_call = vect_loop_vectorized_call (loop);
|
||||||
vectorize_epilogue:
|
vectorize_epilogue:
|
||||||
vect_location = find_loop_location (loop);
|
vect_location = find_loop_location (loop);
|
||||||
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
|
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
|
||||||
@ -595,6 +654,12 @@ vectorize_epilogue:
|
|||||||
ret |= TODO_cleanup_cfg;
|
ret |= TODO_cleanup_cfg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* If outer loop vectorization fails for LOOP_VECTORIZED guarded
|
||||||
|
loop, don't vectorize its inner loop; we'll attempt to
|
||||||
|
vectorize LOOP_VECTORIZED guarded inner loop of the scalar
|
||||||
|
loop version. */
|
||||||
|
if (loop_vectorized_call && loop->inner)
|
||||||
|
loop->inner->dont_vectorize = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user