re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)

PR tree-optimization/78899
	* tree-if-conv.c (version_loop_for_if_conversion): Instead of
	returning bool return struct loop *, NULL for failure and the new
	loop on success.
	(versionable_outer_loop_p): Don't version outer loop if it has
	dont_vectorized bit set.
	(tree_if_conversion): When versioning outer loop, ensure
	tree_if_conversion is performed also on the inner loop of the
	non-vectorizable outer loop copy.
	* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix.  Fold
	LOOP_VECTORIZED in inner loop of the scalar outer loop and
	prevent vectorization of it.
	(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
	the outer loop vectorization of the non-scalar version is attempted
	before vectorization of the inner loop in scalar version.  If
	outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
	vectorization of its inner loop.
	* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
	has 2 inner loops, rename also on edges from bb whose single pred
	is outer_loop->header.  Fix typo in function comment.

	* gcc.target/i386/pr78899.c: New test.
	* gcc.dg/pr71077.c: New test.

From-SVN: r244238
This commit is contained in:
Jakub Jelinek 2017-01-09 21:10:23 +01:00 committed by Jakub Jelinek
parent 47d5beb478
commit cb330ba582
7 changed files with 201 additions and 18 deletions

View File

@ -1,3 +1,26 @@
2017-01-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78899
* tree-if-conv.c (version_loop_for_if_conversion): Instead of
returning bool return struct loop *, NULL for failure and the new
loop on success.
(versionable_outer_loop_p): Don't version outer loop if it has
dont_vectorized bit set.
(tree_if_conversion): When versioning outer loop, ensure
tree_if_conversion is performed also on the inner loop of the
non-vectorizable outer loop copy.
* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold
LOOP_VECTORIZED in inner loop of the scalar outer loop and
prevent vectorization of it.
(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
the outer loop vectorization of the non-scalar version is attempted
before vectorization of the inner loop in scalar version. If
outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
vectorization of its inner loop.
* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
has 2 inner loops, rename also on edges from bb whose single pred
is outer_loop->header. Fix typo in function comment.
2017-01-09 Martin Sebor <msebor@redhat.com> 2017-01-09 Martin Sebor <msebor@redhat.com>
PR bootstrap/79033 PR bootstrap/79033

View File

@ -1,3 +1,9 @@
2017-01-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78899
* gcc.target/i386/pr78899.c: New test.
* gcc.dg/pr71077.c: New test.
2017-01-09 Martin Jambor <mjambor@suse.cz> 2017-01-09 Martin Jambor <mjambor@suse.cz>
PR ipa/78365 PR ipa/78365

View File

@ -0,0 +1,14 @@
/* PR c++/71077 */
/* { dg-do compile } */
/* { dg-options "-O3" } */
/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
void
foo (int *a, int n)
{
int b, c;
for (b = 0; b < n; b++)
for (c = 0; c < 32; c++)
if ((b & 1U) << c)
a[b + c] = 0;
}

View File

@ -0,0 +1,27 @@
/* PR tree-optimization/78899 */
/* { dg-do compile } */
/* { dg-options "-Ofast -fopenmp-simd -mavx2 -mno-avx512f" } */
#define N 1024
#define M 4
int p1[N], p2[N], p3[N], c[N];
void
foo (int n)
{
int i, k;
for (k = 0; k < n / M; k++)
{
#pragma omp simd
for (i = 0; i < M; i++)
if (c[k * M + i])
{
p1[k * M + i] += 1;
p2[k * M + i] = p3[k * M + i] + 2;
}
}
}
/* Ensure the loop is vectorized. */
/* { dg-final { scan-assembler "vpmaskmov" } } */
/* { dg-final { scan-assembler "vpadd" } } */

View File

@ -2535,7 +2535,7 @@ combine_blocks (struct loop *loop)
loop to execute. The vectorizer pass will fold this loop to execute. The vectorizer pass will fold this
internal call into either true or false. */ internal call into either true or false. */
static bool static struct loop *
version_loop_for_if_conversion (struct loop *loop) version_loop_for_if_conversion (struct loop *loop)
{ {
basic_block cond_bb; basic_block cond_bb;
@ -2566,7 +2566,7 @@ version_loop_for_if_conversion (struct loop *loop)
ifc_bbs[i]->aux = saved_preds[i]; ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL) if (new_loop == NULL)
return false; return NULL;
new_loop->dont_vectorize = true; new_loop->dont_vectorize = true;
new_loop->force_vectorize = false; new_loop->force_vectorize = false;
@ -2574,7 +2574,7 @@ version_loop_for_if_conversion (struct loop *loop)
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num)); gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
gsi_insert_before (&gsi, g, GSI_SAME_STMT); gsi_insert_before (&gsi, g, GSI_SAME_STMT);
update_ssa (TODO_update_ssa); update_ssa (TODO_update_ssa);
return true; return new_loop;
} }
/* Return true when LOOP satisfies the follow conditions that will /* Return true when LOOP satisfies the follow conditions that will
@ -2594,6 +2594,7 @@ static bool
versionable_outer_loop_p (struct loop *loop) versionable_outer_loop_p (struct loop *loop)
{ {
if (!loop_outer (loop) if (!loop_outer (loop)
|| loop->dont_vectorize
|| !loop->inner || !loop->inner
|| loop->inner->next || loop->inner->next
|| !single_exit (loop) || !single_exit (loop)
@ -2789,7 +2790,10 @@ tree_if_conversion (struct loop *loop)
{ {
unsigned int todo = 0; unsigned int todo = 0;
bool aggressive_if_conv; bool aggressive_if_conv;
struct loop *rloop;
again:
rloop = NULL;
ifc_bbs = NULL; ifc_bbs = NULL;
any_pred_load_store = false; any_pred_load_store = false;
any_complicated_phi = false; any_complicated_phi = false;
@ -2829,8 +2833,31 @@ tree_if_conversion (struct loop *loop)
struct loop *vloop struct loop *vloop
= (versionable_outer_loop_p (loop_outer (loop)) = (versionable_outer_loop_p (loop_outer (loop))
? loop_outer (loop) : loop); ? loop_outer (loop) : loop);
if (!version_loop_for_if_conversion (vloop)) struct loop *nloop = version_loop_for_if_conversion (vloop);
if (nloop == NULL)
goto cleanup; goto cleanup;
if (vloop != loop)
{
/* If versionable_outer_loop_p decided to version the
outer loop, version also the inner loop of the non-vectorized
loop copy. So we transform:
loop1
loop2
into:
if (LOOP_VECTORIZED (1, 3))
{
loop1
loop2
}
else
loop3 (copy of loop1)
if (LOOP_VECTORIZED (4, 5))
loop4 (copy of loop2)
else
loop5 (copy of loop4) */
gcc_assert (nloop->inner && nloop->inner->next == NULL);
rloop = nloop->inner;
}
} }
/* Now all statements are if-convertible. Combine all the basic /* Now all statements are if-convertible. Combine all the basic
@ -2854,6 +2881,11 @@ tree_if_conversion (struct loop *loop)
free (ifc_bbs); free (ifc_bbs);
ifc_bbs = NULL; ifc_bbs = NULL;
} }
if (rloop != NULL)
{
loop = rloop;
goto again;
}
return todo; return todo;
} }

View File

@ -71,7 +71,7 @@ rename_use_op (use_operand_p op_p)
} }
/* Renames the variables in basic block BB. Allow renaming of PHI argumnets /* Renames the variables in basic block BB. Allow renaming of PHI arguments
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
true. */ true. */
@ -102,9 +102,25 @@ rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
FOR_EACH_EDGE (e, ei, bb->preds) FOR_EACH_EDGE (e, ei, bb->preds)
{ {
if (!flow_bb_inside_loop_p (loop, e->src) if (!flow_bb_inside_loop_p (loop, e->src))
&& (!rename_from_outer_loop || e->src != outer_loop->header)) {
if (!rename_from_outer_loop)
continue; continue;
if (e->src != outer_loop->header)
{
if (outer_loop->inner->next)
{
/* If outer_loop has 2 inner loops, allow there to
be an extra basic block which decides which of the
two loops to use using LOOP_VECTORIZED. */
if (!single_pred_p (e->src)
|| single_pred (e->src) != outer_loop->header)
continue;
}
else
continue;
}
}
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
gsi_next (&gsi)) gsi_next (&gsi))
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));

View File

@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value)
update_stmt (use_stmt); update_stmt (use_stmt);
} }
} }
/* Set the uids of all the statements in basic blocks inside loop /* Set the uids of all the statements in basic blocks inside loop
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
call guarding the loop which has been if converted. */ call guarding the loop which has been if converted. */
@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
gcc_checking_assert (vect_loop_vectorized_call gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
(LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
== loop_vectorized_call); == loop_vectorized_call);
/* If we are going to vectorize outer loop, prevent vectorization
of the inner loop in the scalar loop - either the scalar loop is
thrown away, so it is a wasted work, or is used only for
a few iterations. */
if (scalar_loop->inner)
{
gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
if (g)
{
arg = gimple_call_arg (g, 0);
get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
fold_loop_vectorized_call (g, boolean_false_node);
}
}
bbs = get_loop_body (scalar_loop); bbs = get_loop_body (scalar_loop);
for (i = 0; i < scalar_loop->num_nodes; i++) for (i = 0; i < scalar_loop->num_nodes; i++)
{ {
@ -534,13 +548,58 @@ vectorize_loops (void)
only over initial loops skipping newly generated ones. */ only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (loop, 0) FOR_EACH_LOOP (loop, 0)
if (loop->dont_vectorize) if (loop->dont_vectorize)
any_ifcvt_loops = true;
else if ((flag_tree_loop_vectorize
&& optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize)
{ {
loop_vec_info loop_vinfo, orig_loop_vinfo = NULL; any_ifcvt_loops = true;
gimple *loop_vectorized_call = vect_loop_vectorized_call (loop); /* If-conversion sometimes versions both the outer loop
(for the case when outer loop vectorization might be
desirable) as well as the inner loop in the scalar version
of the loop. So we have:
if (LOOP_VECTORIZED (1, 3))
{
loop1
loop2
}
else
loop3 (copy of loop1)
if (LOOP_VECTORIZED (4, 5))
loop4 (copy of loop2)
else
loop5 (copy of loop4)
If FOR_EACH_LOOP gives us loop3 first (which has
dont_vectorize set), make sure to process loop1 before loop4;
so that we can prevent vectorization of loop4 if loop1
is successfully vectorized. */
if (loop->inner)
{
gimple *loop_vectorized_call
= vect_loop_vectorized_call (loop);
if (loop_vectorized_call
&& vect_loop_vectorized_call (loop->inner))
{
tree arg = gimple_call_arg (loop_vectorized_call, 0);
struct loop *vector_loop
= get_loop (cfun, tree_to_shwi (arg));
if (vector_loop && vector_loop != loop)
{
loop = vector_loop;
/* Make sure we don't vectorize it twice. */
loop->dont_vectorize = true;
goto try_vectorize;
}
}
}
}
else
{
loop_vec_info loop_vinfo, orig_loop_vinfo;
gimple *loop_vectorized_call;
try_vectorize:
if (!((flag_tree_loop_vectorize
&& optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize))
continue;
orig_loop_vinfo = NULL;
loop_vectorized_call = vect_loop_vectorized_call (loop);
vectorize_epilogue: vectorize_epilogue:
vect_location = find_loop_location (loop); vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
@ -595,6 +654,12 @@ vectorize_epilogue:
ret |= TODO_cleanup_cfg; ret |= TODO_cleanup_cfg;
} }
} }
/* If outer loop vectorization fails for LOOP_VECTORIZED guarded
loop, don't vectorize its inner loop; we'll attempt to
vectorize LOOP_VECTORIZED guarded inner loop of the scalar
loop version. */
if (loop_vectorized_call && loop->inner)
loop->inner->dont_vectorize = true;
continue; continue;
} }