re PR tree-optimization/87621 (outer loop auto-vectorization fails for exponentiation code)

2018-11-09  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87621
	* tree-vect-loop.c (vectorizable_reduction): Handle reduction
	op with only phi inputs.
	* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
	(ch_base::copy_headers): Run CSE on copied loop headers.
	(pass_ch_vect::process_loop_p): Simplify.

	* g++.dg/vect/pr87621.cc: New testcase.

From-SVN: r265959
This commit is contained in:
Richard Biener 2018-11-09 10:53:31 +00:00 committed by Richard Biener
parent 43b01cc12d
commit 1dd6990226
5 changed files with 75 additions and 16 deletions

View File

@ -1,3 +1,12 @@
2018-11-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/87621
* tree-vect-loop.c (vectorizable_reduction): Handle reduction
op with only phi inputs.
* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
(ch_base::copy_headers): Run CSE on copied loop headers.
(pass_ch_vect::process_loop_p): Simplify.
2018-11-09 Alexandre Oliva <oliva@adacore.com>
* config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust

View File

@ -1,3 +1,8 @@
2018-11-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/87621
* g++.dg/vect/pr87621.cc: New testcase.
2018-11-09 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/86438

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
extern "C" double pow(double, double);
template <typename T>
T pow(T x, unsigned int n)
{
if (!n)
return 1;
T y = 1;
while (n > 1)
{
if (n%2)
y *= x;
x = x*x;
n /= 2;
}
return x*y;
}
void testVec(int* x)
{
for (int i = 0; i < 8; ++i)
x[i] = pow(x[i], 10);
}
/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */

View File

@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-ssa-scopedtables.h"
#include "tree-ssa-threadedge.h"
#include "tree-ssa-sccvn.h"
#include "params.h"
/* Duplicates headers of loops if they are small enough, so that the statements
@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun)
bool changed = false;
if (number_of_loops (fun) <= 1)
return 0;
return 0;
bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
bbs_size = n_basic_blocks_for_fn (fun);
auto_vec<std::pair<edge, loop_p> > copied;
FOR_EACH_LOOP (loop, 0)
{
int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun)
fprintf (dump_file, "Duplication failed.\n");
continue;
}
copied.safe_push (std::make_pair (entry, loop));
/* If the loop has the form "for (i = j; i < j + 10; i++)" then
this copying can introduce a case where we rely on undefined
@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun)
}
if (changed)
update_ssa (TODO_update_ssa);
{
update_ssa (TODO_update_ssa);
/* After updating SSA form perform CSE on the loop header
copies. This is esp. required for the pass before
vectorization since nothing cleans up copied exit tests
that can now be simplified. CSE from the entry of the
region we copied till all loop exit blocks but not
entering the loop itself. */
for (unsigned i = 0; i < copied.length (); ++i)
{
edge entry = copied[i].first;
loop_p loop = copied[i].second;
vec<edge> exit_edges = get_loop_exit_edges (loop);
bitmap exit_bbs = BITMAP_ALLOC (NULL);
for (unsigned j = 0; j < exit_edges.length (); ++j)
bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
bitmap_set_bit (exit_bbs, loop->header->index);
do_rpo_vn (cfun, entry, exit_bbs);
BITMAP_FREE (exit_bbs);
exit_edges.release ();
}
}
free (bbs);
free (copied_bbs);
@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop)
if (loop->dont_vectorize)
return false;
if (!do_while_loop_p (loop))
return true;
/* The vectorizer won't handle anything with multiple exits, so skip. */
/* The vectorizer won't handle anything with multiple exits, so skip. */
edge exit = single_exit (loop);
if (!exit)
return false;
/* Copy headers iff there looks to be code in the loop after the exit block,
i.e. the exit block has an edge to another block (besides the latch,
which should be empty). */
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, exit->src->succs)
if (!loop_exit_edge_p (loop, e)
&& e->dest != loop->header
&& e->dest != loop->latch)
return true;
if (!do_while_loop_p (loop))
return true;
return false;
}

View File

@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
/* For a nested cycle we might end up with an operation like
phi_result * phi_result. */
if (!vectype_in)
vectype_in = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype_in);
if (slp_node)