re PR tree-optimization/87621 (outer loop auto-vectorization fails for exponentiation code)
2018-11-09 Richard Biener <rguenther@suse.de> PR tree-optimization/87621 * tree-vect-loop.c (vectorizable_reduction): Handle reduction op with only phi inputs. * tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h. (ch_base::copy_headers): Run CSE on copied loop headers. (pass_ch_vect::process_loop_p): Simplify. * g++.dg/vect/pr87621.cc: New testcase. From-SVN: r265959
This commit is contained in:
parent
43b01cc12d
commit
1dd6990226
@ -1,3 +1,12 @@
|
||||
2018-11-09 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/87621
|
||||
* tree-vect-loop.c (vectorizable_reduction): Handle reduction
|
||||
op with only phi inputs.
|
||||
* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
|
||||
(ch_base::copy_headers): Run CSE on copied loop headers.
|
||||
(pass_ch_vect::process_loop_p): Simplify.
|
||||
|
||||
2018-11-09 Alexandre Oliva <oliva@adacore.com>
|
||||
|
||||
* config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust
|
||||
|
@ -1,3 +1,8 @@
|
||||
2018-11-09 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/87621
|
||||
* g++.dg/vect/pr87621.cc: New testcase.
|
||||
|
||||
2018-11-09 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
PR rtl-optimization/86438
|
||||
|
27
gcc/testsuite/g++.dg/vect/pr87621.cc
Normal file
27
gcc/testsuite/g++.dg/vect/pr87621.cc
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
extern "C" double pow(double, double);
|
||||
template <typename T>
|
||||
T pow(T x, unsigned int n)
|
||||
{
|
||||
if (!n)
|
||||
return 1;
|
||||
|
||||
T y = 1;
|
||||
while (n > 1)
|
||||
{
|
||||
if (n%2)
|
||||
y *= x;
|
||||
x = x*x;
|
||||
n /= 2;
|
||||
}
|
||||
return x*y;
|
||||
}
|
||||
|
||||
void testVec(int* x)
|
||||
{
|
||||
for (int i = 0; i < 8; ++i)
|
||||
x[i] = pow(x[i], 10);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */
|
@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-inline.h"
|
||||
#include "tree-ssa-scopedtables.h"
|
||||
#include "tree-ssa-threadedge.h"
|
||||
#include "tree-ssa-sccvn.h"
|
||||
#include "params.h"
|
||||
|
||||
/* Duplicates headers of loops if they are small enough, so that the statements
|
||||
@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun)
|
||||
bool changed = false;
|
||||
|
||||
if (number_of_loops (fun) <= 1)
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
|
||||
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
|
||||
bbs_size = n_basic_blocks_for_fn (fun);
|
||||
|
||||
auto_vec<std::pair<edge, loop_p> > copied;
|
||||
|
||||
FOR_EACH_LOOP (loop, 0)
|
||||
{
|
||||
int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
|
||||
@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun)
|
||||
fprintf (dump_file, "Duplication failed.\n");
|
||||
continue;
|
||||
}
|
||||
copied.safe_push (std::make_pair (entry, loop));
|
||||
|
||||
/* If the loop has the form "for (i = j; i < j + 10; i++)" then
|
||||
this copying can introduce a case where we rely on undefined
|
||||
@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun)
|
||||
}
|
||||
|
||||
if (changed)
|
||||
update_ssa (TODO_update_ssa);
|
||||
{
|
||||
update_ssa (TODO_update_ssa);
|
||||
/* After updating SSA form perform CSE on the loop header
|
||||
copies. This is esp. required for the pass before
|
||||
vectorization since nothing cleans up copied exit tests
|
||||
that can now be simplified. CSE from the entry of the
|
||||
region we copied till all loop exit blocks but not
|
||||
entering the loop itself. */
|
||||
for (unsigned i = 0; i < copied.length (); ++i)
|
||||
{
|
||||
edge entry = copied[i].first;
|
||||
loop_p loop = copied[i].second;
|
||||
vec<edge> exit_edges = get_loop_exit_edges (loop);
|
||||
bitmap exit_bbs = BITMAP_ALLOC (NULL);
|
||||
for (unsigned j = 0; j < exit_edges.length (); ++j)
|
||||
bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
|
||||
bitmap_set_bit (exit_bbs, loop->header->index);
|
||||
do_rpo_vn (cfun, entry, exit_bbs);
|
||||
BITMAP_FREE (exit_bbs);
|
||||
exit_edges.release ();
|
||||
}
|
||||
}
|
||||
free (bbs);
|
||||
free (copied_bbs);
|
||||
|
||||
@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop)
|
||||
if (loop->dont_vectorize)
|
||||
return false;
|
||||
|
||||
if (!do_while_loop_p (loop))
|
||||
return true;
|
||||
|
||||
/* The vectorizer won't handle anything with multiple exits, so skip. */
|
||||
/* The vectorizer won't handle anything with multiple exits, so skip. */
|
||||
edge exit = single_exit (loop);
|
||||
if (!exit)
|
||||
return false;
|
||||
|
||||
/* Copy headers iff there looks to be code in the loop after the exit block,
|
||||
i.e. the exit block has an edge to another block (besides the latch,
|
||||
which should be empty). */
|
||||
edge_iterator ei;
|
||||
edge e;
|
||||
FOR_EACH_EDGE (e, ei, exit->src->succs)
|
||||
if (!loop_exit_edge_p (loop, e)
|
||||
&& e->dest != loop->header
|
||||
&& e->dest != loop->latch)
|
||||
return true;
|
||||
if (!do_while_loop_p (loop))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
break;
|
||||
}
|
||||
/* For a nested cycle we might end up with an operation like
|
||||
phi_result * phi_result. */
|
||||
if (!vectype_in)
|
||||
vectype_in = STMT_VINFO_VECTYPE (stmt_info);
|
||||
gcc_assert (vectype_in);
|
||||
|
||||
if (slp_node)
|
||||
|
Loading…
Reference in New Issue
Block a user