re PR tree-optimization/15255 ([tree-ssa] a * 2 + a * 2 is not converted to a * 4)
2008-08-13 Richard Guenther <rguenther@suse.de> PR tree-optimization/15255 * tree-ssa-reassoc.c (linearize_expr_tree): Declare. (struct oecount_s): New struct and VEC types. (cvec): New global. (oecount_hash): New function. (oecount_eq): Likewise. (oecount_cmp): Likewise. (zero_one_operation): New function. (build_and_add_sum): Likewise. (undistribute_ops_list): Perform un-distribution of multiplication and division on the chain of summands. (should_break_up_subtract): Also break up subtracts for factors. (reassociate_bb): Delete dead visited statements. Call undistribute_ops_list. Re-sort and optimize if it did something. * passes.c (init_optimization_passes): Move DSE before reassociation. * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle PHI nodes. * gcc.dg/tree-ssa/reassoc-14.c: New testcase. * gcc.dg/tree-ssa/reassoc-15.c: Likewise. * gcc.dg/tree-ssa/reassoc-16.c: Likewise. * gcc.dg/torture/reassoc-1.c: Likewise. * gcc.dg/tree-ssa/recip-2.c: Adjust. * gcc.dg/tree-ssa/recip-6.c: Likewise. * gcc.dg/tree-ssa/recip-7.c: Likewise. * gfortran.dg/reassoc_4.f: Likewise. From-SVN: r139048
This commit is contained in:
parent
92464a8a7a
commit
25c6036a5b
|
@ -1,3 +1,24 @@
|
|||
2008-08-13 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/15255
|
||||
* tree-ssa-reassoc.c (linearize_expr_tree): Declare.
|
||||
(struct oecount_s): New struct and VEC types.
|
||||
(cvec): New global.
|
||||
(oecount_hash): New function.
|
||||
(oecount_eq): Likewise.
|
||||
(oecount_cmp): Likewise.
|
||||
(zero_one_operation): New function.
|
||||
(build_and_add_sum): Likewise.
|
||||
(undistribute_ops_list): Perform un-distribution of multiplication
|
||||
and division on the chain of summands.
|
||||
(should_break_up_subtract): Also break up subtracts for factors.
|
||||
(reassociate_bb): Delete dead visited statements.
|
||||
Call undistribute_ops_list. Re-sort and optimize if it did something.
|
||||
* passes.c (init_optimization_passes): Move DSE before
|
||||
reassociation.
|
||||
* tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle
|
||||
PHI nodes.
|
||||
|
||||
2008-08-12 Janis Johnson <janis187@us.ibm.com>
|
||||
|
||||
* doc/invoke.texi (-fipa-pta): Say the option is experimental.
|
||||
|
|
|
@ -633,9 +633,9 @@ init_optimization_passes (void)
|
|||
only examines PHIs to discover const/copy propagation
|
||||
opportunities. */
|
||||
NEXT_PASS (pass_phi_only_cprop);
|
||||
NEXT_PASS (pass_dse);
|
||||
NEXT_PASS (pass_reassoc);
|
||||
NEXT_PASS (pass_dce);
|
||||
NEXT_PASS (pass_dse);
|
||||
NEXT_PASS (pass_forwprop);
|
||||
NEXT_PASS (pass_phiopt);
|
||||
NEXT_PASS (pass_object_sizes);
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
2008-08-13 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/15255
|
||||
* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
|
||||
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
|
||||
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
|
||||
* gcc.dg/torture/reassoc-1.c: Likewise.
|
||||
* gcc.dg/tree-ssa/recip-2.c: Adjust.
|
||||
* gcc.dg/tree-ssa/recip-6.c: Likewise.
|
||||
* gcc.dg/tree-ssa/recip-7.c: Likewise.
|
||||
* gfortran.dg/reassoc_4.f: Likewise.
|
||||
|
||||
2008-08-12 Janis Johnson <janis187@us.ibm.com>
|
||||
|
||||
* gcc.target/i386/pr32000-2.c: Use dg-skip-if for target expression.
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
int x;
|
||||
|
||||
int __attribute__((noinline))
|
||||
foo(int a, int b, int w)
|
||||
{
|
||||
int tmp1 = a * w;
|
||||
int tmp2 = b * w;
|
||||
x = tmp1;
|
||||
return tmp1 + tmp2;
|
||||
}
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(1, 2, 3) != 9)
|
||||
abort ();
|
||||
if (x != 3)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-reassoc1" } */
|
||||
|
||||
int test1 (int x, int y, int z, int weight)
|
||||
{
|
||||
int tmp1 = x * weight;
|
||||
int tmp2 = y * weight;
|
||||
int tmp3 = (x - y) * weight;
|
||||
return tmp1 + (tmp2 + tmp3);
|
||||
}
|
||||
|
||||
int test2 (int x, int y, int z, int weight)
|
||||
{
|
||||
int tmp1 = x * weight;
|
||||
int tmp2 = y * weight * weight;
|
||||
int tmp3 = z * weight * weight * weight;
|
||||
return tmp1 + tmp2 + tmp3;
|
||||
}
|
||||
|
||||
/* There should be one multiplication left in test1 and three in test2. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "reassoc1" } } */
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-reassoc1" } */
|
||||
|
||||
int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
|
||||
{
|
||||
int wtmp1 = w1 * weight;
|
||||
int wtmp2 = w2 * weight;
|
||||
int wtmp3 = w3 * weight;
|
||||
int tmp1 = x * wtmp1;
|
||||
int tmp2 = y * wtmp2;
|
||||
int tmp3 = z * wtmp3;
|
||||
return tmp1 + tmp2 + tmp3;
|
||||
}
|
||||
|
||||
/* The multiplication with weight should be un-distributed.
|
||||
??? This pattern is not recognized currently. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "reassoc1" } } */
|
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
|
||||
|
||||
double test1 (double x, double y, double z, double weight)
|
||||
{
|
||||
double tmp1 = x / weight;
|
||||
double tmp2 = y / weight;
|
||||
double tmp3 = -x / weight;
|
||||
return tmp1 + tmp2 + tmp3;
|
||||
}
|
||||
|
||||
/* The division should be un-distributed and all references to x should
|
||||
be gone. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "reassoc1" } } */
|
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
|
||||
|
||||
double test2 (double x, double y, double ddj, int b)
|
||||
{
|
||||
double tmp1, tmp2, sum;
|
||||
sum = 0.0;
|
||||
if (b)
|
||||
sum = 1.0;
|
||||
tmp1 = sum/ddj;
|
||||
tmp2 = x/ddj;
|
||||
return tmp1 + y + tmp2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "reassoc1" } } */
|
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-reassoc1" } */
|
||||
|
||||
int
|
||||
ETree_nFactorEntriesInFront (int b, int m)
|
||||
{
|
||||
int nent = b*b + 2*b*m;
|
||||
return nent;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "\\\*" 2 "reassoc1" } } */
|
||||
/* { dg-final { cleanup-tree-dump "reassoc1" } } */
|
|
@ -1,7 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
|
||||
|
||||
float e(float a, float b, float c, float d, float e, float f)
|
||||
float u, v, w, x, y, z;
|
||||
|
||||
void e(float a, float b, float c, float d, float e, float f)
|
||||
{
|
||||
if (a < b)
|
||||
{
|
||||
|
@ -20,7 +22,12 @@ float e(float a, float b, float c, float d, float e, float f)
|
|||
/* This should not be left as a multiplication. */
|
||||
c = 1 / c;
|
||||
|
||||
return a + b + c + d + e + f;
|
||||
u = a;
|
||||
v = b;
|
||||
w = c;
|
||||
x = d;
|
||||
y = e;
|
||||
z = f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
|
||||
extern int f2();
|
||||
|
||||
double f1(double y, double z, double w)
|
||||
double m, n, o;
|
||||
|
||||
void f1(double y, double z, double w)
|
||||
{
|
||||
double b, c, d, e, f;
|
||||
|
||||
|
@ -18,7 +20,9 @@ double f1(double y, double z, double w)
|
|||
e = c / y;
|
||||
f = 1 / y;
|
||||
|
||||
return d + e + f;
|
||||
m = d;
|
||||
n = e;
|
||||
o = f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
|
||||
extern double h();
|
||||
|
||||
double f(int x, double z, double w)
|
||||
double m, n, o;
|
||||
|
||||
void f(int x, double z, double w)
|
||||
{
|
||||
double b, c, d, e, f;
|
||||
double y = h ();
|
||||
|
@ -19,7 +21,9 @@ double f(int x, double z, double w)
|
|||
e = c / y;
|
||||
f = 1 / y;
|
||||
|
||||
return d + e + f;
|
||||
m = d;
|
||||
n = e;
|
||||
o = f;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
! { dg-do compile }
|
||||
! { dg-options "-O3 -ffast-math -fdump-tree-reassoc1" }
|
||||
subroutine anisonl(w,vo,anisox,s,ii1,jj1,weight)
|
||||
integer ii1,jj1,i1,iii1,j1,jjj1,k1,l1,m1,n1
|
||||
real*8 w(3,3),vo(3,3),anisox(3,3,3,3),s(60,60),weight
|
||||
!
|
||||
! This routine replaces the following lines in e_c3d.f for
|
||||
! an anisotropic material
|
||||
!
|
||||
do i1=1,3
|
||||
iii1=ii1+i1-1
|
||||
do j1=1,3
|
||||
jjj1=jj1+j1-1
|
||||
do k1=1,3
|
||||
do l1=1,3
|
||||
s(iii1,jjj1)=s(iii1,jjj1)
|
||||
& +anisox(i1,k1,j1,l1)*w(k1,l1)*weight
|
||||
do m1=1,3
|
||||
s(iii1,jjj1)=s(iii1,jjj1)
|
||||
& +anisox(i1,k1,m1,l1)*w(k1,l1)
|
||||
& *vo(j1,m1)*weight
|
||||
& +anisox(m1,k1,j1,l1)*w(k1,l1)
|
||||
& *vo(i1,m1)*weight
|
||||
do n1=1,3
|
||||
s(iii1,jjj1)=s(iii1,jjj1)
|
||||
& +anisox(m1,k1,n1,l1)
|
||||
& *w(k1,l1)*vo(i1,m1)*vo(j1,n1)*weight
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
! There should be 22 multiplications left after un-distributing
|
||||
! weigth, w(k1,l1), vo(i1,m1) and vo(j1,m1) on the innermost two
|
||||
! unrolled loops.
|
||||
|
||||
! { dg-final { scan-tree-dump-times "\[0-9\] \\\* " 22 "reassoc1" } }
|
||||
! { dg-final { cleanup-tree-dump "reassoc1" } }
|
|
@ -2914,6 +2914,12 @@ stmt_dominates_stmt_p (gimple s1, gimple s2)
|
|||
{
|
||||
gimple_stmt_iterator bsi;
|
||||
|
||||
if (gimple_code (s2) == GIMPLE_PHI)
|
||||
return false;
|
||||
|
||||
if (gimple_code (s1) == GIMPLE_PHI)
|
||||
return true;
|
||||
|
||||
for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
|
||||
if (gsi_stmt (bsi) == s1)
|
||||
return true;
|
||||
|
|
|
@ -727,6 +727,415 @@ eliminate_using_constants (enum tree_code opcode,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
|
||||
bool, bool);
|
||||
|
||||
/* Structure for tracking and counting operands. */
|
||||
typedef struct oecount_s {
|
||||
int cnt;
|
||||
enum tree_code oecode;
|
||||
tree op;
|
||||
} oecount;
|
||||
|
||||
DEF_VEC_O(oecount);
|
||||
DEF_VEC_ALLOC_O(oecount,heap);
|
||||
|
||||
/* The heap for the oecount hashtable and the sorted list of operands. */
|
||||
static VEC (oecount, heap) *cvec;
|
||||
|
||||
/* Hash function for oecount. */
|
||||
|
||||
static hashval_t
|
||||
oecount_hash (const void *p)
|
||||
{
|
||||
const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
|
||||
return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
|
||||
}
|
||||
|
||||
/* Comparison function for oecount. */
|
||||
|
||||
static int
|
||||
oecount_eq (const void *p1, const void *p2)
|
||||
{
|
||||
const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
|
||||
const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
|
||||
return (c1->oecode == c2->oecode
|
||||
&& c1->op == c2->op);
|
||||
}
|
||||
|
||||
/* Comparison function for qsort sorting oecount elements by count. */
|
||||
|
||||
static int
|
||||
oecount_cmp (const void *p1, const void *p2)
|
||||
{
|
||||
const oecount *c1 = (const oecount *)p1;
|
||||
const oecount *c2 = (const oecount *)p2;
|
||||
return c1->cnt - c2->cnt;
|
||||
}
|
||||
|
||||
/* Walks the linear chain with result *DEF searching for an operation
|
||||
with operand OP and code OPCODE removing that from the chain. *DEF
|
||||
is updated if there is only one operand but no operation left. */
|
||||
|
||||
static void
|
||||
zero_one_operation (tree *def, enum tree_code opcode, tree op)
|
||||
{
|
||||
gimple stmt = SSA_NAME_DEF_STMT (*def);
|
||||
|
||||
do
|
||||
{
|
||||
tree name = gimple_assign_rhs1 (stmt);
|
||||
|
||||
/* If this is the operation we look for and one of the operands
|
||||
is ours simply propagate the other operand into the stmts
|
||||
single use. */
|
||||
if (gimple_assign_rhs_code (stmt) == opcode
|
||||
&& (name == op
|
||||
|| gimple_assign_rhs2 (stmt) == op))
|
||||
{
|
||||
gimple use_stmt;
|
||||
use_operand_p use;
|
||||
gimple_stmt_iterator gsi;
|
||||
if (name == op)
|
||||
name = gimple_assign_rhs2 (stmt);
|
||||
gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
|
||||
single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
|
||||
if (gimple_assign_lhs (stmt) == *def)
|
||||
*def = name;
|
||||
SET_USE (use, name);
|
||||
if (TREE_CODE (name) != SSA_NAME)
|
||||
update_stmt (use_stmt);
|
||||
gsi = gsi_for_stmt (stmt);
|
||||
gsi_remove (&gsi, true);
|
||||
release_defs (stmt);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Continue walking the chain. */
|
||||
gcc_assert (name != op
|
||||
&& TREE_CODE (name) == SSA_NAME);
|
||||
stmt = SSA_NAME_DEF_STMT (name);
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
|
||||
the result. Places the statement after the definition of either
|
||||
OP1 or OP2. Returns the new statement. */
|
||||
|
||||
static gimple
|
||||
build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
|
||||
{
|
||||
gimple op1def = NULL, op2def = NULL;
|
||||
gimple_stmt_iterator gsi;
|
||||
tree op;
|
||||
gimple sum;
|
||||
|
||||
/* Create the addition statement. */
|
||||
sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
|
||||
op = make_ssa_name (tmpvar, sum);
|
||||
gimple_assign_set_lhs (sum, op);
|
||||
|
||||
/* Find an insertion place and insert. */
|
||||
if (TREE_CODE (op1) == SSA_NAME)
|
||||
op1def = SSA_NAME_DEF_STMT (op1);
|
||||
if (TREE_CODE (op2) == SSA_NAME)
|
||||
op2def = SSA_NAME_DEF_STMT (op2);
|
||||
if ((!op1def || gimple_nop_p (op1def))
|
||||
&& (!op2def || gimple_nop_p (op2def)))
|
||||
{
|
||||
gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR));
|
||||
gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
||||
}
|
||||
else if ((!op1def || gimple_nop_p (op1def))
|
||||
|| (op2def && !gimple_nop_p (op2def)
|
||||
&& stmt_dominates_stmt_p (op1def, op2def)))
|
||||
{
|
||||
if (gimple_code (op2def) == GIMPLE_PHI)
|
||||
{
|
||||
gsi = gsi_start_bb (gimple_bb (op2def));
|
||||
gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
||||
}
|
||||
else
|
||||
{
|
||||
gsi = gsi_for_stmt (op2def);
|
||||
gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gimple_code (op1def) == GIMPLE_PHI)
|
||||
{
|
||||
gsi = gsi_start_bb (gimple_bb (op1def));
|
||||
gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
||||
}
|
||||
else
|
||||
{
|
||||
gsi = gsi_for_stmt (op1def);
|
||||
gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
|
||||
}
|
||||
}
|
||||
update_stmt (sum);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* Perform un-distribution of divisions and multiplications.
|
||||
A * X + B * X is transformed into (A + B) * X and A / X + B / X
|
||||
to (A + B) / X for real X.
|
||||
|
||||
The algorithm is organized as follows.
|
||||
|
||||
- First we walk the addition chain *OPS looking for summands that
|
||||
are defined by a multiplication or a real division. This results
|
||||
in the candidates bitmap with relevant indices into *OPS.
|
||||
|
||||
- Second we build the chains of multiplications or divisions for
|
||||
these candidates, counting the number of occurences of (operand, code)
|
||||
pairs in all of the candidates chains.
|
||||
|
||||
- Third we sort the (operand, code) pairs by number of occurence and
|
||||
process them starting with the pair with the most uses.
|
||||
|
||||
* For each such pair we walk the candidates again to build a
|
||||
second candidate bitmap noting all multiplication/division chains
|
||||
that have at least one occurence of (operand, code).
|
||||
|
||||
* We build an alternate addition chain only covering these
|
||||
candidates with one (operand, code) operation removed from their
|
||||
multiplication/division chain.
|
||||
|
||||
* The first candidate gets replaced by the alternate addition chain
|
||||
multiplied/divided by the operand.
|
||||
|
||||
* All candidate chains get disabled for further processing and
|
||||
processing of (operand, code) pairs continues.
|
||||
|
||||
The alternate addition chains built are re-processed by the main
|
||||
reassociation algorithm which allows optimizing a * x * y + b * y * x
|
||||
to (a + b ) * x * y in one invocation of the reassociation pass. */
|
||||
|
||||
static bool
|
||||
undistribute_ops_list (enum tree_code opcode,
|
||||
VEC (operand_entry_t, heap) **ops, struct loop *loop)
|
||||
{
|
||||
unsigned int length = VEC_length (operand_entry_t, *ops);
|
||||
operand_entry_t oe1;
|
||||
unsigned i, j;
|
||||
sbitmap candidates, candidates2;
|
||||
unsigned nr_candidates, nr_candidates2;
|
||||
sbitmap_iterator sbi0;
|
||||
VEC (operand_entry_t, heap) **subops;
|
||||
htab_t ctable;
|
||||
bool changed = false;
|
||||
|
||||
if (length <= 1
|
||||
|| opcode != PLUS_EXPR)
|
||||
return false;
|
||||
|
||||
/* Build a list of candidates to process. */
|
||||
candidates = sbitmap_alloc (length);
|
||||
sbitmap_zero (candidates);
|
||||
nr_candidates = 0;
|
||||
for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
|
||||
{
|
||||
enum tree_code dcode;
|
||||
gimple oe1def;
|
||||
|
||||
if (TREE_CODE (oe1->op) != SSA_NAME)
|
||||
continue;
|
||||
oe1def = SSA_NAME_DEF_STMT (oe1->op);
|
||||
if (!is_gimple_assign (oe1def))
|
||||
continue;
|
||||
dcode = gimple_assign_rhs_code (oe1def);
|
||||
if ((dcode != MULT_EXPR
|
||||
&& dcode != RDIV_EXPR)
|
||||
|| !is_reassociable_op (oe1def, dcode, loop))
|
||||
continue;
|
||||
|
||||
SET_BIT (candidates, i);
|
||||
nr_candidates++;
|
||||
}
|
||||
|
||||
if (nr_candidates < 2)
|
||||
{
|
||||
sbitmap_free (candidates);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "searching for un-distribute opportunities ");
|
||||
print_generic_expr (dump_file,
|
||||
VEC_index (operand_entry_t, *ops,
|
||||
sbitmap_first_set_bit (candidates))->op, 0);
|
||||
fprintf (dump_file, " %d\n", nr_candidates);
|
||||
}
|
||||
|
||||
/* Build linearized sub-operand lists and the counting table. */
|
||||
cvec = NULL;
|
||||
ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
|
||||
subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
|
||||
VEC_length (operand_entry_t, *ops));
|
||||
EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
|
||||
{
|
||||
gimple oedef;
|
||||
enum tree_code oecode;
|
||||
unsigned j;
|
||||
|
||||
oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
|
||||
oecode = gimple_assign_rhs_code (oedef);
|
||||
linearize_expr_tree (&subops[i], oedef,
|
||||
associative_tree_code (oecode), false);
|
||||
|
||||
for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
|
||||
{
|
||||
oecount c;
|
||||
void **slot;
|
||||
size_t idx;
|
||||
c.oecode = oecode;
|
||||
c.cnt = 1;
|
||||
c.op = oe1->op;
|
||||
VEC_safe_push (oecount, heap, cvec, &c);
|
||||
idx = VEC_length (oecount, cvec) + 41;
|
||||
slot = htab_find_slot (ctable, (void *)idx, INSERT);
|
||||
if (!*slot)
|
||||
{
|
||||
*slot = (void *)idx;
|
||||
}
|
||||
else
|
||||
{
|
||||
VEC_pop (oecount, cvec);
|
||||
VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
htab_delete (ctable);
|
||||
|
||||
/* Sort the counting table. */
|
||||
qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
|
||||
sizeof (oecount), oecount_cmp);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
oecount *c;
|
||||
fprintf (dump_file, "Candidates:\n");
|
||||
for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
|
||||
{
|
||||
fprintf (dump_file, " %u %s: ", c->cnt,
|
||||
c->oecode == MULT_EXPR
|
||||
? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
|
||||
print_generic_expr (dump_file, c->op, 0);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Process the (operand, code) pairs in order of most occurence. */
|
||||
candidates2 = sbitmap_alloc (length);
|
||||
while (!VEC_empty (oecount, cvec))
|
||||
{
|
||||
oecount *c = VEC_last (oecount, cvec);
|
||||
if (c->cnt < 2)
|
||||
break;
|
||||
|
||||
/* Now collect the operands in the outer chain that contain
|
||||
the common operand in their inner chain. */
|
||||
sbitmap_zero (candidates2);
|
||||
nr_candidates2 = 0;
|
||||
EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
|
||||
{
|
||||
gimple oedef;
|
||||
enum tree_code oecode;
|
||||
unsigned j;
|
||||
tree op = VEC_index (operand_entry_t, *ops, i)->op;
|
||||
|
||||
/* If we undistributed in this chain already this may be
|
||||
a constant. */
|
||||
if (TREE_CODE (op) != SSA_NAME)
|
||||
continue;
|
||||
|
||||
oedef = SSA_NAME_DEF_STMT (op);
|
||||
oecode = gimple_assign_rhs_code (oedef);
|
||||
if (oecode != c->oecode)
|
||||
continue;
|
||||
|
||||
for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
|
||||
{
|
||||
if (oe1->op == c->op)
|
||||
{
|
||||
SET_BIT (candidates2, i);
|
||||
++nr_candidates2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_candidates2 >= 2)
|
||||
{
|
||||
operand_entry_t oe1, oe2;
|
||||
tree tmpvar;
|
||||
gimple prod;
|
||||
int first = sbitmap_first_set_bit (candidates2);
|
||||
|
||||
/* Build the new addition chain. */
|
||||
oe1 = VEC_index (operand_entry_t, *ops, first);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Building (");
|
||||
print_generic_expr (dump_file, oe1->op, 0);
|
||||
}
|
||||
tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
|
||||
add_referenced_var (tmpvar);
|
||||
zero_one_operation (&oe1->op, c->oecode, c->op);
|
||||
EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
|
||||
{
|
||||
gimple sum;
|
||||
oe2 = VEC_index (operand_entry_t, *ops, i);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, " + ");
|
||||
print_generic_expr (dump_file, oe2->op, 0);
|
||||
}
|
||||
zero_one_operation (&oe2->op, c->oecode, c->op);
|
||||
sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
|
||||
oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
|
||||
oe2->rank = 0;
|
||||
oe1->op = gimple_get_lhs (sum);
|
||||
}
|
||||
|
||||
/* Apply the multiplication/division. */
|
||||
prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
|
||||
print_generic_expr (dump_file, c->op, 0);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
/* Record it in the addition chain and disable further
|
||||
undistribution with this op. */
|
||||
oe1->op = gimple_assign_lhs (prod);
|
||||
oe1->rank = get_rank (oe1->op);
|
||||
VEC_free (operand_entry_t, heap, subops[first]);
|
||||
|
||||
changed = true;
|
||||
}
|
||||
|
||||
VEC_pop (oecount, cvec);
|
||||
}
|
||||
|
||||
for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
|
||||
VEC_free (operand_entry_t, heap, subops[i]);
|
||||
free (subops);
|
||||
VEC_free (oecount, heap, cvec);
|
||||
sbitmap_free (candidates);
|
||||
sbitmap_free (candidates2);
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
/* Perform various identities and other optimizations on the list of
|
||||
operand entries, stored in OPS. The tree code for the binary
|
||||
operation between all the operands is OPCODE. */
|
||||
|
@ -1097,7 +1506,8 @@ should_break_up_subtract (gimple stmt)
|
|||
if (TREE_CODE (lhs) == SSA_NAME
|
||||
&& (immusestmt = get_single_immediate_use (lhs))
|
||||
&& is_gimple_assign (immusestmt)
|
||||
&& gimple_assign_rhs_code (immusestmt) == PLUS_EXPR)
|
||||
&& (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
|
||||
|| gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -1125,7 +1535,8 @@ break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
|
|||
Place the operands of the expression tree in the vector named OPS. */
|
||||
|
||||
static void
|
||||
linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
|
||||
linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
|
||||
bool is_associative, bool set_visited)
|
||||
{
|
||||
gimple_stmt_iterator gsinow, gsilhs;
|
||||
tree binlhs = gimple_assign_rhs1 (stmt);
|
||||
|
@ -1136,7 +1547,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
|
|||
enum tree_code rhscode = gimple_assign_rhs_code (stmt);
|
||||
struct loop *loop = loop_containing_stmt (stmt);
|
||||
|
||||
gimple_set_visited (stmt, true);
|
||||
if (set_visited)
|
||||
gimple_set_visited (stmt, true);
|
||||
|
||||
if (TREE_CODE (binlhs) == SSA_NAME)
|
||||
{
|
||||
|
@ -1160,6 +1572,13 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
|
|||
{
|
||||
tree temp;
|
||||
|
||||
/* If this is not a associative operation like division, give up. */
|
||||
if (!is_associative)
|
||||
{
|
||||
add_to_ops_vec (ops, binrhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!binrhsisreassoc)
|
||||
{
|
||||
add_to_ops_vec (ops, binrhs);
|
||||
|
@ -1203,7 +1622,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
|
|||
gsinow = gsi_for_stmt (stmt);
|
||||
gsilhs = gsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
|
||||
gsi_move_before (&gsilhs, &gsinow);
|
||||
linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
|
||||
linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
|
||||
is_associative, set_visited);
|
||||
add_to_ops_vec (ops, binrhs);
|
||||
}
|
||||
|
||||
|
@ -1344,7 +1764,16 @@ reassociate_bb (basic_block bb)
|
|||
/* If this was part of an already processed statement,
|
||||
we don't need to touch it again. */
|
||||
if (gimple_visited_p (stmt))
|
||||
continue;
|
||||
{
|
||||
/* This statement might have become dead because of previous
|
||||
reassociations. */
|
||||
if (has_zero_uses (gimple_get_lhs (stmt)))
|
||||
{
|
||||
gsi_remove (&gsi, true);
|
||||
release_defs (stmt);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
lhs = gimple_assign_lhs (stmt);
|
||||
rhs1 = gimple_assign_rhs1 (stmt);
|
||||
|
@ -1375,12 +1804,21 @@ reassociate_bb (basic_block bb)
|
|||
continue;
|
||||
|
||||
gimple_set_visited (stmt, true);
|
||||
linearize_expr_tree (&ops, stmt);
|
||||
linearize_expr_tree (&ops, stmt, true, true);
|
||||
qsort (VEC_address (operand_entry_t, ops),
|
||||
VEC_length (operand_entry_t, ops),
|
||||
sizeof (operand_entry_t),
|
||||
sort_by_operand_rank);
|
||||
optimize_ops_list (rhs_code, &ops);
|
||||
if (undistribute_ops_list (rhs_code, &ops,
|
||||
loop_containing_stmt (stmt)))
|
||||
{
|
||||
qsort (VEC_address (operand_entry_t, ops),
|
||||
VEC_length (operand_entry_t, ops),
|
||||
sizeof (operand_entry_t),
|
||||
sort_by_operand_rank);
|
||||
optimize_ops_list (rhs_code, &ops);
|
||||
}
|
||||
|
||||
if (VEC_length (operand_entry_t, ops) == 1)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue