re PR tree-optimization/29789 (Missed invariant out of the loop with conditionals and shifts)

2007-04-22  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/29789
	* tree-ssa-loop-im.c (stmt_cost): Adjust cost of shifts.
	(rewrite_reciprocal): New helper split out from
	determine_invariantness_stmt.
	(rewrite_bittest): Likewise.
	(determine_invariantness_stmt): Rewrite (A >> B) & 1 to
	A & (1 << B) if (1 << B) is loop invariant but (A >> B)
	is not.

	* gcc.dg/tree-ssa/ssa-lim-1.c: New testcase.
	* gcc.dg/tree-ssa/ssa-lim-2.c: Likewise.

From-SVN: r124042
This commit is contained in:
Richard Guenther 2007-04-22 11:26:49 +00:00 committed by Richard Biener
parent 4c9be80604
commit e0a607311c
5 changed files with 195 additions and 29 deletions

View File

@ -1,3 +1,14 @@
2007-04-22 Richard Guenther <rguenther@suse.de>
PR tree-optimization/29789
* tree-ssa-loop-im.c (stmt_cost): Adjust cost of shifts.
(rewrite_reciprocal): New helper split out from
determine_invariantness_stmt.
(rewrite_bittest): Likewise.
(determine_invariantness_stmt): Rewrite (A >> B) & 1 to
A & (1 << B) if (1 << B) is loop invariant but (A >> B)
is not.
2007-04-22 Revital Eres <eres@il.ibm.com>
* loop-unroll.c (var_to_expand): New field to support also

View File

@ -1,3 +1,9 @@
2007-04-22 Richard Guenther <rguenther@suse.de>
PR tree-optimization/29789
* gcc.dg/tree-ssa/ssa-lim-1.c: New testcase.
* gcc.dg/tree-ssa/ssa-lim-2.c: Likewise.
2007-04-22 Revital Eres <eres@il.ibm.com>
* gcc.dg/var-expand2.c: New test.

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-lim" } */
/* This is a variant that does cause fold to place a cast to
int before testing bit 1. */
void
quantum_toffoli (int control1, int control2, int target,
unsigned long *state, int size)
{
int i;
for(i=0; i<size; i++)
{
if (state[i] & ((unsigned long) 1 << control1))
if (state[i] & ((unsigned long) 1 << control2))
state[i] ^= ((unsigned long) 1 << target);
}
}
/* { dg-final { scan-tree-dump-times "shifttmp" 6 "lim" } } */
/* { dg-final { cleanup-tree-dump "lim" } } */

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-lim" } */
/* This is a variant that doesn't cause fold to place a cast to
int before testing bit 1. */
void
quantum_toffoli(int control1, int control2, int target, int *state,
int size)
{
int i;
for(i=0; i<size; i++)
{
if (state[i] & ( 1 << control1))
if (state[i] & ( 1 << control2))
state[i] ^= ( 1 << target);
}
}
/* { dg-final { scan-tree-dump-times "shifttmp" 6 "lim" } } */
/* { dg-final { cleanup-tree-dump "lim" } } */

View File

@ -460,6 +460,11 @@ stmt_cost (tree stmt)
cost += 20;
break;
case LSHIFT_EXPR:
case RSHIFT_EXPR:
cost += 20;
break;
default:
break;
}
@ -571,6 +576,123 @@ free_lim_aux_data (struct lim_aux_data *data)
free (data);
}
/* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
static tree
rewrite_reciprocal (block_stmt_iterator *bsi)
{
tree stmt, lhs, rhs, stmt1, stmt2, var, name, tmp;
stmt = bsi_stmt (*bsi);
lhs = GENERIC_TREE_OPERAND (stmt, 0);
rhs = GENERIC_TREE_OPERAND (stmt, 1);
/* stmt must be GIMPLE_MODIFY_STMT. */
var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
add_referenced_var (var);
tmp = build2 (RDIV_EXPR, TREE_TYPE (rhs),
build_real (TREE_TYPE (rhs), dconst1),
TREE_OPERAND (rhs, 1));
stmt1 = build_gimple_modify_stmt (var, tmp);
name = make_ssa_name (var, stmt1);
GIMPLE_STMT_OPERAND (stmt1, 0) = name;
tmp = build2 (MULT_EXPR, TREE_TYPE (rhs),
name, TREE_OPERAND (rhs, 0));
stmt2 = build_gimple_modify_stmt (lhs, tmp);
/* Replace division stmt with reciprocal and multiply stmts.
The multiply stmt is not invariant, so update iterator
and avoid rescanning. */
bsi_replace (bsi, stmt1, true);
bsi_insert_after (bsi, stmt2, BSI_NEW_STMT);
SSA_NAME_DEF_STMT (lhs) = stmt2;
/* Continue processing with invariant reciprocal statement. */
return stmt1;
}
/* Check if the pattern at *BSI is a bittest of the form
(A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
static tree
rewrite_bittest (block_stmt_iterator *bsi)
{
tree stmt, lhs, rhs, var, name, stmt1, stmt2, t;
use_operand_p use;
stmt = bsi_stmt (*bsi);
lhs = GENERIC_TREE_OPERAND (stmt, 0);
rhs = GENERIC_TREE_OPERAND (stmt, 1);
/* Verify that the single use of lhs is a comparison against zero. */
if (TREE_CODE (lhs) != SSA_NAME
|| !single_imm_use (lhs, &use, &stmt1)
|| TREE_CODE (stmt1) != COND_EXPR)
return stmt;
t = COND_EXPR_COND (stmt1);
if (TREE_OPERAND (t, 0) != lhs
|| (TREE_CODE (t) != NE_EXPR
&& TREE_CODE (t) != EQ_EXPR)
|| !integer_zerop (TREE_OPERAND (t, 1)))
return stmt;
/* Get at the operands of the shift. The rhs is TMP1 & 1. */
stmt1 = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
return stmt;
/* There is a conversion inbetween possibly inserted by fold. */
t = GIMPLE_STMT_OPERAND (stmt1, 1);
if (TREE_CODE (t) == NOP_EXPR
|| TREE_CODE (t) == CONVERT_EXPR)
{
t = TREE_OPERAND (t, 0);
if (TREE_CODE (t) != SSA_NAME
|| !has_single_use (t))
return stmt;
stmt1 = SSA_NAME_DEF_STMT (t);
if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
return stmt;
t = GIMPLE_STMT_OPERAND (stmt1, 1);
}
/* Verify that B is loop invariant but A is not. Verify that with
all the stmt walking we are still in the same loop. */
if (TREE_CODE (t) == RSHIFT_EXPR
&& loop_containing_stmt (stmt1) == loop_containing_stmt (stmt)
&& outermost_invariant_loop_expr (TREE_OPERAND (t, 1),
loop_containing_stmt (stmt1)) != NULL
&& outermost_invariant_loop_expr (TREE_OPERAND (t, 0),
loop_containing_stmt (stmt1)) == NULL)
{
tree a = TREE_OPERAND (t, 0);
tree b = TREE_OPERAND (t, 1);
/* 1 << B */
var = create_tmp_var (TREE_TYPE (a), "shifttmp");
add_referenced_var (var);
t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
build_int_cst (TREE_TYPE (a), 1), b);
stmt1 = build_gimple_modify_stmt (var, t);
name = make_ssa_name (var, stmt1);
GIMPLE_STMT_OPERAND (stmt1, 0) = name;
/* A & (1 << B) */
t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
stmt2 = build_gimple_modify_stmt (lhs, t);
bsi_insert_before (bsi, stmt1, BSI_SAME_STMT);
bsi_replace (bsi, stmt2, true);
SSA_NAME_DEF_STMT (lhs) = stmt2;
return stmt1;
}
return stmt;
}
/* Determine the outermost loops in that statements in basic block BB are
invariant, and record them to the LIM_DATA associated with the statements.
Callback for walk_dominator_tree. */
@ -607,10 +729,11 @@ determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
continue;
}
rhs = GENERIC_TREE_OPERAND (stmt, 1);
/* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
to be hoisted out of loop, saving expensive divide. */
if (pos == MOVE_POSSIBLE
&& (rhs = GENERIC_TREE_OPERAND (stmt, 1)) != NULL
&& TREE_CODE (rhs) == RDIV_EXPR
&& flag_unsafe_math_optimizations
&& !flag_trapping_math
@ -618,35 +741,17 @@ determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
loop_containing_stmt (stmt)) != NULL
&& outermost_invariant_loop_expr (rhs,
loop_containing_stmt (stmt)) == NULL)
{
tree lhs, stmt1, stmt2, var, name, tmp;
stmt = rewrite_reciprocal (&bsi);
lhs = GENERIC_TREE_OPERAND (stmt, 0);
/* stmt must be GIMPLE_MODIFY_STMT. */
var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
add_referenced_var (var);
tmp = build2 (RDIV_EXPR, TREE_TYPE (rhs),
build_real (TREE_TYPE (rhs), dconst1),
TREE_OPERAND (rhs, 1));
stmt1 = build_gimple_modify_stmt (var, tmp);
name = make_ssa_name (var, stmt1);
GIMPLE_STMT_OPERAND (stmt1, 0) = name;
tmp = build2 (MULT_EXPR, TREE_TYPE (rhs),
name, TREE_OPERAND (rhs, 0));
stmt2 = build_gimple_modify_stmt (lhs, tmp);
/* Replace division stmt with reciprocal and multiply stmts.
The multiply stmt is not invariant, so update iterator
and avoid rescanning. */
bsi_replace (&bsi, stmt1, true);
bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
SSA_NAME_DEF_STMT (lhs) = stmt2;
/* Continue processing with invariant reciprocal statement. */
stmt = stmt1;
}
/* If the shift count is invariant, convert (A >> B) & 1 to
A & (1 << B) allowing the bit mask to be hoisted out of the loop
saving an expensive shift. */
if (pos == MOVE_POSSIBLE
&& TREE_CODE (rhs) == BIT_AND_EXPR
&& integer_onep (TREE_OPERAND (rhs, 1))
&& TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME
&& has_single_use (TREE_OPERAND (rhs, 0)))
stmt = rewrite_bittest (&bsi);
stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
LIM_DATA (stmt)->always_executed_in = outermost;