[multiple changes]

2004-09-16  Daniel Berlin  <dberlin@dberlin.org>

	* cfgloop.h (duplicate_loop):  Add prototype.
	* cfgloopmanip.c (duplicate_loop): Make non-static.
	* lambda-code.c (perfect_nestify): Factor out test whether
	we can handle this loop into separate function.
	Call it.
	(can_convert_to_perfect_nest): New function.
	(replace_uses_of_x_with_y): Add modify_stmt call.
	* tree-loop-linear.c (linear_transform_loops): Call
	rewrite_into_loop_closed_ssa and free_df.

2004-09-16  Daniel Berlin  <dberlin@dberlin.org>

	* lambda-code.c (invariant_in_loop): is_gimple_min_invariant is
	loop invariant as well.
	(perfect_nestify): new function.
	(gcc_loop_to_lambda_loop): New parameters to track lower bounds,
	upper bounds, and steps.
	Set outerinductionvar properly.
	(gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect
	parameters.
	Return NULL if we need a perfect loop and can't make one.
	(lambda_loopnest_to_gcc_loopnest): Correct algorithm.
	(not_interesting_stmt): New function.
	(phi_loop_edge_uses_def): Ditto.
	(stmt_uses_phi_result): Ditto.
	(stmt_is_bumper_for_loop): Ditto.
	(perfect_nest_p): Ditto.
	(nestify_update_pending_stmts): Ditto.
	(replace_uses_of_x_with_y): Ditto.
	(stmt_uses_op): Ditto.
	(perfect_nestify): Ditto.
	* lambda-mat.c (lambda_matrix_id_p): New function.
	* lambda-trans.c (lambda_trans_matrix_id_p): Ditto.
	* lambda.h: Update prototypes.
	* tree-loop-linear (linear_transform_loop): Use new
	perfect_nest_p. Detect and ignore identity transform.
	* tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed.

2004-09-16  Sebastian Pop  <pop@cri.ensmp.fr>

	* tree-loop-linear.c (gather_interchange_stats): Add more comments.
	Gather also strides of accessed data.  Pass in the data references
	array.
	(try_interchange_loops): Add a new heuristic for handling the temporal
	locality.  Pass in the data references array.
	(linear_transform_loops): Pass the data references array to
	try_interchange_loops.

From-SVN: r87607
This commit is contained in:
Daniel Berlin 2004-09-16 16:16:14 +00:00
parent 83c9948608
commit f67d92e937
8 changed files with 786 additions and 144 deletions

View File

@ -1,3 +1,53 @@
2004-09-16 Daniel Berlin <dberlin@dberlin.org>
* cfgloop.h (duplicate_loop): Add prototype.
* cfgloopmanip.c (duplicate_loop): Make non-static.
* lambda-code.c (perfect_nestify): Factor out test whether
we can handle this loop into separate function.
Call it.
(can_convert_to_perfect_nest): New function.
(replace_uses_of_x_with_y): Add modify_stmt call.
* tree-loop-linear.c (linear_transform_loops): Call
rewrite_into_loop_closed_ssa and free_df.
2004-09-16 Daniel Berlin <dberlin@dberlin.org>
* lambda-code.c (invariant_in_loop): is_gimple_min_invariant is
loop invariant as well.
(perfect_nestify): new function.
(gcc_loop_to_lambda_loop): New parameters to track lower bounds,
upper bounds, and steps.
Set outerinductionvar properly.
(gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect
parameters.
Return NULL if we need a perfect loop and can't make one.
(lambda_loopnest_to_gcc_loopnest): Correct algorithm.
(not_interesting_stmt): New function.
(phi_loop_edge_uses_def): Ditto.
(stmt_uses_phi_result): Ditto.
(stmt_is_bumper_for_loop): Ditto.
(perfect_nest_p): Ditto.
(nestify_update_pending_stmts): Ditto.
(replace_uses_of_x_with_y): Ditto.
(stmt_uses_op): Ditto.
(perfect_nestify): Ditto.
* lambda-mat.c (lambda_matrix_id_p): New function.
* lambda-trans.c (lambda_trans_matrix_id_p): Ditto.
* lambda.h: Update prototypes.
* tree-loop-linear (linear_transform_loop): Use new
perfect_nest_p. Detect and ignore identity transform.
* tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed.
2004-09-16 Sebastian Pop <pop@cri.ensmp.fr>
* tree-loop-linear.c (gather_interchange_stats): Add more comments.
Gather also strides of accessed data. Pass in the data references
array.
(try_interchange_loops): Add a new heuristic for handling the temporal
locality. Pass in the data references array.
(linear_transform_loops): Pass the data references array to
try_interchange_loops.
2004-09-16 Kazu Hirata <kazu@cs.umass.edu>
* doc/invoke.texi: Fix typos. Follow spelling conventions.

View File

@ -308,6 +308,8 @@ extern bool can_duplicate_loop_p (struct loop *loop);
#define DLTHE_FLAG_UPDATE_FREQ 1 /* Update frequencies in
duplicate_loop_to_header_edge. */
extern struct loop * duplicate_loop (struct loops *, struct loop *,
struct loop *);
extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *,
unsigned, sbitmap, edge, edge *,
unsigned *, int);

View File

@ -29,8 +29,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "cfglayout.h"
#include "output.h"
static struct loop * duplicate_loop (struct loops *, struct loop *,
struct loop *);
static void duplicate_subloops (struct loops *, struct loop *, struct loop *);
static void copy_loops_to (struct loops *, struct loop **, int,
struct loop *);
@ -701,7 +699,7 @@ place_new_loop (struct loops *loops, struct loop *loop)
/* Copies copy of LOOP as subloop of TARGET loop, placing newly
created loop into LOOPS structure. */
static struct loop *
struct loop *
duplicate_loop (struct loops *loops, struct loop *loop, struct loop *target)
{
struct loop *cloop;

View File

@ -115,6 +115,13 @@
Fourier-Motzkin elimination is used to compute the bounds of the base space
of the lattice. */
DEF_VEC_GC_P(int);
static bool perfect_nestify (struct loops *,
struct loop *, VEC (tree) *,
VEC (tree) *, VEC (int) *, VEC (tree) *);
/* Lattice stuff that is internal to the code generation algorithm. */
typedef struct
@ -1160,20 +1167,23 @@ gcc_tree_to_linear_expression (int depth, tree expr,
static bool
invariant_in_loop (struct loop *loop, tree op)
{
if (is_gimple_min_invariant (op))
return true;
if (loop->depth == 0)
return true;
if (TREE_CODE (op) == SSA_NAME)
{
tree def;
def = SSA_NAME_DEF_STMT (op);
if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
&& IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
&& IS_EMPTY_STMT (def))
return true;
if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
if (IS_EMPTY_STMT (def))
return false;
if (loop->outer)
if (!invariant_in_loop (loop->outer, op))
if (loop->outer
&& !invariant_in_loop (loop->outer, op))
return false;
return !flow_bb_inside_loop_p (loop,
bb_for_stmt (SSA_NAME_DEF_STMT (op)));
return !flow_bb_inside_loop_p (loop, bb_for_stmt (def));
}
return false;
}
@ -1190,7 +1200,10 @@ static lambda_loop
gcc_loop_to_lambda_loop (struct loop *loop, int depth,
VEC (tree) ** invariants,
tree * ourinductionvar,
VEC (tree) * outerinductionvars)
VEC (tree) * outerinductionvars,
VEC (tree) ** lboundvars,
VEC (tree) ** uboundvars,
VEC (int) ** steps)
{
tree phi;
tree exit_cond;
@ -1201,15 +1214,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
tree test;
int stepint;
int extra = 0;
tree uboundvar;
tree lboundvar, uboundvar;
use_optype uses;
/* Find out induction var and set the pointer so that the caller can
append it to the outerinductionvars array later. */
/* Find out induction var and exit condition. */
inductionvar = find_induction_var_from_exit_cond (loop);
*ourinductionvar = inductionvar;
exit_cond = get_loop_exit_condition (loop);
if (inductionvar == NULL || exit_cond == NULL)
@ -1260,7 +1269,9 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
}
}
/* The induction variable name/version we want to put in the array is the
result of the induction variable phi node. */
*ourinductionvar = PHI_RESULT (phi);
access_fn = instantiate_parameters
(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
if (!access_fn)
@ -1316,14 +1327,20 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
}
if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
outerinductionvars, *invariants,
0);
{
lboundvar = PHI_ARG_DEF (phi, 1);
lbound = gcc_tree_to_linear_expression (depth, lboundvar,
outerinductionvars, *invariants,
0);
}
else
lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
outerinductionvars, *invariants,
0);
{
lboundvar = PHI_ARG_DEF (phi, 0);
lbound = gcc_tree_to_linear_expression (depth, lboundvar,
outerinductionvars, *invariants,
0);
}
if (!lbound)
{
@ -1368,6 +1385,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
uboundvar,
outerinductionvars,
*invariants, extra);
VEC_safe_push (tree, *uboundvars, build (PLUS_EXPR, integer_type_node,
uboundvar,
build_int_cst (integer_type_node, extra)));
VEC_safe_push (tree, *lboundvars, lboundvar);
VEC_safe_push (int, *steps, stepint);
if (!ubound)
{
@ -1400,7 +1422,7 @@ find_induction_var_from_exit_cond (struct loop *loop)
test = TREE_OPERAND (expr, 0);
if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
return NULL_TREE;
/* This is a guess. We say that for a <,!=,<= b, a is the induction
/* This is a guess. We say that for a <,!=,<= b, a is the induction
variable.
For >, >=, we guess b is the induction variable.
If we are wrong, it'll fail the rest of the induction variable tests, and
@ -1433,15 +1455,20 @@ DEF_VEC_GC_P(lambda_loop);
during this process. */
lambda_loopnest
gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
struct loop * loop_nest,
VEC (tree) **inductionvars,
VEC (tree) **invariants)
VEC (tree) **invariants,
bool need_perfect_nest)
{
lambda_loopnest ret;
struct loop *temp;
int depth = 0;
size_t i;
VEC (lambda_loop) *loops;
VEC (tree) *uboundvars;
VEC (tree) *lboundvars;
VEC (int) *steps;
lambda_loop newloop;
tree inductionvar = NULL;
@ -1454,18 +1481,30 @@ gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
loops = VEC_alloc (lambda_loop, 1);
*inductionvars = VEC_alloc (tree, 1);
*invariants = VEC_alloc (tree, 1);
lboundvars = VEC_alloc (tree, 1);
uboundvars = VEC_alloc (tree, 1);
steps = VEC_alloc (int, 1);
temp = loop_nest;
while (temp)
{
newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
&inductionvar, *inductionvars);
&inductionvar, *inductionvars,
&lboundvars, &uboundvars,
&steps);
if (!newloop)
return NULL;
VEC_safe_push (tree, *inductionvars, inductionvar);
VEC_safe_push (lambda_loop, loops, newloop);
temp = temp->inner;
}
if (need_perfect_nest
&& !perfect_nestify (currloops, loop_nest,
lboundvars, uboundvars, steps, *inductionvars))
{
if (dump_file)
fprintf (dump_file, "Not a perfect nest and couldn't convert to one.\n");
return NULL;
}
ret = lambda_loopnest_new (depth, 2 * depth);
for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
LN_LOOPS (ret)[i] = newloop;
@ -1489,7 +1528,7 @@ lbv_to_gcc_expression (lambda_body_vector lbv,
/* Create a statement list and a linear expression temporary. */
stmts = alloc_stmt_list ();
resvar = create_tmp_var (integer_type_node, "lletmp");
resvar = create_tmp_var (integer_type_node, "lbvtmp");
add_referenced_tmp_var (resvar);
/* Start at 0. */
@ -1767,7 +1806,6 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
size_t depth = 0;
VEC(tree) *new_ivs;
block_stmt_iterator bsi;
basic_block *bbs;
if (dump_file)
{
@ -1849,66 +1887,56 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
i++;
temp = temp->inner;
}
/* Go through the loop and make iv replacements. */
bbs = get_loop_body (old_loopnest);
for (i = 0; i < old_loopnest->num_nodes; i++)
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
{
tree stmt = bsi_stmt (bsi);
use_optype uses;
size_t j;
get_stmt_operands (stmt);
uses = STMT_USE_OPS (stmt);
for (j = 0; j < NUM_USES (uses); j++)
{
size_t k;
use_operand_p use = USE_OP_PTR (uses, j);
for (k = 0; k < VEC_length (tree, old_ivs); k++)
{
tree oldiv = VEC_index (tree, old_ivs, k);
if (USE_FROM_PTR (use) == oldiv)
{
tree newiv, stmts;
lambda_body_vector lbv;
/* Compute the new expression for the induction
variable. */
depth = VEC_length (tree, new_ivs);
lbv = lambda_body_vector_new (depth);
LBV_COEFFICIENTS (lbv)[k] = 1;
lbv = lambda_body_vector_compute_new (transform, lbv);
newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
/* Insert the statements to build that
expression. */
bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
/* Replace the use with the result of that
expression. */
if (dump_file)
{
fprintf (dump_file,
"Replacing induction variable use of ");
print_generic_stmt (dump_file, USE_FROM_PTR (use), 0);
fprintf (dump_file, " with ");
print_generic_stmt (dump_file, newiv, 0);
fprintf (dump_file, "\n");
}
SET_USE (use, newiv);
}
}
}
}
/* Rewrite uses of the old ivs so that they are now specified in terms of
the new ivs. */
temp = old_loopnest;
for (i = 0; i < VEC_length (tree, old_ivs); i++)
{
int j;
tree oldiv = VEC_index (tree, old_ivs, i);
dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
for (j = 0; j < num_immediate_uses (imm); j++)
{
size_t k;
tree stmt = immediate_use (imm, j);
use_optype uses;
get_stmt_operands (stmt);
uses = STMT_USE_OPS (stmt);
for (k = 0; k < NUM_USES (uses); k++)
{
use_operand_p use = USE_OP_PTR (uses, k);
if (USE_FROM_PTR (use) == oldiv)
{
tree newiv, stmts;
lambda_body_vector lbv;
/* Compute the new expression for the induction
variable. */
depth = VEC_length (tree, new_ivs);
lbv = lambda_body_vector_new (depth);
LBV_COEFFICIENTS (lbv)[i] = 1;
lbv = lambda_body_vector_compute_new (transform, lbv);
newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
bsi = stmt_for_bsi (stmt);
/* Insert the statements to build that
expression. */
bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
SET_USE (use, newiv);
modify_stmt (stmt);
}
}
}
}
}
/* Returns true when the vector V is lexicographically positive, in
other words, when the first non zero element is positive. */
static bool
lambda_vector_lexico_pos (lambda_vector v, unsigned n)
lambda_vector_lexico_pos (lambda_vector v,
unsigned n)
{
unsigned i;
for (i = 0; i < n; i++)
@ -1923,6 +1951,442 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n)
return true;
}
/* Return TRUE if this is not interesting statement from the perspective of
determining if we have a perfect loop nest. */
static bool
not_interesting_stmt (tree stmt)
{
/* Note that COND_EXPR's aren't interesting because if they were exiting the
loop, we would have already failed the number of exits tests. */
if (TREE_CODE (stmt) == LABEL_EXPR
|| TREE_CODE (stmt) == GOTO_EXPR
|| TREE_CODE (stmt) == COND_EXPR)
return true;
return false;
}
/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
static bool
phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
{
int i;
for (i = 0; i < PHI_NUM_ARGS (phi); i++)
if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
if (PHI_ARG_DEF (phi, i) == def)
return true;
return false;
}
/* Return TRUE if STMT is a use of PHI_RESULT. */
static bool
stmt_uses_phi_result (tree stmt, tree phi_result)
{
use_optype uses = STMT_USE_OPS (stmt);
/* This is conservatively true, because we only want SIMPLE bumpers
of the form x +- constant for our pass. */
if (NUM_USES (uses) != 1)
return false;
if (USE_OP (uses, 0) == phi_result)
return true;
return false;
}
/* STMT is a bumper stmt for LOOP if the version it defines is used in the
in-loop-edge in a phi node, and the operand it uses is the result of that
phi node.
I.E. i_29 = i_3 + 1
i_3 = PHI (0, i_29); */
static bool
stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
{
tree use;
tree def;
def_optype defs = STMT_DEF_OPS (stmt);
dataflow_t imm;
int i;
if (NUM_DEFS (defs) != 1)
return false;
def = DEF_OP (defs, 0);
imm = get_immediate_uses (stmt);
for (i = 0; i < num_immediate_uses (imm); i++)
{
use = immediate_use (imm, i);
if (TREE_CODE (use) == PHI_NODE)
{
if (phi_loop_edge_uses_def (loop, use, def))
if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
return true;
}
}
return false;
}
/* Return true if LOOP is a perfect loop nest.
Perfect loop nests are those loop nests where all code occurs in the
innermost loop body.
If S is a program statement, then
ie
DO I = 1, 20
S1
DO J = 1, 20
...
END DO
END DO
is not a perfect loop nest because of S1.
DO I = 1, 20
DO J = 1, 20
S1
...
END DO
END DO
is a perfect loop nest.
Since we don't have high level loops anymore, we basically have to walk our
statements and ignore those that are there because the loop needs them (IE
the induction variable increment, and jump back to the top of the loop). */
bool
perfect_nest_p (struct loop *loop)
{
basic_block *bbs;
size_t i;
tree exit_cond;
if (!loop->inner)
return true;
bbs = get_loop_body (loop);
exit_cond = get_loop_exit_condition (loop);
for (i = 0; i < loop->num_nodes; i++)
{
if (bbs[i]->loop_father == loop)
{
block_stmt_iterator bsi;
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
{
tree stmt = bsi_stmt (bsi);
if (stmt == exit_cond
|| not_interesting_stmt (stmt)
|| stmt_is_bumper_for_loop (loop, stmt))
continue;
free (bbs);
return false;
}
}
}
free (bbs);
/* See if the inner loops are perfectly nested as well. */
if (loop->inner)
return perfect_nest_p (loop->inner);
return true;
}
/* Add phi args using PENDINT_STMT list. */
static void
nestify_update_pending_stmts (edge e)
{
basic_block dest;
tree phi, arg, def;
if (!PENDING_STMT (e))
return;
dest = e->dest;
for (phi = phi_nodes (dest), arg = PENDING_STMT (e);
phi;
phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg))
{
def = TREE_VALUE (arg);
add_phi_arg (&phi, def, e);
}
PENDING_STMT (e) = NULL;
}
/* Replace the USES of tree X in STMT with tree Y */
static void
replace_uses_of_x_with_y (tree stmt, tree x, tree y)
{
use_optype uses = STMT_USE_OPS (stmt);
size_t i;
for (i = 0; i < NUM_USES (uses); i++)
{
if (USE_OP (uses, i) == x)
SET_USE_OP (uses, i, y);
}
}
/* Return TRUE if STMT uses tree OP in it's uses. */
static bool
stmt_uses_op (tree stmt, tree op)
{
use_optype uses = STMT_USE_OPS (stmt);
size_t i;
for (i = 0; i < NUM_USES (uses); i++)
{
if (USE_OP (uses, i) == op)
return true;
}
return false;
}
/* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
one. LOOPIVS is a vector of induction variables, one per loop.
ATM, we only handle imperfect nests of depth 2, where all of the statements
occur after the inner loop. */
static bool
can_convert_to_perfect_nest (struct loop *loop,
VEC (tree) *loopivs)
{
basic_block *bbs;
tree exit_condition;
size_t i;
block_stmt_iterator bsi;
/* Can't handle triply nested+ loops yet. */
if (!loop->inner || loop->inner->inner)
return false;
/* We only handle moving the after-inner-body statements right now, so make
sure all the statements we need to move are located in that position. */
bbs = get_loop_body (loop);
exit_condition = get_loop_exit_condition (loop);
for (i = 0; i < loop->num_nodes; i++)
{
if (bbs[i]->loop_father == loop)
{
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
{
size_t j;
tree stmt = bsi_stmt (bsi);
if (stmt == exit_condition
|| not_interesting_stmt (stmt)
|| stmt_is_bumper_for_loop (loop, stmt))
continue;
/* If the statement uses inner loop ivs, we == screwed. */
for (j = 1; j < VEC_length (tree, loopivs); j++)
if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
{
free (bbs);
return false;
}
/* If the bb of a statement we care about isn't dominated by
the header of the inner loop, then we are also screwed. */
if (!dominated_by_p (CDI_DOMINATORS,
bb_for_stmt (stmt),
loop->inner->header))
{
free (bbs);
return false;
}
}
}
}
return true;
}
/* Transform the loop nest into a perfect nest, if possible.
LOOPS is the current struct loops *
LOOP is the loop nest to transform into a perfect nest
LBOUNDS are the lower bounds for the loops to transform
UBOUNDS are the upper bounds for the loops to transform
STEPS is the STEPS for the loops to transform.
LOOPIVS is the induction variables for the loops to transform.
Basically, for the case of
FOR (i = 0; i < 50; i++)
{
FOR (j =0; j < 50; j++)
{
<whatever>
}
<some code>
}
This function will transform it into a perfect loop nest by splitting the
outer loop into two loops, like so:
FOR (i = 0; i < 50; i++)
{
FOR (j = 0; j < 50; j++)
{
<whatever>
}
}
FOR (i = 0; i < 50; i ++)
{
<some code>
}
Return FALSE if we can't make this loop into a perfect nest. */
static bool
perfect_nestify (struct loops *loops,
struct loop *loop,
VEC (tree) *lbounds,
VEC (tree) *ubounds,
VEC (int) *steps,
VEC (tree) *loopivs)
{
basic_block *bbs;
tree exit_condition;
tree then_label, else_label, cond_stmt;
basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
size_t i;
block_stmt_iterator bsi;
edge e;
struct loop *newloop;
tree phi;
tree uboundvar;
tree stmt;
tree ivvar, ivvarinced;
VEC (tree) *phis;
if (!can_convert_to_perfect_nest (loop, loopivs))
return false;
phis = VEC_alloc (tree, 1);
/* Create the new loop */
olddest = loop->single_exit->dest;
preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
/* This is done because otherwise, it will release the ssa_name too early
when the edge gets redirected and it will get reused, causing the use of
the phi node to get rewritten. */
for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
{
/* These should be simple exit phi copies. */
if (PHI_NUM_ARGS (phi) != 1)
return false;
VEC_safe_push (tree, phis, PHI_RESULT (phi));
VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
mark_for_rewrite (PHI_RESULT (phi));
}
e = redirect_edge_and_branch (preheaderbb->succ, headerbb);
unmark_all_for_rewrite ();
bb_ann (olddest)->phi_nodes = NULL;
/* Add back the old exit phis. */
while (VEC_length (tree, phis) != 0)
{
tree def;
tree phiname;
def = VEC_pop (tree, phis);
phiname = VEC_pop (tree, phis);
phi = create_phi_node (phiname, preheaderbb);
add_phi_arg (&phi, def, preheaderbb->pred);
}
nestify_update_pending_stmts (e);
bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
make_edge (headerbb, bodybb, EDGE_FALLTHRU);
then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
cond_stmt = build (COND_EXPR, void_type_node,
build (NE_EXPR, boolean_type_node,
integer_one_node,
integer_zero_node),
then_label, else_label);
bsi = bsi_start (bodybb);
bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
make_edge (latchbb, headerbb, EDGE_FALLTHRU);
/* Update the loop structures. */
newloop = duplicate_loop (loops, loop, olddest->loop_father);
newloop->header = headerbb;
newloop->latch = latchbb;
newloop->single_exit = e;
add_bb_to_loop (latchbb, newloop);
add_bb_to_loop (bodybb, newloop);
add_bb_to_loop (headerbb, newloop);
add_bb_to_loop (preheaderbb, olddest->loop_father);
set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
loop->single_exit->src);
set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
/* Create the new iv. */
ivvar = create_tmp_var (integer_type_node, "perfectiv");
add_referenced_tmp_var (ivvar);
bsi = bsi_last (newloop->latch->pred->src);
create_iv (VEC_index (tree, lbounds, 0),
build_int_cst (integer_type_node,
VEC_index (int, steps, 0)),
ivvar, newloop, &bsi, false, &ivvar, &ivvarinced);
/* Create the new upper bound. This may be not just a variable, so we copy
it to one just in case. */
exit_condition = get_loop_exit_condition (newloop);
uboundvar = create_tmp_var (integer_type_node, "uboundvar");
add_referenced_tmp_var (uboundvar);
stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
VEC_index (tree, ubounds, 0));
uboundvar = make_ssa_name (uboundvar, stmt);
TREE_OPERAND (stmt, 0) = uboundvar;
bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
COND_EXPR_COND (exit_condition) = build (LE_EXPR,
boolean_type_node,
ivvarinced,
uboundvar);
bbs = get_loop_body (loop);
/* Now replace the induction variable in the moved statements with the
correct loop induction variable. */
for (i = 0; i < loop->num_nodes; i++)
{
block_stmt_iterator tobsi = bsi_last (bodybb);
if (bbs[i]->loop_father == loop)
{
/* Note that the bsi only needs to be explicitly incremented
when we don't move something, since it is automatically
incremented when we do. */
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
{
tree stmt = bsi_stmt (bsi);
if (stmt == exit_condition
|| not_interesting_stmt (stmt)
|| stmt_is_bumper_for_loop (loop, stmt))
{
bsi_next (&bsi);
continue;
}
replace_uses_of_x_with_y (stmt,
VEC_index (tree, loopivs, 0),
ivvar);
bsi_move_before (&bsi, &tobsi);
}
}
}
free (bbs);
flow_loops_find (loops, LOOP_ALL);
return perfect_nest_p (loop);
}
/* Return true if TRANS is a legal transformation matrix that respects
the dependence vectors in DISTS and DIRS. The conservative answer
is false.
@ -1931,27 +2395,29 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n)
matrix T is legal when applied to a loop nest with a set of
lexicographically non-negative distance vectors RDG if and only if
for each vector d in RDG, (T.d >= 0) is lexicographically positive.
i.e.: if and only if it transforms the lexicographically positive
ie.: if and only if it transforms the lexicographically positive
distance vectors to lexicographically positive vectors. Note that
a unimodular matrix must transform the zero vector (and only it) to
the zero vector." S.Muchnick. */
bool
lambda_transform_legal_p (lambda_trans_matrix trans,
int nb_loops, varray_type dependence_relations)
lambda_transform_legal_p (lambda_trans_matrix trans,
int nb_loops,
varray_type dependence_relations)
{
unsigned int i;
lambda_vector distres;
struct data_dependence_relation *ddr;
#if defined ENABLE_CHECKING
gcc_assert (LTM_COLSIZE (trans) == nb_loops
&& LTM_ROWSIZE (trans) == nb_loops);
if (LTM_COLSIZE (trans) != nb_loops
|| LTM_ROWSIZE (trans) != nb_loops)
abort ();
#endif
/* When there is an unknown relation in the dependence_relations, we
know that it is no worth looking at this loop nest: give up. */
ddr = (struct data_dependence_relation *)
ddr = (struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, 0);
if (ddr == NULL)
return true;
@ -1963,12 +2429,14 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
/* For each distance vector in the dependence graph. */
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
{
ddr = (struct data_dependence_relation *)
ddr = (struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, i);
/* Don't care about relations for which we know that there is no
dependence, nor about read-read (aka. output-dependences):
these data accesses can happen in any order. */
dependence, nor about read-read (aka. output-dependences):
these data accesses can happen in any order. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_known
|| (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
continue;
@ -1977,12 +2445,11 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
return false;
/* Compute trans.dist_vect */
lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
DDR_DIST_VECT (ddr), distres);
if (!lambda_vector_lexico_pos (distres, nb_loops))
return false;
}
return true;
}

View File

@ -70,6 +70,29 @@ lambda_matrix_id (lambda_matrix mat, int size)
mat[i][j] = (i == j) ? 1 : 0;
}
/* Return true if MAT is the identity matrix of SIZE */
bool
lambda_matrix_id_p (lambda_matrix mat, int size)
{
int i, j;
for (i = 0; i < size; i++)
for (j = 0; j < size; j++)
{
if (i == j)
{
if (mat[i][j] != 1)
return false;
}
else
{
if (mat[i][j] != 0)
return false;
}
}
return true;
}
/* Negate the elements of the M x N matrix MAT1 and store it in MAT2. */
void

View File

@ -45,7 +45,18 @@ lambda_trans_matrix_new (int colsize, int rowsize)
return ret;
}
/* Compute the inverse of the transformation. */
/* Return true if MAT is an identity matrix. */
bool
lambda_trans_matrix_id_p (lambda_trans_matrix mat)
{
if (LTM_ROWSIZE (mat) != LTM_COLSIZE (mat))
return false;
return lambda_matrix_id_p (LTM_MATRIX (mat), LTM_ROWSIZE (mat));
}
/* Compute the inverse of the transformation matrix MAT. */
lambda_trans_matrix
lambda_trans_matrix_inverse (lambda_trans_matrix mat)

View File

@ -105,7 +105,9 @@ typedef struct
lambda_loopnest lambda_loopnest_new (int, int);
lambda_loopnest lambda_loopnest_transform (lambda_loopnest, lambda_trans_matrix);
struct loop;
struct loops;
bool perfect_nest_p (struct loop *);
bool lambda_transform_legal_p (lambda_trans_matrix, int, varray_type);
void print_lambda_loopnest (FILE *, lambda_loopnest, char);
@ -116,6 +118,7 @@ void print_lambda_loop (FILE *, lambda_loop, int, int, char);
lambda_matrix lambda_matrix_new (int, int);
void lambda_matrix_id (lambda_matrix, int);
bool lambda_matrix_id_p (lambda_matrix, int);
void lambda_matrix_copy (lambda_matrix, lambda_matrix, int, int);
void lambda_matrix_negate (lambda_matrix, lambda_matrix, int, int);
void lambda_matrix_transpose (lambda_matrix, lambda_matrix, int, int);
@ -153,16 +156,17 @@ lambda_trans_matrix lambda_trans_matrix_inverse (lambda_trans_matrix);
void print_lambda_trans_matrix (FILE *, lambda_trans_matrix);
void lambda_matrix_vector_mult (lambda_matrix, int, int, lambda_vector,
lambda_vector);
bool lambda_trans_matrix_id_p (lambda_trans_matrix);
lambda_body_vector lambda_body_vector_new (int);
lambda_body_vector lambda_body_vector_compute_new (lambda_trans_matrix,
lambda_body_vector);
void print_lambda_body_vector (FILE *, lambda_body_vector);
struct loop;
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loop *,
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
struct loop *,
VEC(tree) **,
VEC(tree) **);
VEC(tree) **,
bool);
void lambda_loopnest_to_gcc_loopnest (struct loop *, VEC(tree) *,
VEC(tree) *,
lambda_loopnest,

View File

@ -55,22 +55,56 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
transform matrix for locality purposes.
TODO: Completion of partial transforms. */
/* Gather statistics for loop interchange. Initializes SUM the sum of
all the data dependence distances carried by loop LOOP_NUMBER.
NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
dependence relations for which the loop LOOP_NUMBER is not carrying
any dependence. */
/* Gather statistics for loop interchange. LOOP_NUMBER is a relative
index in the considered loop nest. The first loop in the
considered loop nest is FIRST_LOOP, and consequently the index of
the considered loop is obtained by FIRST_LOOP + LOOP_NUMBER.
Initializes:
- DEPENDENCE_STEPS the sum of all the data dependence distances
carried by loop LOOP_NUMBER,
- NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
for which the loop LOOP_NUMBER is not carrying any dependence,
- ACCESS_STRIDES the sum of all the strides in LOOP_NUMBER.
Example: for the following loop,
| loop_1 runs 1335 times
| loop_2 runs 1335 times
| A[{{0, +, 1}_1, +, 1335}_2]
| B[{{0, +, 1}_1, +, 1335}_2]
| endloop_2
| A[{0, +, 1336}_1]
| endloop_1
gather_interchange_stats (in loop_1) will return
DEPENDENCE_STEPS = 3002
NB_DEPS_NOT_CARRIED_BY_LOOP = 5
ACCESS_STRIDES = 10694
gather_interchange_stats (in loop_2) will return
DEPENDENCE_STEPS = 3000
NB_DEPS_NOT_CARRIED_BY_LOOP = 7
ACCESS_STRIDES = 8010
*/
static void
gather_interchange_stats (varray_type dependence_relations,
varray_type datarefs,
unsigned int loop_number,
unsigned int *sum,
unsigned int *nb_deps_not_carried_by_loop)
unsigned int first_loop,
unsigned int *dependence_steps,
unsigned int *nb_deps_not_carried_by_loop,
unsigned int *access_strides)
{
unsigned int i;
*sum = 0;
*dependence_steps = 0;
*nb_deps_not_carried_by_loop = 0;
*access_strides = 0;
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
{
int dist;
@ -78,11 +112,11 @@ gather_interchange_stats (varray_type dependence_relations,
(struct data_dependence_relation *)
VARRAY_GENERIC_PTR (dependence_relations, i);
/* Compute the dependence strides. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
/* Some constants will need tweaking, but not something that should
be user-accessible. Thus, no --param. */
*sum += 100;
(*dependence_steps) += 0;
continue;
}
@ -90,34 +124,64 @@ gather_interchange_stats (varray_type dependence_relations,
is no reuse of the data. */
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
{
/* Ditto on the no --param here */
*sum += 1000;
(*dependence_steps) += 0;
continue;
}
dist = DDR_DIST_VECT (ddr)[loop_number];
if (dist == 0)
*nb_deps_not_carried_by_loop++;
(*nb_deps_not_carried_by_loop) += 1;
else if (dist < 0)
*sum += -dist;
(*dependence_steps) += -dist;
else
*sum += dist;
(*dependence_steps) += dist;
}
/* Compute the access strides. */
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
{
unsigned int it;
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
tree stmt = DR_STMT (dr);
struct loop *stmt_loop = loop_containing_stmt (stmt);
struct loop *inner_loop = current_loops->parray[first_loop + 1];
if (!flow_loop_nested_p (inner_loop, stmt_loop)
&& inner_loop->num != stmt_loop->num)
continue;
for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
{
tree chrec = DR_ACCESS_FN (dr, it);
tree tstride = evolution_part_in_loop_num
(chrec, first_loop + loop_number);
if (tstride == NULL_TREE
|| TREE_CODE (tstride) != INTEGER_CST)
continue;
(*access_strides) += int_cst_value (tstride);
}
}
}
/* Apply to TRANS any loop interchange that minimize inner loop steps.
DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
of dependence relations.
Returns the new transform matrix. The smaller the reuse vector
distances in the inner loops, the fewer the cache misses. */
distances in the inner loops, the fewer the cache misses.
FIRST_LOOP is the loop->num of the first loop in the analyzed loop
nest. */
static lambda_trans_matrix
try_interchange_loops (lambda_trans_matrix trans,
unsigned int depth,
varray_type dependence_relations)
varray_type dependence_relations,
varray_type datarefs,
unsigned int first_loop)
{
unsigned int loop_i, loop_j;
unsigned int steps_i, steps_j;
unsigned int dependence_steps_i, dependence_steps_j;
unsigned int access_strides_i, access_strides_j;
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
struct data_dependence_relation *ddr;
@ -132,33 +196,40 @@ try_interchange_loops (lambda_trans_matrix trans,
for (loop_j = 1; loop_j < depth; loop_j++)
for (loop_i = 0; loop_i < loop_j; loop_i++)
{
gather_interchange_stats (dependence_relations, loop_i, &steps_i,
&nb_deps_not_carried_by_i);
gather_interchange_stats (dependence_relations, loop_j, &steps_j,
&nb_deps_not_carried_by_j);
gather_interchange_stats (dependence_relations, datarefs,
loop_i, first_loop,
&dependence_steps_i,
&nb_deps_not_carried_by_i,
&access_strides_i);
gather_interchange_stats (dependence_relations, datarefs,
loop_j, first_loop,
&dependence_steps_j,
&nb_deps_not_carried_by_j,
&access_strides_j);
/* Heuristics for loop interchange profitability:
1. Inner loops should have smallest steps.
2. Inner loops should contain more dependence relations not
carried by the loop.
1. (spatial locality) Inner loops should have smallest
dependence steps.
2. (spatial locality) Inner loops should contain more
dependence relations not carried by the loop.
3. (temporal locality) Inner loops should have smallest
array access strides.
*/
if (steps_i < steps_j
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
if (dependence_steps_i < dependence_steps_j
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
|| access_strides_i < access_strides_j)
{
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
/* Validate the resulting matrix. When the transformation
is not valid, reverse to the previous matrix.
FIXME: In this case of transformation it could be
faster to verify the validity of the interchange
without applying the transform to the matrix. But for
the moment do it cleanly: this is just a prototype. */
is not valid, reverse to the previous transformation. */
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
}
}
return trans;
}
@ -181,6 +252,7 @@ linear_transform_loops (struct loops *loops)
lambda_loopnest before, after;
lambda_trans_matrix trans;
bool problem = false;
bool need_perfect_nest = false;
/* If it's not a loop nest, we don't want it.
We also don't handle sibling loops properly,
which are loops of the following form:
@ -197,7 +269,8 @@ linear_transform_loops (struct loops *loops)
} */
if (!loop_nest->inner)
continue;
for (temp = loop_nest; temp; temp = temp->inner)
depth = 1;
for (temp = loop_nest->inner; temp; temp = temp->inner)
{
flow_loop_scan (temp, LOOP_ALL);
/* If we have a sibling loop or multiple exit edges, jump ship. */
@ -246,7 +319,15 @@ linear_transform_loops (struct loops *loops)
/* Build the transformation matrix. */
trans = lambda_trans_matrix_new (depth, depth);
lambda_matrix_id (LTM_MATRIX (trans), depth);
trans = try_interchange_loops (trans, depth, dependence_relations);
trans = try_interchange_loops (trans, depth, dependence_relations,
datarefs, loop_nest->num);
if (lambda_trans_matrix_id_p (trans))
{
if (dump_file)
fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
continue;
}
/* Check whether the transformation is legal. */
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
@ -255,8 +336,12 @@ linear_transform_loops (struct loops *loops)
fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
continue;
}
before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
&invariants);
if (!perfect_nest_p (loop_nest))
need_perfect_nest = true;
before = gcc_loopnest_to_lambda_loopnest (loops,
loop_nest, &oldivs,
&invariants,
need_perfect_nest);
if (!before)
continue;
@ -279,4 +364,6 @@ linear_transform_loops (struct loops *loops)
free_dependence_relations (dependence_relations);
free_data_refs (datarefs);
}
rewrite_into_loop_closed_ssa ();
free_df ();
}