[multiple changes]
2004-09-16 Daniel Berlin <dberlin@dberlin.org> * cfgloop.h (duplicate_loop): Add prototype. * cfgloopmanip.c (duplicate_loop): Make non-static. * lambda-code.c (perfect_nestify): Factor out test whether we can handle this loop into separate function. Call it. (can_convert_to_perfect_nest): New function. (replace_uses_of_x_with_y): Add modify_stmt call. * tree-loop-linear.c (linear_transform_loops): Call rewrite_into_loop_closed_ssa and free_df. 2004-09-16 Daniel Berlin <dberlin@dberlin.org> * lambda-code.c (invariant_in_loop): is_gimple_min_invariant is loop invariant as well. (perfect_nestify): new function. (gcc_loop_to_lambda_loop): New parameters to track lower bounds, upper bounds, and steps. Set outerinductionvar properly. (gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect parameters. Return NULL if we need a perfect loop and can't make one. (lambda_loopnest_to_gcc_loopnest): Correct algorithm. (not_interesting_stmt): New function. (phi_loop_edge_uses_def): Ditto. (stmt_uses_phi_result): Ditto. (stmt_is_bumper_for_loop): Ditto. (perfect_nest_p): Ditto. (nestify_update_pending_stmts): Ditto. (replace_uses_of_x_with_y): Ditto. (stmt_uses_op): Ditto. (perfect_nestify): Ditto. * lambda-mat.c (lambda_matrix_id_p): New function. * lambda-trans.c (lambda_trans_matrix_id_p): Ditto. * lambda.h: Update prototypes. * tree-loop-linear (linear_transform_loop): Use new perfect_nest_p. Detect and ignore identity transform. * tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed. 2004-09-16 Sebastian Pop <pop@cri.ensmp.fr> * tree-loop-linear.c (gather_interchange_stats): Add more comments. Gather also strides of accessed data. Pass in the data references array. (try_interchange_loops): Add a new heuristic for handling the temporal locality. Pass in the data references array. (linear_transform_loops): Pass the data references array to try_interchange_loops. From-SVN: r87607
This commit is contained in:
parent
83c9948608
commit
f67d92e937
|
@ -1,3 +1,53 @@
|
|||
2004-09-16 Daniel Berlin <dberlin@dberlin.org>
|
||||
|
||||
* cfgloop.h (duplicate_loop): Add prototype.
|
||||
* cfgloopmanip.c (duplicate_loop): Make non-static.
|
||||
* lambda-code.c (perfect_nestify): Factor out test whether
|
||||
we can handle this loop into separate function.
|
||||
Call it.
|
||||
(can_convert_to_perfect_nest): New function.
|
||||
(replace_uses_of_x_with_y): Add modify_stmt call.
|
||||
* tree-loop-linear.c (linear_transform_loops): Call
|
||||
rewrite_into_loop_closed_ssa and free_df.
|
||||
|
||||
2004-09-16 Daniel Berlin <dberlin@dberlin.org>
|
||||
|
||||
* lambda-code.c (invariant_in_loop): is_gimple_min_invariant is
|
||||
loop invariant as well.
|
||||
(perfect_nestify): new function.
|
||||
(gcc_loop_to_lambda_loop): New parameters to track lower bounds,
|
||||
upper bounds, and steps.
|
||||
Set outerinductionvar properly.
|
||||
(gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect
|
||||
parameters.
|
||||
Return NULL if we need a perfect loop and can't make one.
|
||||
(lambda_loopnest_to_gcc_loopnest): Correct algorithm.
|
||||
(not_interesting_stmt): New function.
|
||||
(phi_loop_edge_uses_def): Ditto.
|
||||
(stmt_uses_phi_result): Ditto.
|
||||
(stmt_is_bumper_for_loop): Ditto.
|
||||
(perfect_nest_p): Ditto.
|
||||
(nestify_update_pending_stmts): Ditto.
|
||||
(replace_uses_of_x_with_y): Ditto.
|
||||
(stmt_uses_op): Ditto.
|
||||
(perfect_nestify): Ditto.
|
||||
* lambda-mat.c (lambda_matrix_id_p): New function.
|
||||
* lambda-trans.c (lambda_trans_matrix_id_p): Ditto.
|
||||
* lambda.h: Update prototypes.
|
||||
* tree-loop-linear (linear_transform_loop): Use new
|
||||
perfect_nest_p. Detect and ignore identity transform.
|
||||
* tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed.
|
||||
|
||||
2004-09-16 Sebastian Pop <pop@cri.ensmp.fr>
|
||||
|
||||
* tree-loop-linear.c (gather_interchange_stats): Add more comments.
|
||||
Gather also strides of accessed data. Pass in the data references
|
||||
array.
|
||||
(try_interchange_loops): Add a new heuristic for handling the temporal
|
||||
locality. Pass in the data references array.
|
||||
(linear_transform_loops): Pass the data references array to
|
||||
try_interchange_loops.
|
||||
|
||||
2004-09-16 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* doc/invoke.texi: Fix typos. Follow spelling conventions.
|
||||
|
|
|
@ -308,6 +308,8 @@ extern bool can_duplicate_loop_p (struct loop *loop);
|
|||
#define DLTHE_FLAG_UPDATE_FREQ 1 /* Update frequencies in
|
||||
duplicate_loop_to_header_edge. */
|
||||
|
||||
extern struct loop * duplicate_loop (struct loops *, struct loop *,
|
||||
struct loop *);
|
||||
extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *,
|
||||
unsigned, sbitmap, edge, edge *,
|
||||
unsigned *, int);
|
||||
|
|
|
@ -29,8 +29,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "cfglayout.h"
|
||||
#include "output.h"
|
||||
|
||||
static struct loop * duplicate_loop (struct loops *, struct loop *,
|
||||
struct loop *);
|
||||
static void duplicate_subloops (struct loops *, struct loop *, struct loop *);
|
||||
static void copy_loops_to (struct loops *, struct loop **, int,
|
||||
struct loop *);
|
||||
|
@ -701,7 +699,7 @@ place_new_loop (struct loops *loops, struct loop *loop)
|
|||
|
||||
/* Copies copy of LOOP as subloop of TARGET loop, placing newly
|
||||
created loop into LOOPS structure. */
|
||||
static struct loop *
|
||||
struct loop *
|
||||
duplicate_loop (struct loops *loops, struct loop *loop, struct loop *target)
|
||||
{
|
||||
struct loop *cloop;
|
||||
|
|
|
@ -115,6 +115,13 @@
|
|||
Fourier-Motzkin elimination is used to compute the bounds of the base space
|
||||
of the lattice. */
|
||||
|
||||
|
||||
|
||||
DEF_VEC_GC_P(int);
|
||||
|
||||
static bool perfect_nestify (struct loops *,
|
||||
struct loop *, VEC (tree) *,
|
||||
VEC (tree) *, VEC (int) *, VEC (tree) *);
|
||||
/* Lattice stuff that is internal to the code generation algorithm. */
|
||||
|
||||
typedef struct
|
||||
|
@ -1160,20 +1167,23 @@ gcc_tree_to_linear_expression (int depth, tree expr,
|
|||
static bool
|
||||
invariant_in_loop (struct loop *loop, tree op)
|
||||
{
|
||||
if (is_gimple_min_invariant (op))
|
||||
return true;
|
||||
if (loop->depth == 0)
|
||||
return true;
|
||||
if (TREE_CODE (op) == SSA_NAME)
|
||||
{
|
||||
tree def;
|
||||
def = SSA_NAME_DEF_STMT (op);
|
||||
if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
|
||||
&& IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
|
||||
&& IS_EMPTY_STMT (def))
|
||||
return true;
|
||||
if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
|
||||
if (IS_EMPTY_STMT (def))
|
||||
return false;
|
||||
if (loop->outer)
|
||||
if (!invariant_in_loop (loop->outer, op))
|
||||
if (loop->outer
|
||||
&& !invariant_in_loop (loop->outer, op))
|
||||
return false;
|
||||
return !flow_bb_inside_loop_p (loop,
|
||||
bb_for_stmt (SSA_NAME_DEF_STMT (op)));
|
||||
return !flow_bb_inside_loop_p (loop, bb_for_stmt (def));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1190,7 +1200,10 @@ static lambda_loop
|
|||
gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
||||
VEC (tree) ** invariants,
|
||||
tree * ourinductionvar,
|
||||
VEC (tree) * outerinductionvars)
|
||||
VEC (tree) * outerinductionvars,
|
||||
VEC (tree) ** lboundvars,
|
||||
VEC (tree) ** uboundvars,
|
||||
VEC (int) ** steps)
|
||||
{
|
||||
tree phi;
|
||||
tree exit_cond;
|
||||
|
@ -1201,15 +1214,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
|||
tree test;
|
||||
int stepint;
|
||||
int extra = 0;
|
||||
tree uboundvar;
|
||||
tree lboundvar, uboundvar;
|
||||
use_optype uses;
|
||||
|
||||
/* Find out induction var and set the pointer so that the caller can
|
||||
append it to the outerinductionvars array later. */
|
||||
|
||||
/* Find out induction var and exit condition. */
|
||||
inductionvar = find_induction_var_from_exit_cond (loop);
|
||||
*ourinductionvar = inductionvar;
|
||||
|
||||
exit_cond = get_loop_exit_condition (loop);
|
||||
|
||||
if (inductionvar == NULL || exit_cond == NULL)
|
||||
|
@ -1260,7 +1269,9 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
/* The induction variable name/version we want to put in the array is the
|
||||
result of the induction variable phi node. */
|
||||
*ourinductionvar = PHI_RESULT (phi);
|
||||
access_fn = instantiate_parameters
|
||||
(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
|
||||
if (!access_fn)
|
||||
|
@ -1316,14 +1327,20 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
|||
}
|
||||
|
||||
if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
|
||||
|
||||
lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
|
||||
outerinductionvars, *invariants,
|
||||
0);
|
||||
{
|
||||
lboundvar = PHI_ARG_DEF (phi, 1);
|
||||
lbound = gcc_tree_to_linear_expression (depth, lboundvar,
|
||||
outerinductionvars, *invariants,
|
||||
0);
|
||||
}
|
||||
else
|
||||
lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
|
||||
outerinductionvars, *invariants,
|
||||
0);
|
||||
{
|
||||
lboundvar = PHI_ARG_DEF (phi, 0);
|
||||
lbound = gcc_tree_to_linear_expression (depth, lboundvar,
|
||||
outerinductionvars, *invariants,
|
||||
0);
|
||||
}
|
||||
|
||||
if (!lbound)
|
||||
{
|
||||
|
||||
|
@ -1368,6 +1385,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth,
|
|||
uboundvar,
|
||||
outerinductionvars,
|
||||
*invariants, extra);
|
||||
VEC_safe_push (tree, *uboundvars, build (PLUS_EXPR, integer_type_node,
|
||||
uboundvar,
|
||||
build_int_cst (integer_type_node, extra)));
|
||||
VEC_safe_push (tree, *lboundvars, lboundvar);
|
||||
VEC_safe_push (int, *steps, stepint);
|
||||
if (!ubound)
|
||||
{
|
||||
|
||||
|
@ -1400,7 +1422,7 @@ find_induction_var_from_exit_cond (struct loop *loop)
|
|||
test = TREE_OPERAND (expr, 0);
|
||||
if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
|
||||
return NULL_TREE;
|
||||
/* This is a guess. We say that for a <,!=,<= b, a is the induction
|
||||
/* This is a guess. We say that for a <,!=,<= b, a is the induction
|
||||
variable.
|
||||
For >, >=, we guess b is the induction variable.
|
||||
If we are wrong, it'll fail the rest of the induction variable tests, and
|
||||
|
@ -1433,15 +1455,20 @@ DEF_VEC_GC_P(lambda_loop);
|
|||
during this process. */
|
||||
|
||||
lambda_loopnest
|
||||
gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
|
||||
gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
|
||||
struct loop * loop_nest,
|
||||
VEC (tree) **inductionvars,
|
||||
VEC (tree) **invariants)
|
||||
VEC (tree) **invariants,
|
||||
bool need_perfect_nest)
|
||||
{
|
||||
lambda_loopnest ret;
|
||||
struct loop *temp;
|
||||
int depth = 0;
|
||||
size_t i;
|
||||
VEC (lambda_loop) *loops;
|
||||
VEC (tree) *uboundvars;
|
||||
VEC (tree) *lboundvars;
|
||||
VEC (int) *steps;
|
||||
lambda_loop newloop;
|
||||
tree inductionvar = NULL;
|
||||
|
||||
|
@ -1454,18 +1481,30 @@ gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
|
|||
loops = VEC_alloc (lambda_loop, 1);
|
||||
*inductionvars = VEC_alloc (tree, 1);
|
||||
*invariants = VEC_alloc (tree, 1);
|
||||
lboundvars = VEC_alloc (tree, 1);
|
||||
uboundvars = VEC_alloc (tree, 1);
|
||||
steps = VEC_alloc (int, 1);
|
||||
temp = loop_nest;
|
||||
while (temp)
|
||||
{
|
||||
newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
|
||||
&inductionvar, *inductionvars);
|
||||
&inductionvar, *inductionvars,
|
||||
&lboundvars, &uboundvars,
|
||||
&steps);
|
||||
if (!newloop)
|
||||
return NULL;
|
||||
VEC_safe_push (tree, *inductionvars, inductionvar);
|
||||
VEC_safe_push (lambda_loop, loops, newloop);
|
||||
temp = temp->inner;
|
||||
}
|
||||
|
||||
if (need_perfect_nest
|
||||
&& !perfect_nestify (currloops, loop_nest,
|
||||
lboundvars, uboundvars, steps, *inductionvars))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Not a perfect nest and couldn't convert to one.\n");
|
||||
return NULL;
|
||||
}
|
||||
ret = lambda_loopnest_new (depth, 2 * depth);
|
||||
for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
|
||||
LN_LOOPS (ret)[i] = newloop;
|
||||
|
@ -1489,7 +1528,7 @@ lbv_to_gcc_expression (lambda_body_vector lbv,
|
|||
|
||||
/* Create a statement list and a linear expression temporary. */
|
||||
stmts = alloc_stmt_list ();
|
||||
resvar = create_tmp_var (integer_type_node, "lletmp");
|
||||
resvar = create_tmp_var (integer_type_node, "lbvtmp");
|
||||
add_referenced_tmp_var (resvar);
|
||||
|
||||
/* Start at 0. */
|
||||
|
@ -1767,7 +1806,6 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
|
|||
size_t depth = 0;
|
||||
VEC(tree) *new_ivs;
|
||||
block_stmt_iterator bsi;
|
||||
basic_block *bbs;
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
|
@ -1849,66 +1887,56 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
|
|||
i++;
|
||||
temp = temp->inner;
|
||||
}
|
||||
|
||||
/* Go through the loop and make iv replacements. */
|
||||
bbs = get_loop_body (old_loopnest);
|
||||
for (i = 0; i < old_loopnest->num_nodes; i++)
|
||||
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
{
|
||||
tree stmt = bsi_stmt (bsi);
|
||||
use_optype uses;
|
||||
size_t j;
|
||||
|
||||
get_stmt_operands (stmt);
|
||||
uses = STMT_USE_OPS (stmt);
|
||||
for (j = 0; j < NUM_USES (uses); j++)
|
||||
{
|
||||
size_t k;
|
||||
use_operand_p use = USE_OP_PTR (uses, j);
|
||||
for (k = 0; k < VEC_length (tree, old_ivs); k++)
|
||||
{
|
||||
tree oldiv = VEC_index (tree, old_ivs, k);
|
||||
if (USE_FROM_PTR (use) == oldiv)
|
||||
{
|
||||
tree newiv, stmts;
|
||||
lambda_body_vector lbv;
|
||||
|
||||
/* Compute the new expression for the induction
|
||||
variable. */
|
||||
depth = VEC_length (tree, new_ivs);
|
||||
lbv = lambda_body_vector_new (depth);
|
||||
LBV_COEFFICIENTS (lbv)[k] = 1;
|
||||
lbv = lambda_body_vector_compute_new (transform, lbv);
|
||||
newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
|
||||
|
||||
/* Insert the statements to build that
|
||||
expression. */
|
||||
bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
|
||||
|
||||
/* Replace the use with the result of that
|
||||
expression. */
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file,
|
||||
"Replacing induction variable use of ");
|
||||
print_generic_stmt (dump_file, USE_FROM_PTR (use), 0);
|
||||
fprintf (dump_file, " with ");
|
||||
print_generic_stmt (dump_file, newiv, 0);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
SET_USE (use, newiv);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* Rewrite uses of the old ivs so that they are now specified in terms of
|
||||
the new ivs. */
|
||||
temp = old_loopnest;
|
||||
for (i = 0; i < VEC_length (tree, old_ivs); i++)
|
||||
{
|
||||
int j;
|
||||
tree oldiv = VEC_index (tree, old_ivs, i);
|
||||
dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
|
||||
for (j = 0; j < num_immediate_uses (imm); j++)
|
||||
{
|
||||
size_t k;
|
||||
tree stmt = immediate_use (imm, j);
|
||||
use_optype uses;
|
||||
get_stmt_operands (stmt);
|
||||
uses = STMT_USE_OPS (stmt);
|
||||
for (k = 0; k < NUM_USES (uses); k++)
|
||||
{
|
||||
use_operand_p use = USE_OP_PTR (uses, k);
|
||||
if (USE_FROM_PTR (use) == oldiv)
|
||||
{
|
||||
tree newiv, stmts;
|
||||
lambda_body_vector lbv;
|
||||
/* Compute the new expression for the induction
|
||||
variable. */
|
||||
depth = VEC_length (tree, new_ivs);
|
||||
lbv = lambda_body_vector_new (depth);
|
||||
LBV_COEFFICIENTS (lbv)[i] = 1;
|
||||
lbv = lambda_body_vector_compute_new (transform, lbv);
|
||||
newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
|
||||
bsi = stmt_for_bsi (stmt);
|
||||
/* Insert the statements to build that
|
||||
expression. */
|
||||
bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
|
||||
SET_USE (use, newiv);
|
||||
modify_stmt (stmt);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Returns true when the vector V is lexicographically positive, in
|
||||
other words, when the first non zero element is positive. */
|
||||
|
||||
static bool
|
||||
lambda_vector_lexico_pos (lambda_vector v, unsigned n)
|
||||
lambda_vector_lexico_pos (lambda_vector v,
|
||||
unsigned n)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < n; i++)
|
||||
|
@ -1923,6 +1951,442 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n)
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Return TRUE if this is not interesting statement from the perspective of
|
||||
determining if we have a perfect loop nest. */
|
||||
|
||||
static bool
|
||||
not_interesting_stmt (tree stmt)
|
||||
{
|
||||
/* Note that COND_EXPR's aren't interesting because if they were exiting the
|
||||
loop, we would have already failed the number of exits tests. */
|
||||
if (TREE_CODE (stmt) == LABEL_EXPR
|
||||
|| TREE_CODE (stmt) == GOTO_EXPR
|
||||
|| TREE_CODE (stmt) == COND_EXPR)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
|
||||
|
||||
static bool
|
||||
phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < PHI_NUM_ARGS (phi); i++)
|
||||
if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
|
||||
if (PHI_ARG_DEF (phi, i) == def)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE if STMT is a use of PHI_RESULT. */
|
||||
|
||||
static bool
|
||||
stmt_uses_phi_result (tree stmt, tree phi_result)
|
||||
{
|
||||
use_optype uses = STMT_USE_OPS (stmt);
|
||||
|
||||
/* This is conservatively true, because we only want SIMPLE bumpers
|
||||
of the form x +- constant for our pass. */
|
||||
if (NUM_USES (uses) != 1)
|
||||
return false;
|
||||
if (USE_OP (uses, 0) == phi_result)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* STMT is a bumper stmt for LOOP if the version it defines is used in the
|
||||
in-loop-edge in a phi node, and the operand it uses is the result of that
|
||||
phi node.
|
||||
I.E. i_29 = i_3 + 1
|
||||
i_3 = PHI (0, i_29); */
|
||||
|
||||
static bool
|
||||
stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
|
||||
{
|
||||
tree use;
|
||||
tree def;
|
||||
def_optype defs = STMT_DEF_OPS (stmt);
|
||||
dataflow_t imm;
|
||||
int i;
|
||||
|
||||
if (NUM_DEFS (defs) != 1)
|
||||
return false;
|
||||
def = DEF_OP (defs, 0);
|
||||
imm = get_immediate_uses (stmt);
|
||||
for (i = 0; i < num_immediate_uses (imm); i++)
|
||||
{
|
||||
use = immediate_use (imm, i);
|
||||
if (TREE_CODE (use) == PHI_NODE)
|
||||
{
|
||||
if (phi_loop_edge_uses_def (loop, use, def))
|
||||
if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/* Return true if LOOP is a perfect loop nest.
|
||||
Perfect loop nests are those loop nests where all code occurs in the
|
||||
innermost loop body.
|
||||
If S is a program statement, then
|
||||
|
||||
ie
|
||||
DO I = 1, 20
|
||||
S1
|
||||
DO J = 1, 20
|
||||
...
|
||||
END DO
|
||||
END DO
|
||||
is not a perfect loop nest because of S1.
|
||||
|
||||
DO I = 1, 20
|
||||
DO J = 1, 20
|
||||
S1
|
||||
...
|
||||
END DO
|
||||
END DO
|
||||
is a perfect loop nest.
|
||||
|
||||
Since we don't have high level loops anymore, we basically have to walk our
|
||||
statements and ignore those that are there because the loop needs them (IE
|
||||
the induction variable increment, and jump back to the top of the loop). */
|
||||
|
||||
bool
|
||||
perfect_nest_p (struct loop *loop)
|
||||
{
|
||||
basic_block *bbs;
|
||||
size_t i;
|
||||
tree exit_cond;
|
||||
|
||||
if (!loop->inner)
|
||||
return true;
|
||||
bbs = get_loop_body (loop);
|
||||
exit_cond = get_loop_exit_condition (loop);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
if (bbs[i]->loop_father == loop)
|
||||
{
|
||||
block_stmt_iterator bsi;
|
||||
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
{
|
||||
tree stmt = bsi_stmt (bsi);
|
||||
if (stmt == exit_cond
|
||||
|| not_interesting_stmt (stmt)
|
||||
|| stmt_is_bumper_for_loop (loop, stmt))
|
||||
continue;
|
||||
free (bbs);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
free (bbs);
|
||||
/* See if the inner loops are perfectly nested as well. */
|
||||
if (loop->inner)
|
||||
return perfect_nest_p (loop->inner);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Add phi args using PENDINT_STMT list. */
|
||||
|
||||
static void
|
||||
nestify_update_pending_stmts (edge e)
|
||||
{
|
||||
basic_block dest;
|
||||
tree phi, arg, def;
|
||||
|
||||
if (!PENDING_STMT (e))
|
||||
return;
|
||||
|
||||
dest = e->dest;
|
||||
|
||||
for (phi = phi_nodes (dest), arg = PENDING_STMT (e);
|
||||
phi;
|
||||
phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg))
|
||||
{
|
||||
def = TREE_VALUE (arg);
|
||||
add_phi_arg (&phi, def, e);
|
||||
}
|
||||
|
||||
PENDING_STMT (e) = NULL;
|
||||
}
|
||||
|
||||
/* Replace the USES of tree X in STMT with tree Y */
|
||||
|
||||
static void
|
||||
replace_uses_of_x_with_y (tree stmt, tree x, tree y)
|
||||
{
|
||||
use_optype uses = STMT_USE_OPS (stmt);
|
||||
size_t i;
|
||||
for (i = 0; i < NUM_USES (uses); i++)
|
||||
{
|
||||
if (USE_OP (uses, i) == x)
|
||||
SET_USE_OP (uses, i, y);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return TRUE if STMT uses tree OP in it's uses. */
|
||||
|
||||
static bool
|
||||
stmt_uses_op (tree stmt, tree op)
|
||||
{
|
||||
use_optype uses = STMT_USE_OPS (stmt);
|
||||
size_t i;
|
||||
for (i = 0; i < NUM_USES (uses); i++)
|
||||
{
|
||||
if (USE_OP (uses, i) == op)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
|
||||
one. LOOPIVS is a vector of induction variables, one per loop.
|
||||
ATM, we only handle imperfect nests of depth 2, where all of the statements
|
||||
occur after the inner loop. */
|
||||
|
||||
static bool
|
||||
can_convert_to_perfect_nest (struct loop *loop,
|
||||
VEC (tree) *loopivs)
|
||||
{
|
||||
basic_block *bbs;
|
||||
tree exit_condition;
|
||||
size_t i;
|
||||
block_stmt_iterator bsi;
|
||||
|
||||
/* Can't handle triply nested+ loops yet. */
|
||||
if (!loop->inner || loop->inner->inner)
|
||||
return false;
|
||||
|
||||
/* We only handle moving the after-inner-body statements right now, so make
|
||||
sure all the statements we need to move are located in that position. */
|
||||
bbs = get_loop_body (loop);
|
||||
exit_condition = get_loop_exit_condition (loop);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
if (bbs[i]->loop_father == loop)
|
||||
{
|
||||
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
{
|
||||
size_t j;
|
||||
tree stmt = bsi_stmt (bsi);
|
||||
if (stmt == exit_condition
|
||||
|| not_interesting_stmt (stmt)
|
||||
|| stmt_is_bumper_for_loop (loop, stmt))
|
||||
continue;
|
||||
/* If the statement uses inner loop ivs, we == screwed. */
|
||||
for (j = 1; j < VEC_length (tree, loopivs); j++)
|
||||
if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
|
||||
{
|
||||
free (bbs);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If the bb of a statement we care about isn't dominated by
|
||||
the header of the inner loop, then we are also screwed. */
|
||||
if (!dominated_by_p (CDI_DOMINATORS,
|
||||
bb_for_stmt (stmt),
|
||||
loop->inner->header))
|
||||
{
|
||||
free (bbs);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Transform the loop nest into a perfect nest, if possible.
|
||||
LOOPS is the current struct loops *
|
||||
LOOP is the loop nest to transform into a perfect nest
|
||||
LBOUNDS are the lower bounds for the loops to transform
|
||||
UBOUNDS are the upper bounds for the loops to transform
|
||||
STEPS is the STEPS for the loops to transform.
|
||||
LOOPIVS is the induction variables for the loops to transform.
|
||||
|
||||
Basically, for the case of
|
||||
|
||||
FOR (i = 0; i < 50; i++)
|
||||
{
|
||||
FOR (j =0; j < 50; j++)
|
||||
{
|
||||
<whatever>
|
||||
}
|
||||
<some code>
|
||||
}
|
||||
|
||||
This function will transform it into a perfect loop nest by splitting the
|
||||
outer loop into two loops, like so:
|
||||
|
||||
FOR (i = 0; i < 50; i++)
|
||||
{
|
||||
FOR (j = 0; j < 50; j++)
|
||||
{
|
||||
<whatever>
|
||||
}
|
||||
}
|
||||
|
||||
FOR (i = 0; i < 50; i ++)
|
||||
{
|
||||
<some code>
|
||||
}
|
||||
|
||||
Return FALSE if we can't make this loop into a perfect nest. */
|
||||
static bool
|
||||
perfect_nestify (struct loops *loops,
|
||||
struct loop *loop,
|
||||
VEC (tree) *lbounds,
|
||||
VEC (tree) *ubounds,
|
||||
VEC (int) *steps,
|
||||
VEC (tree) *loopivs)
|
||||
{
|
||||
basic_block *bbs;
|
||||
tree exit_condition;
|
||||
tree then_label, else_label, cond_stmt;
|
||||
basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
|
||||
size_t i;
|
||||
block_stmt_iterator bsi;
|
||||
edge e;
|
||||
struct loop *newloop;
|
||||
tree phi;
|
||||
tree uboundvar;
|
||||
tree stmt;
|
||||
tree ivvar, ivvarinced;
|
||||
VEC (tree) *phis;
|
||||
|
||||
if (!can_convert_to_perfect_nest (loop, loopivs))
|
||||
return false;
|
||||
|
||||
phis = VEC_alloc (tree, 1);
|
||||
|
||||
/* Create the new loop */
|
||||
|
||||
olddest = loop->single_exit->dest;
|
||||
preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
|
||||
headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
|
||||
|
||||
/* This is done because otherwise, it will release the ssa_name too early
|
||||
when the edge gets redirected and it will get reused, causing the use of
|
||||
the phi node to get rewritten. */
|
||||
|
||||
for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
|
||||
{
|
||||
/* These should be simple exit phi copies. */
|
||||
if (PHI_NUM_ARGS (phi) != 1)
|
||||
return false;
|
||||
VEC_safe_push (tree, phis, PHI_RESULT (phi));
|
||||
VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
|
||||
mark_for_rewrite (PHI_RESULT (phi));
|
||||
}
|
||||
e = redirect_edge_and_branch (preheaderbb->succ, headerbb);
|
||||
unmark_all_for_rewrite ();
|
||||
bb_ann (olddest)->phi_nodes = NULL;
|
||||
/* Add back the old exit phis. */
|
||||
while (VEC_length (tree, phis) != 0)
|
||||
{
|
||||
tree def;
|
||||
tree phiname;
|
||||
def = VEC_pop (tree, phis);
|
||||
phiname = VEC_pop (tree, phis);
|
||||
|
||||
phi = create_phi_node (phiname, preheaderbb);
|
||||
add_phi_arg (&phi, def, preheaderbb->pred);
|
||||
}
|
||||
|
||||
nestify_update_pending_stmts (e);
|
||||
bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
|
||||
latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
|
||||
make_edge (headerbb, bodybb, EDGE_FALLTHRU);
|
||||
then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
|
||||
else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
|
||||
cond_stmt = build (COND_EXPR, void_type_node,
|
||||
build (NE_EXPR, boolean_type_node,
|
||||
integer_one_node,
|
||||
integer_zero_node),
|
||||
then_label, else_label);
|
||||
bsi = bsi_start (bodybb);
|
||||
bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
|
||||
e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
|
||||
make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
|
||||
make_edge (latchbb, headerbb, EDGE_FALLTHRU);
|
||||
|
||||
/* Update the loop structures. */
|
||||
newloop = duplicate_loop (loops, loop, olddest->loop_father);
|
||||
newloop->header = headerbb;
|
||||
newloop->latch = latchbb;
|
||||
newloop->single_exit = e;
|
||||
add_bb_to_loop (latchbb, newloop);
|
||||
add_bb_to_loop (bodybb, newloop);
|
||||
add_bb_to_loop (headerbb, newloop);
|
||||
add_bb_to_loop (preheaderbb, olddest->loop_father);
|
||||
set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
|
||||
loop->single_exit->src);
|
||||
set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
|
||||
set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
|
||||
/* Create the new iv. */
|
||||
ivvar = create_tmp_var (integer_type_node, "perfectiv");
|
||||
add_referenced_tmp_var (ivvar);
|
||||
bsi = bsi_last (newloop->latch->pred->src);
|
||||
create_iv (VEC_index (tree, lbounds, 0),
|
||||
build_int_cst (integer_type_node,
|
||||
VEC_index (int, steps, 0)),
|
||||
ivvar, newloop, &bsi, false, &ivvar, &ivvarinced);
|
||||
|
||||
/* Create the new upper bound. This may be not just a variable, so we copy
|
||||
it to one just in case. */
|
||||
|
||||
exit_condition = get_loop_exit_condition (newloop);
|
||||
uboundvar = create_tmp_var (integer_type_node, "uboundvar");
|
||||
add_referenced_tmp_var (uboundvar);
|
||||
stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
|
||||
VEC_index (tree, ubounds, 0));
|
||||
uboundvar = make_ssa_name (uboundvar, stmt);
|
||||
TREE_OPERAND (stmt, 0) = uboundvar;
|
||||
bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
|
||||
COND_EXPR_COND (exit_condition) = build (LE_EXPR,
|
||||
boolean_type_node,
|
||||
ivvarinced,
|
||||
uboundvar);
|
||||
|
||||
bbs = get_loop_body (loop);
|
||||
/* Now replace the induction variable in the moved statements with the
|
||||
correct loop induction variable. */
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
block_stmt_iterator tobsi = bsi_last (bodybb);
|
||||
if (bbs[i]->loop_father == loop)
|
||||
{
|
||||
/* Note that the bsi only needs to be explicitly incremented
|
||||
when we don't move something, since it is automatically
|
||||
incremented when we do. */
|
||||
for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
|
||||
{
|
||||
tree stmt = bsi_stmt (bsi);
|
||||
if (stmt == exit_condition
|
||||
|| not_interesting_stmt (stmt)
|
||||
|| stmt_is_bumper_for_loop (loop, stmt))
|
||||
{
|
||||
bsi_next (&bsi);
|
||||
continue;
|
||||
}
|
||||
replace_uses_of_x_with_y (stmt,
|
||||
VEC_index (tree, loopivs, 0),
|
||||
ivvar);
|
||||
bsi_move_before (&bsi, &tobsi);
|
||||
}
|
||||
}
|
||||
}
|
||||
free (bbs);
|
||||
flow_loops_find (loops, LOOP_ALL);
|
||||
return perfect_nest_p (loop);
|
||||
}
|
||||
|
||||
/* Return true if TRANS is a legal transformation matrix that respects
|
||||
the dependence vectors in DISTS and DIRS. The conservative answer
|
||||
is false.
|
||||
|
@ -1931,27 +2395,29 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n)
|
|||
matrix T is legal when applied to a loop nest with a set of
|
||||
lexicographically non-negative distance vectors RDG if and only if
|
||||
for each vector d in RDG, (T.d >= 0) is lexicographically positive.
|
||||
i.e.: if and only if it transforms the lexicographically positive
|
||||
ie.: if and only if it transforms the lexicographically positive
|
||||
distance vectors to lexicographically positive vectors. Note that
|
||||
a unimodular matrix must transform the zero vector (and only it) to
|
||||
the zero vector." S.Muchnick. */
|
||||
|
||||
bool
|
||||
lambda_transform_legal_p (lambda_trans_matrix trans,
|
||||
int nb_loops, varray_type dependence_relations)
|
||||
lambda_transform_legal_p (lambda_trans_matrix trans,
|
||||
int nb_loops,
|
||||
varray_type dependence_relations)
|
||||
{
|
||||
unsigned int i;
|
||||
lambda_vector distres;
|
||||
struct data_dependence_relation *ddr;
|
||||
|
||||
#if defined ENABLE_CHECKING
|
||||
gcc_assert (LTM_COLSIZE (trans) == nb_loops
|
||||
&& LTM_ROWSIZE (trans) == nb_loops);
|
||||
if (LTM_COLSIZE (trans) != nb_loops
|
||||
|| LTM_ROWSIZE (trans) != nb_loops)
|
||||
abort ();
|
||||
#endif
|
||||
|
||||
/* When there is an unknown relation in the dependence_relations, we
|
||||
know that it is no worth looking at this loop nest: give up. */
|
||||
ddr = (struct data_dependence_relation *)
|
||||
ddr = (struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, 0);
|
||||
if (ddr == NULL)
|
||||
return true;
|
||||
|
@ -1963,12 +2429,14 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
|
|||
/* For each distance vector in the dependence graph. */
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
|
||||
{
|
||||
ddr = (struct data_dependence_relation *)
|
||||
ddr = (struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, i);
|
||||
|
||||
|
||||
|
||||
/* Don't care about relations for which we know that there is no
|
||||
dependence, nor about read-read (aka. output-dependences):
|
||||
these data accesses can happen in any order. */
|
||||
dependence, nor about read-read (aka. output-dependences):
|
||||
these data accesses can happen in any order. */
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_known
|
||||
|| (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
|
||||
continue;
|
||||
|
@ -1977,12 +2445,11 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
|
|||
return false;
|
||||
|
||||
/* Compute trans.dist_vect */
|
||||
lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
|
||||
lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
|
||||
DDR_DIST_VECT (ddr), distres);
|
||||
|
||||
if (!lambda_vector_lexico_pos (distres, nb_loops))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,29 @@ lambda_matrix_id (lambda_matrix mat, int size)
|
|||
mat[i][j] = (i == j) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* Return true if MAT is the identity matrix of SIZE */
|
||||
|
||||
bool
|
||||
lambda_matrix_id_p (lambda_matrix mat, int size)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < size; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
{
|
||||
if (i == j)
|
||||
{
|
||||
if (mat[i][j] != 1)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mat[i][j] != 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Negate the elements of the M x N matrix MAT1 and store it in MAT2. */
|
||||
|
||||
void
|
||||
|
|
|
@ -45,7 +45,18 @@ lambda_trans_matrix_new (int colsize, int rowsize)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Compute the inverse of the transformation. */
|
||||
/* Return true if MAT is an identity matrix. */
|
||||
|
||||
bool
|
||||
lambda_trans_matrix_id_p (lambda_trans_matrix mat)
|
||||
{
|
||||
if (LTM_ROWSIZE (mat) != LTM_COLSIZE (mat))
|
||||
return false;
|
||||
return lambda_matrix_id_p (LTM_MATRIX (mat), LTM_ROWSIZE (mat));
|
||||
}
|
||||
|
||||
|
||||
/* Compute the inverse of the transformation matrix MAT. */
|
||||
|
||||
lambda_trans_matrix
|
||||
lambda_trans_matrix_inverse (lambda_trans_matrix mat)
|
||||
|
|
14
gcc/lambda.h
14
gcc/lambda.h
|
@ -105,7 +105,9 @@ typedef struct
|
|||
|
||||
lambda_loopnest lambda_loopnest_new (int, int);
|
||||
lambda_loopnest lambda_loopnest_transform (lambda_loopnest, lambda_trans_matrix);
|
||||
|
||||
struct loop;
|
||||
struct loops;
|
||||
bool perfect_nest_p (struct loop *);
|
||||
bool lambda_transform_legal_p (lambda_trans_matrix, int, varray_type);
|
||||
void print_lambda_loopnest (FILE *, lambda_loopnest, char);
|
||||
|
||||
|
@ -116,6 +118,7 @@ void print_lambda_loop (FILE *, lambda_loop, int, int, char);
|
|||
lambda_matrix lambda_matrix_new (int, int);
|
||||
|
||||
void lambda_matrix_id (lambda_matrix, int);
|
||||
bool lambda_matrix_id_p (lambda_matrix, int);
|
||||
void lambda_matrix_copy (lambda_matrix, lambda_matrix, int, int);
|
||||
void lambda_matrix_negate (lambda_matrix, lambda_matrix, int, int);
|
||||
void lambda_matrix_transpose (lambda_matrix, lambda_matrix, int, int);
|
||||
|
@ -153,16 +156,17 @@ lambda_trans_matrix lambda_trans_matrix_inverse (lambda_trans_matrix);
|
|||
void print_lambda_trans_matrix (FILE *, lambda_trans_matrix);
|
||||
void lambda_matrix_vector_mult (lambda_matrix, int, int, lambda_vector,
|
||||
lambda_vector);
|
||||
bool lambda_trans_matrix_id_p (lambda_trans_matrix);
|
||||
|
||||
lambda_body_vector lambda_body_vector_new (int);
|
||||
lambda_body_vector lambda_body_vector_compute_new (lambda_trans_matrix,
|
||||
lambda_body_vector);
|
||||
void print_lambda_body_vector (FILE *, lambda_body_vector);
|
||||
struct loop;
|
||||
|
||||
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loop *,
|
||||
lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
|
||||
struct loop *,
|
||||
VEC(tree) **,
|
||||
VEC(tree) **);
|
||||
VEC(tree) **,
|
||||
bool);
|
||||
void lambda_loopnest_to_gcc_loopnest (struct loop *, VEC(tree) *,
|
||||
VEC(tree) *,
|
||||
lambda_loopnest,
|
||||
|
|
|
@ -55,22 +55,56 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
transform matrix for locality purposes.
|
||||
TODO: Completion of partial transforms. */
|
||||
|
||||
/* Gather statistics for loop interchange. Initializes SUM the sum of
|
||||
all the data dependence distances carried by loop LOOP_NUMBER.
|
||||
NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
|
||||
dependence relations for which the loop LOOP_NUMBER is not carrying
|
||||
any dependence. */
|
||||
/* Gather statistics for loop interchange. LOOP_NUMBER is a relative
|
||||
index in the considered loop nest. The first loop in the
|
||||
considered loop nest is FIRST_LOOP, and consequently the index of
|
||||
the considered loop is obtained by FIRST_LOOP + LOOP_NUMBER.
|
||||
|
||||
Initializes:
|
||||
- DEPENDENCE_STEPS the sum of all the data dependence distances
|
||||
carried by loop LOOP_NUMBER,
|
||||
|
||||
- NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
|
||||
for which the loop LOOP_NUMBER is not carrying any dependence,
|
||||
|
||||
- ACCESS_STRIDES the sum of all the strides in LOOP_NUMBER.
|
||||
|
||||
Example: for the following loop,
|
||||
|
||||
| loop_1 runs 1335 times
|
||||
| loop_2 runs 1335 times
|
||||
| A[{{0, +, 1}_1, +, 1335}_2]
|
||||
| B[{{0, +, 1}_1, +, 1335}_2]
|
||||
| endloop_2
|
||||
| A[{0, +, 1336}_1]
|
||||
| endloop_1
|
||||
|
||||
gather_interchange_stats (in loop_1) will return
|
||||
DEPENDENCE_STEPS = 3002
|
||||
NB_DEPS_NOT_CARRIED_BY_LOOP = 5
|
||||
ACCESS_STRIDES = 10694
|
||||
|
||||
gather_interchange_stats (in loop_2) will return
|
||||
DEPENDENCE_STEPS = 3000
|
||||
NB_DEPS_NOT_CARRIED_BY_LOOP = 7
|
||||
ACCESS_STRIDES = 8010
|
||||
*/
|
||||
|
||||
static void
|
||||
gather_interchange_stats (varray_type dependence_relations,
|
||||
varray_type datarefs,
|
||||
unsigned int loop_number,
|
||||
unsigned int *sum,
|
||||
unsigned int *nb_deps_not_carried_by_loop)
|
||||
unsigned int first_loop,
|
||||
unsigned int *dependence_steps,
|
||||
unsigned int *nb_deps_not_carried_by_loop,
|
||||
unsigned int *access_strides)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
*sum = 0;
|
||||
*dependence_steps = 0;
|
||||
*nb_deps_not_carried_by_loop = 0;
|
||||
*access_strides = 0;
|
||||
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
|
||||
{
|
||||
int dist;
|
||||
|
@ -78,11 +112,11 @@ gather_interchange_stats (varray_type dependence_relations,
|
|||
(struct data_dependence_relation *)
|
||||
VARRAY_GENERIC_PTR (dependence_relations, i);
|
||||
|
||||
/* Compute the dependence strides. */
|
||||
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||
{
|
||||
/* Some constants will need tweaking, but not something that should
|
||||
be user-accessible. Thus, no --param. */
|
||||
*sum += 100;
|
||||
(*dependence_steps) += 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -90,34 +124,64 @@ gather_interchange_stats (varray_type dependence_relations,
|
|||
is no reuse of the data. */
|
||||
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
||||
{
|
||||
/* Ditto on the no --param here */
|
||||
*sum += 1000;
|
||||
(*dependence_steps) += 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
dist = DDR_DIST_VECT (ddr)[loop_number];
|
||||
if (dist == 0)
|
||||
*nb_deps_not_carried_by_loop++;
|
||||
(*nb_deps_not_carried_by_loop) += 1;
|
||||
else if (dist < 0)
|
||||
*sum += -dist;
|
||||
(*dependence_steps) += -dist;
|
||||
else
|
||||
*sum += dist;
|
||||
(*dependence_steps) += dist;
|
||||
}
|
||||
|
||||
/* Compute the access strides. */
|
||||
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
|
||||
{
|
||||
unsigned int it;
|
||||
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
|
||||
tree stmt = DR_STMT (dr);
|
||||
struct loop *stmt_loop = loop_containing_stmt (stmt);
|
||||
struct loop *inner_loop = current_loops->parray[first_loop + 1];
|
||||
|
||||
if (!flow_loop_nested_p (inner_loop, stmt_loop)
|
||||
&& inner_loop->num != stmt_loop->num)
|
||||
continue;
|
||||
|
||||
for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
|
||||
{
|
||||
tree chrec = DR_ACCESS_FN (dr, it);
|
||||
tree tstride = evolution_part_in_loop_num
|
||||
(chrec, first_loop + loop_number);
|
||||
|
||||
if (tstride == NULL_TREE
|
||||
|| TREE_CODE (tstride) != INTEGER_CST)
|
||||
continue;
|
||||
|
||||
(*access_strides) += int_cst_value (tstride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Apply to TRANS any loop interchange that minimize inner loop steps.
|
||||
DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
|
||||
of dependence relations.
|
||||
Returns the new transform matrix. The smaller the reuse vector
|
||||
distances in the inner loops, the fewer the cache misses. */
|
||||
distances in the inner loops, the fewer the cache misses.
|
||||
FIRST_LOOP is the loop->num of the first loop in the analyzed loop
|
||||
nest. */
|
||||
|
||||
|
||||
static lambda_trans_matrix
|
||||
try_interchange_loops (lambda_trans_matrix trans,
|
||||
unsigned int depth,
|
||||
varray_type dependence_relations)
|
||||
varray_type dependence_relations,
|
||||
varray_type datarefs,
|
||||
unsigned int first_loop)
|
||||
{
|
||||
unsigned int loop_i, loop_j;
|
||||
unsigned int steps_i, steps_j;
|
||||
unsigned int dependence_steps_i, dependence_steps_j;
|
||||
unsigned int access_strides_i, access_strides_j;
|
||||
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
|
||||
struct data_dependence_relation *ddr;
|
||||
|
||||
|
@ -132,33 +196,40 @@ try_interchange_loops (lambda_trans_matrix trans,
|
|||
for (loop_j = 1; loop_j < depth; loop_j++)
|
||||
for (loop_i = 0; loop_i < loop_j; loop_i++)
|
||||
{
|
||||
gather_interchange_stats (dependence_relations, loop_i, &steps_i,
|
||||
&nb_deps_not_carried_by_i);
|
||||
gather_interchange_stats (dependence_relations, loop_j, &steps_j,
|
||||
&nb_deps_not_carried_by_j);
|
||||
gather_interchange_stats (dependence_relations, datarefs,
|
||||
loop_i, first_loop,
|
||||
&dependence_steps_i,
|
||||
&nb_deps_not_carried_by_i,
|
||||
&access_strides_i);
|
||||
gather_interchange_stats (dependence_relations, datarefs,
|
||||
loop_j, first_loop,
|
||||
&dependence_steps_j,
|
||||
&nb_deps_not_carried_by_j,
|
||||
&access_strides_j);
|
||||
|
||||
/* Heuristics for loop interchange profitability:
|
||||
1. Inner loops should have smallest steps.
|
||||
2. Inner loops should contain more dependence relations not
|
||||
carried by the loop.
|
||||
|
||||
1. (spatial locality) Inner loops should have smallest
|
||||
dependence steps.
|
||||
|
||||
2. (spatial locality) Inner loops should contain more
|
||||
dependence relations not carried by the loop.
|
||||
|
||||
3. (temporal locality) Inner loops should have smallest
|
||||
array access strides.
|
||||
*/
|
||||
if (steps_i < steps_j
|
||||
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
|
||||
if (dependence_steps_i < dependence_steps_j
|
||||
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
|
||||
|| access_strides_i < access_strides_j)
|
||||
{
|
||||
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
|
||||
|
||||
/* Validate the resulting matrix. When the transformation
|
||||
is not valid, reverse to the previous matrix.
|
||||
|
||||
FIXME: In this case of transformation it could be
|
||||
faster to verify the validity of the interchange
|
||||
without applying the transform to the matrix. But for
|
||||
the moment do it cleanly: this is just a prototype. */
|
||||
is not valid, reverse to the previous transformation. */
|
||||
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
|
||||
lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return trans;
|
||||
}
|
||||
|
||||
|
@ -181,6 +252,7 @@ linear_transform_loops (struct loops *loops)
|
|||
lambda_loopnest before, after;
|
||||
lambda_trans_matrix trans;
|
||||
bool problem = false;
|
||||
bool need_perfect_nest = false;
|
||||
/* If it's not a loop nest, we don't want it.
|
||||
We also don't handle sibling loops properly,
|
||||
which are loops of the following form:
|
||||
|
@ -197,7 +269,8 @@ linear_transform_loops (struct loops *loops)
|
|||
} */
|
||||
if (!loop_nest->inner)
|
||||
continue;
|
||||
for (temp = loop_nest; temp; temp = temp->inner)
|
||||
depth = 1;
|
||||
for (temp = loop_nest->inner; temp; temp = temp->inner)
|
||||
{
|
||||
flow_loop_scan (temp, LOOP_ALL);
|
||||
/* If we have a sibling loop or multiple exit edges, jump ship. */
|
||||
|
@ -246,7 +319,15 @@ linear_transform_loops (struct loops *loops)
|
|||
/* Build the transformation matrix. */
|
||||
trans = lambda_trans_matrix_new (depth, depth);
|
||||
lambda_matrix_id (LTM_MATRIX (trans), depth);
|
||||
trans = try_interchange_loops (trans, depth, dependence_relations);
|
||||
trans = try_interchange_loops (trans, depth, dependence_relations,
|
||||
datarefs, loop_nest->num);
|
||||
|
||||
if (lambda_trans_matrix_id_p (trans))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check whether the transformation is legal. */
|
||||
if (!lambda_transform_legal_p (trans, depth, dependence_relations))
|
||||
|
@ -255,8 +336,12 @@ linear_transform_loops (struct loops *loops)
|
|||
fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
|
||||
continue;
|
||||
}
|
||||
before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
|
||||
&invariants);
|
||||
if (!perfect_nest_p (loop_nest))
|
||||
need_perfect_nest = true;
|
||||
before = gcc_loopnest_to_lambda_loopnest (loops,
|
||||
loop_nest, &oldivs,
|
||||
&invariants,
|
||||
need_perfect_nest);
|
||||
if (!before)
|
||||
continue;
|
||||
|
||||
|
@ -279,4 +364,6 @@ linear_transform_loops (struct loops *loops)
|
|||
free_dependence_relations (dependence_relations);
|
||||
free_data_refs (datarefs);
|
||||
}
|
||||
rewrite_into_loop_closed_ssa ();
|
||||
free_df ();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue