invoke.texi (-fprefetch-loop-arrays, [...]): Document.

* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
	Document.
	* tree-ssa-loop-niter.c (number_of_iterations_ne,
	number_of_iterations_lt, number_of_iterations_cond): Remember the shape
	of the ending condition.
	* tree-ssa-loop-manip.c: Include params.h.
	(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
	tree_unroll_loop): New functions.
	* tree-pass.h (pass_loop_prefetch): Declare.
	* loop.c (rest_of_handle_loop_optimize): Test for
	-fprefetch-loop-arrays-rtl.
	* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
	* timevar.def (TV_TREE_PREFETCH): New timevar.
	* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
	pass_loop_prefetch): New.
	* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
	(cleanup_tree_cfg_loop): Call scev_reset.
	* common.opt (fprefetch-loop-arrays-rtl): Add.
	* tree-ssa-loop-prefetch.c: New file.
	* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
	(new_temp_expr_table): Initialize expr_vars.
	(free_temp_expr_table): Cleanup expr_vars.
	(check_replaceable, find_replaceable_in_bb): Prevent accumulating
	expressions from being merged into one.
	* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
	(struct tree_niter_desc): Add control, bound and cmp fields.
	(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
	Declare.
	* Makefile.in (tree-ssa-loop-prefetch.o): Add.
	(tree-cfgcleanup.o): Add SCEV_H dependency.
	(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
	* passes.c (init_optimization_passes): Add pass_loop_prefetch.

From-SVN: r110964
This commit is contained in:
Zdenek Dvorak 2006-02-14 13:22:11 +01:00 committed by Zdenek Dvorak
parent 0a4288d92e
commit 1768461812
15 changed files with 512 additions and 41 deletions

View File

@ -1,3 +1,38 @@
2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
Document.
* tree-ssa-loop-niter.c (number_of_iterations_ne,
number_of_iterations_lt, number_of_iterations_cond): Remember the shape
of the ending condition.
* tree-ssa-loop-manip.c: Include params.h.
(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
tree_unroll_loop): New functions.
* tree-pass.h (pass_loop_prefetch): Declare.
* loop.c (rest_of_handle_loop_optimize): Test for
-fprefetch-loop-arrays-rtl.
* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
* timevar.def (TV_TREE_PREFETCH): New timevar.
* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
pass_loop_prefetch): New.
* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
(cleanup_tree_cfg_loop): Call scev_reset.
* common.opt (fprefetch-loop-arrays-rtl): Add.
* tree-ssa-loop-prefetch.c: New file.
* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
(new_temp_expr_table): Initialize expr_vars.
(free_temp_expr_table): Cleanup expr_vars.
(check_replaceable, find_replaceable_in_bb): Prevent accumulating
expressions from being merged into one.
* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
(struct tree_niter_desc): Add control, bound and cmp fields.
(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
Declare.
* Makefile.in (tree-ssa-loop-prefetch.o): Add.
(tree-cfgcleanup.o): Add SCEV_H dependency.
(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
* passes.c (init_optimization_passes): Add pass_loop_prefetch.
2006-02-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/26258

View File

@ -963,7 +963,7 @@ OBJS-common = \
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
tree-vect-patterns.o \
tree-vect-patterns.o tree-ssa-loop-prefetch.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
tree-chrec.h toplev.h langhooks.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
$(PARAMS_H)
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \

View File

@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
Generate position-independent code for executables if possible (small mode)
fprefetch-loop-arrays
Common Report Var(flag_prefetch_loop_arrays)
Common Report Var(flag_prefetch_loop_arrays,1)
Generate prefetch instructions, if available, for arrays in loops
fprefetch-loop-arrays-rtl
Common Report Var(flag_prefetch_loop_arrays,2)
Generate prefetch instructions, if available, for arrays in loops
fprofile

View File

@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
-foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
-fprofile-generate -fprofile-use @gol
-fregmove -frename-registers @gol
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
local variables when unrolling a loop which can result in superior code.
@item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
of the loop on both branches (modified according to result of the condition).
@item -fprefetch-loop-arrays
@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.

View File

@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
free_bb_for_insn ();
profile_status = PROFILE_ABSENT;
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt)
{

View File

@ -601,6 +601,7 @@ init_optimization_passes (void)
vectorizer creates alias relations that are not supported by
pass_may_alias. */
NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done);
*p = NULL;

View File

@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")

View File

@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */
#include "cfglayout.h"
#include "hashtab.h"
#include "tree-ssa-propagate.h"
#include "tree-scalar-evolution.h"
/* Remove any fallthru edge from EV. Return true if an edge was removed. */
@ -559,23 +560,26 @@ cleanup_tree_cfg (void)
void
cleanup_tree_cfg_loop (void)
{
bitmap changed_bbs = BITMAP_ALLOC (NULL);
bool changed = cleanup_tree_cfg ();
cleanup_tree_cfg ();
if (changed)
{
bitmap changed_bbs = BITMAP_ALLOC (NULL);
fix_loop_structure (current_loops, changed_bbs);
calculate_dominance_info (CDI_DOMINATORS);
fix_loop_structure (current_loops, changed_bbs);
calculate_dominance_info (CDI_DOMINATORS);
/* This usually does nothing. But sometimes parts of cfg that originally
were inside a loop get out of it due to edge removal (since they
become unreachable by back edges from latch). */
rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
/* This usually does nothing. But sometimes parts of cfg that originally
were inside a loop get out of it due to edge removal (since they
become unreachable by back edges from latch). */
rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
BITMAP_FREE (changed_bbs);
BITMAP_FREE (changed_bbs);
#ifdef ENABLE_CHECKING
verify_loop_structure (current_loops);
verify_loop_structure (current_loops);
#endif
scev_reset ();
}
}
/* Merge the PHI nodes at BB into those at BB's sole successor. */

View File

@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
extern bool may_propagate_copy (tree, tree);
extern bool may_propagate_copy_into_asm (tree);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
/* Description of number of iterations of a loop. All the expressions inside
the structure can be evaluated at the end of the loop's preheader
(and due to ssa form, also anywhere inside the body of the loop). */
@ -697,6 +708,15 @@ struct tree_niter_desc
MAX_SIGNED_INT. However if the (n <= 0) assumption
is eliminated (by looking at the guard on entry of
the loop), then the information would be lost. */
/* The simplified shape of the exit condition. The loop exits if
CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
LE_EXPR and negative if CMP is GE_EXPR. This information is used
by loop unrolling. */
affine_iv control;
tree bound;
enum tree_code cmp;
};
/* In tree-vectorizer.c */
@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
void tree_ssa_unswitch_loops (struct loops *);
void canonicalize_induction_variables (struct loops *);
void tree_unroll_loops_completely (struct loops *, bool);
void tree_ssa_prefetch_arrays (struct loops *);
void remove_empty_loops (struct loops *);
void tree_ssa_iv_optimize (struct loops *);
@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
tree expand_simple_operations (tree);
void substitute_in_loop_info (struct loop *, tree, tree);
edge single_dom_exit (struct loop *);
bool can_unroll_loop_p (struct loop *loop, unsigned factor,
struct tree_niter_desc *niter);
void tree_unroll_loop (struct loops *, struct loop *, unsigned,
edge, struct tree_niter_desc *);
/* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block);

View File

@ -1299,7 +1299,8 @@ typedef struct value_expr_d
typedef struct temp_expr_table_d
{
var_map map;
void **version_info;
void **version_info;
bitmap *expr_vars;
value_expr_p *partition_dep_list;
bitmap replaceable;
bool saw_replaceable;
@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
t->map = map;
t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
t->partition_dep_list = XCNEWVEC (value_expr_p,
num_var_partitions (map) + 1);
@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
{
value_expr_p p;
tree *ret = NULL;
unsigned i;
#ifdef ENABLE_CHECKING
unsigned x;
@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
BITMAP_FREE (t->partition_in_use);
BITMAP_FREE (t->replaceable);
for (i = 0; i <= num_ssa_names; i++)
if (t->expr_vars[i])
BITMAP_FREE (t->expr_vars[i]);
free (t->expr_vars);
free (t->partition_dep_list);
if (t->saw_replaceable)
ret = (tree *)t->version_info;
@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
static bool
check_replaceable (temp_expr_table_p tab, tree stmt)
{
tree var, def;
tree var, def, basevar;
int version;
var_map map = tab->map;
ssa_op_iter iter;
tree call_expr;
bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
if (TREE_CODE (stmt) != MODIFY_EXPR)
return false;
@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
}
version = SSA_NAME_VERSION (def);
basevar = SSA_NAME_VAR (def);
bitmap_set_bit (def_vars, DECL_UID (basevar));
/* Add this expression to the dependency list for each use partition. */
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
{
add_dependance (tab, version, var);
use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
if (use_vars)
bitmap_ior_into (def_vars, use_vars);
}
tab->expr_vars[version] = def_vars;
/* If there are VUSES, add a dependence on virtual defs. */
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
@ -1704,7 +1720,7 @@ static void
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
{
block_stmt_iterator bsi;
tree stmt, def;
tree stmt, def, use;
stmt_ann_t ann;
int partition;
var_map map = tab->map;
@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
ann = stmt_ann (stmt);
/* Determine if this stmt finishes an existing expression. */
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{
if (tab->version_info[SSA_NAME_VERSION (def)])
unsigned ver = SSA_NAME_VERSION (use);
if (tab->version_info[ver])
{
bool same_root_var = false;
tree def2;
ssa_op_iter iter2;
bitmap vars = tab->expr_vars[ver];
/* See if the root variables are the same. If they are, we
do not want to do the replacement to avoid problems with
code size, see PR tree-optimization/17549. */
FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
{
same_root_var = true;
break;
}
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
{
if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
{
same_root_var = true;
break;
}
}
/* Mark expression as replaceable unless stmt is volatile
or DEF sets the same root variable as STMT. */
if (!ann->has_volatile_ops && !same_root_var)
mark_replaceable (tab, def);
mark_replaceable (tab, use);
else
finish_expr (tab, SSA_NAME_VERSION (def), false);
finish_expr (tab, ver, false);
}
}

View File

@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize;
extern struct tree_opt_pass pass_complete_unroll;
extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_tree_loop_done;
extern struct tree_opt_pass pass_ch;

View File

@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
extern void scev_analysis (void);
void scev_const_prop (void);
/* Affine iv. */
typedef struct
{
/* Iv = BASE + STEP * i. */
tree base, step;
/* True if this iv does not overflow. */
bool no_overflow;
} affine_iv;
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */

View File

@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "tree-pass.h"
#include "cfglayout.h"
#include "tree-scalar-evolution.h"
#include "params.h"
/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
It is expected that neither BASE nor STEP are shared with other expressions
@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
return true;
}
/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
static tree
build_if_stmt (tree cond, tree then_label, tree else_label)
{
return build3 (COND_EXPR, void_type_node,
cond,
build1 (GOTO_EXPR, void_type_node, then_label),
build1 (GOTO_EXPR, void_type_node, else_label));
}
/* Returns true if we can unroll LOOP FACTOR times. Number
of iterations of the loop is returned in NITER. */
bool
can_unroll_loop_p (struct loop *loop, unsigned factor,
struct tree_niter_desc *niter)
{
edge exit;
/* Check whether unrolling is possible. We only want to unroll loops
for that we are able to determine number of iterations. We also
want to split the extra iterations of the loop from its end,
therefore we require that the loop has precisely one
exit. */
exit = single_dom_exit (loop);
if (!exit)
return false;
if (!number_of_iterations_exit (loop, exit, niter, false)
|| niter->cmp == ERROR_MARK)
return false;
/* And of course, we must be able to duplicate the loop. */
if (!can_duplicate_loop_p (loop))
return false;
/* The final loop should be small enough. */
if (tree_num_loop_insns (loop) * factor
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
return false;
return true;
}
/* Determines the conditions that control execution of LOOP unrolled FACTOR
times. DESC is number of iterations of LOOP. ENTER_COND is set to
condition that must be true if the main loop can be entered.
EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
how the exit from the unrolled loop should be controlled. */
static void
determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
unsigned factor, tree *enter_cond,
tree *exit_base, tree *exit_step,
enum tree_code *exit_cmp, tree *exit_bound)
{
tree stmts;
tree base = desc->control.base;
tree step = desc->control.step;
tree bound = desc->bound;
tree type = TREE_TYPE (base);
tree bigstep, delta;
tree min = lower_bound_in_type (type, type);
tree max = upper_bound_in_type (type, type);
enum tree_code cmp = desc->cmp;
tree cond = boolean_true_node, assum;
*enter_cond = boolean_false_node;
*exit_base = NULL_TREE;
*exit_step = NULL_TREE;
*exit_cmp = ERROR_MARK;
*exit_bound = NULL_TREE;
gcc_assert (cmp != ERROR_MARK);
/* We only need to be correct when we answer question
"Do at least FACTOR more iterations remain?" in the unrolled loop.
Thus, transforming BASE + STEP * i <> BOUND to
BASE + STEP * i < BOUND is ok. */
if (cmp == NE_EXPR)
{
if (tree_int_cst_sign_bit (step))
cmp = GT_EXPR;
else
cmp = LT_EXPR;
}
else if (cmp == LT_EXPR)
{
gcc_assert (!tree_int_cst_sign_bit (step));
}
else if (cmp == GT_EXPR)
{
gcc_assert (tree_int_cst_sign_bit (step));
}
else
gcc_unreachable ();
/* The main body of the loop may be entered iff:
1) desc->may_be_zero is false.
2) it is possible to check that there are at least FACTOR iterations
of the loop, i.e., BOUND - step * FACTOR does not overflow.
3) # of iterations is at least FACTOR */
if (!zero_p (desc->may_be_zero))
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
invert_truthvalue (desc->may_be_zero),
cond);
bigstep = fold_build2 (MULT_EXPR, type, step,
build_int_cst_type (type, factor));
delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
if (cmp == LT_EXPR)
assum = fold_build2 (GE_EXPR, boolean_type_node,
bound,
fold_build2 (PLUS_EXPR, type, min, delta));
else
assum = fold_build2 (LE_EXPR, boolean_type_node,
bound,
fold_build2 (PLUS_EXPR, type, max, delta));
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
bound = fold_build2 (MINUS_EXPR, type, bound, delta);
assum = fold_build2 (cmp, boolean_type_node, base, bound);
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
/* cond now may be a gimple comparison, which would be OK, but also any
other gimple rhs (say a && b). In this case we need to force it to
operand. */
if (!is_gimple_condexpr (cond))
{
cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
}
*enter_cond = cond;
base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
*exit_base = base;
*exit_step = bigstep;
*exit_cmp = cmp;
*exit_bound = bound;
}
/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes
number of iterations of LOOP. EXIT is the exit of the loop to that
DESC corresponds.
If N is number of iterations of the loop and MAY_BE_ZERO is the condition
under that loop exits in the first iteration even if N != 0,
while (1)
{
x = phi (init, next);
pre;
if (st)
break;
post;
}
becomes (with possibly the exit conditions formulated a bit differently,
avoiding the need to create a new iv):
if (MAY_BE_ZERO || N < FACTOR)
goto rest;
do
{
x = phi (init, next);
pre;
post;
pre;
post;
...
pre;
post;
N -= FACTOR;
} while (N >= FACTOR);
rest:
init' = phi (init, x);
while (1)
{
x = phi (init', next);
pre;
if (st)
break;
post;
} */
void
tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
edge exit, struct tree_niter_desc *desc)
{
tree dont_exit, exit_if, ctr_before, ctr_after;
tree enter_main_cond, exit_base, exit_step, exit_bound;
enum tree_code exit_cmp;
tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
struct loop *new_loop;
basic_block rest, exit_bb;
edge old_entry, new_entry, old_latch, precond_edge, new_exit;
edge nonexit, new_nonexit;
block_stmt_iterator bsi;
use_operand_p op;
bool ok;
unsigned est_niter;
sbitmap wont_exit;
est_niter = expected_loop_iterations (loop);
determine_exit_conditions (loop, desc, factor,
&enter_main_cond, &exit_base, &exit_step,
&exit_cmp, &exit_bound);
new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa);
/* Unroll the loop and remove the old exits. */
dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
? boolean_false_node
: boolean_true_node);
if (exit == EDGE_SUCC (exit->src, 0))
nonexit = EDGE_SUCC (exit->src, 1);
else
nonexit = EDGE_SUCC (exit->src, 0);
nonexit->probability = REG_BR_PROB_BASE;
exit->probability = 0;
nonexit->count += exit->count;
exit->count = 0;
exit_if = last_stmt (exit->src);
COND_EXPR_COND (exit_if) = dont_exit;
update_stmt (exit_if);
wont_exit = sbitmap_alloc (factor);
sbitmap_ones (wont_exit);
ok = tree_duplicate_loop_to_header_edge
(loop, loop_latch_edge (loop), loops, factor - 1,
wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
free (wont_exit);
gcc_assert (ok);
update_ssa (TODO_update_ssa);
/* Prepare the cfg and update the phi nodes. */
rest = loop_preheader_edge (new_loop)->src;
precond_edge = single_pred_edge (rest);
loop_split_edge_with (loop_latch_edge (loop), NULL);
exit_bb = single_pred (loop->latch);
new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
new_exit->count = loop_preheader_edge (loop)->count;
est_niter = est_niter / factor + 1;
new_exit->probability = REG_BR_PROB_BASE / est_niter;
new_nonexit = single_pred_edge (loop->latch);
new_nonexit->flags = EDGE_TRUE_VALUE;
new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
old_entry = loop_preheader_edge (loop);
new_entry = loop_preheader_edge (new_loop);
old_latch = loop_latch_edge (loop);
for (phi_old_loop = phi_nodes (loop->header),
phi_new_loop = phi_nodes (new_loop->header);
phi_old_loop;
phi_old_loop = PHI_CHAIN (phi_old_loop),
phi_new_loop = PHI_CHAIN (phi_new_loop))
{
init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
/* Prefer using original variable as a base for the new ssa name.
This is necessary for virtual ops, and useful in order to avoid
losing debug info for real ops. */
if (TREE_CODE (next) == SSA_NAME)
var = SSA_NAME_VAR (next);
else if (TREE_CODE (init) == SSA_NAME)
var = SSA_NAME_VAR (init);
else
{
var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
add_referenced_tmp_var (var);
}
new_init = make_ssa_name (var, NULL_TREE);
phi_rest = create_phi_node (new_init, rest);
SSA_NAME_DEF_STMT (new_init) = phi_rest;
add_phi_arg (phi_rest, init, precond_edge);
add_phi_arg (phi_rest, next, new_exit);
SET_USE (op, new_init);
}
/* Finally create the new counter for number of iterations and add the new
exit instruction. */
bsi = bsi_last (exit_bb);
create_iv (exit_base, exit_step, NULL_TREE, loop,
&bsi, true, &ctr_before, &ctr_after);
exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
exit_bound),
tree_block_label (loop->latch),
tree_block_label (rest));
bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
verify_flow_info ();
verify_dominators (CDI_DOMINATORS);
verify_loop_structure (loops);
verify_loop_closed_ssa ();
}

View File

@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
tree niter_type = unsigned_type_for (type);
tree s, c, d, bits, assumption, tmp, bound;
niter->control = *iv;
niter->bound = final;
niter->cmp = NE_EXPR;
/* Rearrange the terms so that we get inequality s * i <> c, with s
positive. Also cast everything to the unsigned type. */
if (tree_int_cst_sign_bit (iv->step))
@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
tree niter_type = unsigned_type_for (type);
tree delta, step, s;
if (nonzero_p (iv0->step))
{
niter->control = *iv0;
niter->cmp = LT_EXPR;
niter->bound = iv1->base;
}
else
{
niter->control = *iv1;
niter->cmp = GT_EXPR;
niter->bound = iv0->base;
}
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
niter->niter = NULL_TREE;
niter->additional_info = boolean_true_node;
niter->bound = NULL_TREE;
niter->cmp = ERROR_MARK;
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
the control variable is on lhs. */
if (code == GE_EXPR || code == GT_EXPR

View File

@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
0 /* letter */
};
/* Prefetching. */
static void
tree_ssa_loop_prefetch (void)
{
if (!current_loops)
return;
tree_ssa_prefetch_arrays (current_loops);
}
static bool
gate_tree_ssa_loop_prefetch (void)
{
return flag_prefetch_loop_arrays == 1;
}
struct tree_opt_pass pass_loop_prefetch =
{
"prefetch", /* name */
gate_tree_ssa_loop_prefetch, /* gate */
tree_ssa_loop_prefetch, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_PREFETCH, /* tv_id */
PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
0 /* letter */
};
/* Induction variable optimizations. */
static void