tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
* tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL. * tree-predcom.c (mark_virtual_ops_for_renaming): Exported. * tree-ssa-loop-prefetch.c: Include optabs.h. (FENCE_FOLLOWING_MOVNT): New macro. (struct mem_ref): Add independent_p and storent_p fields. (record_ref): Initalize the new fields. (gather_memory_references_ref): Return true if the reference could be analysed. (gather_memory_references): Check whether all memory accesses in loop were recorded. (should_issue_prefetch_p): Return false for nontemporal stores. (nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop, may_use_storent_in_loop_p, mark_nontemporal_stores): New functions. (determine_loop_nest_reuse): Detect independent memory references. (loop_prefetch_arrays): Call mark_nontemporal_stores. * tree-flow.h (mark_virtual_ops_for_renaming): Declare. * Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency. * config/i386/i386.h (x86_mfence): Declare. (FENCE_FOLLOWING_MOVNT): Return x86_mfence. * config/i386/i386.c (x86_mfence): New variable. (ix86_init_mmx_sse_builtins): Initialize x86_mfence. * tree-pretty-print.c (dump_generic_node): Mark nontemporal stores. * optabs.c (init_optabs): Initialize storent_optab. * optabs.h (enum optab_index): Add OTI_storent. (storent_optab): Declare. * genopinit.c (optabs): Add initialization for storent_optab. * tree.h (MOVE_NONTEMPORAL): New macro. * expr.c (expand_assignment, store_expr, store_constructor_field, store_constructor, store_field, expand_expr_real_1): Propagate nontemporality of the expanded store. (emit_storent_insn): New function. * expr.h (expand_assignment, store_expr): Declaration changed. * function.c (assign_parm_setup_reg): Pass false as nontemporality to expand_assignment. * stmt.c (expand_asm_expr): Ditto. * calls.c (initialize_argument_information): Pass false as nontemporality to store_expr. * config/i386/sse.md (storentv4sf, storentv2df, storentv2di, storentsi): New. * gcc.dg/tree-ssa/prefetch-7.c: New test. From-SVN: r125604
This commit is contained in:
parent
6bdff197e6
commit
79f5e44262
@ -1,3 +1,46 @@
|
||||
2007-06-10 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
|
||||
* tree-predcom.c (mark_virtual_ops_for_renaming): Exported.
|
||||
* tree-ssa-loop-prefetch.c: Include optabs.h.
|
||||
(FENCE_FOLLOWING_MOVNT): New macro.
|
||||
(struct mem_ref): Add independent_p and storent_p fields.
|
||||
(record_ref): Initalize the new fields.
|
||||
(gather_memory_references_ref): Return true if the reference
|
||||
could be analysed.
|
||||
(gather_memory_references): Check whether all memory accesses
|
||||
in loop were recorded.
|
||||
(should_issue_prefetch_p): Return false for nontemporal stores.
|
||||
(nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop,
|
||||
may_use_storent_in_loop_p, mark_nontemporal_stores): New functions.
|
||||
(determine_loop_nest_reuse): Detect independent memory references.
|
||||
(loop_prefetch_arrays): Call mark_nontemporal_stores.
|
||||
* tree-flow.h (mark_virtual_ops_for_renaming): Declare.
|
||||
* Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency.
|
||||
* config/i386/i386.h (x86_mfence): Declare.
|
||||
(FENCE_FOLLOWING_MOVNT): Return x86_mfence.
|
||||
* config/i386/i386.c (x86_mfence): New variable.
|
||||
(ix86_init_mmx_sse_builtins): Initialize x86_mfence.
|
||||
|
||||
* tree-pretty-print.c (dump_generic_node): Mark nontemporal stores.
|
||||
* optabs.c (init_optabs): Initialize storent_optab.
|
||||
* optabs.h (enum optab_index): Add OTI_storent.
|
||||
(storent_optab): Declare.
|
||||
* genopinit.c (optabs): Add initialization for storent_optab.
|
||||
* tree.h (MOVE_NONTEMPORAL): New macro.
|
||||
* expr.c (expand_assignment, store_expr, store_constructor_field,
|
||||
store_constructor, store_field, expand_expr_real_1): Propagate
|
||||
nontemporality of the expanded store.
|
||||
(emit_storent_insn): New function.
|
||||
* expr.h (expand_assignment, store_expr): Declaration changed.
|
||||
* function.c (assign_parm_setup_reg): Pass false as nontemporality
|
||||
to expand_assignment.
|
||||
* stmt.c (expand_asm_expr): Ditto.
|
||||
* calls.c (initialize_argument_information): Pass false as
|
||||
nontemporality to store_expr.
|
||||
* config/i386/sse.md (storentv4sf, storentv2df, storentv2di,
|
||||
storentsi): New.
|
||||
|
||||
2007-06-09 Daniel Berlin <dberlin@dberlin.org>
|
||||
|
||||
* tree-ssa-structalias.c (set_uids_in_ptset): Add is_deref'd
|
||||
|
@ -2085,7 +2085,8 @@ tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
|
||||
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
|
||||
tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H)
|
||||
tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H) \
|
||||
$(OPTABS_H)
|
||||
tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \
|
||||
$(CFGLOOP_H) $(TREE_FLOW_H) $(GGC_H) $(TREE_DATA_REF_H) $(SCEV_H) \
|
||||
$(PARAMS_H) $(DIAGNOSTIC_H) tree-pass.h $(TM_H) coretypes.h tree-affine.h \
|
||||
|
@ -1080,7 +1080,7 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
|
||||
else
|
||||
copy = assign_temp (type, 0, 1, 0);
|
||||
|
||||
store_expr (args[i].tree_value, copy, 0);
|
||||
store_expr (args[i].tree_value, copy, 0, false);
|
||||
|
||||
if (callee_copies)
|
||||
*ecf_flags &= ~(ECF_CONST | ECF_LIBCALL_BLOCK);
|
||||
|
@ -1500,6 +1500,9 @@ int ix86_section_threshold = 65536;
|
||||
char internal_label_prefix[16];
|
||||
int internal_label_prefix_len;
|
||||
|
||||
/* Fence to use after loop using movnt. */
|
||||
tree x86_mfence;
|
||||
|
||||
/* Register class used for passing given 64bit part of the argument.
|
||||
These represent classes as documented by the PS ABI, with the exception
|
||||
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
|
||||
@ -18120,7 +18123,7 @@ ix86_init_mmx_sse_builtins (void)
|
||||
|
||||
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
|
||||
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
|
||||
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
|
||||
x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
|
||||
|
||||
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
|
||||
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
|
||||
|
@ -386,6 +386,11 @@ extern int ix86_isa_flags;
|
||||
#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
|
||||
#endif
|
||||
|
||||
/* Fence to use after loop using storent. */
|
||||
|
||||
extern tree x86_mfence;
|
||||
#define FENCE_FOLLOWING_MOVNT x86_mfence
|
||||
|
||||
/* Once GDB has been enhanced to deal with functions without frame
|
||||
pointers, we can change this to allow for elimination of
|
||||
the frame pointer in leaf functions. */
|
||||
|
@ -317,6 +317,38 @@
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
; Expand patterns for non-temporal stores. At the moment, only those
|
||||
; that directly map to insns are defined; it would be possible to
|
||||
; define patterns for other modes that would expand to several insns.
|
||||
|
||||
(define_expand "storentv4sf"
|
||||
[(set (match_operand:V4SF 0 "memory_operand" "=m")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE"
|
||||
"")
|
||||
|
||||
(define_expand "storentv2df"
|
||||
[(set (match_operand:V2DF 0 "memory_operand" "=m")
|
||||
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2"
|
||||
"")
|
||||
|
||||
(define_expand "storentv2di"
|
||||
[(set (match_operand:V2DI 0 "memory_operand" "=m")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2"
|
||||
"")
|
||||
|
||||
(define_expand "storentsi"
|
||||
[(set (match_operand:SI 0 "memory_operand" "=m")
|
||||
(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2"
|
||||
"")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel single-precision floating point arithmetic
|
||||
|
120
gcc/expr.c
120
gcc/expr.c
@ -142,7 +142,7 @@ static void store_constructor_field (rtx, unsigned HOST_WIDE_INT,
|
||||
tree, tree, int, int);
|
||||
static void store_constructor (tree, rtx, int, HOST_WIDE_INT);
|
||||
static rtx store_field (rtx, HOST_WIDE_INT, HOST_WIDE_INT, enum machine_mode,
|
||||
tree, tree, int);
|
||||
tree, tree, int, bool);
|
||||
|
||||
static unsigned HOST_WIDE_INT highest_pow2_factor_for_target (tree, tree);
|
||||
|
||||
@ -4074,10 +4074,11 @@ optimize_bitfield_assignment_op (unsigned HOST_WIDE_INT bitsize,
|
||||
}
|
||||
|
||||
|
||||
/* Expand an assignment that stores the value of FROM into TO. */
|
||||
/* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL
|
||||
is true, try generating a nontemporal store. */
|
||||
|
||||
void
|
||||
expand_assignment (tree to, tree from)
|
||||
expand_assignment (tree to, tree from, bool nontemporal)
|
||||
{
|
||||
rtx to_rtx = 0;
|
||||
rtx result;
|
||||
@ -4164,12 +4165,13 @@ expand_assignment (tree to, tree from)
|
||||
if (TREE_CODE (TREE_TYPE (from)) == COMPLEX_TYPE)
|
||||
{
|
||||
gcc_assert (bitpos == 0);
|
||||
result = store_expr (from, to_rtx, false);
|
||||
result = store_expr (from, to_rtx, false, nontemporal);
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1));
|
||||
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false);
|
||||
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
|
||||
nontemporal);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -4195,7 +4197,8 @@ expand_assignment (tree to, tree from)
|
||||
result = NULL;
|
||||
else
|
||||
result = store_field (to_rtx, bitsize, bitpos, mode1, from,
|
||||
TREE_TYPE (tem), get_alias_set (to));
|
||||
TREE_TYPE (tem), get_alias_set (to),
|
||||
nontemporal);
|
||||
}
|
||||
|
||||
if (result)
|
||||
@ -4302,13 +4305,46 @@ expand_assignment (tree to, tree from)
|
||||
/* Compute FROM and store the value in the rtx we got. */
|
||||
|
||||
push_temp_slots ();
|
||||
result = store_expr (from, to_rtx, 0);
|
||||
result = store_expr (from, to_rtx, 0, nontemporal);
|
||||
preserve_temp_slots (result);
|
||||
free_temp_slots ();
|
||||
pop_temp_slots ();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Emits nontemporal store insn that moves FROM to TO. Returns true if this
|
||||
succeeded, false otherwise. */
|
||||
|
||||
static bool
|
||||
emit_storent_insn (rtx to, rtx from)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (to), imode;
|
||||
enum insn_code code = storent_optab->handlers[mode].insn_code;
|
||||
rtx pattern;
|
||||
|
||||
if (code == CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
imode = insn_data[code].operand[0].mode;
|
||||
if (!insn_data[code].operand[0].predicate (to, imode))
|
||||
return false;
|
||||
|
||||
imode = insn_data[code].operand[1].mode;
|
||||
if (!insn_data[code].operand[1].predicate (from, imode))
|
||||
{
|
||||
from = copy_to_mode_reg (imode, from);
|
||||
if (!insn_data[code].operand[1].predicate (from, imode))
|
||||
return false;
|
||||
}
|
||||
|
||||
pattern = GEN_FCN (code) (to, from);
|
||||
if (pattern == NULL_RTX)
|
||||
return false;
|
||||
|
||||
emit_insn (pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Generate code for computing expression EXP,
|
||||
and storing the value into TARGET.
|
||||
|
||||
@ -4320,10 +4356,12 @@ expand_assignment (tree to, tree from)
|
||||
be more thorough?
|
||||
|
||||
If CALL_PARAM_P is nonzero, this is a store into a call param on the
|
||||
stack, and block moves may need to be treated specially. */
|
||||
stack, and block moves may need to be treated specially.
|
||||
|
||||
If NONTEMPORAL is true, try using a nontemporal store instruction. */
|
||||
|
||||
rtx
|
||||
store_expr (tree exp, rtx target, int call_param_p)
|
||||
store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
|
||||
{
|
||||
rtx temp;
|
||||
rtx alt_rtl = NULL_RTX;
|
||||
@ -4344,7 +4382,8 @@ store_expr (tree exp, rtx target, int call_param_p)
|
||||
part. */
|
||||
expand_expr (TREE_OPERAND (exp, 0), const0_rtx, VOIDmode,
|
||||
call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL);
|
||||
return store_expr (TREE_OPERAND (exp, 1), target, call_param_p);
|
||||
return store_expr (TREE_OPERAND (exp, 1), target, call_param_p,
|
||||
nontemporal);
|
||||
}
|
||||
else if (TREE_CODE (exp) == COND_EXPR && GET_MODE (target) == BLKmode)
|
||||
{
|
||||
@ -4358,11 +4397,13 @@ store_expr (tree exp, rtx target, int call_param_p)
|
||||
do_pending_stack_adjust ();
|
||||
NO_DEFER_POP;
|
||||
jumpifnot (TREE_OPERAND (exp, 0), lab1);
|
||||
store_expr (TREE_OPERAND (exp, 1), target, call_param_p);
|
||||
store_expr (TREE_OPERAND (exp, 1), target, call_param_p,
|
||||
nontemporal);
|
||||
emit_jump_insn (gen_jump (lab2));
|
||||
emit_barrier ();
|
||||
emit_label (lab1);
|
||||
store_expr (TREE_OPERAND (exp, 2), target, call_param_p);
|
||||
store_expr (TREE_OPERAND (exp, 2), target, call_param_p,
|
||||
nontemporal);
|
||||
emit_label (lab2);
|
||||
OK_DEFER_POP;
|
||||
|
||||
@ -4433,7 +4474,12 @@ store_expr (tree exp, rtx target, int call_param_p)
|
||||
}
|
||||
else
|
||||
{
|
||||
temp = expand_expr_real (exp, target, GET_MODE (target),
|
||||
rtx tmp_target;
|
||||
|
||||
/* If we want to use a nontemporal store, force the value to
|
||||
register first. */
|
||||
tmp_target = nontemporal ? NULL_RTX : target;
|
||||
temp = expand_expr_real (exp, tmp_target, GET_MODE (target),
|
||||
(call_param_p
|
||||
? EXPAND_STACK_PARM : EXPAND_NORMAL),
|
||||
&alt_rtl);
|
||||
@ -4591,6 +4637,11 @@ store_expr (tree exp, rtx target, int call_param_p)
|
||||
emit_block_move (target, temp, expr_size (exp),
|
||||
(call_param_p
|
||||
? BLOCK_OP_CALL_PARM : BLOCK_OP_NORMAL));
|
||||
else if (nontemporal
|
||||
&& emit_storent_insn (target, temp))
|
||||
/* If we managed to emit a nontemporal store, there is nothing else to
|
||||
do. */
|
||||
;
|
||||
else
|
||||
{
|
||||
temp = force_operand (temp, target);
|
||||
@ -4941,7 +4992,7 @@ store_constructor_field (rtx target, unsigned HOST_WIDE_INT bitsize,
|
||||
store_constructor (exp, target, cleared, bitsize / BITS_PER_UNIT);
|
||||
}
|
||||
else
|
||||
store_field (target, bitsize, bitpos, mode, exp, type, alias_set);
|
||||
store_field (target, bitsize, bitpos, mode, exp, type, alias_set, false);
|
||||
}
|
||||
|
||||
/* Store the value of constructor EXP into the rtx TARGET.
|
||||
@ -5291,7 +5342,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|
||||
= gen_reg_rtx (promote_mode (domain, DECL_MODE (index),
|
||||
&unsignedp, 0));
|
||||
SET_DECL_RTL (index, index_r);
|
||||
store_expr (lo_index, index_r, 0);
|
||||
store_expr (lo_index, index_r, 0, false);
|
||||
|
||||
/* Build the head of the loop. */
|
||||
do_pending_stack_adjust ();
|
||||
@ -5318,7 +5369,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|
||||
store_constructor (value, xtarget, cleared,
|
||||
bitsize / BITS_PER_UNIT);
|
||||
else
|
||||
store_expr (value, xtarget, 0);
|
||||
store_expr (value, xtarget, 0, false);
|
||||
|
||||
/* Generate a conditional jump to exit the loop. */
|
||||
exit_cond = build2 (LT_EXPR, integer_type_node,
|
||||
@ -5329,7 +5380,8 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|
||||
the loop. */
|
||||
expand_assignment (index,
|
||||
build2 (PLUS_EXPR, TREE_TYPE (index),
|
||||
index, integer_one_node));
|
||||
index, integer_one_node),
|
||||
false);
|
||||
|
||||
emit_jump (loop_start);
|
||||
|
||||
@ -5360,7 +5412,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|
||||
expand_normal (position),
|
||||
highest_pow2_factor (position));
|
||||
xtarget = adjust_address (xtarget, mode, 0);
|
||||
store_expr (value, xtarget, 0);
|
||||
store_expr (value, xtarget, 0, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5522,11 +5574,14 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|
||||
|
||||
ALIAS_SET is the alias set for the destination. This value will
|
||||
(in general) be different from that for TARGET, since TARGET is a
|
||||
reference to the containing structure. */
|
||||
reference to the containing structure.
|
||||
|
||||
If NONTEMPORAL is true, try generating a nontemporal store. */
|
||||
|
||||
static rtx
|
||||
store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
|
||||
enum machine_mode mode, tree exp, tree type, int alias_set)
|
||||
enum machine_mode mode, tree exp, tree type, int alias_set,
|
||||
bool nontemporal)
|
||||
{
|
||||
HOST_WIDE_INT width_mask = 0;
|
||||
|
||||
@ -5561,7 +5616,8 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
|
||||
if (bitsize != (HOST_WIDE_INT) GET_MODE_BITSIZE (GET_MODE (target)))
|
||||
emit_move_insn (object, target);
|
||||
|
||||
store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set);
|
||||
store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set,
|
||||
nontemporal);
|
||||
|
||||
emit_move_insn (target, object);
|
||||
|
||||
@ -5574,7 +5630,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
|
||||
/* We're storing into a struct containing a single __complex. */
|
||||
|
||||
gcc_assert (!bitpos);
|
||||
return store_expr (exp, target, 0);
|
||||
return store_expr (exp, target, 0, nontemporal);
|
||||
}
|
||||
|
||||
/* If the structure is in a register or if the component
|
||||
@ -5675,7 +5731,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
|
||||
if (!MEM_KEEP_ALIAS_SET_P (to_rtx) && MEM_ALIAS_SET (to_rtx) != 0)
|
||||
set_mem_alias_set (to_rtx, alias_set);
|
||||
|
||||
return store_expr (exp, to_rtx, 0);
|
||||
return store_expr (exp, to_rtx, 0, nontemporal);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7831,7 +7887,8 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
/* Store data into beginning of memory target. */
|
||||
store_expr (TREE_OPERAND (exp, 0),
|
||||
adjust_address (target, TYPE_MODE (valtype), 0),
|
||||
modifier == EXPAND_STACK_PARM);
|
||||
modifier == EXPAND_STACK_PARM,
|
||||
false);
|
||||
|
||||
else
|
||||
{
|
||||
@ -7844,7 +7901,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
* BITS_PER_UNIT),
|
||||
(HOST_WIDE_INT) GET_MODE_BITSIZE (mode)),
|
||||
0, TYPE_MODE (valtype), TREE_OPERAND (exp, 0),
|
||||
type, 0);
|
||||
type, 0, false);
|
||||
}
|
||||
|
||||
/* Return the entire union. */
|
||||
@ -8760,13 +8817,15 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
op1 = gen_label_rtx ();
|
||||
jumpifnot (TREE_OPERAND (exp, 0), op0);
|
||||
store_expr (TREE_OPERAND (exp, 1), temp,
|
||||
modifier == EXPAND_STACK_PARM);
|
||||
modifier == EXPAND_STACK_PARM,
|
||||
false);
|
||||
|
||||
emit_jump_insn (gen_jump (op1));
|
||||
emit_barrier ();
|
||||
emit_label (op0);
|
||||
store_expr (TREE_OPERAND (exp, 2), temp,
|
||||
modifier == EXPAND_STACK_PARM);
|
||||
modifier == EXPAND_STACK_PARM,
|
||||
false);
|
||||
|
||||
emit_label (op1);
|
||||
OK_DEFER_POP;
|
||||
@ -8781,7 +8840,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
tree lhs = TREE_OPERAND (exp, 0);
|
||||
tree rhs = TREE_OPERAND (exp, 1);
|
||||
gcc_assert (ignore);
|
||||
expand_assignment (lhs, rhs);
|
||||
expand_assignment (lhs, rhs, false);
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
@ -8813,13 +8872,14 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
do_jump (TREE_OPERAND (rhs, 1),
|
||||
value ? label : 0,
|
||||
value ? 0 : label);
|
||||
expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value));
|
||||
expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value),
|
||||
MOVE_NONTEMPORAL (exp));
|
||||
do_pending_stack_adjust ();
|
||||
emit_label (label);
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
expand_assignment (lhs, rhs);
|
||||
expand_assignment (lhs, rhs, MOVE_NONTEMPORAL (exp));
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
|
@ -477,13 +477,13 @@ extern void emit_push_insn (rtx, enum machine_mode, tree, rtx, unsigned int,
|
||||
int, rtx, int, rtx, rtx, int, rtx);
|
||||
|
||||
/* Expand an assignment that stores the value of FROM into TO. */
|
||||
extern void expand_assignment (tree, tree);
|
||||
extern void expand_assignment (tree, tree, bool);
|
||||
|
||||
/* Generate code for computing expression EXP,
|
||||
and storing the value into TARGET.
|
||||
If SUGGEST_REG is nonzero, copy the value through a register
|
||||
and return that register, if that is possible. */
|
||||
extern rtx store_expr (tree, rtx, int);
|
||||
extern rtx store_expr (tree, rtx, int, bool);
|
||||
|
||||
/* Given an rtx that may include add and multiply operations,
|
||||
generate them as insns and return a pseudo-reg containing the value.
|
||||
|
@ -2723,7 +2723,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
|
||||
|
||||
/* TREE_USED gets set erroneously during expand_assignment. */
|
||||
save_tree_used = TREE_USED (parm);
|
||||
expand_assignment (parm, make_tree (data->nominal_type, tempreg));
|
||||
expand_assignment (parm, make_tree (data->nominal_type, tempreg), false);
|
||||
TREE_USED (parm) = save_tree_used;
|
||||
all->first_conversion_insn = get_insns ();
|
||||
all->last_conversion_insn = get_last_insn ();
|
||||
|
@ -164,6 +164,7 @@ static const char * const optabs[] =
|
||||
"mov_optab->handlers[$A].insn_code = CODE_FOR_$(mov$a$)",
|
||||
"movstrict_optab->handlers[$A].insn_code = CODE_FOR_$(movstrict$a$)",
|
||||
"movmisalign_optab->handlers[$A].insn_code = CODE_FOR_$(movmisalign$a$)",
|
||||
"storent_optab->handlers[$A].insn_code = CODE_FOR_$(storent$a$)",
|
||||
"cmp_optab->handlers[$A].insn_code = CODE_FOR_$(cmp$a$)",
|
||||
"tst_optab->handlers[$A].insn_code = CODE_FOR_$(tst$a$)",
|
||||
"addcc_optab->handlers[$A].insn_code = CODE_FOR_$(add$acc$)",
|
||||
|
@ -5492,6 +5492,8 @@ init_optabs (void)
|
||||
movstrict_optab = init_optab (STRICT_LOW_PART);
|
||||
cmp_optab = init_optab (COMPARE);
|
||||
|
||||
storent_optab = init_optab (UNKNOWN);
|
||||
|
||||
ucmp_optab = init_optab (UNKNOWN);
|
||||
tst_optab = init_optab (UNKNOWN);
|
||||
|
||||
|
@ -151,6 +151,8 @@ enum optab_index
|
||||
OTI_movstrict,
|
||||
/* Move, with a misaligned memory. */
|
||||
OTI_movmisalign,
|
||||
/* Nontemporal store. */
|
||||
OTI_storent,
|
||||
|
||||
/* Unary operations */
|
||||
/* Negation */
|
||||
@ -367,6 +369,7 @@ extern GTY(()) optab optab_table[OTI_MAX];
|
||||
#define mov_optab (optab_table[OTI_mov])
|
||||
#define movstrict_optab (optab_table[OTI_movstrict])
|
||||
#define movmisalign_optab (optab_table[OTI_movmisalign])
|
||||
#define storent_optab (optab_table[OTI_storent])
|
||||
|
||||
#define neg_optab (optab_table[OTI_neg])
|
||||
#define negv_optab (optab_table[OTI_negv])
|
||||
|
@ -1114,7 +1114,7 @@ expand_asm_expr (tree exp)
|
||||
{
|
||||
if (o[i] != TREE_VALUE (tail))
|
||||
{
|
||||
expand_assignment (o[i], TREE_VALUE (tail));
|
||||
expand_assignment (o[i], TREE_VALUE (tail), false);
|
||||
free_temp_slots ();
|
||||
|
||||
/* Restore the original value so that it's correct the next
|
||||
|
@ -1,3 +1,7 @@
|
||||
2007-06-10 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/prefetch-7.c: New test.
|
||||
|
||||
2007-06-09 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/loop-28.c: New testcase.
|
||||
|
59
gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c
Normal file
59
gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c
Normal file
@ -0,0 +1,59 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-require-effective-target ilp32 } */
|
||||
/* { dg-options "-O2 -fprefetch-loop-arrays -march=athlon -msse2 -mfpmath=sse --param simultaneous-prefetches=100 --param max-unrolled-insns=1 -fdump-tree-aprefetch-details -fdump-tree-final_cleanup" } */
|
||||
|
||||
#define K 1000000
|
||||
int a[K], b[K];
|
||||
|
||||
void test(int *p)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Nontemporal store should be used for a. */
|
||||
for (i = 0; i < K; i++)
|
||||
a[i] = 0;
|
||||
|
||||
/* Nontemporal store should be used for a, nontemporal prefetch for b. */
|
||||
for (i = 0; i < K; i++)
|
||||
a[i] = b[i];
|
||||
|
||||
/* Nontemporal store should not be used here (only write and read temporal
|
||||
prefetches). */
|
||||
for (i = 0; i < K - 10000; i++)
|
||||
a[i + 10000] = a[i];
|
||||
|
||||
/* Nontemporal store should not be used here (only write and read nontemporal
|
||||
prefetches). */
|
||||
for (i = 0; i < K - 100000; i++)
|
||||
a[i + 100000] = a[i];
|
||||
|
||||
/* Nontemporal store should be used neither for a nor for p, as we do not know
|
||||
whether they alias or not. */
|
||||
for (i = 0; i < K; i++)
|
||||
{
|
||||
a[i] = 0;
|
||||
*p++ = 1;
|
||||
}
|
||||
|
||||
/* Nontemporal store should not be used for a, as we do not know whether its
|
||||
value will be reused or not. */
|
||||
for (i = 0; i < 1000; i++)
|
||||
a[i] = 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Issued prefetch" 5 "aprefetch" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Issued nontemporal prefetch" 3 "aprefetch" } } */
|
||||
/* { dg-final { scan-tree-dump-times "nontemporal store" 2 "aprefetch" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "builtin_prefetch" 8 "final_cleanup" } } */
|
||||
/* { dg-final { scan-tree-dump-times "=\\{nt\\}" 2 "final_cleanup" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_ia32_mfence" 2 "final_cleanup" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "prefetchw" 5 } } */
|
||||
/* { dg-final { scan-assembler-times "prefetcht" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "prefetchnta" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "movnti" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "mfence" 2 } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "aprefetch" } } */
|
||||
/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
|
@ -729,7 +729,7 @@ dr_analyze_alias (struct data_reference *dr)
|
||||
}
|
||||
|
||||
DR_SYMBOL_TAG (dr) = smt;
|
||||
if (var_can_have_subvars (smt))
|
||||
if (smt && var_can_have_subvars (smt))
|
||||
DR_SUBVARS (dr) = get_subvars_for_var (smt);
|
||||
|
||||
vops = BITMAP_ALLOC (NULL);
|
||||
|
@ -1019,6 +1019,7 @@ void tree_transform_and_unroll_loop (struct loop *, unsigned,
|
||||
transform_callback, void *);
|
||||
bool contains_abnormal_ssa_name_p (tree);
|
||||
bool stmt_dominates_stmt_p (tree, tree);
|
||||
void mark_virtual_ops_for_renaming (tree);
|
||||
|
||||
/* In tree-ssa-threadedge.c */
|
||||
extern bool potentially_threadable_block (basic_block);
|
||||
|
@ -1378,7 +1378,7 @@ get_init_expr (chain_p chain, unsigned index)
|
||||
|
||||
/* Marks all virtual operands of statement STMT for renaming. */
|
||||
|
||||
static void
|
||||
void
|
||||
mark_virtual_ops_for_renaming (tree stmt)
|
||||
{
|
||||
ssa_op_iter iter;
|
||||
|
@ -1063,6 +1063,9 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
false);
|
||||
pp_space (buffer);
|
||||
pp_character (buffer, '=');
|
||||
if (TREE_CODE (node) == GIMPLE_MODIFY_STMT
|
||||
&& MOVE_NONTEMPORAL (node))
|
||||
pp_string (buffer, "{nt}");
|
||||
pp_space (buffer);
|
||||
dump_generic_node (buffer, GENERIC_TREE_OPERAND (node, 1), spc, flags,
|
||||
false);
|
||||
|
@ -47,6 +47,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
#include "langhooks.h"
|
||||
#include "tree-inline.h"
|
||||
#include "tree-data-ref.h"
|
||||
#include "optabs.h"
|
||||
|
||||
/* This pass inserts prefetch instructions to optimize cache usage during
|
||||
accesses to arrays in loops. It processes loops sequentially and:
|
||||
@ -177,6 +178,13 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
is accessed several times in a single iteration of the loop. */
|
||||
#define NONTEMPORAL_FRACTION 16
|
||||
|
||||
/* In case we have to emit a memory fence instruction after the loop that
|
||||
uses nontemporal stores, this defines the builtin to use. */
|
||||
|
||||
#ifndef FENCE_FOLLOWING_MOVNT
|
||||
#define FENCE_FOLLOWING_MOVNT NULL_TREE
|
||||
#endif
|
||||
|
||||
/* The group of references between that reuse may occur. */
|
||||
|
||||
struct mem_ref_group
|
||||
@ -198,7 +206,6 @@ struct mem_ref
|
||||
tree stmt; /* Statement in that the reference appears. */
|
||||
tree mem; /* The reference. */
|
||||
HOST_WIDE_INT delta; /* Constant offset of the reference. */
|
||||
bool write_p; /* Is it a write? */
|
||||
struct mem_ref_group *group; /* The group of references it belongs to. */
|
||||
unsigned HOST_WIDE_INT prefetch_mod;
|
||||
/* Prefetch only each PREFETCH_MOD-th
|
||||
@ -208,8 +215,13 @@ struct mem_ref
|
||||
iterations. */
|
||||
unsigned reuse_distance; /* The amount of data accessed before the first
|
||||
reuse of this value. */
|
||||
bool issue_prefetch_p; /* Should we really issue the prefetch? */
|
||||
struct mem_ref *next; /* The next reference in the group. */
|
||||
unsigned write_p : 1; /* Is it a write? */
|
||||
unsigned independent_p : 1; /* True if the reference is independent on
|
||||
all other references inside the loop. */
|
||||
unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */
|
||||
unsigned storent_p : 1; /* True if we changed the store to a
|
||||
nontemporal one. */
|
||||
};
|
||||
|
||||
/* Dumps information about reference REF to FILE. */
|
||||
@ -302,6 +314,8 @@ record_ref (struct mem_ref_group *group, tree stmt, tree mem,
|
||||
(*aref)->issue_prefetch_p = false;
|
||||
(*aref)->group = group;
|
||||
(*aref)->next = NULL;
|
||||
(*aref)->independent_p = false;
|
||||
(*aref)->storent_p = false;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
dump_mem_ref (dump_file, *aref);
|
||||
@ -434,9 +448,10 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base,
|
||||
}
|
||||
|
||||
/* Record a memory reference REF to the list REFS. The reference occurs in
|
||||
LOOP in statement STMT and it is write if WRITE_P. */
|
||||
LOOP in statement STMT and it is write if WRITE_P. Returns true if the
|
||||
reference was recorded, false otherwise. */
|
||||
|
||||
static void
|
||||
static bool
|
||||
gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
|
||||
tree ref, bool write_p, tree stmt)
|
||||
{
|
||||
@ -445,26 +460,31 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
|
||||
struct mem_ref_group *agrp;
|
||||
|
||||
if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt))
|
||||
return;
|
||||
return false;
|
||||
|
||||
/* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
|
||||
are integer constants. */
|
||||
agrp = find_or_create_group (refs, base, step);
|
||||
record_ref (agrp, stmt, ref, delta, write_p);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Record the suitable memory references in LOOP. */
|
||||
/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
|
||||
true if there are no other memory references inside the loop. */
|
||||
|
||||
static struct mem_ref_group *
|
||||
gather_memory_references (struct loop *loop)
|
||||
gather_memory_references (struct loop *loop, bool *no_other_refs)
|
||||
{
|
||||
basic_block *body = get_loop_body_in_dom_order (loop);
|
||||
basic_block bb;
|
||||
unsigned i;
|
||||
block_stmt_iterator bsi;
|
||||
tree stmt, lhs, rhs;
|
||||
tree stmt, lhs, rhs, call;
|
||||
struct mem_ref_group *refs = NULL;
|
||||
|
||||
*no_other_refs = true;
|
||||
|
||||
/* Scan the loop body in order, so that the former references precede the
|
||||
later ones. */
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
@ -476,16 +496,26 @@ gather_memory_references (struct loop *loop)
|
||||
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
||||
{
|
||||
stmt = bsi_stmt (bsi);
|
||||
call = get_call_expr_in (stmt);
|
||||
if (call && !(call_expr_flags (call) & ECF_CONST))
|
||||
*no_other_refs = false;
|
||||
|
||||
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
|
||||
continue;
|
||||
{
|
||||
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
|
||||
*no_other_refs = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
lhs = GIMPLE_STMT_OPERAND (stmt, 0);
|
||||
rhs = GIMPLE_STMT_OPERAND (stmt, 1);
|
||||
|
||||
if (REFERENCE_CLASS_P (rhs))
|
||||
gather_memory_references_ref (loop, &refs, rhs, false, stmt);
|
||||
*no_other_refs &= gather_memory_references_ref (loop, &refs,
|
||||
rhs, false, stmt);
|
||||
if (REFERENCE_CLASS_P (lhs))
|
||||
gather_memory_references_ref (loop, &refs, lhs, true, stmt);
|
||||
*no_other_refs &= gather_memory_references_ref (loop, &refs,
|
||||
lhs, true, stmt);
|
||||
}
|
||||
}
|
||||
free (body);
|
||||
@ -746,6 +776,10 @@ should_issue_prefetch_p (struct mem_ref *ref)
|
||||
if (ref->prefetch_before != PREFETCH_ALL)
|
||||
return false;
|
||||
|
||||
/* Do not prefetch nontemporal stores. */
|
||||
if (ref->storent_p)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -884,6 +918,130 @@ issue_prefetches (struct mem_ref_group *groups,
|
||||
issue_prefetch_ref (ref, unroll_factor, ahead);
|
||||
}
|
||||
|
||||
/* Returns true if REF is a memory write for that a nontemporal store insn
|
||||
can be used. */
|
||||
|
||||
static bool
|
||||
nontemporal_store_p (struct mem_ref *ref)
|
||||
{
|
||||
enum machine_mode mode;
|
||||
enum insn_code code;
|
||||
|
||||
/* REF must be a write that is not reused. We require it to be independent
|
||||
on all other memory references in the loop, as the nontemporal stores may
|
||||
be reordered with respect to other memory references. */
|
||||
if (!ref->write_p
|
||||
|| !ref->independent_p
|
||||
|| ref->reuse_distance < L2_CACHE_SIZE_BYTES)
|
||||
return false;
|
||||
|
||||
/* Check that we have the storent instruction for the mode. */
|
||||
mode = TYPE_MODE (TREE_TYPE (ref->mem));
|
||||
if (mode == BLKmode)
|
||||
return false;
|
||||
|
||||
code = storent_optab->handlers[mode].insn_code;
|
||||
return code != CODE_FOR_nothing;
|
||||
}
|
||||
|
||||
/* If REF is a nontemporal store, we mark the corresponding modify statement
|
||||
and return true. Otherwise, we return false. */
|
||||
|
||||
static bool
|
||||
mark_nontemporal_store (struct mem_ref *ref)
|
||||
{
|
||||
if (!nontemporal_store_p (ref))
|
||||
return false;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Marked reference %p as a nontemporal store.\n",
|
||||
(void *) ref);
|
||||
|
||||
MOVE_NONTEMPORAL (ref->stmt) = true;
|
||||
ref->storent_p = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Issue a memory fence instruction after LOOP. */
|
||||
|
||||
static void
|
||||
emit_mfence_after_loop (struct loop *loop)
|
||||
{
|
||||
VEC (edge, heap) *exits = get_loop_exit_edges (loop);
|
||||
edge exit;
|
||||
tree call;
|
||||
block_stmt_iterator bsi;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
|
||||
{
|
||||
call = build_function_call_expr (FENCE_FOLLOWING_MOVNT, NULL_TREE);
|
||||
|
||||
if (!single_pred_p (exit->dest)
|
||||
/* If possible, we prefer not to insert the fence on other paths
|
||||
in cfg. */
|
||||
&& !(exit->flags & EDGE_ABNORMAL))
|
||||
split_loop_exit_edge (exit);
|
||||
bsi = bsi_after_labels (exit->dest);
|
||||
|
||||
bsi_insert_before (&bsi, call, BSI_NEW_STMT);
|
||||
mark_virtual_ops_for_renaming (call);
|
||||
}
|
||||
|
||||
VEC_free (edge, heap, exits);
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
}
|
||||
|
||||
/* Returns true if we can use storent in loop, false otherwise. */
|
||||
|
||||
static bool
|
||||
may_use_storent_in_loop_p (struct loop *loop)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
if (loop->inner != NULL)
|
||||
return false;
|
||||
|
||||
/* If we must issue a mfence insn after using storent, check that there
|
||||
is a suitable place for it at each of the loop exits. */
|
||||
if (FENCE_FOLLOWING_MOVNT != NULL_TREE)
|
||||
{
|
||||
VEC (edge, heap) *exits = get_loop_exit_edges (loop);
|
||||
unsigned i;
|
||||
edge exit;
|
||||
|
||||
for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
|
||||
if ((exit->flags & EDGE_ABNORMAL)
|
||||
&& exit->dest == EXIT_BLOCK_PTR)
|
||||
ret = false;
|
||||
|
||||
VEC_free (edge, heap, exits);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Marks nontemporal stores in LOOP. GROUPS contains the description of memory
|
||||
references in the loop. */
|
||||
|
||||
static void
|
||||
mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups)
|
||||
{
|
||||
struct mem_ref *ref;
|
||||
bool any = false;
|
||||
|
||||
if (!may_use_storent_in_loop_p (loop))
|
||||
return;
|
||||
|
||||
for (; groups; groups = groups->next)
|
||||
for (ref = groups->refs; ref; ref = ref->next)
|
||||
any |= mark_nontemporal_store (ref);
|
||||
|
||||
if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
|
||||
emit_mfence_after_loop (loop);
|
||||
}
|
||||
|
||||
/* Determines whether we can profitably unroll LOOP FACTOR times, and if
|
||||
this is the case, fill in DESC by the description of number of
|
||||
iterations. */
|
||||
@ -1115,16 +1273,18 @@ self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
|
||||
}
|
||||
|
||||
/* Determines the distance till the first reuse of each reference in REFS
|
||||
in the loop nest of LOOP. */
|
||||
in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
|
||||
memory references in the loop. */
|
||||
|
||||
static void
|
||||
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
|
||||
bool no_other_refs)
|
||||
{
|
||||
struct loop *nest, *aloop;
|
||||
VEC (data_reference_p, heap) *datarefs = NULL;
|
||||
VEC (ddr_p, heap) *dependences = NULL;
|
||||
struct mem_ref_group *gr;
|
||||
struct mem_ref *ref;
|
||||
struct mem_ref *ref, *refb;
|
||||
VEC (loop_p, heap) *vloops = NULL;
|
||||
unsigned *loop_data_size;
|
||||
unsigned i, j, n;
|
||||
@ -1188,6 +1348,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
dr->aux = ref;
|
||||
VEC_safe_push (data_reference_p, heap, datarefs, dr);
|
||||
}
|
||||
else
|
||||
no_other_refs = false;
|
||||
}
|
||||
|
||||
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
|
||||
@ -1196,6 +1358,9 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
ref = dr->aux;
|
||||
if (ref->reuse_distance > dist)
|
||||
ref->reuse_distance = dist;
|
||||
|
||||
if (no_other_refs)
|
||||
ref->independent_p = true;
|
||||
}
|
||||
|
||||
compute_all_dependences (datarefs, &dependences, vloops, true);
|
||||
@ -1205,12 +1370,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
if (DDR_ARE_DEPENDENT (dep) == chrec_known)
|
||||
continue;
|
||||
|
||||
ref = DDR_A (dep)->aux;
|
||||
refb = DDR_B (dep)->aux;
|
||||
|
||||
if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
|
||||
|| DDR_NUM_DIST_VECTS (dep) == 0)
|
||||
{
|
||||
/* If the dependence cannot be analysed, assume that there might be
|
||||
a reuse. */
|
||||
dist = 0;
|
||||
|
||||
ref->independent_p = false;
|
||||
refb->independent_p = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1228,6 +1399,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
|
||||
loop_data_size, n);
|
||||
|
||||
/* If this is a dependence in the innermost loop (i.e., the
|
||||
distances in all superloops are zero) and it is not
|
||||
the trivial self-dependence with distance zero, record that
|
||||
the references are not completely independent. */
|
||||
if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
|
||||
&& (ref != refb
|
||||
|| DDR_DIST_VECT (dep, j)[n-1] != 0))
|
||||
{
|
||||
ref->independent_p = false;
|
||||
refb->independent_p = false;
|
||||
}
|
||||
|
||||
/* Ignore accesses closer than
|
||||
L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
|
||||
so that we use nontemporal prefetches e.g. if single memory
|
||||
@ -1241,12 +1424,10 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
|
||||
}
|
||||
}
|
||||
|
||||
ref = DDR_A (dep)->aux;
|
||||
if (ref->reuse_distance > dist)
|
||||
ref->reuse_distance = dist;
|
||||
ref = DDR_B (dep)->aux;
|
||||
if (ref->reuse_distance > dist)
|
||||
ref->reuse_distance = dist;
|
||||
if (refb->reuse_distance > dist)
|
||||
refb->reuse_distance = dist;
|
||||
}
|
||||
|
||||
free_dependence_relations (dependences);
|
||||
@ -1273,7 +1454,7 @@ loop_prefetch_arrays (struct loop *loop)
|
||||
unsigned ahead, ninsns, time, unroll_factor;
|
||||
HOST_WIDE_INT est_niter;
|
||||
struct tree_niter_desc desc;
|
||||
bool unrolled = false;
|
||||
bool unrolled = false, no_other_refs;
|
||||
|
||||
if (!maybe_hot_bb_p (loop->header))
|
||||
{
|
||||
@ -1283,7 +1464,7 @@ loop_prefetch_arrays (struct loop *loop)
|
||||
}
|
||||
|
||||
/* Step 1: gather the memory references. */
|
||||
refs = gather_memory_references (loop);
|
||||
refs = gather_memory_references (loop, &no_other_refs);
|
||||
|
||||
/* Step 2: estimate the reuse effects. */
|
||||
prune_by_reuse (refs);
|
||||
@ -1291,7 +1472,7 @@ loop_prefetch_arrays (struct loop *loop)
|
||||
if (!anything_to_prefetch_p (refs))
|
||||
goto fail;
|
||||
|
||||
determine_loop_nest_reuse (loop, refs);
|
||||
determine_loop_nest_reuse (loop, refs, no_other_refs);
|
||||
|
||||
/* Step 3: determine the ahead and unroll factor. */
|
||||
|
||||
@ -1313,6 +1494,8 @@ loop_prefetch_arrays (struct loop *loop)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mark_nontemporal_stores (loop, refs);
|
||||
|
||||
ninsns = tree_num_loop_insns (loop, &eni_size_weights);
|
||||
unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
|
||||
est_niter);
|
||||
|
@ -447,6 +447,8 @@ struct gimple_stmt GTY(())
|
||||
EH_FILTER_MUST_NOT_THROW in EH_FILTER_EXPR
|
||||
TYPE_REF_CAN_ALIAS_ALL in
|
||||
POINTER_TYPE, REFERENCE_TYPE
|
||||
MOVE_NONTEMPORAL in
|
||||
GIMPLE_MODIFY_STMT
|
||||
CASE_HIGH_SEEN in CASE_LABEL_EXPR
|
||||
|
||||
public_flag:
|
||||
@ -1159,6 +1161,10 @@ extern void omp_clause_range_check_failed (const tree, const char *, int,
|
||||
#define TYPE_REF_CAN_ALIAS_ALL(NODE) \
|
||||
(PTR_OR_REF_CHECK (NODE)->base.static_flag)
|
||||
|
||||
/* In a MODIFY_EXPR, means that the store in the expression is nontemporal. */
|
||||
#define MOVE_NONTEMPORAL(NODE) \
|
||||
(GIMPLE_MODIFY_STMT_CHECK (NODE)->base.static_flag)
|
||||
|
||||
/* In an INTEGER_CST, REAL_CST, COMPLEX_CST, or VECTOR_CST, this means
|
||||
there was an overflow in folding. */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user