tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.

* tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
	* tree-predcom.c (mark_virtual_ops_for_renaming): Exported.
	* tree-ssa-loop-prefetch.c: Include optabs.h.
	(FENCE_FOLLOWING_MOVNT): New macro.
	(struct mem_ref): Add independent_p and storent_p fields.
	(record_ref): Initalize the new fields.
	(gather_memory_references_ref): Return true if the reference
	could be analysed.
	(gather_memory_references): Check whether all memory accesses
	in loop were recorded.
	(should_issue_prefetch_p): Return false for nontemporal stores.
	(nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop,
	may_use_storent_in_loop_p, mark_nontemporal_stores): New functions.
	(determine_loop_nest_reuse): Detect independent memory references.
	(loop_prefetch_arrays): Call mark_nontemporal_stores.
	* tree-flow.h (mark_virtual_ops_for_renaming): Declare.
	* Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency.
	* config/i386/i386.h (x86_mfence): Declare.
	(FENCE_FOLLOWING_MOVNT): Return x86_mfence.
	* config/i386/i386.c (x86_mfence): New variable.
	(ix86_init_mmx_sse_builtins): Initialize x86_mfence.

	* tree-pretty-print.c (dump_generic_node): Mark nontemporal stores.
	* optabs.c (init_optabs): Initialize storent_optab.
	* optabs.h (enum optab_index): Add OTI_storent.
	(storent_optab): Declare.
	* genopinit.c (optabs): Add initialization for storent_optab.
	* tree.h (MOVE_NONTEMPORAL): New macro.
	* expr.c (expand_assignment, store_expr, store_constructor_field,
	store_constructor, store_field, expand_expr_real_1): Propagate
	nontemporality of the expanded store.
	(emit_storent_insn): New function.
	* expr.h (expand_assignment, store_expr): Declaration changed.
	* function.c (assign_parm_setup_reg): Pass false as nontemporality
	to expand_assignment.
	* stmt.c (expand_asm_expr): Ditto.
	* calls.c (initialize_argument_information): Pass false as
	nontemporality to store_expr.
	* config/i386/sse.md (storentv4sf, storentv2df, storentv2di,
	storentsi): New.

	* gcc.dg/tree-ssa/prefetch-7.c: New test.

From-SVN: r125604
This commit is contained in:
Zdenek Dvorak 2007-06-10 22:39:22 +02:00 committed by Zdenek Dvorak
parent 6bdff197e6
commit 79f5e44262
21 changed files with 466 additions and 60 deletions

View File

@ -1,3 +1,46 @@
2007-06-10 Zdenek Dvorak <dvorakz@suse.cz>
* tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
* tree-predcom.c (mark_virtual_ops_for_renaming): Exported.
* tree-ssa-loop-prefetch.c: Include optabs.h.
(FENCE_FOLLOWING_MOVNT): New macro.
(struct mem_ref): Add independent_p and storent_p fields.
(record_ref): Initalize the new fields.
(gather_memory_references_ref): Return true if the reference
could be analysed.
(gather_memory_references): Check whether all memory accesses
in loop were recorded.
(should_issue_prefetch_p): Return false for nontemporal stores.
(nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop,
may_use_storent_in_loop_p, mark_nontemporal_stores): New functions.
(determine_loop_nest_reuse): Detect independent memory references.
(loop_prefetch_arrays): Call mark_nontemporal_stores.
* tree-flow.h (mark_virtual_ops_for_renaming): Declare.
* Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency.
* config/i386/i386.h (x86_mfence): Declare.
(FENCE_FOLLOWING_MOVNT): Return x86_mfence.
* config/i386/i386.c (x86_mfence): New variable.
(ix86_init_mmx_sse_builtins): Initialize x86_mfence.
* tree-pretty-print.c (dump_generic_node): Mark nontemporal stores.
* optabs.c (init_optabs): Initialize storent_optab.
* optabs.h (enum optab_index): Add OTI_storent.
(storent_optab): Declare.
* genopinit.c (optabs): Add initialization for storent_optab.
* tree.h (MOVE_NONTEMPORAL): New macro.
* expr.c (expand_assignment, store_expr, store_constructor_field,
store_constructor, store_field, expand_expr_real_1): Propagate
nontemporality of the expanded store.
(emit_storent_insn): New function.
* expr.h (expand_assignment, store_expr): Declaration changed.
* function.c (assign_parm_setup_reg): Pass false as nontemporality
to expand_assignment.
* stmt.c (expand_asm_expr): Ditto.
* calls.c (initialize_argument_information): Pass false as
nontemporality to store_expr.
* config/i386/sse.md (storentv4sf, storentv2df, storentv2di,
storentsi): New.
2007-06-09 Daniel Berlin <dberlin@dberlin.org>
* tree-ssa-structalias.c (set_uids_in_ptset): Add is_deref'd

View File

@ -2085,7 +2085,8 @@ tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H)
tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H) \
$(OPTABS_H)
tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \
$(CFGLOOP_H) $(TREE_FLOW_H) $(GGC_H) $(TREE_DATA_REF_H) $(SCEV_H) \
$(PARAMS_H) $(DIAGNOSTIC_H) tree-pass.h $(TM_H) coretypes.h tree-affine.h \

View File

@ -1080,7 +1080,7 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
else
copy = assign_temp (type, 0, 1, 0);
store_expr (args[i].tree_value, copy, 0);
store_expr (args[i].tree_value, copy, 0, false);
if (callee_copies)
*ecf_flags &= ~(ECF_CONST | ECF_LIBCALL_BLOCK);

View File

@ -1500,6 +1500,9 @@ int ix86_section_threshold = 65536;
char internal_label_prefix[16];
int internal_label_prefix_len;
/* Fence to use after loop using movnt. */
tree x86_mfence;
/* Register class used for passing given 64bit part of the argument.
These represent classes as documented by the PS ABI, with the exception
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
@ -18120,7 +18123,7 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);

View File

@ -386,6 +386,11 @@ extern int ix86_isa_flags;
#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
#endif
/* Fence to use after loop using storent. */
extern tree x86_mfence;
#define FENCE_FOLLOWING_MOVNT x86_mfence
/* Once GDB has been enhanced to deal with functions without frame
pointers, we can change this to allow for elimination of
the frame pointer in leaf functions. */

View File

@ -317,6 +317,38 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
; Expand patterns for non-temporal stores. At the moment, only those
; that directly map to insns are defined; it would be possible to
; define patterns for other modes that would expand to several insns.
(define_expand "storentv4sf"
[(set (match_operand:V4SF 0 "memory_operand" "=m")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE"
"")
(define_expand "storentv2df"
[(set (match_operand:V2DF 0 "memory_operand" "=m")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"")
(define_expand "storentv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"")
(define_expand "storentsi"
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point arithmetic

View File

@ -142,7 +142,7 @@ static void store_constructor_field (rtx, unsigned HOST_WIDE_INT,
tree, tree, int, int);
static void store_constructor (tree, rtx, int, HOST_WIDE_INT);
static rtx store_field (rtx, HOST_WIDE_INT, HOST_WIDE_INT, enum machine_mode,
tree, tree, int);
tree, tree, int, bool);
static unsigned HOST_WIDE_INT highest_pow2_factor_for_target (tree, tree);
@ -4074,10 +4074,11 @@ optimize_bitfield_assignment_op (unsigned HOST_WIDE_INT bitsize,
}
/* Expand an assignment that stores the value of FROM into TO. */
/* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL
is true, try generating a nontemporal store. */
void
expand_assignment (tree to, tree from)
expand_assignment (tree to, tree from, bool nontemporal)
{
rtx to_rtx = 0;
rtx result;
@ -4164,12 +4165,13 @@ expand_assignment (tree to, tree from)
if (TREE_CODE (TREE_TYPE (from)) == COMPLEX_TYPE)
{
gcc_assert (bitpos == 0);
result = store_expr (from, to_rtx, false);
result = store_expr (from, to_rtx, false, nontemporal);
}
else
{
gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1));
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false);
result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
nontemporal);
}
}
else
@ -4195,7 +4197,8 @@ expand_assignment (tree to, tree from)
result = NULL;
else
result = store_field (to_rtx, bitsize, bitpos, mode1, from,
TREE_TYPE (tem), get_alias_set (to));
TREE_TYPE (tem), get_alias_set (to),
nontemporal);
}
if (result)
@ -4302,13 +4305,46 @@ expand_assignment (tree to, tree from)
/* Compute FROM and store the value in the rtx we got. */
push_temp_slots ();
result = store_expr (from, to_rtx, 0);
result = store_expr (from, to_rtx, 0, nontemporal);
preserve_temp_slots (result);
free_temp_slots ();
pop_temp_slots ();
return;
}
/* Emits nontemporal store insn that moves FROM to TO. Returns true if this
succeeded, false otherwise. */
static bool
emit_storent_insn (rtx to, rtx from)
{
enum machine_mode mode = GET_MODE (to), imode;
enum insn_code code = storent_optab->handlers[mode].insn_code;
rtx pattern;
if (code == CODE_FOR_nothing)
return false;
imode = insn_data[code].operand[0].mode;
if (!insn_data[code].operand[0].predicate (to, imode))
return false;
imode = insn_data[code].operand[1].mode;
if (!insn_data[code].operand[1].predicate (from, imode))
{
from = copy_to_mode_reg (imode, from);
if (!insn_data[code].operand[1].predicate (from, imode))
return false;
}
pattern = GEN_FCN (code) (to, from);
if (pattern == NULL_RTX)
return false;
emit_insn (pattern);
return true;
}
/* Generate code for computing expression EXP,
and storing the value into TARGET.
@ -4320,10 +4356,12 @@ expand_assignment (tree to, tree from)
be more thorough?
If CALL_PARAM_P is nonzero, this is a store into a call param on the
stack, and block moves may need to be treated specially. */
stack, and block moves may need to be treated specially.
If NONTEMPORAL is true, try using a nontemporal store instruction. */
rtx
store_expr (tree exp, rtx target, int call_param_p)
store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
{
rtx temp;
rtx alt_rtl = NULL_RTX;
@ -4344,7 +4382,8 @@ store_expr (tree exp, rtx target, int call_param_p)
part. */
expand_expr (TREE_OPERAND (exp, 0), const0_rtx, VOIDmode,
call_param_p ? EXPAND_STACK_PARM : EXPAND_NORMAL);
return store_expr (TREE_OPERAND (exp, 1), target, call_param_p);
return store_expr (TREE_OPERAND (exp, 1), target, call_param_p,
nontemporal);
}
else if (TREE_CODE (exp) == COND_EXPR && GET_MODE (target) == BLKmode)
{
@ -4358,11 +4397,13 @@ store_expr (tree exp, rtx target, int call_param_p)
do_pending_stack_adjust ();
NO_DEFER_POP;
jumpifnot (TREE_OPERAND (exp, 0), lab1);
store_expr (TREE_OPERAND (exp, 1), target, call_param_p);
store_expr (TREE_OPERAND (exp, 1), target, call_param_p,
nontemporal);
emit_jump_insn (gen_jump (lab2));
emit_barrier ();
emit_label (lab1);
store_expr (TREE_OPERAND (exp, 2), target, call_param_p);
store_expr (TREE_OPERAND (exp, 2), target, call_param_p,
nontemporal);
emit_label (lab2);
OK_DEFER_POP;
@ -4433,7 +4474,12 @@ store_expr (tree exp, rtx target, int call_param_p)
}
else
{
temp = expand_expr_real (exp, target, GET_MODE (target),
rtx tmp_target;
/* If we want to use a nontemporal store, force the value to
register first. */
tmp_target = nontemporal ? NULL_RTX : target;
temp = expand_expr_real (exp, tmp_target, GET_MODE (target),
(call_param_p
? EXPAND_STACK_PARM : EXPAND_NORMAL),
&alt_rtl);
@ -4591,6 +4637,11 @@ store_expr (tree exp, rtx target, int call_param_p)
emit_block_move (target, temp, expr_size (exp),
(call_param_p
? BLOCK_OP_CALL_PARM : BLOCK_OP_NORMAL));
else if (nontemporal
&& emit_storent_insn (target, temp))
/* If we managed to emit a nontemporal store, there is nothing else to
do. */
;
else
{
temp = force_operand (temp, target);
@ -4941,7 +4992,7 @@ store_constructor_field (rtx target, unsigned HOST_WIDE_INT bitsize,
store_constructor (exp, target, cleared, bitsize / BITS_PER_UNIT);
}
else
store_field (target, bitsize, bitpos, mode, exp, type, alias_set);
store_field (target, bitsize, bitpos, mode, exp, type, alias_set, false);
}
/* Store the value of constructor EXP into the rtx TARGET.
@ -5291,7 +5342,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
= gen_reg_rtx (promote_mode (domain, DECL_MODE (index),
&unsignedp, 0));
SET_DECL_RTL (index, index_r);
store_expr (lo_index, index_r, 0);
store_expr (lo_index, index_r, 0, false);
/* Build the head of the loop. */
do_pending_stack_adjust ();
@ -5318,7 +5369,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
store_constructor (value, xtarget, cleared,
bitsize / BITS_PER_UNIT);
else
store_expr (value, xtarget, 0);
store_expr (value, xtarget, 0, false);
/* Generate a conditional jump to exit the loop. */
exit_cond = build2 (LT_EXPR, integer_type_node,
@ -5329,7 +5380,8 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
the loop. */
expand_assignment (index,
build2 (PLUS_EXPR, TREE_TYPE (index),
index, integer_one_node));
index, integer_one_node),
false);
emit_jump (loop_start);
@ -5360,7 +5412,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
expand_normal (position),
highest_pow2_factor (position));
xtarget = adjust_address (xtarget, mode, 0);
store_expr (value, xtarget, 0);
store_expr (value, xtarget, 0, false);
}
else
{
@ -5522,11 +5574,14 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
ALIAS_SET is the alias set for the destination. This value will
(in general) be different from that for TARGET, since TARGET is a
reference to the containing structure. */
reference to the containing structure.
If NONTEMPORAL is true, try generating a nontemporal store. */
static rtx
store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
enum machine_mode mode, tree exp, tree type, int alias_set)
enum machine_mode mode, tree exp, tree type, int alias_set,
bool nontemporal)
{
HOST_WIDE_INT width_mask = 0;
@ -5561,7 +5616,8 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
if (bitsize != (HOST_WIDE_INT) GET_MODE_BITSIZE (GET_MODE (target)))
emit_move_insn (object, target);
store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set);
store_field (blk_object, bitsize, bitpos, mode, exp, type, alias_set,
nontemporal);
emit_move_insn (target, object);
@ -5574,7 +5630,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
/* We're storing into a struct containing a single __complex. */
gcc_assert (!bitpos);
return store_expr (exp, target, 0);
return store_expr (exp, target, 0, nontemporal);
}
/* If the structure is in a register or if the component
@ -5675,7 +5731,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
if (!MEM_KEEP_ALIAS_SET_P (to_rtx) && MEM_ALIAS_SET (to_rtx) != 0)
set_mem_alias_set (to_rtx, alias_set);
return store_expr (exp, to_rtx, 0);
return store_expr (exp, to_rtx, 0, nontemporal);
}
}
@ -7831,7 +7887,8 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
/* Store data into beginning of memory target. */
store_expr (TREE_OPERAND (exp, 0),
adjust_address (target, TYPE_MODE (valtype), 0),
modifier == EXPAND_STACK_PARM);
modifier == EXPAND_STACK_PARM,
false);
else
{
@ -7844,7 +7901,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
* BITS_PER_UNIT),
(HOST_WIDE_INT) GET_MODE_BITSIZE (mode)),
0, TYPE_MODE (valtype), TREE_OPERAND (exp, 0),
type, 0);
type, 0, false);
}
/* Return the entire union. */
@ -8760,13 +8817,15 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
op1 = gen_label_rtx ();
jumpifnot (TREE_OPERAND (exp, 0), op0);
store_expr (TREE_OPERAND (exp, 1), temp,
modifier == EXPAND_STACK_PARM);
modifier == EXPAND_STACK_PARM,
false);
emit_jump_insn (gen_jump (op1));
emit_barrier ();
emit_label (op0);
store_expr (TREE_OPERAND (exp, 2), temp,
modifier == EXPAND_STACK_PARM);
modifier == EXPAND_STACK_PARM,
false);
emit_label (op1);
OK_DEFER_POP;
@ -8781,7 +8840,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
tree lhs = TREE_OPERAND (exp, 0);
tree rhs = TREE_OPERAND (exp, 1);
gcc_assert (ignore);
expand_assignment (lhs, rhs);
expand_assignment (lhs, rhs, false);
return const0_rtx;
}
@ -8813,13 +8872,14 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
do_jump (TREE_OPERAND (rhs, 1),
value ? label : 0,
value ? 0 : label);
expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value));
expand_assignment (lhs, build_int_cst (TREE_TYPE (rhs), value),
MOVE_NONTEMPORAL (exp));
do_pending_stack_adjust ();
emit_label (label);
return const0_rtx;
}
expand_assignment (lhs, rhs);
expand_assignment (lhs, rhs, MOVE_NONTEMPORAL (exp));
return const0_rtx;
}

View File

@ -477,13 +477,13 @@ extern void emit_push_insn (rtx, enum machine_mode, tree, rtx, unsigned int,
int, rtx, int, rtx, rtx, int, rtx);
/* Expand an assignment that stores the value of FROM into TO. */
extern void expand_assignment (tree, tree);
extern void expand_assignment (tree, tree, bool);
/* Generate code for computing expression EXP,
and storing the value into TARGET.
If SUGGEST_REG is nonzero, copy the value through a register
and return that register, if that is possible. */
extern rtx store_expr (tree, rtx, int);
extern rtx store_expr (tree, rtx, int, bool);
/* Given an rtx that may include add and multiply operations,
generate them as insns and return a pseudo-reg containing the value.

View File

@ -2723,7 +2723,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
/* TREE_USED gets set erroneously during expand_assignment. */
save_tree_used = TREE_USED (parm);
expand_assignment (parm, make_tree (data->nominal_type, tempreg));
expand_assignment (parm, make_tree (data->nominal_type, tempreg), false);
TREE_USED (parm) = save_tree_used;
all->first_conversion_insn = get_insns ();
all->last_conversion_insn = get_last_insn ();

View File

@ -164,6 +164,7 @@ static const char * const optabs[] =
"mov_optab->handlers[$A].insn_code = CODE_FOR_$(mov$a$)",
"movstrict_optab->handlers[$A].insn_code = CODE_FOR_$(movstrict$a$)",
"movmisalign_optab->handlers[$A].insn_code = CODE_FOR_$(movmisalign$a$)",
"storent_optab->handlers[$A].insn_code = CODE_FOR_$(storent$a$)",
"cmp_optab->handlers[$A].insn_code = CODE_FOR_$(cmp$a$)",
"tst_optab->handlers[$A].insn_code = CODE_FOR_$(tst$a$)",
"addcc_optab->handlers[$A].insn_code = CODE_FOR_$(add$acc$)",

View File

@ -5492,6 +5492,8 @@ init_optabs (void)
movstrict_optab = init_optab (STRICT_LOW_PART);
cmp_optab = init_optab (COMPARE);
storent_optab = init_optab (UNKNOWN);
ucmp_optab = init_optab (UNKNOWN);
tst_optab = init_optab (UNKNOWN);

View File

@ -151,6 +151,8 @@ enum optab_index
OTI_movstrict,
/* Move, with a misaligned memory. */
OTI_movmisalign,
/* Nontemporal store. */
OTI_storent,
/* Unary operations */
/* Negation */
@ -367,6 +369,7 @@ extern GTY(()) optab optab_table[OTI_MAX];
#define mov_optab (optab_table[OTI_mov])
#define movstrict_optab (optab_table[OTI_movstrict])
#define movmisalign_optab (optab_table[OTI_movmisalign])
#define storent_optab (optab_table[OTI_storent])
#define neg_optab (optab_table[OTI_neg])
#define negv_optab (optab_table[OTI_negv])

View File

@ -1114,7 +1114,7 @@ expand_asm_expr (tree exp)
{
if (o[i] != TREE_VALUE (tail))
{
expand_assignment (o[i], TREE_VALUE (tail));
expand_assignment (o[i], TREE_VALUE (tail), false);
free_temp_slots ();
/* Restore the original value so that it's correct the next

View File

@ -1,3 +1,7 @@
2007-06-10 Zdenek Dvorak <dvorakz@suse.cz>
* gcc.dg/tree-ssa/prefetch-7.c: New test.
2007-06-09 Zdenek Dvorak <dvorakz@suse.cz>
* gcc.dg/tree-ssa/loop-28.c: New testcase.

View File

@ -0,0 +1,59 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -fprefetch-loop-arrays -march=athlon -msse2 -mfpmath=sse --param simultaneous-prefetches=100 --param max-unrolled-insns=1 -fdump-tree-aprefetch-details -fdump-tree-final_cleanup" } */
#define K 1000000
int a[K], b[K];
void test(int *p)
{
unsigned i;
/* Nontemporal store should be used for a. */
for (i = 0; i < K; i++)
a[i] = 0;
/* Nontemporal store should be used for a, nontemporal prefetch for b. */
for (i = 0; i < K; i++)
a[i] = b[i];
/* Nontemporal store should not be used here (only write and read temporal
prefetches). */
for (i = 0; i < K - 10000; i++)
a[i + 10000] = a[i];
/* Nontemporal store should not be used here (only write and read nontemporal
prefetches). */
for (i = 0; i < K - 100000; i++)
a[i + 100000] = a[i];
/* Nontemporal store should be used neither for a nor for p, as we do not know
whether they alias or not. */
for (i = 0; i < K; i++)
{
a[i] = 0;
*p++ = 1;
}
/* Nontemporal store should not be used for a, as we do not know whether its
value will be reused or not. */
for (i = 0; i < 1000; i++)
a[i] = 0;
}
/* { dg-final { scan-tree-dump-times "Issued prefetch" 5 "aprefetch" } } */
/* { dg-final { scan-tree-dump-times "Issued nontemporal prefetch" 3 "aprefetch" } } */
/* { dg-final { scan-tree-dump-times "nontemporal store" 2 "aprefetch" } } */
/* { dg-final { scan-tree-dump-times "builtin_prefetch" 8 "final_cleanup" } } */
/* { dg-final { scan-tree-dump-times "=\\{nt\\}" 2 "final_cleanup" } } */
/* { dg-final { scan-tree-dump-times "__builtin_ia32_mfence" 2 "final_cleanup" } } */
/* { dg-final { scan-assembler-times "prefetchw" 5 } } */
/* { dg-final { scan-assembler-times "prefetcht" 1 } } */
/* { dg-final { scan-assembler-times "prefetchnta" 2 } } */
/* { dg-final { scan-assembler-times "movnti" 2 } } */
/* { dg-final { scan-assembler-times "mfence" 2 } } */
/* { dg-final { cleanup-tree-dump "aprefetch" } } */
/* { dg-final { cleanup-tree-dump "final_cleanup" } } */

View File

@ -729,7 +729,7 @@ dr_analyze_alias (struct data_reference *dr)
}
DR_SYMBOL_TAG (dr) = smt;
if (var_can_have_subvars (smt))
if (smt && var_can_have_subvars (smt))
DR_SUBVARS (dr) = get_subvars_for_var (smt);
vops = BITMAP_ALLOC (NULL);

View File

@ -1019,6 +1019,7 @@ void tree_transform_and_unroll_loop (struct loop *, unsigned,
transform_callback, void *);
bool contains_abnormal_ssa_name_p (tree);
bool stmt_dominates_stmt_p (tree, tree);
void mark_virtual_ops_for_renaming (tree);
/* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block);

View File

@ -1378,7 +1378,7 @@ get_init_expr (chain_p chain, unsigned index)
/* Marks all virtual operands of statement STMT for renaming. */
static void
void
mark_virtual_ops_for_renaming (tree stmt)
{
ssa_op_iter iter;

View File

@ -1063,6 +1063,9 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
false);
pp_space (buffer);
pp_character (buffer, '=');
if (TREE_CODE (node) == GIMPLE_MODIFY_STMT
&& MOVE_NONTEMPORAL (node))
pp_string (buffer, "{nt}");
pp_space (buffer);
dump_generic_node (buffer, GENERIC_TREE_OPERAND (node, 1), spc, flags,
false);

View File

@ -47,6 +47,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "langhooks.h"
#include "tree-inline.h"
#include "tree-data-ref.h"
#include "optabs.h"
/* This pass inserts prefetch instructions to optimize cache usage during
accesses to arrays in loops. It processes loops sequentially and:
@ -177,6 +178,13 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
is accessed several times in a single iteration of the loop. */
#define NONTEMPORAL_FRACTION 16
/* In case we have to emit a memory fence instruction after the loop that
uses nontemporal stores, this defines the builtin to use. */
#ifndef FENCE_FOLLOWING_MOVNT
#define FENCE_FOLLOWING_MOVNT NULL_TREE
#endif
/* The group of references between that reuse may occur. */
struct mem_ref_group
@ -198,7 +206,6 @@ struct mem_ref
tree stmt; /* Statement in that the reference appears. */
tree mem; /* The reference. */
HOST_WIDE_INT delta; /* Constant offset of the reference. */
bool write_p; /* Is it a write? */
struct mem_ref_group *group; /* The group of references it belongs to. */
unsigned HOST_WIDE_INT prefetch_mod;
/* Prefetch only each PREFETCH_MOD-th
@ -208,8 +215,13 @@ struct mem_ref
iterations. */
unsigned reuse_distance; /* The amount of data accessed before the first
reuse of this value. */
bool issue_prefetch_p; /* Should we really issue the prefetch? */
struct mem_ref *next; /* The next reference in the group. */
unsigned write_p : 1; /* Is it a write? */
unsigned independent_p : 1; /* True if the reference is independent on
all other references inside the loop. */
unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */
unsigned storent_p : 1; /* True if we changed the store to a
nontemporal one. */
};
/* Dumps information about reference REF to FILE. */
@ -302,6 +314,8 @@ record_ref (struct mem_ref_group *group, tree stmt, tree mem,
(*aref)->issue_prefetch_p = false;
(*aref)->group = group;
(*aref)->next = NULL;
(*aref)->independent_p = false;
(*aref)->storent_p = false;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_mem_ref (dump_file, *aref);
@ -434,9 +448,10 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base,
}
/* Record a memory reference REF to the list REFS. The reference occurs in
LOOP in statement STMT and it is write if WRITE_P. */
LOOP in statement STMT and it is write if WRITE_P. Returns true if the
reference was recorded, false otherwise. */
static void
static bool
gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
tree ref, bool write_p, tree stmt)
{
@ -445,26 +460,31 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
struct mem_ref_group *agrp;
if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt))
return;
return false;
/* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
are integer constants. */
agrp = find_or_create_group (refs, base, step);
record_ref (agrp, stmt, ref, delta, write_p);
return true;
}
/* Record the suitable memory references in LOOP. */
/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
true if there are no other memory references inside the loop. */
static struct mem_ref_group *
gather_memory_references (struct loop *loop)
gather_memory_references (struct loop *loop, bool *no_other_refs)
{
basic_block *body = get_loop_body_in_dom_order (loop);
basic_block bb;
unsigned i;
block_stmt_iterator bsi;
tree stmt, lhs, rhs;
tree stmt, lhs, rhs, call;
struct mem_ref_group *refs = NULL;
*no_other_refs = true;
/* Scan the loop body in order, so that the former references precede the
later ones. */
for (i = 0; i < loop->num_nodes; i++)
@ -476,16 +496,26 @@ gather_memory_references (struct loop *loop)
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
stmt = bsi_stmt (bsi);
call = get_call_expr_in (stmt);
if (call && !(call_expr_flags (call) & ECF_CONST))
*no_other_refs = false;
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
continue;
{
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
*no_other_refs = false;
continue;
}
lhs = GIMPLE_STMT_OPERAND (stmt, 0);
rhs = GIMPLE_STMT_OPERAND (stmt, 1);
if (REFERENCE_CLASS_P (rhs))
gather_memory_references_ref (loop, &refs, rhs, false, stmt);
*no_other_refs &= gather_memory_references_ref (loop, &refs,
rhs, false, stmt);
if (REFERENCE_CLASS_P (lhs))
gather_memory_references_ref (loop, &refs, lhs, true, stmt);
*no_other_refs &= gather_memory_references_ref (loop, &refs,
lhs, true, stmt);
}
}
free (body);
@ -746,6 +776,10 @@ should_issue_prefetch_p (struct mem_ref *ref)
if (ref->prefetch_before != PREFETCH_ALL)
return false;
/* Do not prefetch nontemporal stores. */
if (ref->storent_p)
return false;
return true;
}
@ -884,6 +918,130 @@ issue_prefetches (struct mem_ref_group *groups,
issue_prefetch_ref (ref, unroll_factor, ahead);
}
/* Returns true if REF is a memory write for that a nontemporal store insn
can be used. */
static bool
nontemporal_store_p (struct mem_ref *ref)
{
enum machine_mode mode;
enum insn_code code;
/* REF must be a write that is not reused. We require it to be independent
on all other memory references in the loop, as the nontemporal stores may
be reordered with respect to other memory references. */
if (!ref->write_p
|| !ref->independent_p
|| ref->reuse_distance < L2_CACHE_SIZE_BYTES)
return false;
/* Check that we have the storent instruction for the mode. */
mode = TYPE_MODE (TREE_TYPE (ref->mem));
if (mode == BLKmode)
return false;
code = storent_optab->handlers[mode].insn_code;
return code != CODE_FOR_nothing;
}
/* If REF is a nontemporal store, we mark the corresponding modify statement
and return true. Otherwise, we return false. */
static bool
mark_nontemporal_store (struct mem_ref *ref)
{
if (!nontemporal_store_p (ref))
return false;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Marked reference %p as a nontemporal store.\n",
(void *) ref);
MOVE_NONTEMPORAL (ref->stmt) = true;
ref->storent_p = true;
return true;
}
/* Issue a memory fence instruction after LOOP. */
static void
emit_mfence_after_loop (struct loop *loop)
{
VEC (edge, heap) *exits = get_loop_exit_edges (loop);
edge exit;
tree call;
block_stmt_iterator bsi;
unsigned i;
for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
{
call = build_function_call_expr (FENCE_FOLLOWING_MOVNT, NULL_TREE);
if (!single_pred_p (exit->dest)
/* If possible, we prefer not to insert the fence on other paths
in cfg. */
&& !(exit->flags & EDGE_ABNORMAL))
split_loop_exit_edge (exit);
bsi = bsi_after_labels (exit->dest);
bsi_insert_before (&bsi, call, BSI_NEW_STMT);
mark_virtual_ops_for_renaming (call);
}
VEC_free (edge, heap, exits);
update_ssa (TODO_update_ssa_only_virtuals);
}
/* Returns true if we can use storent in loop, false otherwise. */
static bool
may_use_storent_in_loop_p (struct loop *loop)
{
bool ret = true;
if (loop->inner != NULL)
return false;
/* If we must issue a mfence insn after using storent, check that there
is a suitable place for it at each of the loop exits. */
if (FENCE_FOLLOWING_MOVNT != NULL_TREE)
{
VEC (edge, heap) *exits = get_loop_exit_edges (loop);
unsigned i;
edge exit;
for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
if ((exit->flags & EDGE_ABNORMAL)
&& exit->dest == EXIT_BLOCK_PTR)
ret = false;
VEC_free (edge, heap, exits);
}
return ret;
}
/* Marks nontemporal stores in LOOP. GROUPS contains the description of memory
references in the loop. */
static void
mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups)
{
struct mem_ref *ref;
bool any = false;
if (!may_use_storent_in_loop_p (loop))
return;
for (; groups; groups = groups->next)
for (ref = groups->refs; ref; ref = ref->next)
any |= mark_nontemporal_store (ref);
if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
emit_mfence_after_loop (loop);
}
/* Determines whether we can profitably unroll LOOP FACTOR times, and if
this is the case, fill in DESC by the description of number of
iterations. */
@ -1115,16 +1273,18 @@ self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
}
/* Determines the distance till the first reuse of each reference in REFS
in the loop nest of LOOP. */
in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
memory references in the loop. */
static void
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
bool no_other_refs)
{
struct loop *nest, *aloop;
VEC (data_reference_p, heap) *datarefs = NULL;
VEC (ddr_p, heap) *dependences = NULL;
struct mem_ref_group *gr;
struct mem_ref *ref;
struct mem_ref *ref, *refb;
VEC (loop_p, heap) *vloops = NULL;
unsigned *loop_data_size;
unsigned i, j, n;
@ -1188,6 +1348,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
dr->aux = ref;
VEC_safe_push (data_reference_p, heap, datarefs, dr);
}
else
no_other_refs = false;
}
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
@ -1196,6 +1358,9 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
ref = dr->aux;
if (ref->reuse_distance > dist)
ref->reuse_distance = dist;
if (no_other_refs)
ref->independent_p = true;
}
compute_all_dependences (datarefs, &dependences, vloops, true);
@ -1205,12 +1370,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
if (DDR_ARE_DEPENDENT (dep) == chrec_known)
continue;
ref = DDR_A (dep)->aux;
refb = DDR_B (dep)->aux;
if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
|| DDR_NUM_DIST_VECTS (dep) == 0)
{
/* If the dependence cannot be analysed, assume that there might be
a reuse. */
dist = 0;
ref->independent_p = false;
refb->independent_p = false;
}
else
{
@ -1228,6 +1399,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
loop_data_size, n);
/* If this is a dependence in the innermost loop (i.e., the
distances in all superloops are zero) and it is not
the trivial self-dependence with distance zero, record that
the references are not completely independent. */
if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
&& (ref != refb
|| DDR_DIST_VECT (dep, j)[n-1] != 0))
{
ref->independent_p = false;
refb->independent_p = false;
}
/* Ignore accesses closer than
L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
so that we use nontemporal prefetches e.g. if single memory
@ -1241,12 +1424,10 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
}
}
ref = DDR_A (dep)->aux;
if (ref->reuse_distance > dist)
ref->reuse_distance = dist;
ref = DDR_B (dep)->aux;
if (ref->reuse_distance > dist)
ref->reuse_distance = dist;
if (refb->reuse_distance > dist)
refb->reuse_distance = dist;
}
free_dependence_relations (dependences);
@ -1273,7 +1454,7 @@ loop_prefetch_arrays (struct loop *loop)
unsigned ahead, ninsns, time, unroll_factor;
HOST_WIDE_INT est_niter;
struct tree_niter_desc desc;
bool unrolled = false;
bool unrolled = false, no_other_refs;
if (!maybe_hot_bb_p (loop->header))
{
@ -1283,7 +1464,7 @@ loop_prefetch_arrays (struct loop *loop)
}
/* Step 1: gather the memory references. */
refs = gather_memory_references (loop);
refs = gather_memory_references (loop, &no_other_refs);
/* Step 2: estimate the reuse effects. */
prune_by_reuse (refs);
@ -1291,7 +1472,7 @@ loop_prefetch_arrays (struct loop *loop)
if (!anything_to_prefetch_p (refs))
goto fail;
determine_loop_nest_reuse (loop, refs);
determine_loop_nest_reuse (loop, refs, no_other_refs);
/* Step 3: determine the ahead and unroll factor. */
@ -1313,6 +1494,8 @@ loop_prefetch_arrays (struct loop *loop)
goto fail;
}
mark_nontemporal_stores (loop, refs);
ninsns = tree_num_loop_insns (loop, &eni_size_weights);
unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
est_niter);

View File

@ -447,6 +447,8 @@ struct gimple_stmt GTY(())
EH_FILTER_MUST_NOT_THROW in EH_FILTER_EXPR
TYPE_REF_CAN_ALIAS_ALL in
POINTER_TYPE, REFERENCE_TYPE
MOVE_NONTEMPORAL in
GIMPLE_MODIFY_STMT
CASE_HIGH_SEEN in CASE_LABEL_EXPR
public_flag:
@ -1159,6 +1161,10 @@ extern void omp_clause_range_check_failed (const tree, const char *, int,
#define TYPE_REF_CAN_ALIAS_ALL(NODE) \
(PTR_OR_REF_CHECK (NODE)->base.static_flag)
/* In a MODIFY_EXPR, means that the store in the expression is nontemporal. */
#define MOVE_NONTEMPORAL(NODE) \
(GIMPLE_MODIFY_STMT_CHECK (NODE)->base.static_flag)
/* In an INTEGER_CST, REAL_CST, COMPLEX_CST, or VECTOR_CST, this means
there was an overflow in folding. */