timevar.def (TV_SCHED_FUSION): New time var.

* timevar.def (TV_SCHED_FUSION): New time var.
	* passes.def (pass_sched_fusion): New pass.
	* config/arm/arm.c (TARGET_SCHED_FUSION_PRIORITY): New.
	(extract_base_offset_in_addr, fusion_load_store): New.
	(arm_sched_fusion_priority): New.
	(arm_option_override): Disable scheduling fusion by default
	on non-armv7 processors or ldrd/strd isn't preferred.
	* sched-int.h (struct _haifa_insn_data): New field.
	(INSN_FUSION_PRIORITY, FUSION_MAX_PRIORITY, sched_fusion): New.
	* sched-rgn.c (rest_of_handle_sched_fusion): New.
	(pass_data_sched_fusion, pass_sched_fusion): New.
	(make_pass_sched_fusion): New.
	* haifa-sched.c (sched_fusion): New.
	(insn_cost): Handle sched_fusion.
	(priority): Handle sched_fusion by calling target hook.
	(enum rfs_decision): New enum value.
	(rfs_str): New element for RFS_FUSION.
	(rank_for_schedule): Support sched_fusion.
	(schedule_insn, max_issue, prune_ready_list): Handle sched_fusion.
	(schedule_block, fix_tick_ready): Handle sched_fusion.
	* common.opt (flag_schedule_fusion): New.
	* tree-pass.h (make_pass_sched_fusion): New.
	* target.def (fusion_priority): New.
	* doc/tm.texi.in (TARGET_SCHED_FUSION_PRIORITY): New.
	* doc/tm.texi: Regenerated.
	* doc/invoke.texi (-fschedule-fusion): New.

	testsuite:
	* gcc.target/arm/ldrd-strd-pair-1.c: New test.
	* gcc.target/arm/vfp-1.c: Improve scanning string.

From-SVN: r217533
This commit is contained in:
Bin Cheng 2014-11-14 02:32:38 +00:00 committed by Bin Cheng
parent 0fb3402f69
commit b16abbcb85
16 changed files with 497 additions and 10 deletions

View File

@ -1,3 +1,32 @@
2014-11-14 Bin Cheng <bin.cheng@arm.com>
* timevar.def (TV_SCHED_FUSION): New time var.
* passes.def (pass_sched_fusion): New pass.
* config/arm/arm.c (TARGET_SCHED_FUSION_PRIORITY): New.
(extract_base_offset_in_addr, fusion_load_store): New.
(arm_sched_fusion_priority): New.
(arm_option_override): Disable scheduling fusion by default
on non-armv7 processors or ldrd/strd isn't preferred.
* sched-int.h (struct _haifa_insn_data): New field.
(INSN_FUSION_PRIORITY, FUSION_MAX_PRIORITY, sched_fusion): New.
* sched-rgn.c (rest_of_handle_sched_fusion): New.
(pass_data_sched_fusion, pass_sched_fusion): New.
(make_pass_sched_fusion): New.
* haifa-sched.c (sched_fusion): New.
(insn_cost): Handle sched_fusion.
(priority): Handle sched_fusion by calling target hook.
(enum rfs_decision): New enum value.
(rfs_str): New element for RFS_FUSION.
(rank_for_schedule): Support sched_fusion.
(schedule_insn, max_issue, prune_ready_list): Handle sched_fusion.
(schedule_block, fix_tick_ready): Handle sched_fusion.
* common.opt (flag_schedule_fusion): New.
* tree-pass.h (make_pass_sched_fusion): New.
* target.def (fusion_priority): New.
* doc/tm.texi.in (TARGET_SCHED_FUSION_PRIORITY): New.
* doc/tm.texi: Regenerated.
* doc/invoke.texi (-fschedule-fusion): New.
2014-11-13 Rong Xu <xur@google.com>
PR debug/63581

View File

@ -1848,6 +1848,10 @@ frename-registers
Common Report Var(flag_rename_registers) Init(2) Optimization
Perform a register renaming optimization pass
fschedule-fusion
Common Report Var(flag_schedule_fusion) Init(2) Optimization
Perform a target dependent instruction fusion optimization pass
freorder-blocks
Common Report Var(flag_reorder_blocks) Optimization
Reorder basic blocks to improve code placement

View File

@ -311,6 +311,8 @@ static unsigned arm_add_stmt_cost (void *data, int count,
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value);
static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
/* Table of machine attributes. */
static const struct attribute_spec arm_attribute_table[] =
@ -708,6 +710,9 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
#undef TARGET_SCHED_FUSION_PRIORITY
#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@ -3168,6 +3173,12 @@ arm_option_override (void)
if (TARGET_THUMB2)
inline_asm_unified = 1;
/* Disable scheduling fusion by default if it's not armv7 processor
or doesn't prefer ldrd/strd. */
if (flag_schedule_fusion == 2
&& (!arm_arch7 || !current_tune->prefer_ldrd_strd))
flag_schedule_fusion = 0;
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@ -32350,4 +32361,124 @@ arm_is_constant_pool_ref (rtx x)
&& CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
}
/* If MEM is in the form of [base+offset], extract the two parts
of address and set to BASE and OFFSET, otherwise return false
after clearing BASE and OFFSET. */
static bool
extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
{
rtx addr;
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
/* Strip off const from addresses like (const (addr)). */
if (GET_CODE (addr) == CONST)
addr = XEXP (addr, 0);
if (GET_CODE (addr) == REG)
{
*base = addr;
*offset = const0_rtx;
return true;
}
if (GET_CODE (addr) == PLUS
&& GET_CODE (XEXP (addr, 0)) == REG
&& CONST_INT_P (XEXP (addr, 1)))
{
*base = XEXP (addr, 0);
*offset = XEXP (addr, 1);
return true;
}
*base = NULL_RTX;
*offset = NULL_RTX;
return false;
}
/* If INSN is a load or store of address in the form of [base+offset],
extract the two parts and set to BASE and OFFSET. IS_LOAD is set
to TRUE if it's a load. Return TRUE if INSN is such an instruction,
otherwise return FALSE. */
static bool
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
{
rtx x, dest, src;
gcc_assert (INSN_P (insn));
x = PATTERN (insn);
if (GET_CODE (x) != SET)
return false;
src = SET_SRC (x);
dest = SET_DEST (x);
if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
{
*is_load = false;
extract_base_offset_in_addr (dest, base, offset);
}
else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
{
*is_load = true;
extract_base_offset_in_addr (src, base, offset);
}
else
return false;
return (*base != NULL_RTX && *offset != NULL_RTX);
}
/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
Currently we only support to fuse ldr or str instructions, so FUSION_PRI
and PRI are only calculated for these instructions. For other instruction,
FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
instruction fusion can be supported by returning different priorities.
It's important that irrelevant instructions get the largest FUSION_PRI. */
static void
arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
int *fusion_pri, int *pri)
{
int tmp, off_val;
bool is_load;
rtx base, offset;
gcc_assert (INSN_P (insn));
tmp = max_pri - 1;
if (!fusion_load_store (insn, &base, &offset, &is_load))
{
*pri = tmp;
*fusion_pri = tmp;
return;
}
/* Load goes first. */
if (is_load)
*fusion_pri = tmp - 1;
else
*fusion_pri = tmp - 2;
tmp /= 2;
/* INSN with smaller base register goes first. */
tmp -= ((REGNO (base) & 0xff) << 20);
/* INSN with smaller offset goes first. */
off_val = (int)(INTVAL (offset));
if (off_val >= 0)
tmp -= (off_val & 0xfffff);
else
tmp += ((- off_val) & 0xfffff);
*pri = tmp;
return;
}
#include "gt-arm.h"

View File

@ -406,7 +406,7 @@ Objective-C and Objective-C++ Dialects}.
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
-fprofile-generate=@var{path} @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-values -fprofile-reorder-functions @gol
-freciprocal-math -free -frename-registers -freorder-blocks @gol
-freciprocal-math -free -frename-registers -fschedule-fusion -freorder-blocks @gol
-freorder-blocks-and-partition -freorder-functions @gol
-frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
-frounding-math -fsched2-use-superblocks -fsched-pressure @gol
@ -9575,6 +9575,14 @@ a ``home register''.
Enabled by default with @option{-funroll-loops} and @option{-fpeel-loops}.
@item -fschedule-fusion
@opindex fschedule-fusion
Performs a target dependent pass over the instruction stream to schedule
instructions of same type together because target machine can execute them
more efficiently if they are adjacent to each other in the instruction flow.
Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
@item -ftracer
@opindex ftracer
Perform tail duplication to enlarge superblock size. This transformation

View File

@ -6771,6 +6771,76 @@ This hook is called by tree reassociator to determine a level of
parallelism required in output calculations chain.
@end deftypefn
@deftypefn {Target Hook} void TARGET_SCHED_FUSION_PRIORITY (rtx_insn *@var{insn}, int @var{max_pri}, int *@var{fusion_pri}, int *@var{pri})
This hook is called by scheduling fusion pass. It calculates fusion
priorities for each instruction passed in by parameter. The priorities
are returned via pointer parameters.
@var{insn} is the instruction whose priorities need to be calculated.
@var{max_pri} is the maximum priority can be returned in any cases.
@var{fusion_pri} is the pointer parameter through which @var{insn}'s
fusion priority should be calculated and returned.
@var{pri} is the pointer parameter through which @var{insn}'s priority
should be calculated and returned.
Same @var{fusion_pri} should be returned for instructions which should
be scheduled together. Different @var{pri} should be returned for
instructions with same @var{fusion_pri}. @var{fusion_pri} is the major
sort key, @var{pri} is the minor sort key. All instructions will be
scheduled according to the two priorities. All priorities calculated
should be between 0 (exclusive) and @var{max_pri} (inclusive). To avoid
false dependencies, @var{fusion_pri} of instructions which need to be
scheduled together should be smaller than @var{fusion_pri} of irrelevant
instructions.
Given below example:
ldr r10, [r1, 4]
add r4, r4, r10
ldr r15, [r2, 8]
sub r5, r5, r15
ldr r11, [r1, 0]
add r4, r4, r11
ldr r16, [r2, 12]
sub r5, r5, r16
On targets like ARM/AArch64, the two pairs of consecutive loads should be
merged. Since peephole2 pass can't help in this case unless consecutive
loads are actually next to each other in instruction flow. That's where
this scheduling fusion pass works. This hook calculates priority for each
instruction based on its fustion type, like:
ldr r10, [r1, 4] ; fusion_pri=99, pri=96
add r4, r4, r10 ; fusion_pri=100, pri=100
ldr r15, [r2, 8] ; fusion_pri=98, pri=92
sub r5, r5, r15 ; fusion_pri=100, pri=100
ldr r11, [r1, 0] ; fusion_pri=99, pri=100
add r4, r4, r11 ; fusion_pri=100, pri=100
ldr r16, [r2, 12] ; fusion_pri=98, pri=88
sub r5, r5, r16 ; fusion_pri=100, pri=100
Scheduling fusion pass then sorts all ready to issue instructions according
to the priorities. As a result, instructions of same fusion type will be
pushed together in instruction flow, like:
ldr r11, [r1, 0]
ldr r10, [r1, 4]
ldr r15, [r2, 8]
ldr r16, [r2, 12]
add r4, r4, r10
sub r5, r5, r15
add r4, r4, r11
sub r5, r5, r16
Now peephole2 pass can simply merge the two pairs of loads.
Since scheduling fusion pass relies on peephole2 to do real fusion
work, it is only enabled by default when peephole2 is in effect.
This is firstly introduced on ARM/AArch64 targets, please refer to
the hook implementation for how different fusion types are supported.
@end deftypefn
@node Sections
@section Dividing the Output into Sections (Texts, Data, @dots{})
@c the above section title is WAY too long. maybe cut the part between

View File

@ -4811,6 +4811,8 @@ them: try the first ones in this list first.
@hook TARGET_SCHED_REASSOCIATION_WIDTH
@hook TARGET_SCHED_FUSION_PRIORITY
@node Sections
@section Dividing the Output into Sections (Texts, Data, @dots{})
@c the above section title is WAY too long. maybe cut the part between

View File

@ -1391,6 +1391,9 @@ insn_cost (rtx_insn *insn)
{
int cost;
if (sched_fusion)
return 0;
if (sel_sched_p ())
{
if (recog_memoized (insn) < 0)
@ -1603,6 +1606,8 @@ dep_list_size (rtx insn, sd_list_types_def list)
return nodbgcount;
}
bool sched_fusion;
/* Compute the priority number for INSN. */
static int
priority (rtx_insn *insn)
@ -1617,7 +1622,15 @@ priority (rtx_insn *insn)
{
int this_priority = -1;
if (dep_list_size (insn, SD_LIST_FORW) == 0)
if (sched_fusion)
{
int this_fusion_priority;
targetm.sched.fusion_priority (insn, FUSION_MAX_PRIORITY,
&this_fusion_priority, &this_priority);
INSN_FUSION_PRIORITY (insn) = this_fusion_priority;
}
else if (dep_list_size (insn, SD_LIST_FORW) == 0)
/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
some forward deps but all of them are ignored by
contributes_to_priority hook. At the moment we set priority of
@ -2548,7 +2561,7 @@ enum rfs_decision {
RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
RFS_DEP_COUNT, RFS_TIE, RFS_N };
RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_N };
/* Corresponding strings for print outs. */
static const char *rfs_str[RFS_N] = {
@ -2556,7 +2569,7 @@ static const char *rfs_str[RFS_N] = {
"RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
"RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
"RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
"RFS_DEP_COUNT", "RFS_TIE" };
"RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION" };
/* Statistical breakdown of rank_for_schedule decisions. */
typedef struct { unsigned stats[RFS_N]; } rank_for_schedule_stats_t;
@ -2627,6 +2640,55 @@ rank_for_schedule (const void *x, const void *y)
/* Make sure that priority of TMP and TMP2 are initialized. */
gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
if (sched_fusion)
{
/* The instruction that has the same fusion priority as the last
instruction is the instruction we picked next. If that is not
the case, we sort ready list firstly by fusion priority, then
by priority, and at last by INSN_LUID. */
int a = INSN_FUSION_PRIORITY (tmp);
int b = INSN_FUSION_PRIORITY (tmp2);
int last = -1;
if (last_nondebug_scheduled_insn
&& !NOTE_P (last_nondebug_scheduled_insn)
&& BLOCK_FOR_INSN (tmp)
== BLOCK_FOR_INSN (last_nondebug_scheduled_insn))
last = INSN_FUSION_PRIORITY (last_nondebug_scheduled_insn);
if (a != last && b != last)
{
if (a == b)
{
a = INSN_PRIORITY (tmp);
b = INSN_PRIORITY (tmp2);
}
if (a != b)
return rfs_result (RFS_FUSION, b - a, tmp, tmp2);
else
return rfs_result (RFS_FUSION,
INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
}
else if (a == b)
{
gcc_assert (last_nondebug_scheduled_insn
&& !NOTE_P (last_nondebug_scheduled_insn));
last = INSN_PRIORITY (last_nondebug_scheduled_insn);
a = abs (INSN_PRIORITY (tmp) - last);
b = abs (INSN_PRIORITY (tmp2) - last);
if (a != b)
return rfs_result (RFS_FUSION, a - b, tmp, tmp2);
else
return rfs_result (RFS_FUSION,
INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
}
else if (a == last)
return rfs_result (RFS_FUSION, -1, tmp, tmp2);
else
return rfs_result (RFS_FUSION, 1, tmp, tmp2);
}
if (sched_pressure != SCHED_PRESSURE_NONE)
{
/* Prefer insn whose scheduling results in the smallest register
@ -4007,8 +4069,8 @@ schedule_insn (rtx_insn *insn)
gcc_assert (INSN_TICK (insn) >= MIN_TICK);
if (INSN_TICK (insn) > clock_var)
/* INSN has been prematurely moved from the queue to the ready list.
This is possible only if following flag is set. */
gcc_assert (flag_sched_stalled_insns);
This is possible only if following flags are set. */
gcc_assert (flag_sched_stalled_insns || sched_fusion);
/* ??? Probably, if INSN is scheduled prematurely, we should leave
INSN_TICK untouched. This is a machine-dependent issue, actually. */
@ -5500,6 +5562,9 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
struct choice_entry *top;
rtx_insn *insn;
if (sched_fusion)
return 0;
n_ready = ready->n_ready;
gcc_assert (dfa_lookahead >= 1 && privileged_n >= 0
&& privileged_n <= n_ready);
@ -5848,6 +5913,9 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
bool sched_group_found = false;
int min_cost_group = 1;
if (sched_fusion)
return;
for (i = 0; i < ready.n_ready; i++)
{
rtx_insn *insn = ready_element (&ready, i);
@ -6059,7 +6127,7 @@ schedule_block (basic_block *target_bb, state_t init_state)
rtx_insn *tail = PREV_INSN (next_tail);
if ((current_sched_info->flags & DONT_BREAK_DEPENDENCIES) == 0
&& sched_pressure != SCHED_PRESSURE_MODEL)
&& sched_pressure != SCHED_PRESSURE_MODEL && !sched_fusion)
find_modifiable_mems (head, tail);
/* We used to have code to avoid getting parameters moved from hard
@ -6455,7 +6523,7 @@ schedule_block (basic_block *target_bb, state_t init_state)
{
memcpy (temp_state, curr_state, dfa_state_size);
cost = state_transition (curr_state, insn);
if (sched_pressure != SCHED_PRESSURE_WEIGHTED)
if (sched_pressure != SCHED_PRESSURE_WEIGHTED && !sched_fusion)
gcc_assert (cost < 0);
if (memcmp (temp_state, curr_state, dfa_state_size) != 0)
cycle_issued_insns++;
@ -7288,7 +7356,7 @@ fix_tick_ready (rtx_insn *next)
INSN_TICK (next) = tick;
delay = tick - clock_var;
if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE)
if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE || sched_fusion)
delay = QUEUE_READY;
change_queue_index (next, delay);

View File

@ -419,6 +419,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_stack_adjustments);
NEXT_PASS (pass_jump2);
NEXT_PASS (pass_duplicate_computed_gotos);
NEXT_PASS (pass_sched_fusion);
NEXT_PASS (pass_peephole2);
NEXT_PASS (pass_if_after_reload);
NEXT_PASS (pass_regrename);

View File

@ -805,6 +805,9 @@ struct _haifa_insn_data
/* A priority for each insn. */
int priority;
/* The fusion priority for each insn. */
int fusion_priority;
/* The minimum clock tick at which the insn becomes ready. This is
used to note timing constraints for the insns in the pending list. */
int tick;
@ -903,6 +906,7 @@ extern vec<haifa_insn_data_def> h_i_d;
/* Accessor macros for h_i_d. There are more in haifa-sched.c and
sched-rgn.c. */
#define INSN_PRIORITY(INSN) (HID (INSN)->priority)
#define INSN_FUSION_PRIORITY(INSN) (HID (INSN)->fusion_priority)
#define INSN_REG_PRESSURE(INSN) (HID (INSN)->reg_pressure)
#define INSN_MAX_REG_PRESSURE(INSN) (HID (INSN)->max_reg_pressure)
#define INSN_REG_USE_LIST(INSN) (HID (INSN)->reg_use_list)
@ -1620,6 +1624,10 @@ extern void sd_copy_back_deps (rtx_insn *, rtx_insn *, bool);
extern void sd_delete_dep (sd_iterator_def);
extern void sd_debug_lists (rtx, sd_list_types_def);
/* Macros and declarations for scheduling fusion. */
#define FUSION_MAX_PRIORITY (INT_MAX)
extern bool sched_fusion;
#endif /* INSN_SCHEDULING */
#endif /* GCC_SCHED_INT_H */

View File

@ -3658,6 +3658,17 @@ rest_of_handle_sched2 (void)
return 0;
}
static unsigned int
rest_of_handle_sched_fusion (void)
{
#ifdef INSN_SCHEDULING
sched_fusion = true;
schedule_insns ();
sched_fusion = false;
#endif
return 0;
}
namespace {
const pass_data pass_data_live_range_shrinkage =
@ -3800,3 +3811,55 @@ make_pass_sched2 (gcc::context *ctxt)
{
return new pass_sched2 (ctxt);
}
namespace {
const pass_data pass_data_sched_fusion =
{
RTL_PASS, /* type */
"sched_fusion", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_SCHED_FUSION, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish, /* todo_flags_finish */
};
class pass_sched_fusion : public rtl_opt_pass
{
public:
pass_sched_fusion (gcc::context *ctxt)
: rtl_opt_pass (pass_data_sched_fusion, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *);
virtual unsigned int execute (function *)
{
return rest_of_handle_sched_fusion ();
}
}; // class pass_sched2
bool
pass_sched_fusion::gate (function *)
{
#ifdef INSN_SCHEDULING
/* Scheduling fusion relies on peephole2 to do real fusion work,
so only enable it if peephole2 is in effect. */
return (optimize > 0 && flag_peephole2
&& flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
#else
return 0;
#endif
}
} // anon namespace
rtl_opt_pass *
make_pass_sched_fusion (gcc::context *ctxt)
{
return new pass_sched_fusion (ctxt);
}

View File

@ -1526,6 +1526,79 @@ parallelism required in output calculations chain.",
int, (unsigned int opc, machine_mode mode),
hook_int_uint_mode_1)
/* The following member value is a function that returns priority for
fusion of each instruction via pointer parameters. */
DEFHOOK
(fusion_priority,
"This hook is called by scheduling fusion pass. It calculates fusion\n\
priorities for each instruction passed in by parameter. The priorities\n\
are returned via pointer parameters.\n\
\n\
@var{insn} is the instruction whose priorities need to be calculated.\n\
@var{max_pri} is the maximum priority can be returned in any cases.\n\
@var{fusion_pri} is the pointer parameter through which @var{insn}'s\n\
fusion priority should be calculated and returned.\n\
@var{pri} is the pointer parameter through which @var{insn}'s priority\n\
should be calculated and returned.\n\
\n\
Same @var{fusion_pri} should be returned for instructions which should\n\
be scheduled together. Different @var{pri} should be returned for\n\
instructions with same @var{fusion_pri}. @var{fusion_pri} is the major\n\
sort key, @var{pri} is the minor sort key. All instructions will be\n\
scheduled according to the two priorities. All priorities calculated\n\
should be between 0 (exclusive) and @var{max_pri} (inclusive). To avoid\n\
false dependencies, @var{fusion_pri} of instructions which need to be\n\
scheduled together should be smaller than @var{fusion_pri} of irrelevant\n\
instructions.\n\
\n\
Given below example:\n\
\n\
ldr r10, [r1, 4]\n\
add r4, r4, r10\n\
ldr r15, [r2, 8]\n\
sub r5, r5, r15\n\
ldr r11, [r1, 0]\n\
add r4, r4, r11\n\
ldr r16, [r2, 12]\n\
sub r5, r5, r16\n\
\n\
On targets like ARM/AArch64, the two pairs of consecutive loads should be\n\
merged. Since peephole2 pass can't help in this case unless consecutive\n\
loads are actually next to each other in instruction flow. That's where\n\
this scheduling fusion pass works. This hook calculates priority for each\n\
instruction based on its fustion type, like:\n\
\n\
ldr r10, [r1, 4] ; fusion_pri=99, pri=96 \n\
add r4, r4, r10 ; fusion_pri=100, pri=100 \n\
ldr r15, [r2, 8] ; fusion_pri=98, pri=92 \n\
sub r5, r5, r15 ; fusion_pri=100, pri=100 \n\
ldr r11, [r1, 0] ; fusion_pri=99, pri=100 \n\
add r4, r4, r11 ; fusion_pri=100, pri=100 \n\
ldr r16, [r2, 12] ; fusion_pri=98, pri=88 \n\
sub r5, r5, r16 ; fusion_pri=100, pri=100 \n\
\n\
Scheduling fusion pass then sorts all ready to issue instructions according\n\
to the priorities. As a result, instructions of same fusion type will be\n\
pushed together in instruction flow, like:\n\
\n\
ldr r11, [r1, 0]\n\
ldr r10, [r1, 4]\n\
ldr r15, [r2, 8]\n\
ldr r16, [r2, 12]\n\
add r4, r4, r10\n\
sub r5, r5, r15\n\
add r4, r4, r11\n\
sub r5, r5, r16\n\
\n\
Now peephole2 pass can simply merge the two pairs of loads.\n\
\n\
Since scheduling fusion pass relies on peephole2 to do real fusion\n\
work, it is only enabled by default when peephole2 is in effect.\n\
\n\
This is firstly introduced on ARM/AArch64 targets, please refer to\n\
the hook implementation for how different fusion types are supported.",
void, (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri), NULL)
HOOK_VECTOR_END (sched)
/* Functions relating to OpenMP and Cilk Plus SIMD clones. */

View File

@ -1,3 +1,8 @@
2014-11-14 Bin Cheng <bin.cheng@arm.com>
* gcc.target/arm/ldrd-strd-pair-1.c: New test.
* gcc.target/arm/vfp-1.c: Improve scanning string.
2014-11-13 Rong Xu <xur@google.com>
PR debug/63581

View File

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_prefer_ldrd_strd } */
/* { dg-options "-O2 -mthumb" } */
struct
{
int x;
int y;
char c;
int d;
}a;
int foo(int x, int y)
{
int c;
a.x = x;
c = a.x;
a.d = c;
a.y = y;
return 0;
}
/* { dg-final { scan-assembler "strd\t" { target { arm_thumb2_ok } } } } */

View File

@ -126,7 +126,7 @@ void test_convert () {
}
void test_ldst (float f[], double d[]) {
/* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */
/* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #-?\[0-9\]+\\\]" } } */
/* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
/* { dg-final { scan-assembler "add.+ r0, #1024" } } */
/* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */

View File

@ -247,6 +247,7 @@ DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2")
DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers")
DEFTIMEVAR (TV_SCHED_FUSION , "scheduling fusion")
DEFTIMEVAR (TV_CPROP_REGISTERS , "hard reg cprop")
DEFTIMEVAR (TV_SCHED2 , "scheduling 2")
DEFTIMEVAR (TV_MACH_DEP , "machine dep reorg")

View File

@ -552,6 +552,7 @@ extern rtl_opt_pass *make_pass_branch_target_load_optimize1 (gcc::context
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
*ctxt);
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt);