S/390: Handle long-running instructions.

gcc/ChangeLog:

2017-10-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* config/s390/s390.c (NUM_SIDES): New variable.
	(LONGRUNNING_THRESHOLD): New variable.
	(LATENCY_FACTOR): New variable.
	(s390_sched_score): Decrease score for long-running instructions on
	wrong side.
	(s390_sched_variable_issue): Perform bookkeeping for long-running
	instructions.

From-SVN: r253849
This commit is contained in:
Robin Dapp 2017-10-18 09:10:09 +00:00 committed by Robin Dapp
parent ee077fcb8c
commit 67ba339309
2 changed files with 73 additions and 5 deletions

View File

@ -1,3 +1,13 @@
2017-10-18 Robin Dapp <rdapp@linux.vnet.ibm.com>
* config/s390/s390.c (NUM_SIDES): New variable.
(LONGRUNNING_THRESHOLD): New variable.
(LATENCY_FACTOR): New variable.
(s390_sched_score): Decrease score for long-running instructions on
wrong side.
(s390_sched_variable_issue): Perform bookkeeping for long-running
instructions.
2017-10-18 Richard Biener <rguenther@suse.de>
* graphite-isl-ast-to-gimple.c (gcc_expression_from_isl_ast_expr_id):

View File

@ -355,6 +355,18 @@ static rtx_insn *last_scheduled_insn;
#define MAX_SCHED_UNITS 3
static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
#define NUM_SIDES 2
static int current_side = 1;
#define LONGRUNNING_THRESHOLD 5
/* Estimate of number of cycles a long-running insn occupies an
execution unit. */
static unsigned fxu_longrunning[NUM_SIDES];
static unsigned vfu_longrunning[NUM_SIDES];
/* Factor to scale latencies by, determined by measurements. */
#define LATENCY_FACTOR 4
/* The maximum score added for an instruction whose unit hasn't been
in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
give instruction mix scheduling more priority over instruction
@ -14755,7 +14767,24 @@ s390_sched_score (rtx_insn *insn)
if (m & unit_mask)
score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
MAX_SCHED_MIX_DISTANCE);
unsigned latency = insn_default_latency (insn);
int other_side = 1 - current_side;
/* Try to delay long-running insns when side is busy. */
if (latency > LONGRUNNING_THRESHOLD)
{
if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
&& fxu_longrunning[other_side] <= fxu_longrunning[current_side])
score = MAX (0, score - 10);
if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
&& vfu_longrunning[other_side] <= vfu_longrunning[current_side])
score = MAX (0, score - 10);
}
}
return score;
}
@ -14874,12 +14903,19 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
{
last_scheduled_insn = insn;
bool starts_group = false;
if (s390_tune >= PROCESSOR_2827_ZEC12
&& reload_completed
&& recog_memoized (insn) >= 0)
{
unsigned int mask = s390_get_sched_attrmask (insn);
if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
|| (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
|| (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
starts_group = true;
if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
|| (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
s390_sched_state = S390_SCHED_STATE_CRACKED;
@ -14892,14 +14928,15 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
switch (s390_sched_state)
{
case 0:
starts_group = true;
/* fallthrough */
case 1:
case 2:
s390_sched_state++;
break;
case S390_SCHED_STATE_NORMAL:
if (s390_sched_state == S390_SCHED_STATE_NORMAL)
s390_sched_state = 1;
else
s390_sched_state++;
starts_group = true;
s390_sched_state = 1;
break;
case S390_SCHED_STATE_CRACKED:
s390_sched_state = S390_SCHED_STATE_NORMAL;
@ -14922,6 +14959,27 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
last_scheduled_unit_distance[i]++;
}
/* If this insn started a new group, the side flipped. */
if (starts_group)
current_side = current_side ? 0 : 1;
for (int i = 0; i < 2; i++)
{
if (fxu_longrunning[i] >= 1)
fxu_longrunning[i] -= 1;
if (vfu_longrunning[i] >= 1)
vfu_longrunning[i] -= 1;
}
unsigned latency = insn_default_latency (insn);
if (latency > LONGRUNNING_THRESHOLD)
{
if (get_attr_z13_unit_fxu (insn))
fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
else
vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
}
if (verbose > 5)
{
unsigned int sched_mask;