Improve AGU stalls avoidance optimization.
2011-09-08 Enkovich Ilya <ilya.enkovich@intel.com> * config/i386/i386-protos.h (ix86_lea_outperforms): New. (ix86_avoid_lea_for_add): Likewise. (ix86_avoid_lea_for_addr): Likewise. (ix86_split_lea_for_addr): Likewise. * config/i386/i386.c (LEA_MAX_STALL): New. (increase_distance): Likewise. (insn_defines_reg): Likewise. (insn_uses_reg_mem): Likewise. (distance_non_agu_define_in_bb): Likewise. (distance_agu_use_in_bb): Likewise. (ix86_lea_outperforms): Likewise. (ix86_ok_to_clobber_flags): Likewise. (ix86_avoid_lea_for_add): Likewise. (ix86_avoid_lea_for_addr): Likewise. (ix86_split_lea_for_addr): Likewise. (distance_non_agu_define): Search in pred BBs added. (distance_agu_use): Search in succ BBs added. (IX86_LEA_PRIORITY): Value changed from 2 to 0. (LEA_SEARCH_THRESHOLD): Now depends on LEA_MAX_STALL. (ix86_lea_for_add_ok): Use ix86_lea_outperforms to make decision. * config/i386/i386.md: Split added to transform non destructive add into move and add. (lea_1): transformed into insn_and_split to avoid AGU stalls. (lea<mode>_2): Likewise. From-SVN: r178689
This commit is contained in:
parent
868141900a
commit
d275ab8b80
@ -1,3 +1,32 @@
|
||||
2011-09-08 Enkovich Ilya <ilya.enkovich@intel.com>
|
||||
|
||||
* config/i386/i386-protos.h (ix86_lea_outperforms): New.
|
||||
(ix86_avoid_lea_for_add): Likewise.
|
||||
(ix86_avoid_lea_for_addr): Likewise.
|
||||
(ix86_split_lea_for_addr): Likewise.
|
||||
|
||||
* config/i386/i386.c (LEA_MAX_STALL): New.
|
||||
(increase_distance): Likewise.
|
||||
(insn_defines_reg): Likewise.
|
||||
(insn_uses_reg_mem): Likewise.
|
||||
(distance_non_agu_define_in_bb): Likewise.
|
||||
(distance_agu_use_in_bb): Likewise.
|
||||
(ix86_lea_outperforms): Likewise.
|
||||
(ix86_ok_to_clobber_flags): Likewise.
|
||||
(ix86_avoid_lea_for_add): Likewise.
|
||||
(ix86_avoid_lea_for_addr): Likewise.
|
||||
(ix86_split_lea_for_addr): Likewise.
|
||||
(distance_non_agu_define): Search in pred BBs added.
|
||||
(distance_agu_use): Search in succ BBs added.
|
||||
(IX86_LEA_PRIORITY): Value changed from 2 to 0.
|
||||
(LEA_SEARCH_THRESHOLD): Now depends on LEA_MAX_STALL.
|
||||
(ix86_lea_for_add_ok): Use ix86_lea_outperforms to make decision.
|
||||
|
||||
* config/i386/i386.md: Split added to transform non destructive
|
||||
add into move and add.
|
||||
(lea_1): transformed into insn_and_split to avoid AGU stalls.
|
||||
(lea<mode>_2): Likewise.
|
||||
|
||||
2011-09-08 Martin Jambor <mjambor@suse.cz>
|
||||
|
||||
PR tree-optimization/50287
|
||||
|
@ -90,6 +90,11 @@ extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
|
||||
extern void ix86_expand_binary_operator (enum rtx_code,
|
||||
enum machine_mode, rtx[]);
|
||||
extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
||||
extern bool ix86_lea_outperforms (rtx, unsigned int, unsigned int,
|
||||
unsigned int, unsigned int);
|
||||
extern bool ix86_avoid_lea_for_add (rtx, rtx[]);
|
||||
extern bool ix86_avoid_lea_for_addr (rtx, rtx[]);
|
||||
extern void ix86_split_lea_for_addr (rtx[], enum machine_mode);
|
||||
extern bool ix86_lea_for_add_ok (rtx, rtx[]);
|
||||
extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
|
||||
extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
|
||||
|
@ -15969,12 +15969,125 @@ ix86_split_idivmod (enum machine_mode mode, rtx operands[],
|
||||
emit_label (end_label);
|
||||
}
|
||||
|
||||
#define LEA_SEARCH_THRESHOLD 12
|
||||
#define LEA_MAX_STALL (3)
|
||||
#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
|
||||
|
||||
/* Increase given DISTANCE in half-cycles according to
|
||||
dependencies between PREV and NEXT instructions.
|
||||
Add 1 half-cycle if there is no dependency and
|
||||
go to next cycle if there is some dependecy. */
|
||||
|
||||
static unsigned int
|
||||
increase_distance (rtx prev, rtx next, unsigned int distance)
|
||||
{
|
||||
df_ref *use_rec;
|
||||
df_ref *def_rec;
|
||||
|
||||
if (!prev || !next)
|
||||
return distance + (distance & 1) + 2;
|
||||
|
||||
if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
|
||||
return distance + 1;
|
||||
|
||||
for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
|
||||
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
|
||||
if (!DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
|
||||
return distance + (distance & 1) + 2;
|
||||
|
||||
return distance + 1;
|
||||
}
|
||||
|
||||
/* Function checks if instruction INSN defines register number
|
||||
REGNO1 or REGNO2. */
|
||||
|
||||
static bool
|
||||
insn_defines_reg (unsigned int regno1, unsigned int regno2,
|
||||
rtx insn)
|
||||
{
|
||||
df_ref *def_rec;
|
||||
|
||||
for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
|
||||
if (DF_REF_REG_DEF_P (*def_rec)
|
||||
&& !DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& (regno1 == DF_REF_REGNO (*def_rec)
|
||||
|| regno2 == DF_REF_REGNO (*def_rec)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Function checks if instruction INSN uses register number
|
||||
REGNO as a part of address expression. */
|
||||
|
||||
static bool
|
||||
insn_uses_reg_mem (unsigned int regno, rtx insn)
|
||||
{
|
||||
df_ref *use_rec;
|
||||
|
||||
for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
|
||||
if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Search backward for non-agu definition of register number REGNO1
|
||||
or register number REGNO2 in basic block starting from instruction
|
||||
START up to head of basic block or instruction INSN.
|
||||
|
||||
Function puts true value into *FOUND var if definition was found
|
||||
and false otherwise.
|
||||
|
||||
Distance in half-cycles between START and found instruction or head
|
||||
of BB is added to DISTANCE and returned. */
|
||||
|
||||
static int
|
||||
distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
|
||||
rtx insn, int distance,
|
||||
rtx start, bool *found)
|
||||
{
|
||||
basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
|
||||
rtx prev = start;
|
||||
rtx next = NULL;
|
||||
enum attr_type insn_type;
|
||||
|
||||
*found = false;
|
||||
|
||||
while (prev
|
||||
&& prev != insn
|
||||
&& distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
|
||||
{
|
||||
distance = increase_distance (prev, next, distance);
|
||||
if (insn_defines_reg (regno1, regno2, prev))
|
||||
{
|
||||
insn_type = get_attr_type (prev);
|
||||
if (insn_type != TYPE_LEA)
|
||||
{
|
||||
*found = true;
|
||||
return distance;
|
||||
}
|
||||
}
|
||||
|
||||
next = prev;
|
||||
}
|
||||
if (prev == BB_HEAD (bb))
|
||||
break;
|
||||
|
||||
prev = PREV_INSN (prev);
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
/* Search backward for non-agu definition of register number REGNO1
|
||||
or register number REGNO2 in INSN's basic block until
|
||||
1. Pass LEA_SEARCH_THRESHOLD instructions, or
|
||||
2. Reach BB boundary, or
|
||||
2. Reach neighbour BBs boundary, or
|
||||
3. Reach agu definition.
|
||||
Returns the distance between the non-agu definition point and INSN.
|
||||
If no definition point, returns -1. */
|
||||
@ -15985,35 +16098,14 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2,
|
||||
{
|
||||
basic_block bb = BLOCK_FOR_INSN (insn);
|
||||
int distance = 0;
|
||||
df_ref *def_rec;
|
||||
enum attr_type insn_type;
|
||||
bool found = false;
|
||||
|
||||
if (insn != BB_HEAD (bb))
|
||||
{
|
||||
rtx prev = PREV_INSN (insn);
|
||||
while (prev && distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (prev))
|
||||
{
|
||||
distance++;
|
||||
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
|
||||
if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
||||
&& !DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& (regno1 == DF_REF_REGNO (*def_rec)
|
||||
|| regno2 == DF_REF_REGNO (*def_rec)))
|
||||
{
|
||||
insn_type = get_attr_type (prev);
|
||||
if (insn_type != TYPE_LEA)
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
if (prev == BB_HEAD (bb))
|
||||
break;
|
||||
prev = PREV_INSN (prev);
|
||||
}
|
||||
}
|
||||
distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
|
||||
distance, PREV_INSN (insn),
|
||||
&found);
|
||||
|
||||
if (distance < LEA_SEARCH_THRESHOLD)
|
||||
if (!found && distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
@ -16027,38 +16119,100 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2,
|
||||
}
|
||||
|
||||
if (simple_loop)
|
||||
distance = distance_non_agu_define_in_bb (regno1, regno2,
|
||||
insn, distance,
|
||||
BB_END (bb), &found);
|
||||
else
|
||||
{
|
||||
rtx prev = BB_END (bb);
|
||||
while (prev
|
||||
&& prev != insn
|
||||
&& distance < LEA_SEARCH_THRESHOLD)
|
||||
int shortest_dist = -1;
|
||||
bool found_in_bb = false;
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (prev))
|
||||
int bb_dist = distance_non_agu_define_in_bb (regno1, regno2,
|
||||
insn, distance,
|
||||
BB_END (e->src),
|
||||
&found_in_bb);
|
||||
if (found_in_bb)
|
||||
{
|
||||
distance++;
|
||||
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
|
||||
if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
||||
&& !DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& (regno1 == DF_REF_REGNO (*def_rec)
|
||||
|| regno2 == DF_REF_REGNO (*def_rec)))
|
||||
{
|
||||
insn_type = get_attr_type (prev);
|
||||
if (insn_type != TYPE_LEA)
|
||||
goto done;
|
||||
}
|
||||
if (shortest_dist < 0)
|
||||
shortest_dist = bb_dist;
|
||||
else if (bb_dist > 0)
|
||||
shortest_dist = MIN (bb_dist, shortest_dist);
|
||||
}
|
||||
prev = PREV_INSN (prev);
|
||||
|
||||
found = found || found_in_bb;
|
||||
}
|
||||
|
||||
distance = shortest_dist;
|
||||
}
|
||||
}
|
||||
|
||||
distance = -1;
|
||||
|
||||
done:
|
||||
/* get_attr_type may modify recog data. We want to make sure
|
||||
that recog data is valid for instruction INSN, on which
|
||||
distance_non_agu_define is called. INSN is unchanged here. */
|
||||
extract_insn_cached (insn);
|
||||
|
||||
if (!found)
|
||||
distance = -1;
|
||||
else
|
||||
distance = distance >> 1;
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
/* Return the distance in half-cycles between INSN and the next
|
||||
insn that uses register number REGNO in memory address added
|
||||
to DISTANCE. Return -1 if REGNO0 is set.
|
||||
|
||||
Put true value into *FOUND if register usage was found and
|
||||
false otherwise.
|
||||
Put true value into *REDEFINED if register redefinition was
|
||||
found and false otherwise. */
|
||||
|
||||
static int
|
||||
distance_agu_use_in_bb(unsigned int regno,
|
||||
rtx insn, int distance, rtx start,
|
||||
bool *found, bool *redefined)
|
||||
{
|
||||
basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
|
||||
rtx next = start;
|
||||
rtx prev = NULL;
|
||||
|
||||
*found = false;
|
||||
*redefined = false;
|
||||
|
||||
while (next
|
||||
&& next != insn
|
||||
&& distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
|
||||
{
|
||||
distance = increase_distance(prev, next, distance);
|
||||
if (insn_uses_reg_mem (regno, next))
|
||||
{
|
||||
/* Return DISTANCE if OP0 is used in memory
|
||||
address in NEXT. */
|
||||
*found = true;
|
||||
return distance;
|
||||
}
|
||||
|
||||
if (insn_defines_reg (regno, INVALID_REGNUM, next))
|
||||
{
|
||||
/* Return -1 if OP0 is set in NEXT. */
|
||||
*redefined = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
prev = next;
|
||||
}
|
||||
|
||||
if (next == BB_END (bb))
|
||||
break;
|
||||
|
||||
next = NEXT_INSN (next);
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
@ -16071,44 +16225,15 @@ distance_agu_use (unsigned int regno0, rtx insn)
|
||||
{
|
||||
basic_block bb = BLOCK_FOR_INSN (insn);
|
||||
int distance = 0;
|
||||
df_ref *def_rec;
|
||||
df_ref *use_rec;
|
||||
bool found = false;
|
||||
bool redefined = false;
|
||||
|
||||
if (insn != BB_END (bb))
|
||||
{
|
||||
rtx next = NEXT_INSN (insn);
|
||||
while (next && distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (next))
|
||||
{
|
||||
distance++;
|
||||
distance = distance_agu_use_in_bb (regno0, insn, distance,
|
||||
NEXT_INSN (insn),
|
||||
&found, &redefined);
|
||||
|
||||
for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
|
||||
if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
|
||||
|| DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
|
||||
&& regno0 == DF_REF_REGNO (*use_rec))
|
||||
{
|
||||
/* Return DISTANCE if OP0 is used in memory
|
||||
address in NEXT. */
|
||||
return distance;
|
||||
}
|
||||
|
||||
for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
|
||||
if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
||||
&& !DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& regno0 == DF_REF_REGNO (*def_rec))
|
||||
{
|
||||
/* Return -1 if OP0 is set in NEXT. */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (next == BB_END (bb))
|
||||
break;
|
||||
next = NEXT_INSN (next);
|
||||
}
|
||||
}
|
||||
|
||||
if (distance < LEA_SEARCH_THRESHOLD)
|
||||
if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
@ -16122,42 +16247,41 @@ distance_agu_use (unsigned int regno0, rtx insn)
|
||||
}
|
||||
|
||||
if (simple_loop)
|
||||
distance = distance_agu_use_in_bb (regno0, insn,
|
||||
distance, BB_HEAD (bb),
|
||||
&found, &redefined);
|
||||
else
|
||||
{
|
||||
rtx next = BB_HEAD (bb);
|
||||
while (next
|
||||
&& next != insn
|
||||
&& distance < LEA_SEARCH_THRESHOLD)
|
||||
int shortest_dist = -1;
|
||||
bool found_in_bb = false;
|
||||
bool redefined_in_bb = false;
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (next))
|
||||
int bb_dist = distance_agu_use_in_bb (regno0, insn,
|
||||
distance, BB_HEAD (e->dest),
|
||||
&found_in_bb, &redefined_in_bb);
|
||||
if (found_in_bb)
|
||||
{
|
||||
distance++;
|
||||
|
||||
for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
|
||||
if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
|
||||
|| DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
|
||||
&& regno0 == DF_REF_REGNO (*use_rec))
|
||||
{
|
||||
/* Return DISTANCE if OP0 is used in memory
|
||||
address in NEXT. */
|
||||
return distance;
|
||||
}
|
||||
|
||||
for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
|
||||
if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
||||
&& !DF_REF_IS_ARTIFICIAL (*def_rec)
|
||||
&& regno0 == DF_REF_REGNO (*def_rec))
|
||||
{
|
||||
/* Return -1 if OP0 is set in NEXT. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (shortest_dist < 0)
|
||||
shortest_dist = bb_dist;
|
||||
else if (bb_dist > 0)
|
||||
shortest_dist = MIN (bb_dist, shortest_dist);
|
||||
}
|
||||
next = NEXT_INSN (next);
|
||||
|
||||
found = found || found_in_bb;
|
||||
}
|
||||
|
||||
distance = shortest_dist;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
if (!found || redefined)
|
||||
distance = -1;
|
||||
else
|
||||
distance = distance >> 1;
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
/* Define this macro to tune LEA priority vs ADD, it take effect when
|
||||
@ -16165,7 +16289,309 @@ distance_agu_use (unsigned int regno0, rtx insn)
|
||||
Negative value: ADD is more preferred than LEA
|
||||
Zero: Netrual
|
||||
Positive value: LEA is more preferred than ADD*/
|
||||
#define IX86_LEA_PRIORITY 2
|
||||
#define IX86_LEA_PRIORITY 0
|
||||
|
||||
/* Return true if usage of lea INSN has performance advantage
|
||||
over a sequence of instructions. Instructions sequence has
|
||||
SPLIT_COST cycles higher latency than lea latency. */
|
||||
|
||||
bool
|
||||
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
|
||||
unsigned int regno2, unsigned int split_cost)
|
||||
{
|
||||
int dist_define, dist_use;
|
||||
|
||||
dist_define = distance_non_agu_define (regno1, regno2, insn);
|
||||
dist_use = distance_agu_use (regno0, insn);
|
||||
|
||||
if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
|
||||
{
|
||||
/* If there is no non AGU operand definition, no AGU
|
||||
operand usage and split cost is 0 then both lea
|
||||
and non lea variants have same priority. Currently
|
||||
we prefer lea for 64 bit code and non lea on 32 bit
|
||||
code. */
|
||||
if (dist_use < 0 && split_cost == 0)
|
||||
return TARGET_64BIT || IX86_LEA_PRIORITY;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
/* With longer definitions distance lea is more preferable.
|
||||
Here we change it to take into account splitting cost and
|
||||
lea priority. */
|
||||
dist_define += split_cost + IX86_LEA_PRIORITY;
|
||||
|
||||
/* If there is no use in memory addess then we just check
|
||||
that split cost does not exceed AGU stall. */
|
||||
if (dist_use < 0)
|
||||
return dist_define >= LEA_MAX_STALL;
|
||||
|
||||
/* If this insn has both backward non-agu dependence and forward
|
||||
agu dependence, the one with short distance takes effect. */
|
||||
return dist_define >= dist_use;
|
||||
}
|
||||
|
||||
/* Return true if it is legal to clobber flags by INSN and
|
||||
false otherwise. */
|
||||
|
||||
static bool
|
||||
ix86_ok_to_clobber_flags(rtx insn)
|
||||
{
|
||||
basic_block bb = BLOCK_FOR_INSN (insn);
|
||||
df_ref *use;
|
||||
bitmap live;
|
||||
|
||||
while (insn)
|
||||
{
|
||||
if (NONDEBUG_INSN_P (insn))
|
||||
{
|
||||
for (use = DF_INSN_USES (insn); *use; use++)
|
||||
if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
|
||||
return false;
|
||||
|
||||
if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (insn == BB_END (bb))
|
||||
break;
|
||||
|
||||
insn = NEXT_INSN (insn);
|
||||
}
|
||||
|
||||
live = df_get_live_out(bb);
|
||||
return !REGNO_REG_SET_P (live, FLAGS_REG);
|
||||
}
|
||||
|
||||
/* Return true if we need to split op0 = op1 + op2 into a sequence of
|
||||
move and add to avoid AGU stalls. */
|
||||
|
||||
bool
|
||||
ix86_avoid_lea_for_add (rtx insn, rtx operands[])
|
||||
{
|
||||
unsigned int regno0 = true_regnum (operands[0]);
|
||||
unsigned int regno1 = true_regnum (operands[1]);
|
||||
unsigned int regno2 = true_regnum (operands[2]);
|
||||
|
||||
/* Check if we need to optimize. */
|
||||
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
|
||||
/* Check it is correct to split here. */
|
||||
if (!ix86_ok_to_clobber_flags(insn))
|
||||
return false;
|
||||
|
||||
/* We need to split only adds with non destructive
|
||||
destination operand. */
|
||||
if (regno0 == regno1 || regno0 == regno2)
|
||||
return false;
|
||||
else
|
||||
return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
|
||||
}
|
||||
|
||||
/* Return true if we need to split lea into a sequence of
|
||||
instructions to avoid AGU stalls. */
|
||||
|
||||
bool
|
||||
ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
|
||||
{
|
||||
unsigned int regno0 = true_regnum (operands[0]) ;
|
||||
unsigned int regno1 = -1;
|
||||
unsigned int regno2 = -1;
|
||||
unsigned int split_cost = 0;
|
||||
struct ix86_address parts;
|
||||
int ok;
|
||||
|
||||
/* Check we need to optimize. */
|
||||
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
|
||||
/* Check it is correct to split here. */
|
||||
if (!ix86_ok_to_clobber_flags(insn))
|
||||
return false;
|
||||
|
||||
ok = ix86_decompose_address (operands[1], &parts);
|
||||
gcc_assert (ok);
|
||||
|
||||
/* We should not split into add if non legitimate pic
|
||||
operand is used as displacement. */
|
||||
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
|
||||
return false;
|
||||
|
||||
if (parts.base)
|
||||
regno1 = true_regnum (parts.base);
|
||||
if (parts.index)
|
||||
regno2 = true_regnum (parts.index);
|
||||
|
||||
/* Compute how many cycles we will add to execution time
|
||||
if split lea into a sequence of instructions. */
|
||||
if (parts.base || parts.index)
|
||||
{
|
||||
/* Have to use mov instruction if non desctructive
|
||||
destination form is used. */
|
||||
if (regno1 != regno0 && regno2 != regno0)
|
||||
split_cost += 1;
|
||||
|
||||
/* Have to add index to base if both exist. */
|
||||
if (parts.base && parts.index)
|
||||
split_cost += 1;
|
||||
|
||||
/* Have to use shift and adds if scale is 2 or greater. */
|
||||
if (parts.scale > 1)
|
||||
{
|
||||
if (regno0 != regno1)
|
||||
split_cost += 1;
|
||||
else if (regno2 == regno0)
|
||||
split_cost += 4;
|
||||
else
|
||||
split_cost += parts.scale;
|
||||
}
|
||||
|
||||
/* Have to use add instruction with immediate if
|
||||
disp is non zero. */
|
||||
if (parts.disp && parts.disp != const0_rtx)
|
||||
split_cost += 1;
|
||||
|
||||
/* Subtract the price of lea. */
|
||||
split_cost -= 1;
|
||||
}
|
||||
|
||||
return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
|
||||
}
|
||||
|
||||
/* Split lea instructions into a sequence of instructions
|
||||
which are executed on ALU to avoid AGU stalls.
|
||||
It is assumed that it is allowed to clobber flags register
|
||||
at lea position. */
|
||||
|
||||
extern void
|
||||
ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
|
||||
{
|
||||
unsigned int regno0 = true_regnum (operands[0]) ;
|
||||
unsigned int regno1 = INVALID_REGNUM;
|
||||
unsigned int regno2 = INVALID_REGNUM;
|
||||
struct ix86_address parts;
|
||||
rtx tmp, clob;
|
||||
rtvec par;
|
||||
int ok, adds;
|
||||
|
||||
ok = ix86_decompose_address (operands[1], &parts);
|
||||
gcc_assert (ok);
|
||||
|
||||
if (parts.base)
|
||||
{
|
||||
if (GET_MODE (parts.base) != mode)
|
||||
parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
|
||||
regno1 = true_regnum (parts.base);
|
||||
}
|
||||
|
||||
if (parts.index)
|
||||
{
|
||||
if (GET_MODE (parts.index) != mode)
|
||||
parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
|
||||
regno2 = true_regnum (parts.index);
|
||||
}
|
||||
|
||||
if (parts.scale > 1)
|
||||
{
|
||||
/* Case r1 = r1 + ... */
|
||||
if (regno1 == regno0)
|
||||
{
|
||||
/* If we have a case r1 = r1 + C * r1 then we
|
||||
should use multiplication which is very
|
||||
expensive. Assume cost model is wrong if we
|
||||
have such case here. */
|
||||
gcc_assert (regno2 != regno0);
|
||||
|
||||
for (adds = parts.scale; adds > 0; adds--)
|
||||
{
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode,
|
||||
gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* r1 = r2 + r3 * C case. Need to move r3 into r1. */
|
||||
if (regno0 != regno2)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
|
||||
|
||||
/* Use shift for scaling. */
|
||||
tmp = gen_rtx_ASHIFT (mode, operands[0],
|
||||
GEN_INT (exact_log2 (parts.scale)));
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
|
||||
if (parts.base)
|
||||
{
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
}
|
||||
|
||||
if (parts.disp && parts.disp != const0_rtx)
|
||||
{
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!parts.base && !parts.index)
|
||||
{
|
||||
gcc_assert(parts.disp);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!parts.base)
|
||||
{
|
||||
if (regno0 != regno2)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
|
||||
}
|
||||
else if (!parts.index)
|
||||
{
|
||||
if (regno0 != regno1)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regno0 == regno1)
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
|
||||
else if (regno0 == regno2)
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
|
||||
else
|
||||
{
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
|
||||
}
|
||||
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
}
|
||||
|
||||
if (parts.disp && parts.disp != const0_rtx)
|
||||
{
|
||||
tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
|
||||
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
par = gen_rtvec (2, tmp, clob);
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if it is ok to optimize an ADD operation to LEA
|
||||
operation to avoid flag register consumation. For most processors,
|
||||
@ -16186,26 +16612,8 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
|
||||
|
||||
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
|
||||
return false;
|
||||
else
|
||||
{
|
||||
int dist_define, dist_use;
|
||||
|
||||
/* Return false if REGNO0 isn't used in memory address. */
|
||||
dist_use = distance_agu_use (regno0, insn);
|
||||
if (dist_use <= 0)
|
||||
return false;
|
||||
|
||||
dist_define = distance_non_agu_define (regno1, regno2, insn);
|
||||
if (dist_define <= 0)
|
||||
return true;
|
||||
|
||||
/* If this insn has both backward non-agu dependence and forward
|
||||
agu dependence, the one with short distance take effect. */
|
||||
if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
|
||||
}
|
||||
|
||||
/* Return true if destination reg of SET_BODY is shift count of
|
||||
|
@ -5463,19 +5463,31 @@
|
||||
[(set_attr "type" "alu")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_insn "*lea_1"
|
||||
(define_insn_and_split "*lea_1"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(subreg:SI (match_operand:DI 1 "lea_address_operand" "p") 0))]
|
||||
"TARGET_64BIT"
|
||||
"lea{l}\t{%a1, %0|%0, %a1}"
|
||||
"&& reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
|
||||
[(const_int 0)]
|
||||
{
|
||||
ix86_split_lea_for_addr (operands, SImode);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "lea")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*lea<mode>_2"
|
||||
(define_insn_and_split "*lea<mode>_2"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(match_operand:SWI48 1 "lea_address_operand" "p"))]
|
||||
""
|
||||
"lea{<imodesuffix>}\t{%a1, %0|%0, %a1}"
|
||||
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
|
||||
[(const_int 0)]
|
||||
{
|
||||
ix86_split_lea_for_addr (operands, <MODE>mode);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "lea")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
@ -5777,6 +5789,17 @@
|
||||
(const_string "none")))
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
;; Split non destructive adds if we cannot use lea.
|
||||
(define_split
|
||||
[(set (match_operand:SWI48 0 "register_operand" "")
|
||||
(plus:SWI48 (match_operand:SWI48 1 "register_operand" "")
|
||||
(match_operand:SWI48 2 "nonmemory_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"reload_completed && ix86_avoid_lea_for_add (insn, operands)"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])])
|
||||
|
||||
;; Convert add to the lea pattern to avoid flags dependency.
|
||||
(define_split
|
||||
[(set (match_operand:SWI 0 "register_operand" "")
|
||||
|
Loading…
Reference in New Issue
Block a user