predicates.md (fusion_gpr_mem_load): Move testing for base_reg_operand to be common between LO_SUM and PLUS.
2014-09-19 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/predicates.md (fusion_gpr_mem_load): Move testing for base_reg_operand to be common between LO_SUM and PLUS. (fusion_gpr_mem_combo): New predicate to match a fused address that combines the addis and memory offset address. * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Change calling signature. (emit_fusion_gpr_load): Likewise. * config/rs6000/rs6000.c (fusion_gpr_load_p): Change calling signature to pass each argument separately, rather than using an operands array. Rewrite the insns found by peephole2 to be a single insn, rather than hoping the insns will still be together when the peephole pass is done. Drop being called via a normal peephole. (emit_fusion_gpr_load): Change calling signature to be called from the fusion_gpr_load_<mode> insns with a combined memory address instead of the peephole pass passing the addis and offset separately. * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): New unspec for GPR fusion. (power8 fusion peephole): Drop support for doing power8 via a normal peephole that was created by the peephole2 pass. (power8 fusion peephole2): Create a new insn with the fused address, so that the fused operation is kept together after register allocation is done. (fusion_gpr_load_<mode>): Likewise. From-SVN: r215404
This commit is contained in:
parent
ca21928bb3
commit
3f99b0612f
|
@ -1,3 +1,34 @@
|
|||
2014-09-19 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/predicates.md (fusion_gpr_mem_load): Move testing
|
||||
for base_reg_operand to be common between LO_SUM and PLUS.
|
||||
(fusion_gpr_mem_combo): New predicate to match a fused address
|
||||
that combines the addis and memory offset address.
|
||||
|
||||
* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Change
|
||||
calling signature.
|
||||
(emit_fusion_gpr_load): Likewise.
|
||||
|
||||
* config/rs6000/rs6000.c (fusion_gpr_load_p): Change calling
|
||||
signature to pass each argument separately, rather than
|
||||
using an operands array. Rewrite the insns found by peephole2 to
|
||||
be a single insn, rather than hoping the insns will still be
|
||||
together when the peephole pass is done. Drop being called via a
|
||||
normal peephole.
|
||||
(emit_fusion_gpr_load): Change calling signature to be called from
|
||||
the fusion_gpr_load_<mode> insns with a combined memory address
|
||||
instead of the peephole pass passing the addis and offset
|
||||
separately.
|
||||
|
||||
* config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): New unspec for GPR
|
||||
fusion.
|
||||
(power8 fusion peephole): Drop support for doing power8 via a
|
||||
normal peephole that was created by the peephole2 pass.
|
||||
(power8 fusion peephole2): Create a new insn with the fused
|
||||
address, so that the fused operation is kept together after
|
||||
register allocation is done.
|
||||
(fusion_gpr_load_<mode>): Likewise.
|
||||
|
||||
2014-09-19 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
PR lto/63286
|
||||
|
|
|
@ -1797,7 +1797,7 @@
|
|||
(define_predicate "fusion_gpr_mem_load"
|
||||
(match_code "mem,sign_extend,zero_extend")
|
||||
{
|
||||
rtx addr;
|
||||
rtx addr, base, offset;
|
||||
|
||||
/* Handle sign/zero extend. */
|
||||
if (GET_CODE (op) == ZERO_EXTEND
|
||||
|
@ -1827,24 +1827,79 @@
|
|||
}
|
||||
|
||||
addr = XEXP (op, 0);
|
||||
if (GET_CODE (addr) == PLUS)
|
||||
{
|
||||
rtx base = XEXP (addr, 0);
|
||||
rtx offset = XEXP (addr, 1);
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
return 0;
|
||||
|
||||
return (base_reg_operand (base, GET_MODE (base))
|
||||
&& satisfies_constraint_I (offset));
|
||||
}
|
||||
base = XEXP (addr, 0);
|
||||
if (!base_reg_operand (base, GET_MODE (base)))
|
||||
return 0;
|
||||
|
||||
offset = XEXP (addr, 1);
|
||||
|
||||
if (GET_CODE (addr) == PLUS)
|
||||
return satisfies_constraint_I (offset);
|
||||
|
||||
else if (GET_CODE (addr) == LO_SUM)
|
||||
{
|
||||
rtx base = XEXP (addr, 0);
|
||||
rtx offset = XEXP (addr, 1);
|
||||
|
||||
if (!base_reg_operand (base, GET_MODE (base)))
|
||||
return 0;
|
||||
|
||||
else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
|
||||
if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
|
||||
return small_toc_ref (offset, GET_MODE (offset));
|
||||
|
||||
else if (TARGET_ELF && !TARGET_POWERPC64)
|
||||
return CONSTANT_P (offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
})
|
||||
|
||||
;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
|
||||
;; memory field with both the addis and the memory offset. Sign extension
|
||||
;; is not handled here, since lha and lwa are not fused.
|
||||
(define_predicate "fusion_gpr_mem_combo"
|
||||
(match_code "mem,zero_extend")
|
||||
{
|
||||
rtx addr, base, offset;
|
||||
|
||||
/* Handle zero extend. */
|
||||
if (GET_CODE (op) == ZERO_EXTEND)
|
||||
{
|
||||
op = XEXP (op, 0);
|
||||
mode = GET_MODE (op);
|
||||
}
|
||||
|
||||
if (!MEM_P (op))
|
||||
return 0;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case QImode:
|
||||
case HImode:
|
||||
case SImode:
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
if (!TARGET_POWERPC64)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
addr = XEXP (op, 0);
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
return 0;
|
||||
|
||||
base = XEXP (addr, 0);
|
||||
if (!fusion_gpr_addis (base, GET_MODE (base)))
|
||||
return 0;
|
||||
|
||||
offset = XEXP (addr, 1);
|
||||
if (GET_CODE (addr) == PLUS)
|
||||
return satisfies_constraint_I (offset);
|
||||
|
||||
else if (GET_CODE (addr) == LO_SUM)
|
||||
{
|
||||
if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
|
||||
return small_toc_ref (offset, GET_MODE (offset));
|
||||
|
||||
else if (TARGET_ELF && !TARGET_POWERPC64)
|
||||
|
|
|
@ -80,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
|
|||
extern bool gpr_or_gpr_p (rtx, rtx);
|
||||
extern bool direct_move_p (rtx, rtx);
|
||||
extern bool quad_load_store_p (rtx, rtx);
|
||||
extern bool fusion_gpr_load_p (rtx *, bool);
|
||||
extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx);
|
||||
extern void expand_fusion_gpr_load (rtx *);
|
||||
extern const char *emit_fusion_gpr_load (rtx *);
|
||||
extern const char *emit_fusion_gpr_load (rtx, rtx);
|
||||
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
|
||||
enum reg_class);
|
||||
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
|
||||
|
|
|
@ -33044,25 +33044,14 @@ rs6000_split_logical (rtx operands[3],
|
|||
|
||||
/* Return true if the peephole2 can combine a load involving a combination of
|
||||
an addis instruction and a load with an offset that can be fused together on
|
||||
a power8.
|
||||
|
||||
The operands are:
|
||||
operands[0] register set with addis
|
||||
operands[1] value set via addis
|
||||
operands[2] target register being loaded
|
||||
operands[3] D-form memory reference using operands[0].
|
||||
|
||||
In addition, we are passed a boolean that is true if this is a peephole2,
|
||||
and we can use see if the addis_reg is dead after the insn and can be
|
||||
replaced by the target register. */
|
||||
a power8. */
|
||||
|
||||
bool
|
||||
fusion_gpr_load_p (rtx *operands, bool peep2_p)
|
||||
fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
|
||||
rtx addis_value, /* addis value. */
|
||||
rtx target, /* target register that is loaded. */
|
||||
rtx mem) /* bottom part of the memory addr. */
|
||||
{
|
||||
rtx addis_reg = operands[0];
|
||||
rtx addis_value = operands[1];
|
||||
rtx target = operands[2];
|
||||
rtx mem = operands[3];
|
||||
rtx addr;
|
||||
rtx base_reg;
|
||||
|
||||
|
@ -33076,9 +33065,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
|
|||
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
|
||||
return false;
|
||||
|
||||
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
|
||||
return false;
|
||||
|
||||
/* Allow sign/zero extension. */
|
||||
if (GET_CODE (mem) == ZERO_EXTEND
|
||||
|| (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
|
||||
|
@ -33087,22 +33073,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
|
|||
if (!MEM_P (mem))
|
||||
return false;
|
||||
|
||||
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
|
||||
return false;
|
||||
|
||||
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
return false;
|
||||
|
||||
/* Validate that the register used to load the high value is either the
|
||||
register being loaded, or we can safely replace its use in a peephole2.
|
||||
register being loaded, or we can safely replace its use.
|
||||
|
||||
If this is a peephole2, we assume that there are 2 instructions in the
|
||||
peephole (addis and load), so we want to check if the target register was
|
||||
not used in the memory address and the register to hold the addis result
|
||||
is dead after the peephole. */
|
||||
This function is only called from the peephole2 pass and we assume that
|
||||
there are 2 instructions in the peephole (addis and load), so we want to
|
||||
check if the target register was not used in the memory address and the
|
||||
register to hold the addis result is dead after the peephole. */
|
||||
if (REGNO (addis_reg) != REGNO (target))
|
||||
{
|
||||
if (!peep2_p)
|
||||
return false;
|
||||
|
||||
if (reg_mentioned_p (target, mem))
|
||||
return false;
|
||||
|
||||
|
@ -33143,9 +33129,6 @@ expand_fusion_gpr_load (rtx *operands)
|
|||
enum machine_mode extend_mode = target_mode;
|
||||
enum machine_mode ptr_mode = Pmode;
|
||||
enum rtx_code extend = UNKNOWN;
|
||||
rtx addis_reg = ((ptr_mode == target_mode)
|
||||
? target
|
||||
: simplify_subreg (ptr_mode, target, target_mode, 0));
|
||||
|
||||
if (GET_CODE (orig_mem) == ZERO_EXTEND
|
||||
|| (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
|
||||
|
@ -33162,13 +33145,14 @@ expand_fusion_gpr_load (rtx *operands)
|
|||
gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
|
||||
|
||||
offset = XEXP (orig_addr, 1);
|
||||
new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
|
||||
new_mem = change_address (orig_mem, target_mode, new_addr);
|
||||
new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
|
||||
new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
|
||||
|
||||
if (extend != UNKNOWN)
|
||||
new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
|
||||
|
||||
emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
|
||||
new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
|
||||
UNSPEC_FUSION_GPR);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
|
||||
|
||||
if (extend == SIGN_EXTEND)
|
||||
|
@ -33187,55 +33171,40 @@ expand_fusion_gpr_load (rtx *operands)
|
|||
}
|
||||
|
||||
/* Return a string to fuse an addis instruction with a gpr load to the same
|
||||
register that we loaded up the addis instruction. The code is complicated,
|
||||
so we call output_asm_insn directly, and just return "".
|
||||
register that we loaded up the addis instruction. The address that is used
|
||||
is the logical address that was formed during peephole2:
|
||||
(lo_sum (high) (low-part))
|
||||
|
||||
The operands are:
|
||||
operands[0] register set with addis (must be same reg as target).
|
||||
operands[1] value set via addis
|
||||
operands[2] target register being loaded
|
||||
operands[3] D-form memory reference using operands[0]. */
|
||||
The code is complicated, so we call output_asm_insn directly, and just
|
||||
return "". */
|
||||
|
||||
const char *
|
||||
emit_fusion_gpr_load (rtx *operands)
|
||||
emit_fusion_gpr_load (rtx target, rtx mem)
|
||||
{
|
||||
rtx addis_reg = operands[0];
|
||||
rtx addis_value = operands[1];
|
||||
rtx target = operands[2];
|
||||
rtx mem = operands[3];
|
||||
rtx addis_value;
|
||||
rtx fuse_ops[10];
|
||||
rtx addr;
|
||||
rtx load_offset;
|
||||
const char *addis_str = NULL;
|
||||
const char *load_str = NULL;
|
||||
const char *extend_insn = NULL;
|
||||
const char *mode_name = NULL;
|
||||
char insn_template[80];
|
||||
enum machine_mode mode;
|
||||
const char *comment_str = ASM_COMMENT_START;
|
||||
bool sign_p = false;
|
||||
|
||||
gcc_assert (REG_P (addis_reg) && REG_P (target));
|
||||
gcc_assert (REGNO (addis_reg) == REGNO (target));
|
||||
if (GET_CODE (mem) == ZERO_EXTEND)
|
||||
mem = XEXP (mem, 0);
|
||||
|
||||
gcc_assert (REG_P (target) && MEM_P (mem));
|
||||
|
||||
if (*comment_str == ' ')
|
||||
comment_str++;
|
||||
|
||||
/* Allow sign/zero extension. */
|
||||
if (GET_CODE (mem) == ZERO_EXTEND)
|
||||
mem = XEXP (mem, 0);
|
||||
|
||||
else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
|
||||
{
|
||||
sign_p = true;
|
||||
mem = XEXP (mem, 0);
|
||||
}
|
||||
|
||||
gcc_assert (MEM_P (mem));
|
||||
addr = XEXP (mem, 0);
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
gcc_unreachable ();
|
||||
|
||||
addis_value = XEXP (addr, 0);
|
||||
load_offset = XEXP (addr, 1);
|
||||
|
||||
/* Now emit the load instruction to the same register. */
|
||||
|
@ -33245,29 +33214,22 @@ emit_fusion_gpr_load (rtx *operands)
|
|||
case QImode:
|
||||
mode_name = "char";
|
||||
load_str = "lbz";
|
||||
extend_insn = "extsb %0,%0";
|
||||
break;
|
||||
|
||||
case HImode:
|
||||
mode_name = "short";
|
||||
load_str = "lhz";
|
||||
extend_insn = "extsh %0,%0";
|
||||
break;
|
||||
|
||||
case SImode:
|
||||
mode_name = "int";
|
||||
load_str = "lwz";
|
||||
extend_insn = "extsw %0,%0";
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
if (TARGET_POWERPC64)
|
||||
{
|
||||
mode_name = "long";
|
||||
load_str = "ld";
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
gcc_assert (TARGET_POWERPC64);
|
||||
mode_name = "long";
|
||||
load_str = "ld";
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -33411,14 +33373,6 @@ emit_fusion_gpr_load (rtx *operands)
|
|||
else
|
||||
fatal_insn ("Unable to generate load offset for fusion", load_offset);
|
||||
|
||||
/* Handle sign extension. The peephole2 pass generates this as a separate
|
||||
insn, but we handle it just in case it got reattached. */
|
||||
if (sign_p)
|
||||
{
|
||||
gcc_assert (extend_insn != NULL);
|
||||
output_asm_insn (extend_insn, fuse_ops);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
|
|
@ -137,6 +137,7 @@
|
|||
UNSPEC_UNPACK_128BIT
|
||||
UNSPEC_PACK_128BIT
|
||||
UNSPEC_LSQ
|
||||
UNSPEC_FUSION_GPR
|
||||
])
|
||||
|
||||
;;
|
||||
|
@ -14296,22 +14297,9 @@
|
|||
;; a GPR. The addis instruction must be adjacent to the load, and use the same
|
||||
;; register that is being loaded. The fused ops must be physically adjacent.
|
||||
|
||||
;; We use define_peephole for the actual addis/load, and the register used to
|
||||
;; hold the addis value must be the same as the register being loaded. We use
|
||||
;; define_peephole2 to change the register used for addis to be the register
|
||||
;; being loaded, since we can look at whether it is dead after the load insn.
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:INT1 2 "base_reg_operand" "")
|
||||
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
|
||||
"TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
;; Find cases where the addis that feeds into a load instruction is either used
|
||||
;; once or is the same as the target register, and replace it with the fusion
|
||||
;; insn
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
|
@ -14319,15 +14307,28 @@
|
|||
(set (match_operand:INT1 2 "base_reg_operand" "")
|
||||
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
|
||||
"TARGET_P8_FUSION
|
||||
&& (REGNO (operands[0]) != REGNO (operands[2])
|
||||
|| GET_CODE (operands[3]) == SIGN_EXTEND)
|
||||
&& fusion_gpr_load_p (operands, true)"
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2],
|
||||
operands[3])"
|
||||
[(const_int 0)]
|
||||
{
|
||||
expand_fusion_gpr_load (operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Fusion insn, created by the define_peephole2 above (and eventually by
|
||||
;; reload)
|
||||
|
||||
(define_insn "fusion_gpr_load_<mode>"
|
||||
[(set (match_operand:INT1 0 "base_reg_operand" "=&b")
|
||||
(unspec:INT1 [(match_operand:INT1 1 "fusion_gpr_mem_combo" "")]
|
||||
UNSPEC_FUSION_GPR))]
|
||||
"TARGET_P8_FUSION"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands[0], operands[1]);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
|
||||
;; Miscellaneous ISA 2.06 (power7) instructions
|
||||
(define_insn "addg6s"
|
||||
|
|
Loading…
Reference in New Issue