predicates.md (fusion_gpr_mem_load): Move testing for base_reg_operand to be common between LO_SUM and PLUS.

2014-09-19  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/predicates.md (fusion_gpr_mem_load): Move testing
	for base_reg_operand to be common between LO_SUM and PLUS.
	(fusion_gpr_mem_combo): New predicate to match a fused address
	that combines the addis and memory offset address.

	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Change
	calling signature.
	(emit_fusion_gpr_load): Likewise.

	* config/rs6000/rs6000.c (fusion_gpr_load_p): Change calling
	signature to pass each argument separately, rather than
	using an operands array.  Rewrite the insns found by peephole2 to
	be a single insn, rather than hoping the insns will still be
	together when the peephole pass is done.  Drop being called via a
	normal peephole.
	(emit_fusion_gpr_load): Change calling signature to be called from
	the fusion_gpr_load_<mode> insns with a combined memory address
	instead of the peephole pass passing the addis and offset
	separately.

	* config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): New unspec for GPR
	fusion.
	(power8 fusion peephole): Drop support for doing power8 via a
	normal peephole that was created by the peephole2 pass.
	(power8 fusion peephole2): Create a new insn with the fused
	address, so that the fused operation is kept together after
	register allocation is done.
	(fusion_gpr_load_<mode>): Likewise.

From-SVN: r215404
This commit is contained in:
Michael Meissner 2014-09-19 19:36:57 +00:00 committed by Michael Meissner
parent ca21928bb3
commit 3f99b0612f
5 changed files with 155 additions and 114 deletions

View File

@ -1,3 +1,34 @@
2014-09-19 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/predicates.md (fusion_gpr_mem_load): Move testing
for base_reg_operand to be common between LO_SUM and PLUS.
(fusion_gpr_mem_combo): New predicate to match a fused address
that combines the addis and memory offset address.
* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Change
calling signature.
(emit_fusion_gpr_load): Likewise.
* config/rs6000/rs6000.c (fusion_gpr_load_p): Change calling
signature to pass each argument separately, rather than
using an operands array. Rewrite the insns found by peephole2 to
be a single insn, rather than hoping the insns will still be
together when the peephole pass is done. Drop being called via a
normal peephole.
(emit_fusion_gpr_load): Change calling signature to be called from
the fusion_gpr_load_<mode> insns with a combined memory address
instead of the peephole pass passing the addis and offset
separately.
* config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): New unspec for GPR
fusion.
(power8 fusion peephole): Drop support for doing power8 via a
normal peephole that was created by the peephole2 pass.
(power8 fusion peephole2): Create a new insn with the fused
address, so that the fused operation is kept together after
register allocation is done.
(fusion_gpr_load_<mode>): Likewise.
2014-09-19 Jan Hubicka <hubicka@ucw.cz>
PR lto/63286

View File

@ -1797,7 +1797,7 @@
(define_predicate "fusion_gpr_mem_load"
(match_code "mem,sign_extend,zero_extend")
{
rtx addr;
rtx addr, base, offset;
/* Handle sign/zero extend. */
if (GET_CODE (op) == ZERO_EXTEND
@ -1827,24 +1827,79 @@
}
addr = XEXP (op, 0);
if (GET_CODE (addr) == PLUS)
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return 0;
return (base_reg_operand (base, GET_MODE (base))
&& satisfies_constraint_I (offset));
}
base = XEXP (addr, 0);
if (!base_reg_operand (base, GET_MODE (base)))
return 0;
offset = XEXP (addr, 1);
if (GET_CODE (addr) == PLUS)
return satisfies_constraint_I (offset);
else if (GET_CODE (addr) == LO_SUM)
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
if (!base_reg_operand (base, GET_MODE (base)))
return 0;
else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
return small_toc_ref (offset, GET_MODE (offset));
else if (TARGET_ELF && !TARGET_POWERPC64)
return CONSTANT_P (offset);
}
return 0;
})
;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
;; memory field with both the addis and the memory offset. Sign extension
;; is not handled here, since lha and lwa are not fused.
(define_predicate "fusion_gpr_mem_combo"
(match_code "mem,zero_extend")
{
rtx addr, base, offset;
/* Handle zero extend. */
if (GET_CODE (op) == ZERO_EXTEND)
{
op = XEXP (op, 0);
mode = GET_MODE (op);
}
if (!MEM_P (op))
return 0;
switch (mode)
{
case QImode:
case HImode:
case SImode:
break;
case DImode:
if (!TARGET_POWERPC64)
return 0;
break;
default:
return 0;
}
addr = XEXP (op, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return 0;
base = XEXP (addr, 0);
if (!fusion_gpr_addis (base, GET_MODE (base)))
return 0;
offset = XEXP (addr, 1);
if (GET_CODE (addr) == PLUS)
return satisfies_constraint_I (offset);
else if (GET_CODE (addr) == LO_SUM)
{
if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
return small_toc_ref (offset, GET_MODE (offset));
else if (TARGET_ELF && !TARGET_POWERPC64)

View File

@ -80,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
extern bool direct_move_p (rtx, rtx);
extern bool quad_load_store_p (rtx, rtx);
extern bool fusion_gpr_load_p (rtx *, bool);
extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx);
extern void expand_fusion_gpr_load (rtx *);
extern const char *emit_fusion_gpr_load (rtx *);
extern const char *emit_fusion_gpr_load (rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,

View File

@ -33044,25 +33044,14 @@ rs6000_split_logical (rtx operands[3],
/* Return true if the peephole2 can combine a load involving a combination of
an addis instruction and a load with an offset that can be fused together on
a power8.
The operands are:
operands[0] register set with addis
operands[1] value set via addis
operands[2] target register being loaded
operands[3] D-form memory reference using operands[0].
In addition, we are passed a boolean that is true if this is a peephole2,
and we can use see if the addis_reg is dead after the insn and can be
replaced by the target register. */
a power8. */
bool
fusion_gpr_load_p (rtx *operands, bool peep2_p)
fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
rtx addis_value, /* addis value. */
rtx target, /* target register that is loaded. */
rtx mem) /* bottom part of the memory addr. */
{
rtx addis_reg = operands[0];
rtx addis_value = operands[1];
rtx target = operands[2];
rtx mem = operands[3];
rtx addr;
rtx base_reg;
@ -33076,9 +33065,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
return false;
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
return false;
/* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND
|| (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
@ -33087,22 +33073,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!MEM_P (mem))
return false;
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
return false;
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return false;
/* Validate that the register used to load the high value is either the
register being loaded, or we can safely replace its use in a peephole2.
register being loaded, or we can safely replace its use.
If this is a peephole2, we assume that there are 2 instructions in the
peephole (addis and load), so we want to check if the target register was
not used in the memory address and the register to hold the addis result
is dead after the peephole. */
This function is only called from the peephole2 pass and we assume that
there are 2 instructions in the peephole (addis and load), so we want to
check if the target register was not used in the memory address and the
register to hold the addis result is dead after the peephole. */
if (REGNO (addis_reg) != REGNO (target))
{
if (!peep2_p)
return false;
if (reg_mentioned_p (target, mem))
return false;
@ -33143,9 +33129,6 @@ expand_fusion_gpr_load (rtx *operands)
enum machine_mode extend_mode = target_mode;
enum machine_mode ptr_mode = Pmode;
enum rtx_code extend = UNKNOWN;
rtx addis_reg = ((ptr_mode == target_mode)
? target
: simplify_subreg (ptr_mode, target, target_mode, 0));
if (GET_CODE (orig_mem) == ZERO_EXTEND
|| (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
@ -33162,13 +33145,14 @@ expand_fusion_gpr_load (rtx *operands)
gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
offset = XEXP (orig_addr, 1);
new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
new_mem = change_address (orig_mem, target_mode, new_addr);
new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
if (extend != UNKNOWN)
new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
UNSPEC_FUSION_GPR);
emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
if (extend == SIGN_EXTEND)
@ -33187,55 +33171,40 @@ expand_fusion_gpr_load (rtx *operands)
}
/* Return a string to fuse an addis instruction with a gpr load to the same
register that we loaded up the addis instruction. The code is complicated,
so we call output_asm_insn directly, and just return "".
register that we loaded up the addis instruction. The address that is used
is the logical address that was formed during peephole2:
(lo_sum (high) (low-part))
The operands are:
operands[0] register set with addis (must be same reg as target).
operands[1] value set via addis
operands[2] target register being loaded
operands[3] D-form memory reference using operands[0]. */
The code is complicated, so we call output_asm_insn directly, and just
return "". */
const char *
emit_fusion_gpr_load (rtx *operands)
emit_fusion_gpr_load (rtx target, rtx mem)
{
rtx addis_reg = operands[0];
rtx addis_value = operands[1];
rtx target = operands[2];
rtx mem = operands[3];
rtx addis_value;
rtx fuse_ops[10];
rtx addr;
rtx load_offset;
const char *addis_str = NULL;
const char *load_str = NULL;
const char *extend_insn = NULL;
const char *mode_name = NULL;
char insn_template[80];
enum machine_mode mode;
const char *comment_str = ASM_COMMENT_START;
bool sign_p = false;
gcc_assert (REG_P (addis_reg) && REG_P (target));
gcc_assert (REGNO (addis_reg) == REGNO (target));
if (GET_CODE (mem) == ZERO_EXTEND)
mem = XEXP (mem, 0);
gcc_assert (REG_P (target) && MEM_P (mem));
if (*comment_str == ' ')
comment_str++;
/* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND)
mem = XEXP (mem, 0);
else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
{
sign_p = true;
mem = XEXP (mem, 0);
}
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
gcc_unreachable ();
addis_value = XEXP (addr, 0);
load_offset = XEXP (addr, 1);
/* Now emit the load instruction to the same register. */
@ -33245,29 +33214,22 @@ emit_fusion_gpr_load (rtx *operands)
case QImode:
mode_name = "char";
load_str = "lbz";
extend_insn = "extsb %0,%0";
break;
case HImode:
mode_name = "short";
load_str = "lhz";
extend_insn = "extsh %0,%0";
break;
case SImode:
mode_name = "int";
load_str = "lwz";
extend_insn = "extsw %0,%0";
break;
case DImode:
if (TARGET_POWERPC64)
{
mode_name = "long";
load_str = "ld";
}
else
gcc_unreachable ();
gcc_assert (TARGET_POWERPC64);
mode_name = "long";
load_str = "ld";
break;
default:
@ -33411,14 +33373,6 @@ emit_fusion_gpr_load (rtx *operands)
else
fatal_insn ("Unable to generate load offset for fusion", load_offset);
/* Handle sign extension. The peephole2 pass generates this as a separate
insn, but we handle it just in case it got reattached. */
if (sign_p)
{
gcc_assert (extend_insn != NULL);
output_asm_insn (extend_insn, fuse_ops);
}
return "";
}

View File

@ -137,6 +137,7 @@
UNSPEC_UNPACK_128BIT
UNSPEC_PACK_128BIT
UNSPEC_LSQ
UNSPEC_FUSION_GPR
])
;;
@ -14296,22 +14297,9 @@
;; a GPR. The addis instruction must be adjacent to the load, and use the same
;; register that is being loaded. The fused ops must be physically adjacent.
;; We use define_peephole for the actual addis/load, and the register used to
;; hold the addis value must be the same as the register being loaded. We use
;; define_peephole2 to change the register used for addis to be the register
;; being loaded, since we can look at whether it is dead after the load insn.
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "fusion_gpr_addis" ""))
(set (match_operand:INT1 2 "base_reg_operand" "")
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
"TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
{
return emit_fusion_gpr_load (operands);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
;; Find cases where the addis that feeds into a load instruction is either used
;; once or is the same as the target register, and replace it with the fusion
;; insn
(define_peephole2
[(set (match_operand:P 0 "base_reg_operand" "")
@ -14319,15 +14307,28 @@
(set (match_operand:INT1 2 "base_reg_operand" "")
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
"TARGET_P8_FUSION
&& (REGNO (operands[0]) != REGNO (operands[2])
|| GET_CODE (operands[3]) == SIGN_EXTEND)
&& fusion_gpr_load_p (operands, true)"
&& fusion_gpr_load_p (operands[0], operands[1], operands[2],
operands[3])"
[(const_int 0)]
{
expand_fusion_gpr_load (operands);
DONE;
})
;; Fusion insn, created by the define_peephole2 above (and eventually by
;; reload)
(define_insn "fusion_gpr_load_<mode>"
[(set (match_operand:INT1 0 "base_reg_operand" "=&b")
(unspec:INT1 [(match_operand:INT1 1 "fusion_gpr_mem_combo" "")]
UNSPEC_FUSION_GPR))]
"TARGET_P8_FUSION"
{
return emit_fusion_gpr_load (operands[0], operands[1]);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
;; Miscellaneous ISA 2.06 (power7) instructions
(define_insn "addg6s"