predicates.md (fusion_gpr_addis): New predicates to support power8 load fusion.

[gcc]
2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
	to support power8 load fusion.
	(fusion_gpr_mem_load): Likewise.

	* config/rs6000/rs6000-modes.def (PTImode): Update a comment.

	* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
	declarations for power8 load fusion.
	(emit_fusion_gpr_load): Likewise.

	* config/rs6000/rs6000.c (rs6000_option_override_internal): If
	tuning for power8, turn on fusion mode by default.  Turn on sign
	extending fusion mode if normal fusion mode is on, and we are at
	-O2 or -O3.
	(fusion_gpr_load_p): New function, return true if we can fuse an
	addis instruction with a dependent load to a GPR.
	(emit_fusion_gpr_load): Emit the instructions for power8 load
	fusion to GPRs.

	* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
	peepholes.
	(VSX load fusion peepholes): New peepholes to fuse together an
	addi instruction with a VSX load instruction.

	* config/rs6000/rs6000.md (GPR load fusion peepholes): New
	peepholes to fuse an addis instruction with a load to a GPR base
	register.  If we are supporting sign extending fusions, convert
	sign extending loads to zero extending loads and add an explicit
	sign extension.

[gcc/testsuite]
2013-07-31  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/fusion.c: New file, test power8 fusion
	support.

From-SVN: r201385
This commit is contained in:
Michael Meissner 2013-07-31 20:04:07 +00:00 committed by Michael Meissner
parent b26e3fc277
commit d86e633abc
9 changed files with 571 additions and 1 deletions

View File

@ -1,3 +1,35 @@
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
to support power8 load fusion.
(fusion_gpr_mem_load): Likewise.
* config/rs6000/rs6000-modes.def (PTImode): Update a comment.
* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
declarations for power8 load fusion.
(emit_fusion_gpr_load): Likewise.
* config/rs6000/rs6000.c (rs6000_option_override_internal): If
tuning for power8, turn on fusion mode by default. Turn on sign
extending fusion mode if normal fusion mode is on, and we are at
-O2 or -O3.
(fusion_gpr_load_p): New function, return true if we can fuse an
addis instruction with a dependent load to a GPR.
(emit_fusion_gpr_load): Emit the instructions for power8 load
fusion to GPRs.
* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
peepholes.
(VSX load fusion peepholes): New peepholes to fuse together an
addi instruction with a VSX load instruction.
* config/rs6000/rs6000.md (GPR load fusion peepholes): New
peepholes to fuse an addis instruction with a load to a GPR base
register. If we are supporting sign extending fusions, convert
sign extending loads to zero extending loads and add an explicit
sign extension.
2013-07-31 Sofiane Naci <sofiane.naci@arm.com>
* config.gcc (arm*-*-*): Add aarch-common.o to extra_objs. Add

View File

@ -1702,3 +1702,91 @@
return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
})
;; Match the first insn (addis) in fusing the combination of addis and loads to
;; GPR registers on power8.
(define_predicate "fusion_gpr_addis"
(match_code "const_int,high,plus")
{
HOST_WIDE_INT value;
rtx int_const;
if (GET_CODE (op) == HIGH)
return 1;
if (CONST_INT_P (op))
int_const = op;
else if (GET_CODE (op) == PLUS
&& base_reg_operand (XEXP (op, 0), Pmode)
&& CONST_INT_P (XEXP (op, 1)))
int_const = XEXP (op, 1);
else
return 0;
/* Power8 currently will only do the fusion if the top 11 bits of the addis
value are all 1's or 0's. */
value = INTVAL (int_const);
if ((value & (HOST_WIDE_INT)0xffff) != 0)
return 0;
if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
return 0;
return (IN_RANGE (value >> 16, -32, 31));
})
;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
;; and loads to GPR registers on power8.
(define_predicate "fusion_gpr_mem_load"
(match_code "mem")
{
rtx addr;
if (!MEM_P (op))
return 0;
switch (mode)
{
case QImode:
case HImode:
case SImode:
break;
case DImode:
if (!TARGET_POWERPC64)
return 0;
break;
default:
return 0;
}
addr = XEXP (op, 0);
if (GET_CODE (addr) == PLUS)
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
return (base_reg_operand (base, GET_MODE (base))
&& satisfies_constraint_I (offset));
}
else if (GET_CODE (addr) == LO_SUM)
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
if (!base_reg_operand (base, GET_MODE (base)))
return 0;
else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
return small_toc_ref (offset, GET_MODE (offset));
else if (TARGET_ELF && !TARGET_POWERPC64)
return CONSTANT_P (offset);
}
return 0;
})

View File

@ -42,5 +42,7 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
/* Replacement for TImode that only is allowed in GPRs. */
/* Replacement for TImode that only is allowed in GPRs. We also use PTImode
for quad memory atomic operations to force getting an even/odd register
combination. */
PARTIAL_INT_MODE (TI);

View File

@ -73,6 +73,8 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
extern bool direct_move_p (rtx, rtx);
extern bool quad_load_store_p (rtx, rtx);
extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx, rtx);
extern const char *emit_fusion_gpr_load (rtx, rtx, rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,

View File

@ -3074,6 +3074,21 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
}
/* Enable power8 fusion if we are tuning for power8, even if we aren't
generating power8 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
& OPTION_MASK_P8_FUSION);
/* Power8 does not fuse sign extended loads with the addis. If we are
optimizing at high levels for speed, convert a sign extended load into a
zero extending load, and an explicit sign extension. */
if (TARGET_P8_FUSION
&& !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
&& optimize_function_for_speed_p (cfun)
&& optimize >= 3)
rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@ -30418,6 +30433,270 @@ rs6000_split_logical (rtx operands[3],
return;
}
/* Return true if the peephole2 can combine a load involving a combination of
an addis instruction and a load with an offset that can be fused together on
a power8. */
bool
fusion_gpr_load_p (rtx addis_reg, /* reg. to hold high value. */
rtx addis_value, /* high value loaded. */
rtx target, /* reg. that is loaded. */
rtx mem, /* memory to load. */
rtx insn) /* insn for looking up reg notes or
NULL_RTX if this is a peephole2. */
{
rtx addr;
rtx base_reg;
/* Validate arguments. */
if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
return false;
if (!base_reg_operand (target, GET_MODE (target)))
return false;
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
return false;
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
return false;
/* Validate that the register used to load the high value is either the
register being loaded, or we can safely replace its use in a peephole.
If this is a peephole2, we assume that there are 2 instructions in the
peephole (addis and load), so we want to check if the target register was
not used and the register to hold the addis result is dead after the
peephole. */
if (REGNO (addis_reg) != REGNO (target))
{
if (reg_mentioned_p (target, mem))
return false;
if (insn)
{
if (!find_reg_note (insn, REG_DEAD, addis_reg))
return false;
}
else
{
if (!peep2_reg_dead_p (2, addis_reg))
return false;
}
}
/* Validate that the value being loaded in the addis is used in the load. */
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return false;
base_reg = XEXP (addr, 0);
return REGNO (addis_reg) == REGNO (base_reg);
}
/* Return a string to fuse an addis instruction with a gpr load to the same
register that we loaded up the addis instruction. The code is complicated,
so we call output_asm_insn directly, and just return "". */
const char *
emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem)
{
rtx fuse_ops[10];
rtx addr;
rtx load_offset;
const char *addis_str = NULL;
const char *load_str = NULL;
const char *mode_name = NULL;
char insn_template[80];
enum machine_mode mode = GET_MODE (mem);
const char *comment_str = ASM_COMMENT_START;
if (*comment_str == ' ')
comment_str++;
if (!MEM_P (mem))
gcc_unreachable ();
addr = XEXP (mem, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
gcc_unreachable ();
load_offset = XEXP (addr, 1);
/* Now emit the load instruction to the same register. */
switch (mode)
{
case QImode:
mode_name = "char";
load_str = "lbz";
break;
case HImode:
mode_name = "short";
load_str = "lhz";
break;
case SImode:
mode_name = "int";
load_str = "lwz";
break;
case DImode:
if (TARGET_POWERPC64)
{
mode_name = "long";
load_str = "ld";
}
break;
default:
break;
}
if (!load_str)
gcc_unreachable ();
/* Emit the addis instruction. */
fuse_ops[0] = target;
fuse_ops[1] = addis_reg;
if (satisfies_constraint_L (addis_value))
{
fuse_ops[2] = addis_value;
addis_str = "lis %0,%v2";
}
else if (GET_CODE (addis_value) == PLUS)
{
rtx op0 = XEXP (addis_value, 0);
rtx op1 = XEXP (addis_value, 1);
if (REG_P (op0) && CONST_INT_P (op1)
&& satisfies_constraint_L (op1))
{
fuse_ops[2] = op0;
fuse_ops[3] = op1;
addis_str = "addis %0,%2,%v3";
}
}
else if (GET_CODE (addis_value) == HIGH)
{
rtx value = XEXP (addis_value, 0);
if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
{
fuse_ops[2] = XVECEXP (value, 0, 0); /* symbol ref. */
fuse_ops[3] = XVECEXP (value, 0, 1); /* TOC register. */
if (TARGET_ELF)
addis_str = "addis %0,%3,%2@toc@ha";
else if (TARGET_XCOFF)
addis_str = "addis %0,%2@u(%3)";
else
gcc_unreachable ();
}
else if (GET_CODE (value) == PLUS)
{
rtx op0 = XEXP (value, 0);
rtx op1 = XEXP (value, 1);
if (GET_CODE (op0) == UNSPEC
&& XINT (op0, 1) == UNSPEC_TOCREL
&& CONST_INT_P (op1))
{
fuse_ops[2] = XVECEXP (op0, 0, 0); /* symbol ref. */
fuse_ops[3] = XVECEXP (op0, 0, 1); /* TOC register. */
fuse_ops[4] = op1;
if (TARGET_ELF)
addis_str = "addis %0,%3,%2+%4@toc@ha";
else if (TARGET_XCOFF)
addis_str = "addis %0,%2+%4@u(%3)";
else
gcc_unreachable ();
}
}
else if (satisfies_constraint_L (value))
{
fuse_ops[2] = value;
addis_str = "lis %0,%v2";
}
else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
{
fuse_ops[2] = value;
addis_str = "lis %0,%2@ha";
}
}
if (!addis_str)
fatal_insn ("Could not generate addis value for fusion", addis_value);
sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s, addis reg %%1",
addis_str, comment_str, mode_name);
output_asm_insn (insn_template, fuse_ops);
if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
{
sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
fuse_ops[1] = load_offset;
output_asm_insn (insn_template, fuse_ops);
}
else if (GET_CODE (load_offset) == UNSPEC
&& XINT (load_offset, 1) == UNSPEC_TOCREL)
{
if (TARGET_ELF)
sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
else if (TARGET_XCOFF)
sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
else
gcc_unreachable ();
fuse_ops[1] = XVECEXP (load_offset, 0, 0);
output_asm_insn (insn_template, fuse_ops);
}
else if (GET_CODE (load_offset) == PLUS
&& GET_CODE (XEXP (load_offset, 0)) == UNSPEC
&& XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
&& CONST_INT_P (XEXP (load_offset, 1)))
{
rtx tocrel_unspec = XEXP (load_offset, 0);
if (TARGET_ELF)
sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
else if (TARGET_XCOFF)
sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
else
gcc_unreachable ();
fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
fuse_ops[2] = XEXP (load_offset, 1);
output_asm_insn (insn_template, fuse_ops);
}
else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
{
sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
fuse_ops[1] = load_offset;
output_asm_insn (insn_template, fuse_ops);
}
else
fatal_insn ("Unable to generate load offset for fusion", load_offset);
return "";
}
struct gcc_target targetm = TARGET_INITIALIZER;

View File

@ -15770,6 +15770,113 @@
return "mftb %0";
})
;; Power8 fusion support for fusing an addis instruction with a D-form load of
;; a GPR. The addis instruction must be adjacent to the load, and use the same
;; register that is being loaded. The fused ops must be physically adjacent.
;; GPR fusion for single word integer types
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "fusion_gpr_addis" ""))
(set (match_operand:INT1 2 "base_reg_operand" "")
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
"TARGET_P8_FUSION
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
insn)"
{
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
operands[3]);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
(define_peephole
[(set (match_operand:DI 0 "base_reg_operand" "")
(match_operand:DI 1 "fusion_gpr_addis" ""))
(set (match_operand:DI 2 "base_reg_operand" "")
(zero_extend:DI (match_operand:QHSI 3 "fusion_gpr_mem_load" "")))]
"TARGET_P8_FUSION && TARGET_POWERPC64
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
insn)"
{
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
operands[3]);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
;; Power8 does not fuse a sign extending load, so convert the sign extending
;; load into a zero extending load, and do an explicit sign extension. Don't
;; do this if we are trying to optimize for space. Do this as a peephole2 to
;; allow final rtl optimizations and scheduling to move the sign extend.
(define_peephole2
[(set (match_operand:DI 0 "base_reg_operand" "")
(match_operand:DI 1 "fusion_gpr_addis" ""))
(set (match_operand:DI 2 "base_reg_operand" "")
(sign_extend:DI (match_operand:HSI 3 "fusion_gpr_mem_load" "")))]
"TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN && TARGET_POWERPC64
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
NULL_RTX)"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 4) (match_dup 3))
(set (match_dup 2) (sign_extend:DI (match_dup 4)))]
{
unsigned int offset
= (BYTES_BIG_ENDIAN ? 8 - GET_MODE_SIZE (<MODE>mode) : 0);
operands[4] = simplify_subreg (<MODE>mode, operands[2], DImode,
offset);
})
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "fusion_gpr_addis" ""))
(set (match_operand:SI 2 "base_reg_operand" "")
(zero_extend:SI (match_operand:QHI 3 "fusion_gpr_mem_load" "")))]
"TARGET_P8_FUSION
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
insn)"
{
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
operands[3]);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
(define_peephole2
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "fusion_gpr_addis" ""))
(set (match_operand:SI 2 "base_reg_operand" "")
(sign_extend:SI (match_operand:HI 3 "fusion_gpr_mem_load" "")))]
"TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
NULL_RTX)"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 4) (match_dup 3))
(set (match_dup 2) (sign_extend:SI (match_dup 4)))]
{
unsigned int offset = (BYTES_BIG_ENDIAN ? 2 : 0);
operands[4] = simplify_subreg (HImode, operands[2], SImode, offset);
})
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "fusion_gpr_addis" ""))
(set (match_operand:HI 2 "base_reg_operand" "")
(zero_extend:HI (match_operand:QI 3 "fusion_gpr_mem_load" "")))]
"TARGET_P8_FUSION
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
insn)"
{
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
operands[3]);
}
[(set_attr "type" "load")
(set_attr "length" "8")])
(include "sync.md")

View File

@ -40,6 +40,14 @@
;; it to use gprs as well as vsx registers.
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
(define_mode_iterator VSX_M2 [V16QI
V8HI
V4SI
V2DI
V4SF
V2DF
(TI "TARGET_VSX_TIMODE")])
;; Map into the appropriate load/store name based on the type
(define_mode_attr VSm [(V16QI "vw4")
(V8HI "vw4")
@ -1446,3 +1454,27 @@
}"
[(set_attr "length" "20")
(set_attr "type" "veccomplex")])
;; Power8 Vector fusion. The fused ops must be physically adjacent.
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "short_cint_operand" ""))
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
(mem:VSX_M2 (plus:P (match_dup 0)
(match_operand:P 3 "int_reg_operand" ""))))]
"TARGET_P8_FUSION"
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "short_cint_operand" ""))
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
(match_dup 0))))]
"TARGET_P8_FUSION"
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])

View File

@ -1,3 +1,8 @@
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/fusion.c: New file, test power8 fusion
support.
2013-07-31 Richard Sandiford <rdsandiford@googlemail.com>
* gcc.target/mips/mips.exp (mips-dg-options): Test for mabicalls

View File

@ -0,0 +1,23 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
#define LARGE 0x12345
int fusion_uchar (unsigned char *p){ return p[LARGE]; }
int fusion_schar (signed char *p){ return p[LARGE]; }
int fusion_ushort (unsigned short *p){ return p[LARGE]; }
int fusion_short (short *p){ return p[LARGE]; }
int fusion_int (int *p){ return p[LARGE]; }
unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
vector double fusion_vector (vector double *p) { return p[2]; }
/* { dg-final { scan-assembler-times "gpr load fusion" 6 } } */
/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */
/* { dg-final { scan-assembler-times "lbz" 2 } } */
/* { dg-final { scan-assembler-times "extsb" 1 } } */
/* { dg-final { scan-assembler-times "lhz" 2 } } */
/* { dg-final { scan-assembler-times "extsh" 1 } } */
/* { dg-final { scan-assembler-times "lwz" 2 } } */