predicates.md (fusion_gpr_addis): New predicates to support power8 load fusion.
[gcc] 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/predicates.md (fusion_gpr_addis): New predicates to support power8 load fusion. (fusion_gpr_mem_load): Likewise. * config/rs6000/rs6000-modes.def (PTImode): Update a comment. * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New declarations for power8 load fusion. (emit_fusion_gpr_load): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): If tuning for power8, turn on fusion mode by default. Turn on sign extending fusion mode if normal fusion mode is on, and we are at -O2 or -O3. (fusion_gpr_load_p): New function, return true if we can fuse an addis instruction with a dependent load to a GPR. (emit_fusion_gpr_load): Emit the instructions for power8 load fusion to GPRs. * config/rs6000/vsx.md (VSX_M2): New iterator for fusion peepholes. (VSX load fusion peepholes): New peepholes to fuse together an addi instruction with a VSX load instruction. * config/rs6000/rs6000.md (GPR load fusion peepholes): New peepholes to fuse an addis instruction with a load to a GPR base register. If we are supporting sign extending fusions, convert sign extending loads to zero extending loads and add an explicit sign extension. [gcc/testsuite] 2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com> * gcc.target/powerpc/fusion.c: New file, test power8 fusion support. From-SVN: r201385
This commit is contained in:
parent
b26e3fc277
commit
d86e633abc
|
@ -1,3 +1,35 @@
|
|||
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
|
||||
to support power8 load fusion.
|
||||
(fusion_gpr_mem_load): Likewise.
|
||||
|
||||
* config/rs6000/rs6000-modes.def (PTImode): Update a comment.
|
||||
|
||||
* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
|
||||
declarations for power8 load fusion.
|
||||
(emit_fusion_gpr_load): Likewise.
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_option_override_internal): If
|
||||
tuning for power8, turn on fusion mode by default. Turn on sign
|
||||
extending fusion mode if normal fusion mode is on, and we are at
|
||||
-O2 or -O3.
|
||||
(fusion_gpr_load_p): New function, return true if we can fuse an
|
||||
addis instruction with a dependent load to a GPR.
|
||||
(emit_fusion_gpr_load): Emit the instructions for power8 load
|
||||
fusion to GPRs.
|
||||
|
||||
* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
|
||||
peepholes.
|
||||
(VSX load fusion peepholes): New peepholes to fuse together an
|
||||
addi instruction with a VSX load instruction.
|
||||
|
||||
* config/rs6000/rs6000.md (GPR load fusion peepholes): New
|
||||
peepholes to fuse an addis instruction with a load to a GPR base
|
||||
register. If we are supporting sign extending fusions, convert
|
||||
sign extending loads to zero extending loads and add an explicit
|
||||
sign extension.
|
||||
|
||||
2013-07-31 Sofiane Naci <sofiane.naci@arm.com>
|
||||
|
||||
* config.gcc (arm*-*-*): Add aarch-common.o to extra_objs. Add
|
||||
|
|
|
@ -1702,3 +1702,91 @@
|
|||
|
||||
return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
|
||||
})
|
||||
|
||||
;; Match the first insn (addis) in fusing the combination of addis and loads to
|
||||
;; GPR registers on power8.
|
||||
(define_predicate "fusion_gpr_addis"
|
||||
(match_code "const_int,high,plus")
|
||||
{
|
||||
HOST_WIDE_INT value;
|
||||
rtx int_const;
|
||||
|
||||
if (GET_CODE (op) == HIGH)
|
||||
return 1;
|
||||
|
||||
if (CONST_INT_P (op))
|
||||
int_const = op;
|
||||
|
||||
else if (GET_CODE (op) == PLUS
|
||||
&& base_reg_operand (XEXP (op, 0), Pmode)
|
||||
&& CONST_INT_P (XEXP (op, 1)))
|
||||
int_const = XEXP (op, 1);
|
||||
|
||||
else
|
||||
return 0;
|
||||
|
||||
/* Power8 currently will only do the fusion if the top 11 bits of the addis
|
||||
value are all 1's or 0's. */
|
||||
value = INTVAL (int_const);
|
||||
if ((value & (HOST_WIDE_INT)0xffff) != 0)
|
||||
return 0;
|
||||
|
||||
if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
|
||||
return 0;
|
||||
|
||||
return (IN_RANGE (value >> 16, -32, 31));
|
||||
})
|
||||
|
||||
;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
|
||||
;; and loads to GPR registers on power8.
|
||||
(define_predicate "fusion_gpr_mem_load"
|
||||
(match_code "mem")
|
||||
{
|
||||
rtx addr;
|
||||
|
||||
if (!MEM_P (op))
|
||||
return 0;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case QImode:
|
||||
case HImode:
|
||||
case SImode:
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
if (!TARGET_POWERPC64)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
addr = XEXP (op, 0);
|
||||
if (GET_CODE (addr) == PLUS)
|
||||
{
|
||||
rtx base = XEXP (addr, 0);
|
||||
rtx offset = XEXP (addr, 1);
|
||||
|
||||
return (base_reg_operand (base, GET_MODE (base))
|
||||
&& satisfies_constraint_I (offset));
|
||||
}
|
||||
|
||||
else if (GET_CODE (addr) == LO_SUM)
|
||||
{
|
||||
rtx base = XEXP (addr, 0);
|
||||
rtx offset = XEXP (addr, 1);
|
||||
|
||||
if (!base_reg_operand (base, GET_MODE (base)))
|
||||
return 0;
|
||||
|
||||
else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
|
||||
return small_toc_ref (offset, GET_MODE (offset));
|
||||
|
||||
else if (TARGET_ELF && !TARGET_POWERPC64)
|
||||
return CONSTANT_P (offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
})
|
||||
|
|
|
@ -42,5 +42,7 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
|||
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
|
||||
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
|
||||
|
||||
/* Replacement for TImode that only is allowed in GPRs. */
|
||||
/* Replacement for TImode that only is allowed in GPRs. We also use PTImode
|
||||
for quad memory atomic operations to force getting an even/odd register
|
||||
combination. */
|
||||
PARTIAL_INT_MODE (TI);
|
||||
|
|
|
@ -73,6 +73,8 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
|
|||
extern bool gpr_or_gpr_p (rtx, rtx);
|
||||
extern bool direct_move_p (rtx, rtx);
|
||||
extern bool quad_load_store_p (rtx, rtx);
|
||||
extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx, rtx);
|
||||
extern const char *emit_fusion_gpr_load (rtx, rtx, rtx, rtx);
|
||||
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
|
||||
enum reg_class);
|
||||
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
|
||||
|
|
|
@ -3074,6 +3074,21 @@ rs6000_option_override_internal (bool global_init_p)
|
|||
rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
|
||||
}
|
||||
|
||||
/* Enable power8 fusion if we are tuning for power8, even if we aren't
|
||||
generating power8 instructions. */
|
||||
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
|
||||
rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
|
||||
& OPTION_MASK_P8_FUSION);
|
||||
|
||||
/* Power8 does not fuse sign extended loads with the addis. If we are
|
||||
optimizing at high levels for speed, convert a sign extended load into a
|
||||
zero extending load, and an explicit sign extension. */
|
||||
if (TARGET_P8_FUSION
|
||||
&& !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& optimize >= 3)
|
||||
rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
|
||||
|
||||
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
|
||||
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
|
||||
|
||||
|
@ -30418,6 +30433,270 @@ rs6000_split_logical (rtx operands[3],
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
/* Return true if the peephole2 can combine a load involving a combination of
|
||||
an addis instruction and a load with an offset that can be fused together on
|
||||
a power8. */
|
||||
|
||||
bool
|
||||
fusion_gpr_load_p (rtx addis_reg, /* reg. to hold high value. */
|
||||
rtx addis_value, /* high value loaded. */
|
||||
rtx target, /* reg. that is loaded. */
|
||||
rtx mem, /* memory to load. */
|
||||
rtx insn) /* insn for looking up reg notes or
|
||||
NULL_RTX if this is a peephole2. */
|
||||
{
|
||||
rtx addr;
|
||||
rtx base_reg;
|
||||
|
||||
/* Validate arguments. */
|
||||
if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
|
||||
return false;
|
||||
|
||||
if (!base_reg_operand (target, GET_MODE (target)))
|
||||
return false;
|
||||
|
||||
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
|
||||
return false;
|
||||
|
||||
if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
|
||||
return false;
|
||||
|
||||
/* Validate that the register used to load the high value is either the
|
||||
register being loaded, or we can safely replace its use in a peephole.
|
||||
|
||||
If this is a peephole2, we assume that there are 2 instructions in the
|
||||
peephole (addis and load), so we want to check if the target register was
|
||||
not used and the register to hold the addis result is dead after the
|
||||
peephole. */
|
||||
if (REGNO (addis_reg) != REGNO (target))
|
||||
{
|
||||
if (reg_mentioned_p (target, mem))
|
||||
return false;
|
||||
|
||||
if (insn)
|
||||
{
|
||||
if (!find_reg_note (insn, REG_DEAD, addis_reg))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!peep2_reg_dead_p (2, addis_reg))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate that the value being loaded in the addis is used in the load. */
|
||||
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
return false;
|
||||
|
||||
base_reg = XEXP (addr, 0);
|
||||
return REGNO (addis_reg) == REGNO (base_reg);
|
||||
}
|
||||
|
||||
/* Return a string to fuse an addis instruction with a gpr load to the same
|
||||
register that we loaded up the addis instruction. The code is complicated,
|
||||
so we call output_asm_insn directly, and just return "". */
|
||||
|
||||
const char *
|
||||
emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem)
|
||||
{
|
||||
rtx fuse_ops[10];
|
||||
rtx addr;
|
||||
rtx load_offset;
|
||||
const char *addis_str = NULL;
|
||||
const char *load_str = NULL;
|
||||
const char *mode_name = NULL;
|
||||
char insn_template[80];
|
||||
enum machine_mode mode = GET_MODE (mem);
|
||||
const char *comment_str = ASM_COMMENT_START;
|
||||
|
||||
if (*comment_str == ' ')
|
||||
comment_str++;
|
||||
|
||||
if (!MEM_P (mem))
|
||||
gcc_unreachable ();
|
||||
|
||||
addr = XEXP (mem, 0);
|
||||
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
|
||||
gcc_unreachable ();
|
||||
|
||||
load_offset = XEXP (addr, 1);
|
||||
|
||||
/* Now emit the load instruction to the same register. */
|
||||
switch (mode)
|
||||
{
|
||||
case QImode:
|
||||
mode_name = "char";
|
||||
load_str = "lbz";
|
||||
break;
|
||||
|
||||
case HImode:
|
||||
mode_name = "short";
|
||||
load_str = "lhz";
|
||||
break;
|
||||
|
||||
case SImode:
|
||||
mode_name = "int";
|
||||
load_str = "lwz";
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
if (TARGET_POWERPC64)
|
||||
{
|
||||
mode_name = "long";
|
||||
load_str = "ld";
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!load_str)
|
||||
gcc_unreachable ();
|
||||
|
||||
/* Emit the addis instruction. */
|
||||
fuse_ops[0] = target;
|
||||
fuse_ops[1] = addis_reg;
|
||||
if (satisfies_constraint_L (addis_value))
|
||||
{
|
||||
fuse_ops[2] = addis_value;
|
||||
addis_str = "lis %0,%v2";
|
||||
}
|
||||
|
||||
else if (GET_CODE (addis_value) == PLUS)
|
||||
{
|
||||
rtx op0 = XEXP (addis_value, 0);
|
||||
rtx op1 = XEXP (addis_value, 1);
|
||||
|
||||
if (REG_P (op0) && CONST_INT_P (op1)
|
||||
&& satisfies_constraint_L (op1))
|
||||
{
|
||||
fuse_ops[2] = op0;
|
||||
fuse_ops[3] = op1;
|
||||
addis_str = "addis %0,%2,%v3";
|
||||
}
|
||||
}
|
||||
|
||||
else if (GET_CODE (addis_value) == HIGH)
|
||||
{
|
||||
rtx value = XEXP (addis_value, 0);
|
||||
if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
|
||||
{
|
||||
fuse_ops[2] = XVECEXP (value, 0, 0); /* symbol ref. */
|
||||
fuse_ops[3] = XVECEXP (value, 0, 1); /* TOC register. */
|
||||
if (TARGET_ELF)
|
||||
addis_str = "addis %0,%3,%2@toc@ha";
|
||||
|
||||
else if (TARGET_XCOFF)
|
||||
addis_str = "addis %0,%2@u(%3)";
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
else if (GET_CODE (value) == PLUS)
|
||||
{
|
||||
rtx op0 = XEXP (value, 0);
|
||||
rtx op1 = XEXP (value, 1);
|
||||
|
||||
if (GET_CODE (op0) == UNSPEC
|
||||
&& XINT (op0, 1) == UNSPEC_TOCREL
|
||||
&& CONST_INT_P (op1))
|
||||
{
|
||||
fuse_ops[2] = XVECEXP (op0, 0, 0); /* symbol ref. */
|
||||
fuse_ops[3] = XVECEXP (op0, 0, 1); /* TOC register. */
|
||||
fuse_ops[4] = op1;
|
||||
if (TARGET_ELF)
|
||||
addis_str = "addis %0,%3,%2+%4@toc@ha";
|
||||
|
||||
else if (TARGET_XCOFF)
|
||||
addis_str = "addis %0,%2+%4@u(%3)";
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
else if (satisfies_constraint_L (value))
|
||||
{
|
||||
fuse_ops[2] = value;
|
||||
addis_str = "lis %0,%v2";
|
||||
}
|
||||
|
||||
else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
|
||||
{
|
||||
fuse_ops[2] = value;
|
||||
addis_str = "lis %0,%2@ha";
|
||||
}
|
||||
}
|
||||
|
||||
if (!addis_str)
|
||||
fatal_insn ("Could not generate addis value for fusion", addis_value);
|
||||
|
||||
sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s, addis reg %%1",
|
||||
addis_str, comment_str, mode_name);
|
||||
output_asm_insn (insn_template, fuse_ops);
|
||||
|
||||
if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
|
||||
{
|
||||
sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
|
||||
fuse_ops[1] = load_offset;
|
||||
output_asm_insn (insn_template, fuse_ops);
|
||||
}
|
||||
|
||||
else if (GET_CODE (load_offset) == UNSPEC
|
||||
&& XINT (load_offset, 1) == UNSPEC_TOCREL)
|
||||
{
|
||||
if (TARGET_ELF)
|
||||
sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
|
||||
|
||||
else if (TARGET_XCOFF)
|
||||
sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
fuse_ops[1] = XVECEXP (load_offset, 0, 0);
|
||||
output_asm_insn (insn_template, fuse_ops);
|
||||
}
|
||||
|
||||
else if (GET_CODE (load_offset) == PLUS
|
||||
&& GET_CODE (XEXP (load_offset, 0)) == UNSPEC
|
||||
&& XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
|
||||
&& CONST_INT_P (XEXP (load_offset, 1)))
|
||||
{
|
||||
rtx tocrel_unspec = XEXP (load_offset, 0);
|
||||
if (TARGET_ELF)
|
||||
sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
|
||||
|
||||
else if (TARGET_XCOFF)
|
||||
sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
|
||||
fuse_ops[2] = XEXP (load_offset, 1);
|
||||
output_asm_insn (insn_template, fuse_ops);
|
||||
}
|
||||
|
||||
else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
|
||||
{
|
||||
sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
|
||||
|
||||
fuse_ops[1] = load_offset;
|
||||
output_asm_insn (insn_template, fuse_ops);
|
||||
}
|
||||
|
||||
else
|
||||
fatal_insn ("Unable to generate load offset for fusion", load_offset);
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
|
|
|
@ -15770,6 +15770,113 @@
|
|||
return "mftb %0";
|
||||
})
|
||||
|
||||
|
||||
;; Power8 fusion support for fusing an addis instruction with a D-form load of
|
||||
;; a GPR. The addis instruction must be adjacent to the load, and use the same
|
||||
;; register that is being loaded. The fused ops must be physically adjacent.
|
||||
|
||||
;; GPR fusion for single word integer types
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:INT1 2 "base_reg_operand" "")
|
||||
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
|
||||
"TARGET_P8_FUSION
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
insn)"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
|
||||
operands[3]);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:DI 0 "base_reg_operand" "")
|
||||
(match_operand:DI 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:DI 2 "base_reg_operand" "")
|
||||
(zero_extend:DI (match_operand:QHSI 3 "fusion_gpr_mem_load" "")))]
|
||||
"TARGET_P8_FUSION && TARGET_POWERPC64
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
insn)"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
|
||||
operands[3]);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
;; Power8 does not fuse a sign extending load, so convert the sign extending
|
||||
;; load into a zero extending load, and do an explicit sign extension. Don't
|
||||
;; do this if we are trying to optimize for space. Do this as a peephole2 to
|
||||
;; allow final rtl optimizations and scheduling to move the sign extend.
|
||||
(define_peephole2
|
||||
[(set (match_operand:DI 0 "base_reg_operand" "")
|
||||
(match_operand:DI 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:DI 2 "base_reg_operand" "")
|
||||
(sign_extend:DI (match_operand:HSI 3 "fusion_gpr_mem_load" "")))]
|
||||
"TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN && TARGET_POWERPC64
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
NULL_RTX)"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(set (match_dup 4) (match_dup 3))
|
||||
(set (match_dup 2) (sign_extend:DI (match_dup 4)))]
|
||||
{
|
||||
unsigned int offset
|
||||
= (BYTES_BIG_ENDIAN ? 8 - GET_MODE_SIZE (<MODE>mode) : 0);
|
||||
|
||||
operands[4] = simplify_subreg (<MODE>mode, operands[2], DImode,
|
||||
offset);
|
||||
})
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:SI 2 "base_reg_operand" "")
|
||||
(zero_extend:SI (match_operand:QHI 3 "fusion_gpr_mem_load" "")))]
|
||||
"TARGET_P8_FUSION
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
insn)"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
|
||||
operands[3]);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:SI 2 "base_reg_operand" "")
|
||||
(sign_extend:SI (match_operand:HI 3 "fusion_gpr_mem_load" "")))]
|
||||
"TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
NULL_RTX)"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(set (match_dup 4) (match_dup 3))
|
||||
(set (match_dup 2) (sign_extend:SI (match_dup 4)))]
|
||||
{
|
||||
unsigned int offset = (BYTES_BIG_ENDIAN ? 2 : 0);
|
||||
|
||||
operands[4] = simplify_subreg (HImode, operands[2], SImode, offset);
|
||||
})
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "fusion_gpr_addis" ""))
|
||||
(set (match_operand:HI 2 "base_reg_operand" "")
|
||||
(zero_extend:HI (match_operand:QI 3 "fusion_gpr_mem_load" "")))]
|
||||
"TARGET_P8_FUSION
|
||||
&& fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3],
|
||||
insn)"
|
||||
{
|
||||
return emit_fusion_gpr_load (operands[0], operands[1], operands[2],
|
||||
operands[3]);
|
||||
}
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
|
||||
|
||||
(include "sync.md")
|
||||
|
|
|
@ -40,6 +40,14 @@
|
|||
;; it to use gprs as well as vsx registers.
|
||||
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
|
||||
|
||||
(define_mode_iterator VSX_M2 [V16QI
|
||||
V8HI
|
||||
V4SI
|
||||
V2DI
|
||||
V4SF
|
||||
V2DF
|
||||
(TI "TARGET_VSX_TIMODE")])
|
||||
|
||||
;; Map into the appropriate load/store name based on the type
|
||||
(define_mode_attr VSm [(V16QI "vw4")
|
||||
(V8HI "vw4")
|
||||
|
@ -1446,3 +1454,27 @@
|
|||
}"
|
||||
[(set_attr "length" "20")
|
||||
(set_attr "type" "veccomplex")])
|
||||
|
||||
|
||||
;; Power8 Vector fusion. The fused ops must be physically adjacent.
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "short_cint_operand" ""))
|
||||
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
|
||||
(mem:VSX_M2 (plus:P (match_dup 0)
|
||||
(match_operand:P 3 "int_reg_operand" ""))))]
|
||||
"TARGET_P8_FUSION"
|
||||
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
|
||||
[(set_attr "length" "8")
|
||||
(set_attr "type" "vecload")])
|
||||
|
||||
(define_peephole
|
||||
[(set (match_operand:P 0 "base_reg_operand" "")
|
||||
(match_operand:P 1 "short_cint_operand" ""))
|
||||
(set (match_operand:VSX_M2 2 "vsx_register_operand" "")
|
||||
(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
|
||||
(match_dup 0))))]
|
||||
"TARGET_P8_FUSION"
|
||||
"li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
|
||||
[(set_attr "length" "8")
|
||||
(set_attr "type" "vecload")])
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/fusion.c: New file, test power8 fusion
|
||||
support.
|
||||
|
||||
2013-07-31 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* gcc.target/mips/mips.exp (mips-dg-options): Test for mabicalls
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_p8vector_ok } */
|
||||
/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
|
||||
|
||||
#define LARGE 0x12345
|
||||
|
||||
int fusion_uchar (unsigned char *p){ return p[LARGE]; }
|
||||
int fusion_schar (signed char *p){ return p[LARGE]; }
|
||||
int fusion_ushort (unsigned short *p){ return p[LARGE]; }
|
||||
int fusion_short (short *p){ return p[LARGE]; }
|
||||
int fusion_int (int *p){ return p[LARGE]; }
|
||||
unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
|
||||
|
||||
vector double fusion_vector (vector double *p) { return p[2]; }
|
||||
|
||||
/* { dg-final { scan-assembler-times "gpr load fusion" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "lbz" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "extsb" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "lhz" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "extsh" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "lwz" 2 } } */
|
Loading…
Reference in New Issue