backport: re PR target/80510 (Optimize Power7/power8 Altivec load/stores)

[gcc]
2017-05-25  Michael Meissner  <meissner@linux.vnet.ibm.com>

	Backport from trunk
	2017-05-18  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/80510
	* config/rs6000/predicates.md (simple_offsettable_mem_operand):
	New predicate.

	* config/rs6000/rs6000.md (ALTIVEC_DFORM): New iterator.
	(define_peephole2 for Altivec d-form load): Add peepholes to catch
	cases where the register allocator uses a move and an offsettable
	memory operation to/from a FPR register on ISA 2.06/2.07.
	(define_peephole2 for Altivec d-form store): Likewise.

	Backport from trunk
	2017-05-09  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/68163
	* config/rs6000/rs6000.md (f32_lr): Delete mode attributes that
	are now unused after splitting mov{sf,sd}_hardfloat.
	(f32_lr2): Likewise.
	(f32_lm): Likewise.
	(f32_lm2): Likewise.
	(f32_li): Likewise.
	(f32_li2): Likewise.
	(f32_lv): Likewise.
	(f32_sr): Likewise.
	(f32_sr2): Likewise.
	(f32_sm): Likewise.
	(f32_sm2): Likewise.
	(f32_si): Likewise.
	(f32_si2): Likewise.
	(f32_sv): Likewise.
	(f32_dm): Likewise.
	(f32_vsx): Likewise.
	(f32_av): Likewise.
	(mov<mode>_hardfloat): Split into separate movsf and movsd pieces.
	For movsf, order stores so the VSX stores occur before the GPR
	store which encourages the register allocator to use a traditional
	FPR instead of a GPR.  For movsd, order the stores so that the GPR
	store comes before the VSX stores to allow the power6 to work.
	This is due to the power6 not having a 32-bit integer store
	instruction from a FPR.
	(movsf_hardfloat): Likewise.
	(movsd_hardfloat): Likewise.

[gcc/testsuite]
2017-05-25  Michael Meissner  <meissner@linux.vnet.ibm.com>

	Backport from trunk
	2017-05-18  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/80510
	* gcc.target/powerpc/pr80510-1.c: New test.
	* gcc.target/powerpc/pr80510-2.c: Likewise.

	Backport from trunk
	2017-05-09  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/68163
	* gcc.target/powerpc/pr68163.c: New test.

From-SVN: r248480
This commit is contained in:
Michael Meissner 2017-05-26 01:52:24 +00:00 committed by Michael Meissner
parent d96e4338b2
commit e722c0f728
7 changed files with 846 additions and 50 deletions

View File

@ -1,3 +1,50 @@
2017-05-25 Michael Meissner <meissner@linux.vnet.ibm.com>
Backport from trunk
2017-05-18 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/80510
* config/rs6000/predicates.md (simple_offsettable_mem_operand):
New predicate.
* config/rs6000/rs6000.md (ALTIVEC_DFORM): New iterator.
(define_peephole2 for Altivec d-form load): Add peepholes to catch
cases where the register allocator uses a move and an offsettable
memory operation to/from a FPR register on ISA 2.06/2.07.
(define_peephole2 for Altivec d-form store): Likewise.
Backport from trunk
2017-05-09 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/68163
* config/rs6000/rs6000.md (f32_lr): Delete mode attributes that
are now unused after splitting mov{sf,sd}_hardfloat.
(f32_lr2): Likewise.
(f32_lm): Likewise.
(f32_lm2): Likewise.
(f32_li): Likewise.
(f32_li2): Likewise.
(f32_lv): Likewise.
(f32_sr): Likewise.
(f32_sr2): Likewise.
(f32_sm): Likewise.
(f32_sm2): Likewise.
(f32_si): Likewise.
(f32_si2): Likewise.
(f32_sv): Likewise.
(f32_dm): Likewise.
(f32_vsx): Likewise.
(f32_av): Likewise.
(mov<mode>_hardfloat): Split into separate movsf and movsd pieces.
For movsf, order stores so the VSX stores occur before the GPR
store which encourages the register allocator to use a traditional
FPR instead of a GPR. For movsd, order the stores so that the GPR
store comes before the VSX stores to allow the power6 to work.
This is due to the power6 not having a 32-bit integer store
instruction from a FPR.
(movsf_hardfloat): Likewise.
(movsd_hardfloat): Likewise.
2017-05-25 Wilco Dijkstra <wdijkstr@arm.com>
Backport from mainlin

View File

@ -847,6 +847,22 @@
(and (match_operand 0 "memory_operand")
(match_test "offsettable_nonstrict_memref_p (op)")))
;; Return 1 if the operand is a simple offsettable memory operand
;; that does not include pre-increment, post-increment, etc.
(define_predicate "simple_offsettable_mem_operand"
(match_operand 0 "offsettable_mem_operand")
{
rtx addr = XEXP (op, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return 0;
if (!CONSTANT_P (XEXP (addr, 1)))
return 0;
return base_reg_operand (XEXP (addr, 0), Pmode);
})
;; Return 1 if the operand is suitable for load/store quad memory.
;; This predicate only checks for non-atomic loads/stores (not lqarx/stqcx).
(define_predicate "quad_memory_operand"

View File

@ -445,35 +445,6 @@
(DD "wn")
(TD "wn")])
; Definitions for load to 32-bit fpr register
(define_mode_attr f32_lr [(SF "f") (SD "wz")])
(define_mode_attr f32_lr2 [(SF "wb") (SD "wn")])
(define_mode_attr f32_lm [(SF "m") (SD "Z")])
(define_mode_attr f32_lm2 [(SF "wY") (SD "wn")])
(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
(define_mode_attr f32_li2 [(SF "lxssp %0,%1") (SD "lfiwzx %0,%y1")])
(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1") (SD "lxsiwzx %x0,%y1")])
; Definitions for store from 32-bit fpr register
(define_mode_attr f32_sr [(SF "f") (SD "wx")])
(define_mode_attr f32_sr2 [(SF "wb") (SD "wn")])
(define_mode_attr f32_sm [(SF "m") (SD "Z")])
(define_mode_attr f32_sm2 [(SF "wY") (SD "wn")])
(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
(define_mode_attr f32_si2 [(SF "stxssp %1,%0") (SD "stfiwx %1,%y0")])
(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwx %x1,%y0")])
; Definitions for 32-bit fpr direct move
; At present, the decimal modes are not allowed in the traditional altivec
; registers, so restrict the constraints to just the traditional FPRs.
(define_mode_attr f32_dm [(SF "wn") (SD "wh")])
; Definitions for 32-bit VSX
(define_mode_attr f32_vsx [(SF "ww") (SD "wn")])
; Definitions for 32-bit use of altivec registers
(define_mode_attr f32_av [(SF "wu") (SD "wn")])
; Definitions for 64-bit VSX
(define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
@ -730,6 +701,11 @@
(define_code_attr SMINMAX [(smin "SMIN")
(smax "SMAX")])
;; Iterator to optimize the following cases:
;; D-form load to FPR register & move to Altivec register
;; Move Altivec register to FPR register and store
(define_mode_iterator ALTIVEC_DFORM [DI DF SF])
;; Start with fixed-point load and store insns. Here we put only the more
;; complex forms. Basic data transfer is done later.
@ -7232,40 +7208,82 @@
operands[3] = gen_int_mode (l, SImode);
}")
(define_insn "mov<mode>_hardfloat"
[(set (match_operand:FMOVE32 0 "nonimmediate_operand"
"=!r, <f32_lr>, <f32_lr2>, <f32_av>, m, <f32_sm>,
<f32_sm2>, Z, <f32_vsx>, !r, ?<f32_dm>, ?r,
f, <f32_vsx>, !r, *c*l, !r, *h")
(match_operand:FMOVE32 1 "input_operand"
"m, <f32_lm>, <f32_lm2>, Z, r, <f32_sr>,
<f32_sr2>, <f32_av>, <zero_fp>, <zero_fp>, r, <f32_dm>,
f, <f32_vsx>, r, r, *h, 0"))]
"(register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))
;; Originally, we tried to keep movsf and movsd common, but the differences
;; addressing was making it rather difficult to hide with mode attributes. In
;; particular for SFmode, on ISA 2.07 (power8) systems, having the GPR store
;; before the VSX stores meant that the register allocator would tend to do a
;; direct move to the GPR (which involves conversion from scalar to
;; vector/memory formats) to save values in the traditional Altivec registers,
;; while SDmode had problems on power6 if the GPR store was not first due to
;; the power6 not having an integer store operation.
;;
;; LWZ LFS LXSSP LXSSPX STFS STXSSP
;; STXSSPX STW XXLXOR LI FMR XSCPSGNDP
;; MR MT<x> MF<x> NOP
(define_insn "movsf_hardfloat"
[(set (match_operand:SF 0 "nonimmediate_operand"
"=!r, f, wb, wu, m, wY,
Z, m, ww, !r, f, ww,
!r, *c*l, !r, *h")
(match_operand:SF 1 "input_operand"
"m, m, wY, Z, f, wb,
wu, r, j, j, f, ww,
r, r, *h, 0"))]
"(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
&& (TARGET_ALLOW_SF_SUBREG
|| valid_sf_si_move (operands[0], operands[1], <MODE>mode))"
|| valid_sf_si_move (operands[0], operands[1], SFmode))"
"@
lwz%U1%X1 %0,%1
<f32_li>
<f32_li2>
<f32_lv>
lfs%U1%X1 %0,%1
lxssp %0,%1
lxsspx %x0,%y1
stfs%U0%X0 %1,%0
stxssp %1,%0
stxsspx %x1,%y0
stw%U0%X0 %1,%0
<f32_si>
<f32_si2>
<f32_sv>
xxlxor %x0,%x0,%x0
li %0,0
mtvsrwz %x0,%1
mfvsrwz %0,%x1
fmr %0,%1
xscpsgndp %x0,%x1,%x1
mr %0,%1
mt%0 %1
mf%1 %0
nop"
[(set_attr "type" "load,fpload,fpload,fpload,store,fpstore,fpstore,fpstore,veclogical,integer,mffgpr,mftgpr,fpsimple,fpsimple,*,mtjmpr,mfjmpr,*")])
[(set_attr "type"
"load, fpload, fpload, fpload, fpstore, fpstore,
fpstore, store, veclogical, integer, fpsimple, fpsimple,
*, mtjmpr, mfjmpr, *")])
;; LWZ LFIWZX STW STFIWX MTVSRWZ MFVSRWZ
;; FMR MR MT%0 MF%1 NOP
(define_insn "movsd_hardfloat"
[(set (match_operand:SD 0 "nonimmediate_operand"
"=!r, wz, m, Z, ?wh, ?r,
f, !r, *c*l, !r, *h")
(match_operand:SD 1 "input_operand"
"m, Z, r, wx, r, wh,
f, r, r, *h, 0"))]
"(register_operand (operands[0], SDmode)
|| register_operand (operands[1], SDmode))
&& TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
"@
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
stw%U0%X0 %1,%0
stfiwx %1,%y0
mtvsrwz %x0,%1
mfvsrwz %0,%x1
fmr %0,%1
mr %0,%1
mt%0 %1
mf%1 %0
nop"
[(set_attr "type"
"load, fpload, store, fpstore, mffgpr, mftgpr,
fpsimple, *, mtjmpr, mfjmpr, *")])
(define_insn "*mov<mode>_softfloat"
[(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
@ -14062,6 +14080,74 @@
[(set_attr "type" "two")
(set_attr "length" "8")])
;; Optimize cases where we want to do a D-form load (register+offset) on
;; ISA 2.06/2.07 to an Altivec register, and the register allocator
;; has generated:
;; load fpr
;; move fpr->altivec
(define_peephole2
[(match_scratch:DI 0 "b")
(set (match_operand:ALTIVEC_DFORM 1 "fpr_reg_operand")
(match_operand:ALTIVEC_DFORM 2 "simple_offsettable_mem_operand"))
(set (match_operand:ALTIVEC_DFORM 3 "altivec_register_operand")
(match_dup 1))]
"TARGET_VSX && TARGET_POWERPC64 && TARGET_UPPER_REGS_<MODE>
&& !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])"
[(set (match_dup 0)
(match_dup 4))
(set (match_dup 3)
(match_dup 5))]
{
rtx tmp_reg = operands[0];
rtx mem = operands[2];
rtx addr = XEXP (mem, 0);
rtx add_op0, add_op1, new_addr;
gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
add_op0 = XEXP (addr, 0);
add_op1 = XEXP (addr, 1);
gcc_assert (REG_P (add_op0));
new_addr = gen_rtx_PLUS (DImode, add_op0, tmp_reg);
operands[4] = add_op1;
operands[5] = change_address (mem, <MODE>mode, new_addr);
})
;; Optimize cases were want to do a D-form store on ISA 2.06/2.07 from an
;; Altivec register, and the register allocator has generated:
;; move altivec->fpr
;; store fpr
(define_peephole2
[(match_scratch:DI 0 "b")
(set (match_operand:ALTIVEC_DFORM 1 "fpr_reg_operand")
(match_operand:ALTIVEC_DFORM 2 "altivec_register_operand"))
(set (match_operand:ALTIVEC_DFORM 3 "simple_offsettable_mem_operand")
(match_dup 1))]
"TARGET_VSX && TARGET_POWERPC64 && TARGET_UPPER_REGS_<MODE>
&& !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])"
[(set (match_dup 0)
(match_dup 4))
(set (match_dup 5)
(match_dup 2))]
{
rtx tmp_reg = operands[0];
rtx mem = operands[3];
rtx addr = XEXP (mem, 0);
rtx add_op0, add_op1, new_addr;
gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
add_op0 = XEXP (addr, 0);
add_op1 = XEXP (addr, 1);
gcc_assert (REG_P (add_op0));
new_addr = gen_rtx_PLUS (DImode, add_op0, tmp_reg);
operands[4] = add_op1;
operands[5] = change_address (mem, <MODE>mode, new_addr);
})
;; Miscellaneous ISA 2.06 (power7) instructions
(define_insn "addg6s"

View File

@ -1,3 +1,18 @@
2017-05-25 Michael Meissner <meissner@linux.vnet.ibm.com>
Backport from trunk
2017-05-18 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/80510
* gcc.target/powerpc/pr80510-1.c: New test.
* gcc.target/powerpc/pr80510-2.c: Likewise.
Backport from trunk
2017-05-09 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/68163
* gcc.target/powerpc/pr68163.c: New test.
2017-05-23 Jerry DeLisle <jvdelisle@gcc.gnu.org>
Backport from trunk

View File

@ -0,0 +1,209 @@
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O2" } */
/* Make sure that the register allocator does not move SF values to GPR
registers in order to do an offsettable store. */
#ifndef TYPE
#define TYPE float
#endif
#ifndef TYPE_IN
#define TYPE_IN TYPE
#endif
#ifndef TYPE_OUT
#define TYPE_OUT TYPE
#endif
#ifndef ITYPE
#define ITYPE long
#endif
#ifdef DO_CALL
extern ITYPE get_bits (ITYPE);
#else
#define get_bits(X) (X)
#endif
void test (ITYPE *bits, ITYPE n, TYPE one, TYPE_IN *p, TYPE_OUT *q)
{
TYPE x_00 = p[ 0];
TYPE x_01 = p[ 1];
TYPE x_02 = p[ 2];
TYPE x_03 = p[ 3];
TYPE x_04 = p[ 4];
TYPE x_05 = p[ 5];
TYPE x_06 = p[ 6];
TYPE x_07 = p[ 7];
TYPE x_08 = p[ 8];
TYPE x_09 = p[ 9];
TYPE x_10 = p[10];
TYPE x_11 = p[11];
TYPE x_12 = p[12];
TYPE x_13 = p[13];
TYPE x_14 = p[14];
TYPE x_15 = p[15];
TYPE x_16 = p[16];
TYPE x_17 = p[17];
TYPE x_18 = p[18];
TYPE x_19 = p[19];
TYPE x_20 = p[20];
TYPE x_21 = p[21];
TYPE x_22 = p[22];
TYPE x_23 = p[23];
TYPE x_24 = p[24];
TYPE x_25 = p[25];
TYPE x_26 = p[26];
TYPE x_27 = p[27];
TYPE x_28 = p[28];
TYPE x_29 = p[29];
TYPE x_30 = p[30];
TYPE x_31 = p[31];
TYPE x_32 = p[32];
TYPE x_33 = p[33];
TYPE x_34 = p[34];
TYPE x_35 = p[35];
TYPE x_36 = p[36];
TYPE x_37 = p[37];
TYPE x_38 = p[38];
TYPE x_39 = p[39];
TYPE x_40 = p[40];
TYPE x_41 = p[41];
TYPE x_42 = p[42];
TYPE x_43 = p[43];
TYPE x_44 = p[44];
TYPE x_45 = p[45];
TYPE x_46 = p[46];
TYPE x_47 = p[47];
TYPE x_48 = p[48];
TYPE x_49 = p[49];
ITYPE i;
for (i = 0; i < n; i++)
{
ITYPE bit = get_bits (bits[i]);
if ((bit & ((ITYPE)1) << 0) != 0) x_00 += one;
if ((bit & ((ITYPE)1) << 1) != 0) x_01 += one;
if ((bit & ((ITYPE)1) << 2) != 0) x_02 += one;
if ((bit & ((ITYPE)1) << 3) != 0) x_03 += one;
if ((bit & ((ITYPE)1) << 4) != 0) x_04 += one;
if ((bit & ((ITYPE)1) << 5) != 0) x_05 += one;
if ((bit & ((ITYPE)1) << 6) != 0) x_06 += one;
if ((bit & ((ITYPE)1) << 7) != 0) x_07 += one;
if ((bit & ((ITYPE)1) << 8) != 0) x_08 += one;
if ((bit & ((ITYPE)1) << 9) != 0) x_09 += one;
if ((bit & ((ITYPE)1) << 10) != 0) x_10 += one;
if ((bit & ((ITYPE)1) << 11) != 0) x_11 += one;
if ((bit & ((ITYPE)1) << 12) != 0) x_12 += one;
if ((bit & ((ITYPE)1) << 13) != 0) x_13 += one;
if ((bit & ((ITYPE)1) << 14) != 0) x_14 += one;
if ((bit & ((ITYPE)1) << 15) != 0) x_15 += one;
if ((bit & ((ITYPE)1) << 16) != 0) x_16 += one;
if ((bit & ((ITYPE)1) << 17) != 0) x_17 += one;
if ((bit & ((ITYPE)1) << 18) != 0) x_18 += one;
if ((bit & ((ITYPE)1) << 19) != 0) x_19 += one;
if ((bit & ((ITYPE)1) << 20) != 0) x_20 += one;
if ((bit & ((ITYPE)1) << 21) != 0) x_21 += one;
if ((bit & ((ITYPE)1) << 22) != 0) x_22 += one;
if ((bit & ((ITYPE)1) << 23) != 0) x_23 += one;
if ((bit & ((ITYPE)1) << 24) != 0) x_24 += one;
if ((bit & ((ITYPE)1) << 25) != 0) x_25 += one;
if ((bit & ((ITYPE)1) << 26) != 0) x_26 += one;
if ((bit & ((ITYPE)1) << 27) != 0) x_27 += one;
if ((bit & ((ITYPE)1) << 28) != 0) x_28 += one;
if ((bit & ((ITYPE)1) << 29) != 0) x_29 += one;
if ((bit & ((ITYPE)1) << 30) != 0) x_30 += one;
if ((bit & ((ITYPE)1) << 31) != 0) x_31 += one;
if ((bit & ((ITYPE)1) << 32) != 0) x_32 += one;
if ((bit & ((ITYPE)1) << 33) != 0) x_33 += one;
if ((bit & ((ITYPE)1) << 34) != 0) x_34 += one;
if ((bit & ((ITYPE)1) << 35) != 0) x_35 += one;
if ((bit & ((ITYPE)1) << 36) != 0) x_36 += one;
if ((bit & ((ITYPE)1) << 37) != 0) x_37 += one;
if ((bit & ((ITYPE)1) << 38) != 0) x_38 += one;
if ((bit & ((ITYPE)1) << 39) != 0) x_39 += one;
if ((bit & ((ITYPE)1) << 40) != 0) x_40 += one;
if ((bit & ((ITYPE)1) << 41) != 0) x_41 += one;
if ((bit & ((ITYPE)1) << 42) != 0) x_42 += one;
if ((bit & ((ITYPE)1) << 43) != 0) x_43 += one;
if ((bit & ((ITYPE)1) << 44) != 0) x_44 += one;
if ((bit & ((ITYPE)1) << 45) != 0) x_45 += one;
if ((bit & ((ITYPE)1) << 46) != 0) x_46 += one;
if ((bit & ((ITYPE)1) << 47) != 0) x_47 += one;
if ((bit & ((ITYPE)1) << 48) != 0) x_48 += one;
if ((bit & ((ITYPE)1) << 49) != 0) x_49 += one;
}
q[ 0] = x_00;
q[ 1] = x_01;
q[ 2] = x_02;
q[ 3] = x_03;
q[ 4] = x_04;
q[ 5] = x_05;
q[ 6] = x_06;
q[ 7] = x_07;
q[ 8] = x_08;
q[ 9] = x_09;
q[10] = x_10;
q[11] = x_11;
q[12] = x_12;
q[13] = x_13;
q[14] = x_14;
q[15] = x_15;
q[16] = x_16;
q[17] = x_17;
q[18] = x_18;
q[19] = x_19;
q[20] = x_20;
q[21] = x_21;
q[22] = x_22;
q[23] = x_23;
q[24] = x_24;
q[25] = x_25;
q[26] = x_26;
q[27] = x_27;
q[28] = x_28;
q[29] = x_29;
q[30] = x_30;
q[31] = x_31;
q[32] = x_32;
q[33] = x_33;
q[34] = x_34;
q[35] = x_35;
q[36] = x_36;
q[37] = x_37;
q[38] = x_38;
q[39] = x_39;
q[40] = x_40;
q[41] = x_41;
q[42] = x_42;
q[43] = x_43;
q[44] = x_44;
q[45] = x_45;
q[46] = x_46;
q[47] = x_47;
q[48] = x_48;
q[49] = x_49;
}
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
/* { dg-final { scan-assembler-not {\mstw\M} } } */

View File

@ -0,0 +1,211 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
/* { dg-options "-mcpu=power7 -O2" } */
/* Make sure that STXSDX is generated for double scalars in Altivec registers
on power7 instead of moving the value to a FPR register and doing a X-FORM
store. */
#ifndef TYPE
#define TYPE double
#endif
#ifndef TYPE_IN
#define TYPE_IN TYPE
#endif
#ifndef TYPE_OUT
#define TYPE_OUT TYPE
#endif
#ifndef ITYPE
#define ITYPE long
#endif
#ifdef DO_CALL
extern ITYPE get_bits (ITYPE);
#else
#define get_bits(X) (X)
#endif
void test (ITYPE *bits, ITYPE n, TYPE one, TYPE_IN *p, TYPE_OUT *q)
{
TYPE x_00 = p[ 0];
TYPE x_01 = p[ 1];
TYPE x_02 = p[ 2];
TYPE x_03 = p[ 3];
TYPE x_04 = p[ 4];
TYPE x_05 = p[ 5];
TYPE x_06 = p[ 6];
TYPE x_07 = p[ 7];
TYPE x_08 = p[ 8];
TYPE x_09 = p[ 9];
TYPE x_10 = p[10];
TYPE x_11 = p[11];
TYPE x_12 = p[12];
TYPE x_13 = p[13];
TYPE x_14 = p[14];
TYPE x_15 = p[15];
TYPE x_16 = p[16];
TYPE x_17 = p[17];
TYPE x_18 = p[18];
TYPE x_19 = p[19];
TYPE x_20 = p[20];
TYPE x_21 = p[21];
TYPE x_22 = p[22];
TYPE x_23 = p[23];
TYPE x_24 = p[24];
TYPE x_25 = p[25];
TYPE x_26 = p[26];
TYPE x_27 = p[27];
TYPE x_28 = p[28];
TYPE x_29 = p[29];
TYPE x_30 = p[30];
TYPE x_31 = p[31];
TYPE x_32 = p[32];
TYPE x_33 = p[33];
TYPE x_34 = p[34];
TYPE x_35 = p[35];
TYPE x_36 = p[36];
TYPE x_37 = p[37];
TYPE x_38 = p[38];
TYPE x_39 = p[39];
TYPE x_40 = p[40];
TYPE x_41 = p[41];
TYPE x_42 = p[42];
TYPE x_43 = p[43];
TYPE x_44 = p[44];
TYPE x_45 = p[45];
TYPE x_46 = p[46];
TYPE x_47 = p[47];
TYPE x_48 = p[48];
TYPE x_49 = p[49];
ITYPE i;
for (i = 0; i < n; i++)
{
ITYPE bit = get_bits (bits[i]);
if ((bit & ((ITYPE)1) << 0) != 0) x_00 += one;
if ((bit & ((ITYPE)1) << 1) != 0) x_01 += one;
if ((bit & ((ITYPE)1) << 2) != 0) x_02 += one;
if ((bit & ((ITYPE)1) << 3) != 0) x_03 += one;
if ((bit & ((ITYPE)1) << 4) != 0) x_04 += one;
if ((bit & ((ITYPE)1) << 5) != 0) x_05 += one;
if ((bit & ((ITYPE)1) << 6) != 0) x_06 += one;
if ((bit & ((ITYPE)1) << 7) != 0) x_07 += one;
if ((bit & ((ITYPE)1) << 8) != 0) x_08 += one;
if ((bit & ((ITYPE)1) << 9) != 0) x_09 += one;
if ((bit & ((ITYPE)1) << 10) != 0) x_10 += one;
if ((bit & ((ITYPE)1) << 11) != 0) x_11 += one;
if ((bit & ((ITYPE)1) << 12) != 0) x_12 += one;
if ((bit & ((ITYPE)1) << 13) != 0) x_13 += one;
if ((bit & ((ITYPE)1) << 14) != 0) x_14 += one;
if ((bit & ((ITYPE)1) << 15) != 0) x_15 += one;
if ((bit & ((ITYPE)1) << 16) != 0) x_16 += one;
if ((bit & ((ITYPE)1) << 17) != 0) x_17 += one;
if ((bit & ((ITYPE)1) << 18) != 0) x_18 += one;
if ((bit & ((ITYPE)1) << 19) != 0) x_19 += one;
if ((bit & ((ITYPE)1) << 20) != 0) x_20 += one;
if ((bit & ((ITYPE)1) << 21) != 0) x_21 += one;
if ((bit & ((ITYPE)1) << 22) != 0) x_22 += one;
if ((bit & ((ITYPE)1) << 23) != 0) x_23 += one;
if ((bit & ((ITYPE)1) << 24) != 0) x_24 += one;
if ((bit & ((ITYPE)1) << 25) != 0) x_25 += one;
if ((bit & ((ITYPE)1) << 26) != 0) x_26 += one;
if ((bit & ((ITYPE)1) << 27) != 0) x_27 += one;
if ((bit & ((ITYPE)1) << 28) != 0) x_28 += one;
if ((bit & ((ITYPE)1) << 29) != 0) x_29 += one;
if ((bit & ((ITYPE)1) << 30) != 0) x_30 += one;
if ((bit & ((ITYPE)1) << 31) != 0) x_31 += one;
if ((bit & ((ITYPE)1) << 32) != 0) x_32 += one;
if ((bit & ((ITYPE)1) << 33) != 0) x_33 += one;
if ((bit & ((ITYPE)1) << 34) != 0) x_34 += one;
if ((bit & ((ITYPE)1) << 35) != 0) x_35 += one;
if ((bit & ((ITYPE)1) << 36) != 0) x_36 += one;
if ((bit & ((ITYPE)1) << 37) != 0) x_37 += one;
if ((bit & ((ITYPE)1) << 38) != 0) x_38 += one;
if ((bit & ((ITYPE)1) << 39) != 0) x_39 += one;
if ((bit & ((ITYPE)1) << 40) != 0) x_40 += one;
if ((bit & ((ITYPE)1) << 41) != 0) x_41 += one;
if ((bit & ((ITYPE)1) << 42) != 0) x_42 += one;
if ((bit & ((ITYPE)1) << 43) != 0) x_43 += one;
if ((bit & ((ITYPE)1) << 44) != 0) x_44 += one;
if ((bit & ((ITYPE)1) << 45) != 0) x_45 += one;
if ((bit & ((ITYPE)1) << 46) != 0) x_46 += one;
if ((bit & ((ITYPE)1) << 47) != 0) x_47 += one;
if ((bit & ((ITYPE)1) << 48) != 0) x_48 += one;
if ((bit & ((ITYPE)1) << 49) != 0) x_49 += one;
}
q[ 0] = x_00;
q[ 1] = x_01;
q[ 2] = x_02;
q[ 3] = x_03;
q[ 4] = x_04;
q[ 5] = x_05;
q[ 6] = x_06;
q[ 7] = x_07;
q[ 8] = x_08;
q[ 9] = x_09;
q[10] = x_10;
q[11] = x_11;
q[12] = x_12;
q[13] = x_13;
q[14] = x_14;
q[15] = x_15;
q[16] = x_16;
q[17] = x_17;
q[18] = x_18;
q[19] = x_19;
q[20] = x_20;
q[21] = x_21;
q[22] = x_22;
q[23] = x_23;
q[24] = x_24;
q[25] = x_25;
q[26] = x_26;
q[27] = x_27;
q[28] = x_28;
q[29] = x_29;
q[30] = x_30;
q[31] = x_31;
q[32] = x_32;
q[33] = x_33;
q[34] = x_34;
q[35] = x_35;
q[36] = x_36;
q[37] = x_37;
q[38] = x_38;
q[39] = x_39;
q[40] = x_40;
q[41] = x_41;
q[42] = x_42;
q[43] = x_43;
q[44] = x_44;
q[45] = x_45;
q[46] = x_46;
q[47] = x_47;
q[48] = x_48;
q[49] = x_49;
}
/* { dg-final { scan-assembler {\mxsadddp\M} } } */
/* { dg-final { scan-assembler {\mstxsdx\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */

View File

@ -0,0 +1,212 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O2" } */
/* Make sure that STXSSPX is generated for float scalars in Altivec registers
on power7 instead of moving the value to a FPR register and doing a X-FORM
store. */
#ifndef TYPE
#define TYPE float
#endif
#ifndef TYPE_IN
#define TYPE_IN TYPE
#endif
#ifndef TYPE_OUT
#define TYPE_OUT TYPE
#endif
#ifndef ITYPE
#define ITYPE long
#endif
#ifdef DO_CALL
extern ITYPE get_bits (ITYPE);
#else
#define get_bits(X) (X)
#endif
void test (ITYPE *bits, ITYPE n, TYPE one, TYPE_IN *p, TYPE_OUT *q)
{
TYPE x_00 = p[ 0];
TYPE x_01 = p[ 1];
TYPE x_02 = p[ 2];
TYPE x_03 = p[ 3];
TYPE x_04 = p[ 4];
TYPE x_05 = p[ 5];
TYPE x_06 = p[ 6];
TYPE x_07 = p[ 7];
TYPE x_08 = p[ 8];
TYPE x_09 = p[ 9];
TYPE x_10 = p[10];
TYPE x_11 = p[11];
TYPE x_12 = p[12];
TYPE x_13 = p[13];
TYPE x_14 = p[14];
TYPE x_15 = p[15];
TYPE x_16 = p[16];
TYPE x_17 = p[17];
TYPE x_18 = p[18];
TYPE x_19 = p[19];
TYPE x_20 = p[20];
TYPE x_21 = p[21];
TYPE x_22 = p[22];
TYPE x_23 = p[23];
TYPE x_24 = p[24];
TYPE x_25 = p[25];
TYPE x_26 = p[26];
TYPE x_27 = p[27];
TYPE x_28 = p[28];
TYPE x_29 = p[29];
TYPE x_30 = p[30];
TYPE x_31 = p[31];
TYPE x_32 = p[32];
TYPE x_33 = p[33];
TYPE x_34 = p[34];
TYPE x_35 = p[35];
TYPE x_36 = p[36];
TYPE x_37 = p[37];
TYPE x_38 = p[38];
TYPE x_39 = p[39];
TYPE x_40 = p[40];
TYPE x_41 = p[41];
TYPE x_42 = p[42];
TYPE x_43 = p[43];
TYPE x_44 = p[44];
TYPE x_45 = p[45];
TYPE x_46 = p[46];
TYPE x_47 = p[47];
TYPE x_48 = p[48];
TYPE x_49 = p[49];
ITYPE i;
for (i = 0; i < n; i++)
{
ITYPE bit = get_bits (bits[i]);
if ((bit & ((ITYPE)1) << 0) != 0) x_00 += one;
if ((bit & ((ITYPE)1) << 1) != 0) x_01 += one;
if ((bit & ((ITYPE)1) << 2) != 0) x_02 += one;
if ((bit & ((ITYPE)1) << 3) != 0) x_03 += one;
if ((bit & ((ITYPE)1) << 4) != 0) x_04 += one;
if ((bit & ((ITYPE)1) << 5) != 0) x_05 += one;
if ((bit & ((ITYPE)1) << 6) != 0) x_06 += one;
if ((bit & ((ITYPE)1) << 7) != 0) x_07 += one;
if ((bit & ((ITYPE)1) << 8) != 0) x_08 += one;
if ((bit & ((ITYPE)1) << 9) != 0) x_09 += one;
if ((bit & ((ITYPE)1) << 10) != 0) x_10 += one;
if ((bit & ((ITYPE)1) << 11) != 0) x_11 += one;
if ((bit & ((ITYPE)1) << 12) != 0) x_12 += one;
if ((bit & ((ITYPE)1) << 13) != 0) x_13 += one;
if ((bit & ((ITYPE)1) << 14) != 0) x_14 += one;
if ((bit & ((ITYPE)1) << 15) != 0) x_15 += one;
if ((bit & ((ITYPE)1) << 16) != 0) x_16 += one;
if ((bit & ((ITYPE)1) << 17) != 0) x_17 += one;
if ((bit & ((ITYPE)1) << 18) != 0) x_18 += one;
if ((bit & ((ITYPE)1) << 19) != 0) x_19 += one;
if ((bit & ((ITYPE)1) << 20) != 0) x_20 += one;
if ((bit & ((ITYPE)1) << 21) != 0) x_21 += one;
if ((bit & ((ITYPE)1) << 22) != 0) x_22 += one;
if ((bit & ((ITYPE)1) << 23) != 0) x_23 += one;
if ((bit & ((ITYPE)1) << 24) != 0) x_24 += one;
if ((bit & ((ITYPE)1) << 25) != 0) x_25 += one;
if ((bit & ((ITYPE)1) << 26) != 0) x_26 += one;
if ((bit & ((ITYPE)1) << 27) != 0) x_27 += one;
if ((bit & ((ITYPE)1) << 28) != 0) x_28 += one;
if ((bit & ((ITYPE)1) << 29) != 0) x_29 += one;
if ((bit & ((ITYPE)1) << 30) != 0) x_30 += one;
if ((bit & ((ITYPE)1) << 31) != 0) x_31 += one;
if ((bit & ((ITYPE)1) << 32) != 0) x_32 += one;
if ((bit & ((ITYPE)1) << 33) != 0) x_33 += one;
if ((bit & ((ITYPE)1) << 34) != 0) x_34 += one;
if ((bit & ((ITYPE)1) << 35) != 0) x_35 += one;
if ((bit & ((ITYPE)1) << 36) != 0) x_36 += one;
if ((bit & ((ITYPE)1) << 37) != 0) x_37 += one;
if ((bit & ((ITYPE)1) << 38) != 0) x_38 += one;
if ((bit & ((ITYPE)1) << 39) != 0) x_39 += one;
if ((bit & ((ITYPE)1) << 40) != 0) x_40 += one;
if ((bit & ((ITYPE)1) << 41) != 0) x_41 += one;
if ((bit & ((ITYPE)1) << 42) != 0) x_42 += one;
if ((bit & ((ITYPE)1) << 43) != 0) x_43 += one;
if ((bit & ((ITYPE)1) << 44) != 0) x_44 += one;
if ((bit & ((ITYPE)1) << 45) != 0) x_45 += one;
if ((bit & ((ITYPE)1) << 46) != 0) x_46 += one;
if ((bit & ((ITYPE)1) << 47) != 0) x_47 += one;
if ((bit & ((ITYPE)1) << 48) != 0) x_48 += one;
if ((bit & ((ITYPE)1) << 49) != 0) x_49 += one;
}
q[ 0] = x_00;
q[ 1] = x_01;
q[ 2] = x_02;
q[ 3] = x_03;
q[ 4] = x_04;
q[ 5] = x_05;
q[ 6] = x_06;
q[ 7] = x_07;
q[ 8] = x_08;
q[ 9] = x_09;
q[10] = x_10;
q[11] = x_11;
q[12] = x_12;
q[13] = x_13;
q[14] = x_14;
q[15] = x_15;
q[16] = x_16;
q[17] = x_17;
q[18] = x_18;
q[19] = x_19;
q[20] = x_20;
q[21] = x_21;
q[22] = x_22;
q[23] = x_23;
q[24] = x_24;
q[25] = x_25;
q[26] = x_26;
q[27] = x_27;
q[28] = x_28;
q[29] = x_29;
q[30] = x_30;
q[31] = x_31;
q[32] = x_32;
q[33] = x_33;
q[34] = x_34;
q[35] = x_35;
q[36] = x_36;
q[37] = x_37;
q[38] = x_38;
q[39] = x_39;
q[40] = x_40;
q[41] = x_41;
q[42] = x_42;
q[43] = x_43;
q[44] = x_44;
q[45] = x_45;
q[46] = x_46;
q[47] = x_47;
q[48] = x_48;
q[49] = x_49;
}
/* { dg-final { scan-assembler {\mxsaddsp\M} } } */
/* { dg-final { scan-assembler {\mstxsspx\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
/* { dg-final { scan-assembler-not {\mmfvsrwz\M} } } */