diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4ceebb4789f..731a7bd3a88 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,76 @@ +2016-05-18 Michael Meissner + + PR target/70915 + * config/rs6000/constraints.md (wE constraint): New constraint + for a vector constant that can be loaded with XXSPLTIB. + (wM constraint): New constraint for a vector constant of a 1's. + (wS constraint): New constraint for a vector constant that can be + loaded with XXSPLTIB and a vector sign extend instruction. + * config/rs6000/predicates.md (xxspltib_constant_split): New + predicates for wE/wS constraints. + (xxspltib_constant_nosplit): Likewise. + (easy_vector_constant): Add support for constants that can be + loaded via XXSPLTIB. + (all_ones_constant): New predicate for vector constant with all + 1's set. + (splat_input_operand): Add support for ISA 3.0 word splat + operations. + * config/rs6000/rs6000.c (xxspltib_constant_p): New function to + return if a constant can be loaded with the ISA 3.0 XXSPLTIB + instruction and possibly with a sign extension. + (output_vec_const_move): Add support for XXSPLTIB. If we are + loading up 0/-1 into Altivec registers, prefer using VSPLTISW + instead of XXLXOR/XXLORC. + (rs6000_expand_vector_init): Add support for ISA 3.0 word splat + operations. + (rs6000_legitimize_reload_address): Likewise. + (rs6000_output_move_128bit): Use output_vec_const_move to emit + constants. + * config/rs6000/vsx.md (VSX_M): Add TImode (if -mvsx-timode) and + combine VSX_M and VSX_M2 into one iterator. + (VSX_M2): Likewise. + (VSINT_84): New iterators for loading constants with XXSPLTIB. + (VSINT_842): Likewise. + (UNSPEC_VSX_SIGN_EXTEND): New UNSPEC. + (xxspltib_v16qi): New insns to load up constants with the ISA 3.0 + XXSPLTIB instruction. + (xxspltib__nosplit): Likewise. + (xxspltib__split): New insn to load up constants with + XXSPLTIB and a sign extend instruction. + (vsx_mov): Replace single move that handled all vector types + with separate 32-bit and 64-bit moves. Combine the movti_ + moves (when -mvsx-timode is in effect) into the main vector + moves. Eliminate separate moves for , where the + preferred register class () is listed first, and the + secondary register class () is listed second with a '?' to + discourage use. Prefer loading 0/-1 in any VSX register for ISA + 3.0, and Altivec registers for ISA 2.06/2.07 (PR target/70915) so + that if the register was involved in a slow operation, the + clear/set operation does not wait for the slow operation to + finish. Adjust the length attributes for 32-bit mode. Use + rs6000_output_move_128bit and drop the use of the string + instructions for 32-bit movti when -mvsx-timode is in effect. Use + spacing so that the alternatives and attributes don't generate + long lines, and put things in columns, so that it is easier to + match up the operands and attributes with the insn alternatives. + (vsx_mov_64bit): Likewise. + (vsx_mov_32bit): Likewise. + (vsx_movti_64bit): Fold movti into normal vector moves. + (vsx_movti_32bit): Likewise. + (vsx_splat_, V4SI/V4SF modes): Add support for ISA 3.0 word + spat instructions. + (vsx_splat_v4si_internal): Likewise. + (vsx_splat_v4sf_internal): Likewise. + (vector fusion peepholes): Use VSX_M instead of VSX_M2. + (vsx_sign_extend_qi_): New ISA 3.0 instructions to sign + extend vector elements. + (vsx_sign_extend_hi_): Likewise. + (vsx_sign_extend_si_v2di): Likewise. + * config/rs6000/rs6000-protos.h (xxspltib_constant_p): Add + declaration. + * doc/md.texi (PowerPC constraints): Document the wE, wM, and wS + constraints. Add trailing period to wL documentation. + 2016-05-18 Richard Sandiford PR middle-end/71020 diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index a3abe6ab80a..ef8f617d9a8 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -140,6 +140,10 @@ (and (match_code "const_int") (match_test "TARGET_VSX && (ival == VECTOR_ELEMENT_SCALAR_64BIT)"))) +(define_constraint "wE" + "Vector constant that can be loaded with the XXSPLTIB instruction." + (match_test "xxspltib_constant_nosplit (op, mode)")) + ;; Extended fusion store (define_memory_constraint "wF" "Memory operand suitable for power9 fusion load/stores" @@ -156,6 +160,12 @@ (and (match_test "TARGET_DIRECT_MOVE_128") (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)")))) +;; Generate the XXORC instruction to set a register to all 1's +(define_constraint "wM" + "Match vector constant with all 1's if the XXLORC instruction is available" + (and (match_test "TARGET_P8_VECTOR") + (match_operand 0 "all_ones_constant"))) + ;; ISA 3.0 vector d-form addresses (define_memory_constraint "wO" "Memory operand suitable for the ISA 3.0 vector d-form instructions." @@ -166,6 +176,10 @@ "Memory operand suitable for the load/store quad instructions" (match_operand 0 "quad_memory_operand")) +(define_constraint "wS" + "Vector constant that can be loaded with XXSPLTIB & sign extension." + (match_test "xxspltib_constant_split (op, mode)")) + ;; Altivec style load/store that ignores the bottom bits of the address (define_memory_constraint "wZ" "Indexed or indirect memory operand, ignoring the bottom 4 bits" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ebc92467344..5b852a12c21 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -565,6 +565,38 @@ } }) +;; Return 1 if the operand is a CONST_VECTOR or VEC_DUPLICATE of a constant +;; that can loaded with a XXSPLTIB instruction and then a VUPKHSB, VECSB2W or +;; VECSB2D instruction. + +(define_predicate "xxspltib_constant_split" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns > 1; +}) + + +;; Return 1 if the operand is a CONST_VECTOR that can loaded directly with a +;; XXSPLTIB instruction. + +(define_predicate "xxspltib_constant_nosplit" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns == 1; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" @@ -583,7 +615,14 @@ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) { - if (zero_constant (op, mode)) + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) + return true; + + if (TARGET_P9_VECTOR + && xxspltib_constant_p (op, mode, &num_insns, &value)) return true; return easy_altivec_constant (op, mode); @@ -662,6 +701,11 @@ (and (match_code "const_int,const_double,const_wide_int,const_vector") (match_test "op == CONST0_RTX (mode)"))) +;; Return 1 if operand is constant -1 (scalars and vectors). +(define_predicate "all_ones_constant" + (and (match_code "const_int,const_double,const_wide_int,const_vector") + (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)"))) + ;; Return 1 if operand is 0.0. (define_predicate "zero_fp_constant" (and (match_code "const_double") @@ -1024,6 +1068,10 @@ mode = V2DFmode; else if (mode == DImode) mode = V2DImode; + else if (mode == SImode && TARGET_P9_VECTOR) + mode = V4SImode; + else if (mode == SFmode && TARGET_P9_VECTOR) + mode = V4SFmode; else gcc_unreachable (); return memory_address_addr_space_p (mode, XEXP (op, 0), diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index f75624f70d8..6b4d17801d0 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, #endif /* TREE_CODE */ extern bool easy_altivec_constant (rtx, machine_mode); +extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 3f721c67cd5..a1841bce765 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6241,6 +6241,128 @@ gen_easy_altivec_constant (rtx op) gcc_unreachable (); } +/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 + instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). + + Return the number of instructions needed (1 or 2) into the address pointed + via NUM_INSNS_PTR. + + If NOSPLIT_P, only return true for constants that only generate the XXSPLTIB + instruction and can go in any VSX register. If !NOSPLIT_P, only return true + for constants that generate XXSPLTIB and need a sign extend operation, which + restricts us to the Altivec registers. + + Allow either (vec_const [...]) or (vec_duplicate ). If OP is a valid + XXSPLTIB constant, return the constant being set via the CONST_PTR + pointer. */ + +bool +xxspltib_constant_p (rtx op, + machine_mode mode, + int *num_insns_ptr, + int *constant_ptr) +{ + size_t nunits = GET_MODE_NUNITS (mode); + size_t i; + HOST_WIDE_INT value; + rtx element; + + /* Set the returned values to out of bound values. */ + *num_insns_ptr = -1; + *constant_ptr = 256; + + if (!TARGET_P9_VECTOR) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + else if (mode != GET_MODE (op)) + return false; + + /* Handle (vec_duplicate ). */ + if (GET_CODE (op) == VEC_DUPLICATE) + { + if (mode != V16QImode && mode != V8HImode && mode != V4SImode + && mode != V2DImode) + return false; + + element = XEXP (op, 0); + if (!CONST_INT_P (element)) + return false; + + value = INTVAL (element); + if (!IN_RANGE (value, -128, 127)) + return false; + } + + /* Handle (const_vector [...]). */ + else if (GET_CODE (op) == CONST_VECTOR) + { + if (mode != V16QImode && mode != V8HImode && mode != V4SImode + && mode != V2DImode) + return false; + + element = CONST_VECTOR_ELT (op, 0); + if (!CONST_INT_P (element)) + return false; + + value = INTVAL (element); + if (!IN_RANGE (value, -128, 127)) + return false; + + for (i = 1; i < nunits; i++) + { + element = CONST_VECTOR_ELT (op, i); + if (!CONST_INT_P (element)) + return false; + + if (value != INTVAL (element)) + return false; + } + + /* See if we could generate vspltisw/vspltish directly instead of + xxspltib + sign extend. Special case 0/-1 to allow getting + any VSX register instead of an Altivec register. */ + if (!IN_RANGE (value, -1, 0) && EASY_VECTOR_15 (value) + && (mode == V4SImode || mode == V8HImode)) + return false; + } + + /* Handle integer constants being loaded into the upper part of the VSX + register as a scalar. If the value isn't 0/-1, only allow it if + the mode can go in Altivec registers. */ + else if (CONST_INT_P (op)) + { + if (!SCALAR_INT_MODE_P (mode)) + return false; + + value = INTVAL (op); + if (!IN_RANGE (value, -128, 127)) + return false; + + if (!IN_RANGE (value, -1, 0) + && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) + return false; + } + + else + return false; + + /* Return # of instructions and the constant byte for XXSPLTIB. */ + if (mode == V16QImode) + *num_insns_ptr = 1; + + else if (IN_RANGE (value, -1, 0)) + *num_insns_ptr = 1; + + else + *num_insns_ptr = 2; + + *constant_ptr = (int) value; + return true; +} + const char * output_vec_const_move (rtx *operands) { @@ -6254,23 +6376,60 @@ output_vec_const_move (rtx *operands) if (TARGET_VSX) { + bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); + int xxspltib_value = 256; + int num_insns = -1; + if (zero_constant (vec, mode)) - return "xxlxor %x0,%x0,%x0"; + { + if (TARGET_P9_VECTOR) + return "xxspltib %x0,0"; - if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode)) - return "xxlorc %x0,%x0,%x0"; + else if (dest_vmx_p) + return "vspltisw %0,0"; - if ((mode == V2DImode || mode == V1TImode) - && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1 - && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1) - return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1"; + else + return "xxlxor %x0,%x0,%x0"; + } + + if (all_ones_constant (vec, mode)) + { + if (TARGET_P9_VECTOR) + return "xxspltib %x0,255"; + + else if (dest_vmx_p) + return "vspltisw %0,-1"; + + else if (TARGET_P8_VECTOR) + return "xxlorc %x0,%x0,%x0"; + + else + gcc_unreachable (); + } + + if (TARGET_P9_VECTOR + && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) + { + if (num_insns == 1) + { + operands[2] = GEN_INT (xxspltib_value & 0xff); + return "xxspltib %x0,%2"; + } + + return "#"; + } } if (TARGET_ALTIVEC) { rtx splat_vec; + + gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); if (zero_constant (vec, mode)) - return "vxor %0,%0,%0"; + return "vspltisw %0,0"; + + if (all_ones_constant (vec, mode)) + return "vspltisw %0,-1"; /* Do we need to construct a value using VSLDOI? */ shift = vspltis_shifted (vec); @@ -6543,6 +6702,15 @@ rs6000_expand_vector_init (rtx target, rtx vals) return; } + /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw. V4SF is + complicated since scalars are stored as doubles in the registers. */ + if (TARGET_P9_VECTOR && mode == V4SImode && all_same + && VECTOR_MEM_VSX_P (mode)) + { + emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0))); + return; + } + /* With single precision floating point on VSX, know that internally single precision is actually represented as a double, and either make 2 V2DF vectors, and convert these vectors to single precision, or do one @@ -6551,14 +6719,23 @@ rs6000_expand_vector_init (rtx target, rtx vals) { if (all_same) { - rtx freg = gen_reg_rtx (V4SFmode); - rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0)); - rtx cvt = ((TARGET_XSCVDPSPN) - ? gen_vsx_xscvdpspn_scalar (freg, sreg) - : gen_vsx_xscvdpsp_scalar (freg, sreg)); + rtx op0 = XVECEXP (vals, 0, 0); - emit_insn (cvt); - emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx)); + if (TARGET_P9_VECTOR) + emit_insn (gen_vsx_splat_v4sf (target, op0)); + + else + { + rtx freg = gen_reg_rtx (V4SFmode); + rtx sreg = force_reg (SFmode, op0); + rtx cvt = (TARGET_XSCVDPSPN + ? gen_vsx_xscvdpspn_scalar (freg, sreg) + : gen_vsx_xscvdpsp_scalar (freg, sreg)); + + emit_insn (cvt); + emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, + const0_rtx)); + } } else { @@ -8326,12 +8503,16 @@ rs6000_legitimize_reload_address (rtx x, machine_mode mode, { bool reg_offset_p = reg_offset_addressing_ok_p (mode); - /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a - DFmode/DImode MEM. */ + /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a + DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */ if (reg_offset_p && opnum == 1 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode) - || (mode == DImode && recog_data.operand_mode[0] == V2DImode))) + || (mode == DImode && recog_data.operand_mode[0] == V2DImode) + || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode + && TARGET_P9_VECTOR) + || (mode == SImode && recog_data.operand_mode[0] == V4SImode + && TARGET_P9_VECTOR))) reg_offset_p = false; /* We must recognize output that we have already generated ourselves. */ @@ -20111,10 +20292,8 @@ rs6000_output_move_128bit (rtx operands[]) if (dest_gpr_p) return "#"; - else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode)) - return "xxlxor %x0,%x0,%x0"; - - else if (TARGET_ALTIVEC && dest_vmx_p) + else if ((dest_vmx_p && TARGET_ALTIVEC) + || (dest_vsx_p && TARGET_VSX)) return output_vec_const_move (operands); } diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 57cee7934ba..2b6963b0ac5 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -55,8 +55,7 @@ (KF "FLOAT128_VECTOR_P (KFmode)") (TF "FLOAT128_VECTOR_P (TFmode)")]) -;; Iterator for memory move. Handle TImode specially to allow -;; it to use gprs as well as vsx registers. +;; Iterator for memory moves. (define_mode_iterator VSX_M [V16QI V8HI V4SI @@ -65,18 +64,8 @@ V2DF V1TI (KF "FLOAT128_VECTOR_P (KFmode)") - (TF "FLOAT128_VECTOR_P (TFmode)")]) - -(define_mode_iterator VSX_M2 [V16QI - V8HI - V4SI - V2DI - V4SF - V2DF - V1TI - (KF "FLOAT128_VECTOR_P (KFmode)") - (TF "FLOAT128_VECTOR_P (TFmode)") - (TI "TARGET_VSX_TIMODE")]) + (TF "FLOAT128_VECTOR_P (TFmode)") + (TI "TARGET_VSX_TIMODE")]) ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") @@ -270,6 +259,10 @@ (define_mode_attr VS_64reg [(V2DF "ws") (V2DI "wi")]) +;; Iterators for loading constants with xxspltib +(define_mode_iterator VSINT_84 [V4SI V2DI]) +(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) + ;; Constants for creating unspecs (define_c_enum "unspec" [UNSPEC_VSX_CONCAT @@ -299,6 +292,7 @@ UNSPEC_VSX_XVCVUXDDP UNSPEC_VSX_XVCVDPSXDS UNSPEC_VSX_XVCVDPUXDS + UNSPEC_VSX_SIGN_EXTEND ]) ;; VSX moves @@ -769,92 +763,141 @@ (const_int 64)))] "") -(define_insn "*vsx_mov" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO,,,?ZwO,?,?,r,we,wQ,?&r,??Y,??r,??r,,?,*r,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" ",ZwO,,,ZwO,,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] - "VECTOR_MEM_VSX_P (mode) +;; Vector constants that can be generated with XXSPLTIB that was added in ISA +;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. +(define_insn "xxspltib_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] + "TARGET_P9_VECTOR" +{ + operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "xxspltib %x0,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn "xxspltib__nosplit" + [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa") + (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "wE"))] + "TARGET_P9_VECTOR" +{ + rtx op1 = operands[1]; + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op1, mode, &num_insns, &value) + || num_insns != 1) + gcc_unreachable (); + + operands[2] = GEN_INT (value & 0xff); + return "xxspltib %x0,%2"; +} + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "*xxspltib__split" + [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") + (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] + "TARGET_P9_VECTOR" + "#" + "&& 1" + [(const_int 0)] +{ + int value = 256; + int num_insns = -1; + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp = ((can_create_pseudo_p ()) + ? gen_reg_rtx (V16QImode) + : gen_lowpart (V16QImode, op0)); + + if (!xxspltib_constant_p (op1, mode, &num_insns, &value) + || num_insns != 2) + gcc_unreachable (); + + emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); + + if (mode == V2DImode) + emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); + + else if (mode == V4SImode) + emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); + + else if (mode == V8HImode) + emit_insn (gen_altivec_vupkhsb (op0, tmp)); + + else + gcc_unreachable (); + + DONE; +} + [(set_attr "type" "vecperm") + (set_attr "length" "8")]) + + +;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB +;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or +;; all 1's, since the machine does not have to wait for the previous +;; instruction using the register being set (such as a store waiting on a slow +;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. + +;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) +;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW +;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) +(define_insn "*vsx_mov_64bit" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" + "=ZwO, , , r, we, ?wQ, + ?&r, ??r, ??Y, ??r, wo, v, + ?, *r, v, ??r, wZ, v") + + (match_operand:VSX_M 1 "input_operand" + ", ZwO, , we, r, r, + wQ, Y, r, r, wE, jwM, + ?jwM, jwM, W, W, v, wZ"))] + + "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (mode) && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") - (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")]) + [(set_attr "type" + "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, + store, load, store, *, vecsimple, vecsimple, + vecsimple, *, *, *, vecstore, vecload") -;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal -;; use of TImode is for unions. However for plain data movement, slightly -;; favor the vector loads -(define_insn "*vsx_movti_64bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r") - (match_operand:TI 1 "input_operand" "wa,ZwO,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))] - "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) - && (register_operand (operands[0], TImode) - || register_operand (operands[1], TImode))" + (set_attr "length" + "4, 4, 4, 8, 4, 8, + 8, 8, 8, 8, 4, 4, + 4, 8, 20, 20, 4, 4")]) + +;; VSX store VSX load VSX move GPR load GPR store GPR move +;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const +;; LVX (VMX) STVX (VMX) +(define_insn "*vsx_mov_32bit" + [(set (match_operand:VSX_M 0 "nonimmediate_operand" + "=ZwO, , , ??r, ??Y, ??r, + wo, v, ?, *r, v, ??r, + wZ, v") + + (match_operand:VSX_M 1 "input_operand" + ", ZwO, , Y, r, r, + wE, jwM, ?jwM, jwM, W, W, + v, wZ"))] + + "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (mode) + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*") - (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")]) + [(set_attr "type" + "vecstore, vecload, vecsimple, load, store, *, + vecsimple, vecsimple, vecsimple, *, *, *, + vecstore, vecload") -(define_insn "*vsx_movti_32bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,v,v,wZ,Q,Y,????r,????r,????r,r") - (match_operand:TI 1 "input_operand" "wa,ZwO,wa,O,W,wZ,v,r,r,Q,Y,r,n"))] - "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) - && (register_operand (operands[0], TImode) - || register_operand (operands[1], TImode))" -{ - switch (which_alternative) - { - case 0: - return "stxvd2x %x1,%y0"; - - case 1: - return "lxvd2x %x0,%y1"; - - case 2: - return "xxlor %x0,%x1,%x1"; - - case 3: - return "xxlxor %x0,%x0,%x0"; - - case 4: - return output_vec_const_move (operands); - - case 5: - return "stvx %1,%y0"; - - case 6: - return "lvx %0,%y1"; - - case 7: - if (TARGET_STRING) - return \"stswi %1,%P0,16\"; - - case 8: - return \"#\"; - - case 9: - /* If the address is not used in the output, we can use lsi. Otherwise, - fall through to generating four loads. */ - if (TARGET_STRING - && ! reg_overlap_mentioned_p (operands[0], operands[1])) - return \"lswi %0,%P1,16\"; - /* ... fall through ... */ - - case 10: - case 11: - case 12: - return \"#\"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *") - (set_attr "update" " *, *, *, *, *, *, *, yes, yes, yes, yes, *, *") - (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16") - (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING") - (const_string "always") - (const_string "conditional")))]) + (set_attr "length" + "4, 4, 4, 16, 16, 16, + 4, 4, 4, 16, 20, 32, + 4, 4")]) ;; Explicit load/store expanders for the builtin functions (define_expand "vsx_load_" @@ -2354,7 +2397,52 @@ lxvdsx %x0,%y1" [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")]) -;; V4SF/V4SI splat +;; V4SI splat (ISA 3.0) +;; When SI's are allowed in VSX registers, add XXSPLTW support +(define_expand "vsx_splat_" + [(set (match_operand:VSX_W 0 "vsx_register_operand" "") + (vec_duplicate:VSX_W + (match_operand: 1 "splat_input_operand" "")))] + "TARGET_P9_VECTOR" +{ + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); + else if (!REG_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "*vsx_splat_v4si_internal" + [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") + (vec_duplicate:V4SI + (match_operand:SI 1 "reg_or_indexed_operand" "r,Z")))] + "TARGET_P9_VECTOR" + "@ + mtvsrws %x0,%1 + lxvwsx %x0,%y1" + [(set_attr "type" "mftgpr,vecload")]) + +;; V4SF splat (ISA 3.0) +(define_insn_and_split "*vsx_splat_v4sf_internal" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") + (vec_duplicate:V4SF + (match_operand:SF 1 "reg_or_indexed_operand" "Z,wy,r")))] + "TARGET_P9_VECTOR" + "@ + lxvwsx %x0,%y1 + # + mtvsrws %x0,%1" + "&& reload_completed && vsx_register_operand (operands[1], SFmode)" + [(set (match_dup 0) + (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) + (set (match_dup 0) + (vec_duplicate:V4SF + (vec_select:SF (match_dup 0) + (parallel [(const_int 0)]))))] + "" + [(set_attr "type" "vecload,vecperm,mftgpr") + (set_attr "length" "4,8,4")]) + +;; V4SF/V4SI splat from a vector element (define_insn "vsx_xxspltw_" [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?") (vec_duplicate:VSX_W @@ -2597,21 +2685,50 @@ (define_peephole [(set (match_operand:P 0 "base_reg_operand" "") (match_operand:P 1 "short_cint_operand" "")) - (set (match_operand:VSX_M2 2 "vsx_register_operand" "") - (mem:VSX_M2 (plus:P (match_dup 0) - (match_operand:P 3 "int_reg_operand" ""))))] + (set (match_operand:VSX_M 2 "vsx_register_operand" "") + (mem:VSX_M (plus:P (match_dup 0) + (match_operand:P 3 "int_reg_operand" ""))))] "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" - "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" + "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" [(set_attr "length" "8") (set_attr "type" "vecload")]) (define_peephole [(set (match_operand:P 0 "base_reg_operand" "") (match_operand:P 1 "short_cint_operand" "")) - (set (match_operand:VSX_M2 2 "vsx_register_operand" "") - (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "") - (match_dup 0))))] + (set (match_operand:VSX_M 2 "vsx_register_operand" "") + (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "") + (match_dup 0))))] "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" - "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" + "li %0,%1\t\t\t# vector load fusion\;lxx %x2,%0,%3" [(set_attr "length" "8") (set_attr "type" "vecload")]) + + +;; ISA 3.0 vector extend sign support + +(define_insn "vsx_sign_extend_qi_" + [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") + (unspec:VSINT_84 + [(match_operand:V16QI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsb2 %0,%1" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_sign_extend_hi_" + [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") + (unspec:VSINT_84 + [(match_operand:V8HI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsh2 %0,%1" + [(set_attr "type" "vecsimple")]) + +(define_insn "*vsx_sign_extend_si_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" + "vextsw2d %0,%1" + [(set_attr "type" "vecsimple")]) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 6915fb24e01..e7b51c10593 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3214,6 +3214,9 @@ Floating point register if the LFIWZX instruction is enabled or NO_REGS. @item wD Int constant that is the element number of the 64-bit scalar in a vector. +@item wE +Vector constant that can be loaded with the XXSPLTIB instruction. + @item wF Memory operand suitable for power9 fusion load/stores. @@ -3221,9 +3224,12 @@ Memory operand suitable for power9 fusion load/stores. Memory operand suitable for TOC fusion memory references. @item wL -Int constant that is the element number that the MFVSRLD instruction +Int constant that is the element number that the MFVSRLD instruction. targets. +@item wM +Match vector constant with all 1's if the XXLORC instruction is available. + @item wO A memory operand suitable for the ISA 3.0 vector d-form instructions. @@ -3231,6 +3237,9 @@ A memory operand suitable for the ISA 3.0 vector d-form instructions. A memory address that will work with the @code{lq} and @code{stq} instructions. +@item wS +Vector constant that can be loaded with XXSPLTIB & sign extension. + @item h @samp{MQ}, @samp{CTR}, or @samp{LINK} register diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d7189e7257d..d77545d03bd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2016-05-18 Michael Meissner + + * gcc.target/powerpc/p9-splat-1.c: New tests for ISA 3.0 word + splat operations and the XXSPLTIB instruction. + * gcc.target/powerpc/p9-splat-2.c: Likewise. + * gcc.target/powerpc/p9-splat-3.c: Likewise. + * gcc.target/powerpc/pr47755.c: Allow vspltisw in addition to + xxlxor to clear a register. + 2016-05-18 Richard Sandiford * gcc.dg/torture/pr71020.c: New test. diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c new file mode 100644 index 00000000000..13b72872d74 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ + +#include + +vector int +foo_r (int a) +{ + return (vector int) { a, a, a, a }; /* mtvsrws */ +} + +vector int +foo_r2 (int a) +{ + return vec_splats (a); /* mtvsrws */ +} + +vector int +foo_p (int *a) +{ + return (vector int) { *a, *a, *a, *a }; /* lxvwsx */ +} + +/* { dg-final { scan-assembler-times "mtvsrws" 2 } } */ +/* { dg-final { scan-assembler-times "lxvwsx" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c new file mode 100644 index 00000000000..2468e92dddb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ + +#include + +vector float +foo_r (float a) +{ + return (vector float) { a, a, a, a }; /* xscvdpspn/xxspltw */ +} + +vector float +foo_r2 (float a) +{ + return vec_splats (a); /* xscvdpspn/xxspltw */ +} + +vector float +foo_g (float *a) +{ + float f = *a; + + __asm__ (" # %0" : "+r" (f)); + return (vector float) { f, f, f, f }; /* mtvsrws */ +} + +vector float +foo_p (float *a) +{ + return (vector float) { *a, *a, *a, *a }; /* lxvwsx */ +} + +/* { dg-final { scan-assembler-times "xscvdpspn" 2 } } */ +/* { dg-final { scan-assembler-times "xxspltw" 2 } } */ +/* { dg-final { scan-assembler-times "mtvsrws" 1 } } */ +/* { dg-final { scan-assembler-times "lxvwsx" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c new file mode 100644 index 00000000000..8a121da2572 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c @@ -0,0 +1,61 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ + +#include + +typedef vector signed char v16qi_t; +typedef vector short v8hi_t; +typedef vector int v4si_t; +typedef vector long long v2di_t; + +void v16qi_0a (v16qi_t *p) { *p = (v16qi_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; } +void v8hi_0a (v8hi_t *p) { *p = (v8hi_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; } +void v4si_0a (v4si_t *p) { *p = (v4si_t) { 0, 0, 0, 0 }; } +void v2di_0a (v2di_t *p) { *p = (v2di_t) { 0, 0 }; } + +void v16qi_0b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)0); } +void v8hi_0b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)0); } +void v4si_0b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)0); } +void v2di_0b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)0); } + +void v16qi_m1a (v16qi_t *p) { *p = (v16qi_t) { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; } +void v8hi_m1a (v8hi_t *p) { *p = (v8hi_t) { -1, -1, -1, -1, -1, -1, -1, -1 }; } +void v4si_m1a (v4si_t *p) { *p = (v4si_t) { -1, -1, -1, -1 }; } +void v2di_m1a (v2di_t *p) { *p = (v2di_t) { -1, -1 }; } + +void v16qi_m1b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)-1); } +void v8hi_m1b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)-1); } +void v4si_m1b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)-1); } +void v2di_m1b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)-1); } + +void v16qi_5a (v16qi_t *p) { *p = (v16qi_t) { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; } +void v8hi_5a (v8hi_t *p) { *p = (v8hi_t) { 5, 5, 5, 5, 5, 5, 5, 5 }; } +void v4si_5a (v4si_t *p) { *p = (v4si_t) { 5, 5, 5, 5 }; } +void v2di_5a (v2di_t *p) { *p = (v2di_t) { 5, 5 }; } + +void v16qi_5b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)5); } +void v8hi_5b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)5); } +void v4si_5b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)5); } +void v2di_5b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)5); } + +void v16qi_33a (v16qi_t *p) { *p = (v16qi_t) { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 }; } +void v8hi_33a (v8hi_t *p) { *p = (v8hi_t) { 33, 33, 33, 33, 33, 33, 33, 33 }; } +void v4si_33a (v4si_t *p) { *p = (v4si_t) { 33, 33, 33, 33 }; } +void v2di_33a (v2di_t *p) { *p = (v2di_t) { 33, 33 }; } + +void v16qi_33b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)33); } +void v8hi_33b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)33); } +void v4si_33b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)33); } +void v2di_33b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)33); } + +/* { dg-final { scan-assembler "xxspltib" } } */ +/* { dg-final { scan-assembler "vextsb2d" } } */ +/* { dg-final { scan-assembler "vextsb2w" } } */ +/* { dg-final { scan-assembler "vupk\[hl\]sb" } } */ +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvw4x" } } */ +/* { dg-final { scan-assembler-not "lxv " } } */ +/* { dg-final { scan-assembler-not "lxvx" } } */ +/* { dg-final { scan-assembler-not "lvx" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr47755.c b/gcc/testsuite/gcc.target/powerpc/pr47755.c index 8feef291e56..d5feecac691 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr47755.c +++ b/gcc/testsuite/gcc.target/powerpc/pr47755.c @@ -3,7 +3,7 @@ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ /* { dg-options "-O3 -mcpu=power7" } */ -/* { dg-final { scan-assembler "xxlxor" } } */ +/* { dg-final { scan-assembler "xxlxor\|vspltis\[bhw\]" } } */ /* { dg-final { scan-assembler-not "lxvd2x" } } */ /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lvx" } } */