Add power6 copysign/parity support
From-SVN: r162656
This commit is contained in:
parent
1808114925
commit
8119a6a61b
@ -1,3 +1,36 @@
|
||||
2010-07-28 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_rtx_costs): Update costs for
|
||||
popcount on power7 and parity on power6 systems.
|
||||
(rs6000_emit_popcount): Rename gen_popcntwsi2 to gen_popcntddi2.
|
||||
(rs6000_emit_parity): Add support for power6 prtyd/prtyw
|
||||
instructions.
|
||||
|
||||
* config/rs6000/rs6000.md (UNSPEC_COPYSIGN): New unspec.
|
||||
(UNSPEC_PARITY): Ditto.
|
||||
(SFDF): New iterator for SF/DF.
|
||||
(rreg2): New mode attribute for floating register constraint.
|
||||
(TARGET_FLOAT): New mode attribute for whether single/double float
|
||||
is supported.
|
||||
(popcntd<mode>2): Combine popcntwsi2 and popcntddi2 into one
|
||||
pattern.
|
||||
(parity<mode>2_cmpb): New insn for parity on power6 and newer
|
||||
machines.
|
||||
(copysign<mode>3): Combine copysignsf3, copysigndf3 into one
|
||||
pattern. Add support for fcpsgn instruction added in power6.
|
||||
(copysignsf3): Delete.
|
||||
(copysigndf3): Delete.
|
||||
(copysign<mode>3_fcpsgn): New insn to generate fcpsgn. Use UNSPEC
|
||||
instead of if_then_else in RTL to avoid problems with -0.
|
||||
|
||||
* config/rs6000/vsx.md (vsx_copysign<mode>3): Use UNSPEC instead
|
||||
of if_then_else to mirror scalar code.
|
||||
(vsx_copysignsf3): Delete, use copysign<mode>3_fcpsgn in
|
||||
rs6000.md.
|
||||
|
||||
* config/rs6000/vector.md (vector_copysign<mode>3): Use UNSPEC
|
||||
instead of if_then_else.
|
||||
|
||||
2010-07-28 Xinliang David Li <davidxl@google.com>
|
||||
|
||||
* tree-ssa-loop-ivopts.c (avg_loop_niter): New function.
|
||||
|
@ -25485,7 +25485,11 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total,
|
||||
return false;
|
||||
|
||||
case POPCOUNT:
|
||||
*total = COSTS_N_INSNS (6);
|
||||
*total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
|
||||
return false;
|
||||
|
||||
case PARITY:
|
||||
*total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
|
||||
return false;
|
||||
|
||||
case NOT:
|
||||
@ -26121,7 +26125,7 @@ rs6000_emit_popcount (rtx dst, rtx src)
|
||||
if (TARGET_POPCNTD)
|
||||
{
|
||||
if (mode == SImode)
|
||||
emit_insn (gen_popcntwsi2 (dst, src));
|
||||
emit_insn (gen_popcntdsi2 (dst, src));
|
||||
else
|
||||
emit_insn (gen_popcntddi2 (dst, src));
|
||||
return;
|
||||
@ -26160,6 +26164,23 @@ rs6000_emit_parity (rtx dst, rtx src)
|
||||
rtx tmp;
|
||||
|
||||
tmp = gen_reg_rtx (mode);
|
||||
|
||||
/* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
|
||||
if (TARGET_CMPB)
|
||||
{
|
||||
if (mode == SImode)
|
||||
{
|
||||
emit_insn (gen_popcntbsi2 (tmp, src));
|
||||
emit_insn (gen_paritysi2_cmpb (dst, tmp));
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_insn (gen_popcntbdi2 (tmp, src));
|
||||
emit_insn (gen_paritydi2_cmpb (dst, tmp));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (mode == SImode)
|
||||
{
|
||||
/* Is mult+shift >= shift+xor+shift+xor? */
|
||||
|
@ -103,6 +103,8 @@
|
||||
(UNSPEC_TOCREL 49)
|
||||
(UNSPEC_MACHOPIC_OFFSET 50)
|
||||
(UNSPEC_BPERM 51)
|
||||
(UNSPEC_COPYSIGN 52)
|
||||
(UNSPEC_PARITY 53)
|
||||
])
|
||||
|
||||
;;
|
||||
@ -222,9 +224,12 @@
|
||||
; but on e500v2, the gpr are 64 bit registers
|
||||
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
|
||||
|
||||
;; Iterator for reciprocal estimate instructions
|
||||
; Iterator for reciprocal estimate instructions
|
||||
(define_mode_iterator RECIPF [SF DF V4SF V2DF])
|
||||
|
||||
; Iterator for just SF/DF
|
||||
(define_mode_iterator SFDF [SF DF])
|
||||
|
||||
; Various instructions that come in SI and DI forms.
|
||||
; A generic w/d attribute, for things like cmpw/cmpd.
|
||||
(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
|
||||
@ -250,6 +255,11 @@
|
||||
(V4SF "Wf")
|
||||
(V2DF "Wd")])
|
||||
|
||||
(define_mode_attr rreg2 [(SF "f")
|
||||
(DF "d")])
|
||||
|
||||
(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
|
||||
(DF "TARGET_DOUBLE_FLOAT")])
|
||||
|
||||
;; Start with fixed-point load and store insns. Here we put only the more
|
||||
;; complex forms. Basic data transfer is done later.
|
||||
@ -2272,17 +2282,11 @@
|
||||
"TARGET_POPCNTB"
|
||||
"popcntb %0,%1")
|
||||
|
||||
(define_insn "popcntwsi2"
|
||||
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
|
||||
(popcount:SI (match_operand:SI 1 "gpc_reg_operand" "r")))]
|
||||
(define_insn "popcntd<mode>2"
|
||||
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
|
||||
(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
|
||||
"TARGET_POPCNTD"
|
||||
"popcntw %0,%1")
|
||||
|
||||
(define_insn "popcntddi2"
|
||||
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
|
||||
(popcount:DI (match_operand:DI 1 "gpc_reg_operand" "r")))]
|
||||
"TARGET_POPCNTD && TARGET_POWERPC64"
|
||||
"popcntd %0,%1")
|
||||
"popcnt<wd> %0,%1")
|
||||
|
||||
(define_expand "popcount<mode>2"
|
||||
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
|
||||
@ -2293,6 +2297,12 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "parity<mode>2_cmpb"
|
||||
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
|
||||
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
|
||||
"TARGET_CMPB && TARGET_POPCNTB"
|
||||
"prty<wd> %0,%1")
|
||||
|
||||
(define_expand "parity<mode>2"
|
||||
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
|
||||
(parity:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))]
|
||||
@ -5976,59 +5986,45 @@
|
||||
"frsqrtes %0,%1"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
(define_expand "copysignsf3"
|
||||
(define_expand "copysign<mode>3"
|
||||
[(set (match_dup 3)
|
||||
(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))
|
||||
(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
|
||||
(set (match_dup 4)
|
||||
(neg:SF (abs:SF (match_dup 1))))
|
||||
(set (match_operand:SF 0 "gpc_reg_operand" "")
|
||||
(if_then_else:SF (ge (match_operand:SF 2 "gpc_reg_operand" "")
|
||||
(match_dup 5))
|
||||
(neg:SFDF (abs:SFDF (match_dup 1))))
|
||||
(set (match_operand:SFDF 0 "gpc_reg_operand" "")
|
||||
(if_then_else:SFDF (ge (match_operand:SFDF 2 "gpc_reg_operand" "")
|
||||
(match_dup 5))
|
||||
(match_dup 3)
|
||||
(match_dup 4)))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
|
||||
&& ((TARGET_PPC_GFXOPT
|
||||
&& !HONOR_NANS (SFmode)
|
||||
&& !HONOR_SIGNED_ZEROS (SFmode))
|
||||
|| VECTOR_UNIT_VSX_P (DFmode))"
|
||||
{
|
||||
if (VECTOR_UNIT_VSX_P (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_copysignsf3 (operands[0], operands[1], operands[2],
|
||||
CONST0_RTX (SFmode)));
|
||||
DONE;
|
||||
}
|
||||
operands[3] = gen_reg_rtx (SFmode);
|
||||
operands[4] = gen_reg_rtx (SFmode);
|
||||
operands[5] = CONST0_RTX (SFmode);
|
||||
&& !HONOR_NANS (<MODE>mode)
|
||||
&& !HONOR_SIGNED_ZEROS (<MODE>mode))
|
||||
|| TARGET_CMPB
|
||||
|| VECTOR_UNIT_VSX_P (<MODE>mode))"
|
||||
{
|
||||
if (TARGET_CMPB || VECTOR_UNIT_VSX_P (<MODE>mode))
|
||||
{
|
||||
emit_insn (gen_copysign<mode>3_fcpsgn (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
|
||||
operands[3] = gen_reg_rtx (<MODE>mode);
|
||||
operands[4] = gen_reg_rtx (<MODE>mode);
|
||||
operands[5] = CONST0_RTX (<MODE>mode);
|
||||
})
|
||||
|
||||
(define_expand "copysigndf3"
|
||||
[(set (match_dup 3)
|
||||
(abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))
|
||||
(set (match_dup 4)
|
||||
(neg:DF (abs:DF (match_dup 1))))
|
||||
(set (match_operand:DF 0 "gpc_reg_operand" "")
|
||||
(if_then_else:DF (ge (match_operand:DF 2 "gpc_reg_operand" "")
|
||||
(match_dup 5))
|
||||
(match_dup 3)
|
||||
(match_dup 4)))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
|
||||
&& ((TARGET_PPC_GFXOPT
|
||||
&& !HONOR_NANS (DFmode)
|
||||
&& !HONOR_SIGNED_ZEROS (DFmode))
|
||||
|| VECTOR_UNIT_VSX_P (DFmode))"
|
||||
{
|
||||
if (VECTOR_UNIT_VSX_P (DFmode))
|
||||
{
|
||||
emit_insn (gen_vsx_copysigndf3 (operands[0], operands[1],
|
||||
operands[2], CONST0_RTX (DFmode)));
|
||||
DONE;
|
||||
}
|
||||
operands[3] = gen_reg_rtx (DFmode);
|
||||
operands[4] = gen_reg_rtx (DFmode);
|
||||
operands[5] = CONST0_RTX (DFmode);
|
||||
})
|
||||
;; Use an unspec rather providing an if-then-else in RTL, to prevent the
|
||||
;; compiler from optimizing -0.0
|
||||
(define_insn "copysign<mode>3_fcpsgn"
|
||||
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
|
||||
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
|
||||
(match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
|
||||
UNSPEC_COPYSIGN))]
|
||||
"TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"fcpsgn %0,%2,%1"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
|
||||
;; fsel instruction and some auxiliary computations. Then we just have a
|
||||
|
@ -309,11 +309,8 @@
|
||||
|
||||
(define_expand "vector_copysign<mode>3"
|
||||
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
|
||||
(if_then_else:VEC_F
|
||||
(ge:VEC_F (match_operand:VEC_F 2 "vfloat_operand" "")
|
||||
(match_dup 3))
|
||||
(abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" ""))
|
||||
(neg:VEC_F (abs:VEC_F (match_dup 1)))))]
|
||||
(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")
|
||||
(match_operand:VEC_F 2 "vfloat_operand" "")] UNSPEC_COPYSIGN))]
|
||||
"VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
|
||||
"
|
||||
{
|
||||
@ -323,8 +320,6 @@
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
|
||||
operands[3] = CONST0_RTX (<MODE>mode);
|
||||
}")
|
||||
|
||||
|
||||
|
@ -852,30 +852,15 @@
|
||||
;; Copy sign
|
||||
(define_insn "vsx_copysign<mode>3"
|
||||
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
|
||||
(if_then_else:VSX_B
|
||||
(ge:VSX_B (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")
|
||||
(match_operand:VSX_B 3 "zero_constant" "j,j"))
|
||||
(abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))
|
||||
(neg:VSX_B (abs:VSX_B (match_dup 1)))))]
|
||||
(unspec:VSX_B
|
||||
[(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
|
||||
(match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
|
||||
UNSPEC_COPYSIGN))]
|
||||
"VECTOR_UNIT_VSX_P (<MODE>mode)"
|
||||
"x<VSv>cpsgn<VSs> %x0,%x2,%x1"
|
||||
[(set_attr "type" "<VStype_simple>")
|
||||
(set_attr "fp_type" "<VSfptype_simple>")])
|
||||
|
||||
;; Special version of copysign for single precision that knows internally
|
||||
;; scalar single values are kept as double
|
||||
(define_insn "vsx_copysignsf3"
|
||||
[(set (match_operand:SF 0 "vsx_register_operand" "=f")
|
||||
(if_then_else:SF
|
||||
(ge:SF (match_operand:SF 2 "vsx_register_operand" "f")
|
||||
(match_operand:SF 3 "zero_constant" "j"))
|
||||
(abs:SF (match_operand:SF 1 "vsx_register_operand" "f"))
|
||||
(neg:SF (abs:SF (match_dup 1)))))]
|
||||
"VECTOR_UNIT_VSX_P (DFmode)"
|
||||
"xscpsgndp %x0,%x2,%x1"
|
||||
[(set_attr "type" "fp")
|
||||
(set_attr "fp_type" "fp_addsub_d")])
|
||||
|
||||
;; For the conversions, limit the register class for the integer value to be
|
||||
;; the fprs because we don't want to add the altivec registers to movdi/movsi.
|
||||
;; For the unsigned tests, there isn't a generic double -> unsigned conversion
|
||||
|
Loading…
Reference in New Issue
Block a user