x86-tune.def (DEF_TUNE): Remove m_CORE_ALL.
2013-10-01 Wei Mi <wmi@google.com> * config/i386/x86-tune.def (DEF_TUNE): Remove m_CORE_ALL. * config/i386/i386.md: Add define_peephole2 to break partial reg stall for cvtss2sd/cvtsd2ss. From-SVN: r203095
This commit is contained in:
parent
dd9480ef3e
commit
157ca3e989
|
@ -1,3 +1,10 @@
|
|||
2013-10-01 Wei Mi <wmi@google.com>
|
||||
|
||||
* config/i386/x86-tune.def (DEF_TUNE): Remove
|
||||
m_CORE_ALL.
|
||||
* config/i386/i386.md: Add define_peephole2 to
|
||||
break partial reg stall for cvtss2sd/cvtsd2ss.
|
||||
|
||||
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
|
||||
* config/arc/arc.c (pass_arc_ifcvt::clone):
|
||||
|
|
|
@ -5117,6 +5117,61 @@
|
|||
emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
|
||||
})
|
||||
|
||||
;; Break partial reg stall for cvtsd2ss.
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:SF 0 "register_operand")
|
||||
(float_truncate:SF
|
||||
(match_operand:DF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE2 && TARGET_SSE_MATH
|
||||
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& SSE_REG_P (operands[0])
|
||||
&& (!SSE_REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V4SF
|
||||
(vec_duplicate:V4SF
|
||||
(float_truncate:V2SF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
operands[0] = simplify_gen_subreg (V4SFmode, operands[0],
|
||||
SFmode, 0);
|
||||
operands[1] = simplify_gen_subreg (V2DFmode, operands[1],
|
||||
DFmode, 0);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
|
||||
})
|
||||
|
||||
;; Break partial reg stall for cvtss2sd.
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:DF 0 "register_operand")
|
||||
(float_extend:DF
|
||||
(match_operand:SF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE2 && TARGET_SSE_MATH
|
||||
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& SSE_REG_P (operands[0])
|
||||
&& (!SSE_REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V2DF
|
||||
(float_extend:V2DF
|
||||
(vec_select:V2SF
|
||||
(match_dup 1)
|
||||
(parallel [(const_int 0) (const_int 1)])))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
operands[0] = simplify_gen_subreg (V2DFmode, operands[0],
|
||||
DFmode, 0);
|
||||
operands[1] = simplify_gen_subreg (V4SFmode, operands[1],
|
||||
SFmode, 0);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
||||
})
|
||||
|
||||
;; Avoid store forwarding (partial memory) stall penalty
|
||||
;; by passing DImode value through XMM registers. */
|
||||
|
||||
|
|
|
@ -346,7 +346,7 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
|
|||
from FP to FP. This form of instructions avoids partial write to the
|
||||
destination. */
|
||||
DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
|
||||
m_CORE_ALL | m_AMDFAM10 | m_GENERIC)
|
||||
m_AMDFAM10 | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
|
||||
from integer to FP. */
|
||||
|
|
Loading…
Reference in New Issue