x86-tune.def (DEF_TUNE): Remove m_CORE_ALL.

2013-10-01  Wei Mi  <wmi@google.com>

        * config/i386/x86-tune.def (DEF_TUNE): Remove
        m_CORE_ALL.
        * config/i386/i386.md: Add define_peephole2 to
        break partial reg stall for cvtss2sd/cvtsd2ss.

From-SVN: r203095
This commit is contained in:
Wei Mi 2013-10-01 23:32:55 +00:00 committed by Wei Mi
parent dd9480ef3e
commit 157ca3e989
3 changed files with 63 additions and 1 deletions

View File

@ -1,3 +1,10 @@
2013-10-01 Wei Mi <wmi@google.com>
* config/i386/x86-tune.def (DEF_TUNE): Remove
m_CORE_ALL.
* config/i386/i386.md: Add define_peephole2 to
break partial reg stall for cvtss2sd/cvtsd2ss.
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
* config/arc/arc.c (pass_arc_ifcvt::clone):

View File

@ -5117,6 +5117,61 @@
emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
})
;; Break partial reg stall for cvtsd2ss.
(define_peephole2
[(set (match_operand:SF 0 "register_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
&& SSE_REG_P (operands[0])
&& (!SSE_REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))"
[(set (match_dup 0)
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:V2SF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = simplify_gen_subreg (V4SFmode, operands[0],
SFmode, 0);
operands[1] = simplify_gen_subreg (V2DFmode, operands[1],
DFmode, 0);
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
})
;; Break partial reg stall for cvtss2sd.
(define_peephole2
[(set (match_operand:DF 0 "register_operand")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
&& SSE_REG_P (operands[0])
&& (!SSE_REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))"
[(set (match_dup 0)
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
(match_dup 1)
(parallel [(const_int 0) (const_int 1)])))
(match_dup 0)
(const_int 1)))]
{
operands[0] = simplify_gen_subreg (V2DFmode, operands[0],
DFmode, 0);
operands[1] = simplify_gen_subreg (V4SFmode, operands[1],
SFmode, 0);
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
})
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */

View File

@ -346,7 +346,7 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
from FP to FP. This form of instructions avoids partial write to the
destination. */
DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
m_CORE_ALL | m_AMDFAM10 | m_GENERIC)
m_AMDFAM10 | m_GENERIC)
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */