constraint.md (Yd, Yx): New register constraints.

* config/i386/constraint.md (Yd, Yx): New register constraints.
	* config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger.  Use
	Yd conditional register constraint.
	(*movtf_internal): Use standard_sse_constant_opcode.
	(*movxf_internal): Merge with *movxf_internal_nointeger.  Use
	Yx conditional register constraint.
	(*movdf_internal): Merge with *movdf_internal_nointeger.  Use
	Yd conditional register constraint.  Use standard_sse_constant_p to
	check for valid SSE constants and call standard_sse_constant_opcode to
	output SSE insn.
	(*movsf_internal): Use standard_sse_constant_p to check for valid SSE
	constants and call standard_sse_constant_opcode to output SSE insn.
	* config/i386/i386.c (ix86_option_ovverride_internal): Set
	TARGET_INTEGER_DFMODE_MOVES for 64bit targets.  Clear it when
	optimize_size is set.
	(standard_sse_constant_opcode): Output conditional AVX insn templates.

From-SVN: r173757
This commit is contained in:
Uros Bizjak 2011-05-14 17:33:02 +02:00 committed by Uros Bizjak
parent 748f7574e8
commit 479fecd31e
4 changed files with 109 additions and 272 deletions

View File

@ -1,3 +1,41 @@
2011-05-14 Uros Bizjak <ubizjak@gmail.com>
* config/i386/constraint.md (Yd, Yx): New register constraints.
* config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger. Use
Yd conditional register constraint.
(*movtf_internal): Use standard_sse_constant_opcode.
(*movxf_internal): Merge with *movxf_internal_nointeger. Use
Yx conditional register constraint.
(*movdf_internal): Merge with *movdf_internal_nointeger. Use
Yd conditional register constraint. Use standard_sse_constant_p to
check for valid SSE constants and call standard_sse_constant_opcode to
output SSE insn.
(*movsf_internal): Use standard_sse_constant_p to check for valid SSE
constants and call standard_sse_constant_opcode to output SSE insn.
* config/i386/i386.c (ix86_option_ovverride_internal): Set
TARGET_INTEGER_DFMODE_MOVES for 64bit targets. Clear it when
optimize_size is set.
(standard_sse_constant_opcode): Output conditional AVX insn templates.
2011-05-14 Uros Bizjak <ubizjak@gmail.com>
* config/i386/constraint.md (Yd, Yx): New register constraints.
* config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger. Use
Yd conditional register constraint.
(*movtf_internal): Use standard_sse_constant_opcode.
(*movxf_internal): Merge with *movxf_internal_nointeger. Use
Yx conditional register constraint.
(*movdf_internal): Merge with *movdf_internal_nointeger. Use
Yd conditional register constraint. Use standard_sse_constant_p to
check for valid SSE constants and call standard_sse_constant_opcode to
output SSE insn.
(*movsf_internal): Use standard_sse_constant_p to check for valid SSE
constants and call standard_sse_constant_opcode to output SSE insn.
* config/i386/i386.c (ix86_option_ovverride_internal): Set
TARGET_INTEGER_DFMODE_MOVES for 64bit targets. Clear it when
optimize_size is set.
(standard_sse_constant_opcode): Output conditional AVX insn templates.
2011-05-14 Tobias Burnus <burnus@net-b.de>
* doc/invoke.texi (-Ofast): Also enables -fstack-arrays.
@ -243,11 +281,11 @@
2011-05-11 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (legitimize_tls_address)
<TLS_MODEL_GLOBAL_DYNAMIC>: Call gen_tls_dynamic_gnu2_{32,64}
<case TLS_MODEL_GLOBAL_DYNAMIC>: Call gen_tls_dynamic_gnu2_{32,64}
expanders directly for TARGET_GNU2_TLS. Determine pic and
__tls_get_addr symbol reference here. Update call to
gen_tls_global_dynamic_{32,64} for added arguments.
<TLS_MODEL_LOCAL_DYNAMIC>: Call gen_tls_dynamic_gnu2_{32,64}
<case TLS_MODEL_LOCAL_DYNAMIC>: Call gen_tls_dynamic_gnu2_{32,64}
expanders directly for TARGET_GNU2_TLS. Determine
__tls_get_addr symbol reference here. Update call to
gen_tls_local_dynamic_base_{32,64} for added arguments. Attach

View File

@ -90,6 +90,8 @@
;; 2 SSE2 enabled
;; i SSE2 inter-unit moves enabled
;; m MMX inter-unit moves enabled
;; d Integer register when integer DFmode moves are enabled
;; x Integer register when integer XFmode moves are enabled
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@ -105,6 +107,14 @@
"TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
"@internal Any MMX register, when inter-unit moves are enabled.")
(define_register_constraint "Yd"
"TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
"@internal Any integer register when integer DFmode moves are enabled.")
(define_register_constraint "Yx"
"optimize_function_for_speed_p (cfun) ? GENERAL_REGS : NO_REGS"
"@internal Any integer register when integer XFmode moves are enabled.")
;; Integer constant constraints.
(define_constraint "I"
"Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
@ -149,7 +159,7 @@
(define_constraint "G"
"Standard 80387 floating point constant."
(and (match_code "const_double")
(match_test "standard_80387_constant_p (op)")))
(match_test "standard_80387_constant_p (op) > 0")))
;; This can theoretically be any mode's CONST0_RTX.
(define_constraint "C"

View File

@ -3933,6 +3933,13 @@ ix86_option_override_internal (bool main_args_p)
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
/* On 32bit targets, avoid moving DFmode values in
integer registers when optimizing for size. */
if (TARGET_64BIT)
target_flags |= TARGET_INTEGER_DFMODE_MOVES;
else if (optimize_size)
target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
@ -8580,17 +8587,17 @@ standard_sse_constant_opcode (rtx insn, rtx x)
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
return "%vxorps\t%0, %d0";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
return "%vxorps\t%0, %d0";
else
return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
return "%vxorpd\t%0, %d0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
return "%vxorps\t%0, %d0";
else
return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
return "%vpxor\t%0, %d0";
case MODE_V8SF:
return "vxorps\t%x0, %x0, %x0";
case MODE_V4DF:
@ -8607,7 +8614,7 @@ standard_sse_constant_opcode (rtx insn, rtx x)
break;
}
case 2:
return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
return "%vpcmpeqd\t%0, %d0";
default:
break;
}

View File

@ -2702,10 +2702,14 @@
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
;; Size of pushdf using integer instructions is 2+2*memory operand size
;; On the average, pushdf using integers can be still shorter.
(define_insn "*pushdf"
[(set (match_operand:DF 0 "push_operand" "=<,<,<")
(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
"TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
(match_operand:DF 1 "general_no_elim_operand" "f,Yd*rFo,Y2"))]
""
{
/* This insn should be already split before reg-stack. */
gcc_unreachable ();
@ -2714,23 +2718,6 @@
(set_attr "unit" "i387,*,*")
(set_attr "mode" "DF,SI,DF")])
;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
;; Size of pushdf using integer instructions is 2+2*memory operand size
;; On the average, pushdf using integers can be still shorter. Allow this
;; pattern for optimize_size too.
(define_insn "*pushdf_nointeger"
[(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
"!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)"
{
/* This insn should be already split before reg-stack. */
gcc_unreachable ();
}
[(set_attr "type" "multi")
(set_attr "unit" "i387,*,*,*")
(set_attr "mode" "DF,SI,SI,DF")])
;; %%% Kill this when call knows how to work this out.
(define_split
[(set (match_operand:DF 0 "push_operand" "")
@ -2822,14 +2809,14 @@
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
return "%vxorps\t%0, %d0";
else
return "%vpxor\t%0, %d0";
return standard_sse_constant_opcode (insn, operands[1]);
case 3:
case 4:
return "#";
default:
gcc_unreachable ();
}
@ -2862,42 +2849,14 @@
"ix86_split_long_move (operands); DONE;")
(define_insn "*movxf_internal"
[(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
"optimize_function_for_speed_p (cfun)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))
[(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r ,o")
(match_operand:XF 1 "general_operand" "fm,f,G,Yx*roF,FYx*r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], XFmode))"
{
switch (which_alternative)
{
case 0:
case 1:
return output_387_reg_move (insn, operands);
case 2:
return standard_80387_constant_opcode (operands[1]);
case 3: case 4:
return "#";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi")
(set_attr "mode" "XF,XF,XF,SI,SI")])
;; Do not use integer registers when optimizing for size
(define_insn "*movxf_internal_nointeger"
[(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
"optimize_function_for_size_p (cfun)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| standard_80387_constant_p (operands[1])
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| (optimize_function_for_size_p (cfun)
&& standard_80387_constant_p (operands[1]) > 0)
|| memory_operand (operands[0], XFmode))"
{
switch (which_alternative)
@ -2940,10 +2899,12 @@
"TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| (!(TARGET_SSE2 && TARGET_SSE_MATH)
&& optimize_function_for_size_p (cfun)
&& standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| (optimize_function_for_size_p (cfun)
&& ((!(TARGET_SSE2 && TARGET_SSE_MATH)
&& standard_80387_constant_p (operands[1]) > 0)
|| (TARGET_SSE2 && TARGET_SSE_MATH
&& standard_sse_constant_p (operands[1]))))
|| memory_operand (operands[0], DFmode))"
{
switch (which_alternative)
@ -2966,23 +2927,8 @@
return "#";
case 7:
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return "%vxorps\t%0, %d0";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vxorpd\t%0, %d0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vpxor\t%0, %d0";
default:
gcc_unreachable ();
}
return standard_sse_constant_opcode (insn, operands[1]);
case 8:
case 9:
case 10:
@ -3094,21 +3040,26 @@
]
(const_string "DF")))])
;; Possible store forwarding (partial memory) stall in alternative 4.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
"=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ")
"=f,m,f,Yd*r ,o ,Y2*x,Y2*x,Y2*x,m ")
(match_operand:DF 1 "general_operand"
"fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))]
"fm,f,G,Yd*roF,FYd*r,C ,Y2*x,m ,Y2*x"))]
"!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& optimize_function_for_speed_p (cfun)
&& TARGET_INTEGER_DFMODE_MOVES
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| (!(TARGET_SSE2 && TARGET_SSE_MATH)
&& optimize_function_for_size_p (cfun)
&& standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], DFmode))"
|| (optimize_function_for_size_p (cfun)
&& ((!(TARGET_SSE2 && TARGET_SSE_MATH)
&& standard_80387_constant_p (operands[1]) > 0)
|| (TARGET_SSE2 && TARGET_SSE_MATH
&& standard_sse_constant_p (operands[1])))
&& !memory_operand (operands[0], DFmode))
|| ((TARGET_INTEGER_DFMODE_MOVES
|| (optimize_function_for_size_p (cfun)
&& !TARGET_MEMORY_MISMATCH_STALL))
&& memory_operand (operands[0], DFmode)))"
{
switch (which_alternative)
{
@ -3124,179 +3075,8 @@
return "#";
case 5:
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return "%vxorps\t%0, %d0";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vxorpd\t%0, %d0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vpxor\t%0, %d0";
default:
gcc_unreachable ();
}
case 6:
case 7:
case 8:
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovapd\t{%1, %0|%0, %1}";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_DI:
return "%vmovq\t{%1, %0|%0, %1}";
case MODE_DF:
if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
else
return "%vmovsd\t{%1, %0|%0, %1}";
case MODE_V1DF:
if (TARGET_AVX && REG_P (operands[0]))
return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
else
return "%vmovlpd\t{%1, %0|%0, %1}";
case MODE_V2SF:
if (TARGET_AVX && REG_P (operands[0]))
return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
else
return "%vmovlps\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
return standard_sse_constant_opcode (insn, operands[1]);
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
(set (attr "prefix")
(if_then_else (eq_attr "alternative" "0,1,2,3,4")
(const_string "orig")
(const_string "maybe_vex")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "mode" "V1DF")
(const_string "1")
(const_string "*")))
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1,2")
(const_string "DF")
(eq_attr "alternative" "3,4")
(const_string "SI")
/* For SSE1, we have many fewer alternatives. */
(eq (symbol_ref "TARGET_SSE2") (const_int 0))
(cond [(eq_attr "alternative" "5,6")
(const_string "V4SF")
]
(const_string "V2SF"))
/* xorps is one byte shorter. */
(eq_attr "alternative" "5")
(cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
(const_int 0))
(const_string "V4SF")
(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
(const_int 0))
(const_string "TI")
]
(const_string "V2DF"))
/* For architectures resolving dependencies on
whole SSE registers use APD move to break dependency
chains, otherwise use short move to avoid extra work.
movaps encodes one byte shorter. */
(eq_attr "alternative" "6")
(cond
[(ne (symbol_ref "optimize_function_for_size_p (cfun)")
(const_int 0))
(const_string "V4SF")
(ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_int 0))
(const_string "V2DF")
]
(const_string "DF"))
/* For architectures resolving dependencies on register
parts we may avoid extra work to zero out upper part
of register. */
(eq_attr "alternative" "7")
(if_then_else
(ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
(const_int 0))
(const_string "V1DF")
(const_string "DF"))
]
(const_string "DF")))])
;; Moving is usually shorter when only FP registers are used. This separate
;; movdf pattern avoids the use of integer registers for FP operations
;; when optimizing for size.
(define_insn "*movdf_internal_nointeger"
[(set (match_operand:DF 0 "nonimmediate_operand"
"=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ")
(match_operand:DF 1 "general_operand"
"fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))]
"!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (optimize_function_for_size_p (cfun)
|| !TARGET_INTEGER_DFMODE_MOVES)
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| (!(TARGET_SSE2 && TARGET_SSE_MATH)
&& optimize_function_for_size_p (cfun)
&& !memory_operand (operands[0], DFmode)
&& standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| ((optimize_function_for_size_p (cfun)
|| !TARGET_MEMORY_MISMATCH_STALL)
&& memory_operand (operands[0], DFmode)))"
{
switch (which_alternative)
{
case 0:
case 1:
return output_387_reg_move (insn, operands);
case 2:
return standard_80387_constant_opcode (operands[1]);
case 3:
case 4:
return "#";
case 5:
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return "%vxorps\t%0, %d0";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vxorpd\t%0, %d0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vxorps\t%0, %d0";
else
return "%vpxor\t%0, %d0";
default:
gcc_unreachable ();
}
case 6:
case 7:
case 8:
@ -3421,9 +3201,12 @@
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
&& standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| (optimize_function_for_size_p (cfun)
&& ((!TARGET_SSE_MATH
&& standard_80387_constant_p (operands[1]) > 0)
|| (TARGET_SSE_MATH
&& standard_sse_constant_p (operands[1]))))
|| memory_operand (operands[0], SFmode))"
{
switch (which_alternative)
@ -3438,11 +3221,10 @@
case 3:
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
if (get_attr_mode (insn) == MODE_TI)
return "%vpxor\t%0, %d0";
else
return "%vxorps\t%0, %d0";
return standard_sse_constant_opcode (insn, operands[1]);
case 6:
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";