i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.

* i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
	(TARGET_INTER_UNIT_CONVERSIONS): New.
	* i386.md (floatsi expanders): Remove redundant check for SImode
	source; offload to memory when asked for.
	(floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse
	floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse):
	Update conditions;
	(floatsisf2_mixed_memory, floatsisf2_sse_memory,
	floatsidf2_mixed_memory, floatsidf2_sse_memory
	floatdisf2_mixed_memory, floatsisf2_sse_memory,
	floatsidf2_mixed_memory, floatsidf2_sse_memory): New.

From-SVN: r128369
This commit is contained in:
Jan Hubicka 2007-09-11 13:38:05 +02:00 committed by Jan Hubicka
parent 7986e000a9
commit 630ecd8d10
4 changed files with 180 additions and 15 deletions

View File

@ -1,3 +1,17 @@
2007-09-11 Jan Hubicka <jh@suse.cz>
* i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
(TARGET_INTER_UNIT_CONVERSIONS): New.
* i386.md (floatsi expanders): Remove redundant check for SImode
source; offload to memory when asked for.
(floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse
floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse):
Update conditions;
(floatsisf2_mixed_memory, floatsisf2_sse_memory,
floatsidf2_mixed_memory, floatsidf2_sse_memory
floatdisf2_mixed_memory, floatsisf2_sse_memory,
floatsidf2_mixed_memory, floatsidf2_sse_memory): New.
2007-09-11 Jan Hubicka <jh@suse.cz>
* toplev.c (process_options): all frontends now do unit-at-a-time.

View File

@ -1376,6 +1376,9 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_INTER_UNIT_MOVES */
~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
~(m_AMDFAM10),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,

View File

@ -259,6 +259,7 @@ enum ix86_tune_indices {
X86_TUNE_SHIFT1,
X86_TUNE_USE_FFREEP,
X86_TUNE_INTER_UNIT_MOVES,
X86_TUNE_INTER_UNIT_CONVERSIONS,
X86_TUNE_FOUR_JUMP_LIMIT,
X86_TUNE_SCHEDULE,
X86_TUNE_USE_BT,
@ -336,6 +337,8 @@ extern unsigned int ix86_tune_features[X86_TUNE_LAST];
#define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1]
#define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP]
#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
#define TARGET_INTER_UNIT_CONVERSIONS\
ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]

View File

@ -4775,14 +4775,13 @@
"TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"
/* When we use vector converts, we can't have input in memory. */
if (GET_MODE (operands[0]) == DFmode && GET_MODE (operands[1]) == SImode
if (GET_MODE (operands[0]) == DFmode
&& TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (DFmode))
operands[1] = force_reg (SImode, operands[1]);
if (GET_MODE (operands[0]) == SFmode && GET_MODE (operands[1]) == SImode
&& !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (SFmode))
else if (GET_MODE (operands[0]) == SFmode
&& !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (SFmode))
{
/* When !flag_trapping_math, we handle SImode->SFmode vector
conversions same way as SImode->DFmode.
@ -4811,6 +4810,19 @@
operands[1] = tmp;
}
}
/* Offload operand of cvtsi2ss and cvtsi2sd into memory for
!TARGET_INTER_UNIT_CONVERSIONS
It is neccesary for the patterns to not accept nonemmory operands
as we would optimize out later. */
else if (!TARGET_INTER_UNIT_CONVERSIONS
&& TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& !optimize_size
&& !MEM_P (operands[1]))
{
rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
emit_move_insn (tmp, operands[1]);
operands[1] = tmp;
}
")
(define_insn "*floatsisf2_mixed_vector"
@ -4833,7 +4845,8 @@
[(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
"TARGET_MIX_SSE_I387
&& (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
&& ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
|| optimize_size)"
"@
fild%z1\t%1
#
@ -4846,6 +4859,20 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_mixed_memory"
[(set (match_operand:SF 0 "register_operand" "=f,x")
(float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
"TARGET_MIX_SSE_I387
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"@
fild%z1\t%1
cvtsi2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "fmov,sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "*,double")
(set_attr "amdfam10_decode" "*,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_sse_vector_nointernunit"
[(set (match_operand:SF 0 "register_operand" "=x")
(float:SF (match_operand:SI 1 "memory_operand" "m")))]
@ -4907,7 +4934,8 @@
[(set (match_operand:SF 0 "register_operand" "=x,x")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
"TARGET_SSE_MATH
&& (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
&& ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
|| optimize_size)"
"cvtsi2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
@ -4915,6 +4943,18 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_sse_memory"
[(set (match_operand:SF 0 "register_operand" "=x")
(float:SF (match_operand:SI 1 "memory_operand" "m")))]
"TARGET_SSE_MATH
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"cvtsi2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "double")
(set_attr "amdfam10_decode" "double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_mixed_vector"
[(set (match_operand:DF 0 "register_operand" "=x,f,f")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
@ -4935,7 +4975,8 @@
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
&& (!TARGET_USE_VECTOR_CONVERTS || !optimize_size)"
&& ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
|| optimize_size)"
"@
fild%z1\t%1
#
@ -4949,6 +4990,20 @@
(set_attr "amdfam10_decode" "*,*,vector,double,double")
(set_attr "fp_int_src" "true,true,true,true,false")])
(define_insn "*floatsidf2_mixed_memory"
[(set (match_operand:DF 0 "register_operand" "=f,x")
(float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"@
fild%z1\t%1
cvtsi2sd\t{%1, %0|%0, %1}"
[(set_attr "type" "fmov,sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "*,direct")
(set_attr "amdfam10_decode" "*,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_sse_vector"
[(set (match_operand:DF 0 "register_operand" "=x")
(float:DF (match_operand:SI 1 "register_operand" "x")))]
@ -4981,7 +5036,8 @@
[(set (match_operand:DF 0 "register_operand" "=x,x,!x")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
&& ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
|| optimize_size)"
"@
cvtsi2sd\t{%1, %0|%0, %1}
cvtsi2sd\t{%1, %0|%0, %1}
@ -4992,6 +5048,19 @@
(set_attr "amdfam10_decode" "vector,double,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_memory"
[(set (match_operand:DF 0 "register_operand" "=x")
(float:DF (match_operand:SI 1 "memory_operand" "x")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
|| optimize_size)"
"cvtsi2sd\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "direct")
(set_attr "amdfam10_decode" "double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsi<mode>2_i387"
[(set (match_operand:MODEF 0 "register_operand" "=f,f")
(float:MODEF
@ -5010,12 +5079,23 @@
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
"")
{
if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
&& TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
&& !optimize_size
&& !MEM_P (operands[1]))
{
rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
emit_move_insn (tmp, operands[1]);
operands[1] = tmp;
}
})
(define_insn "*floatdisf2_mixed"
[(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
"TARGET_64BIT && TARGET_MIX_SSE_I387"
"TARGET_64BIT && TARGET_MIX_SSE_I387
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
#
@ -5028,10 +5108,25 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_mixed"
[(set (match_operand:SF 0 "register_operand" "=f,x")
(float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
"TARGET_64BIT && TARGET_MIX_SSE_I387
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"@
fild%z1\t%1
cvtsi2ss{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "fmov,sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "*,double")
(set_attr "amdfam10_decode" "*,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x,x")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
"TARGET_64BIT && TARGET_SSE_MATH"
"TARGET_64BIT && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"cvtsi2ss{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
@ -5039,6 +5134,18 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_memory"
[(set (match_operand:SF 0 "register_operand" "=x")
(float:SF (match_operand:DI 1 "memory_operand" "m")))]
"TARGET_64BIT && TARGET_SSE_MATH
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"cvtsi2ss{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "double")
(set_attr "amdfam10_decode" "double")
(set_attr "fp_int_src" "true")])
(define_expand "floatdidf2"
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
@ -5049,12 +5156,22 @@
ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
DONE;
}
if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
&& TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
&& !optimize_size
&& !MEM_P (operands[1]))
{
rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
emit_move_insn (tmp, operands[1]);
operands[1] = tmp;
}
})
(define_insn "*floatdidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
"TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
#
@ -5067,10 +5184,25 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_mixed_memory"
[(set (match_operand:DF 0 "register_operand" "=f,x")
(float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"@
fild%z1\t%1
cvtsi2sd{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "fmov,sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "*,direct")
(set_attr "amdfam10_decode" "*,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_sse"
[(set (match_operand:DF 0 "register_operand" "=x,x")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"cvtsi2sd{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
@ -5078,11 +5210,24 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_sse_memory"
[(set (match_operand:DF 0 "register_operand" "=x")
(float:DF (match_operand:DI 1 "memory_operand" "m")))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
&& !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
"cvtsi2sd{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "direct")
(set_attr "amdfam10_decode" "double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdi<mode>2_i387"
[(set (match_operand:MODEF 0 "register_operand" "=f,f")
(float:MODEF
(match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
"TARGET_80387"
"TARGET_80387
&& (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
"@
fild%z1\t%1
#"