From 630ecd8d109104ecc207dd6c4fb96b02ba477239 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 11 Sep 2007 13:38:05 +0200 Subject: [PATCH] i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS. * i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS. (TARGET_INTER_UNIT_CONVERSIONS): New. * i386.md (floatsi expanders): Remove redundant check for SImode source; offload to memory when asked for. (floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse): Update conditions; (floatsisf2_mixed_memory, floatsisf2_sse_memory, floatsidf2_mixed_memory, floatsidf2_sse_memory floatdisf2_mixed_memory, floatsisf2_sse_memory, floatsidf2_mixed_memory, floatsidf2_sse_memory): New. From-SVN: r128369 --- gcc/ChangeLog | 14 ++++ gcc/config/i386/i386.c | 3 + gcc/config/i386/i386.h | 3 + gcc/config/i386/i386.md | 175 ++++++++++++++++++++++++++++++++++++---- 4 files changed, 180 insertions(+), 15 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1706076df99..cc9624210ab 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2007-09-11 Jan Hubicka + + * i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS. + (TARGET_INTER_UNIT_CONVERSIONS): New. + * i386.md (floatsi expanders): Remove redundant check for SImode + source; offload to memory when asked for. + (floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse + floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse): + Update conditions; + (floatsisf2_mixed_memory, floatsisf2_sse_memory, + floatsidf2_mixed_memory, floatsidf2_sse_memory + floatdisf2_mixed_memory, floatsisf2_sse_memory, + floatsidf2_mixed_memory, floatsidf2_sse_memory): New. + 2007-09-11 Jan Hubicka * toplev.c (process_options): all frontends now do unit-at-a-time. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 57fb628a489..ecec205b500 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1376,6 +1376,9 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_INTER_UNIT_MOVES */ ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC), + /* X86_TUNE_INTER_UNIT_CONVERSIONS */ + ~(m_AMDFAM10), + /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more than 4 branch instructions in the 16 byte window. */ m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a14c74b101d..451df2e10ca 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -259,6 +259,7 @@ enum ix86_tune_indices { X86_TUNE_SHIFT1, X86_TUNE_USE_FFREEP, X86_TUNE_INTER_UNIT_MOVES, + X86_TUNE_INTER_UNIT_CONVERSIONS, X86_TUNE_FOUR_JUMP_LIMIT, X86_TUNE_SCHEDULE, X86_TUNE_USE_BT, @@ -336,6 +337,8 @@ extern unsigned int ix86_tune_features[X86_TUNE_LAST]; #define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1] #define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP] #define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES] +#define TARGET_INTER_UNIT_CONVERSIONS\ + ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 5b50274fcb8..0c625da6b72 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4775,14 +4775,13 @@ "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" " /* When we use vector converts, we can't have input in memory. */ - if (GET_MODE (operands[0]) == DFmode && GET_MODE (operands[1]) == SImode + if (GET_MODE (operands[0]) == DFmode && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)) operands[1] = force_reg (SImode, operands[1]); - - if (GET_MODE (operands[0]) == SFmode && GET_MODE (operands[1]) == SImode - && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH - && SSE_FLOAT_MODE_P (SFmode)) + else if (GET_MODE (operands[0]) == SFmode + && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (SFmode)) { /* When !flag_trapping_math, we handle SImode->SFmode vector conversions same way as SImode->DFmode. @@ -4811,6 +4810,19 @@ operands[1] = tmp; } } + /* Offload operand of cvtsi2ss and cvtsi2sd into memory for + !TARGET_INTER_UNIT_CONVERSIONS + It is neccesary for the patterns to not accept nonemmory operands + as we would optimize out later. */ + else if (!TARGET_INTER_UNIT_CONVERSIONS + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && !optimize_size + && !MEM_P (operands[1])) + { + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } ") (define_insn "*floatsisf2_mixed_vector" @@ -4833,7 +4845,8 @@ [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))] "TARGET_MIX_SSE_I387 - && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)" + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" "@ fild%z1\t%1 # @@ -4846,6 +4859,20 @@ (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatsisf2_mixed_memory" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (float:SF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "*,double") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatsisf2_sse_vector_nointernunit" [(set (match_operand:SF 0 "register_operand" "=x") (float:SF (match_operand:SI 1 "memory_operand" "m")))] @@ -4907,7 +4934,8 @@ [(set (match_operand:SF 0 "register_operand" "=x,x") (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))] "TARGET_SSE_MATH - && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)" + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" "cvtsi2ss\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "SF") @@ -4915,6 +4943,18 @@ (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatsisf2_sse_memory" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatsidf2_mixed_vector" [(set (match_operand:DF 0 "register_operand" "=x,f,f") (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))] @@ -4935,7 +4975,8 @@ [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x") (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 - && (!TARGET_USE_VECTOR_CONVERTS || !optimize_size)" + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" "@ fild%z1\t%1 # @@ -4949,6 +4990,20 @@ (set_attr "amdfam10_decode" "*,*,vector,double,double") (set_attr "fp_int_src" "true,true,true,true,false")]) +(define_insn "*floatsidf2_mixed_memory" + [(set (match_operand:DF 0 "register_operand" "=f,x") + (float:DF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatsidf2_sse_vector" [(set (match_operand:DF 0 "register_operand" "=x") (float:DF (match_operand:SI 1 "register_operand" "x")))] @@ -4981,7 +5036,8 @@ [(set (match_operand:DF 0 "register_operand" "=x,x,!x") (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))] "TARGET_SSE2 && TARGET_SSE_MATH - && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)" + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" "@ cvtsi2sd\t{%1, %0|%0, %1} cvtsi2sd\t{%1, %0|%0, %1} @@ -4992,6 +5048,19 @@ (set_attr "amdfam10_decode" "vector,double,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatsidf2_memory" + [(set (match_operand:DF 0 "register_operand" "=x") + (float:DF (match_operand:SI 1 "memory_operand" "x")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) + || optimize_size)" + "cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatsi2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f,f") (float:MODEF @@ -5010,12 +5079,23 @@ [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" - "") +{ + if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode) + && !optimize_size + && !MEM_P (operands[1])) + { + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } +}) (define_insn "*floatdisf2_mixed" [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] - "TARGET_64BIT && TARGET_MIX_SSE_I387" + "TARGET_64BIT && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 # @@ -5028,10 +5108,25 @@ (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatdisf2_mixed" + [(set (match_operand:SF 0 "register_operand" "=f,x") + (float:SF (match_operand:DI 1 "memory_operand" "m,m")))] + "TARGET_64BIT && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "*,double") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatdisf2_sse" [(set (match_operand:SF 0 "register_operand" "=x,x") (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] - "TARGET_64BIT && TARGET_SSE_MATH" + "TARGET_64BIT && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "cvtsi2ss{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "SF") @@ -5039,6 +5134,18 @@ (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatdisf2_memory" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_64BIT && TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + (define_expand "floatdidf2" [(set (match_operand:DF 0 "register_operand" "") (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] @@ -5049,12 +5156,22 @@ ix86_expand_convert_sign_didf_sse (operands[0], operands[1]); DONE; } + if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT + && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode) + && !optimize_size + && !MEM_P (operands[1])) + { + rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } }) (define_insn "*floatdidf2_mixed" [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387" + "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 # @@ -5067,10 +5184,25 @@ (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatdidf2_mixed_memory" + [(set (match_operand:DF 0 "register_operand" "=f,x") + (float:DF (match_operand:DI 1 "memory_operand" "m,m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "@ + fild%z1\t%1 + cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatdidf2_sse" [(set (match_operand:DF 0 "register_operand" "=x,x") (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "cvtsi2sd{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") @@ -5078,11 +5210,24 @@ (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) +(define_insn "*floatdidf2_sse_memory" + [(set (match_operand:DF 0 "register_operand" "=x") + (float:DF (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + "cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + (define_insn "*floatdi2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f,f") (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387" + "TARGET_80387 + && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))" "@ fild%z1\t%1 #"