diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a2118e95083..693ea09f3e1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2008-04-01 Uros Bizjak + + * config/i386/i386.md (rex64suffix): New mode attribute. + (floathi2): Disable expander for SSE math. + (*floathi2_1): New insn insn_and_split pattern. + (*floathi2_i387_with_temp): New macroized instruction pattern and + corresponding post-reload splitters. + (*floathi2_i387): New macroized insn pattern. + (float2): New macroized expander. + (*float2_1): New macroized + insn_and_split pattern. + (*floatsi2_vector_mixed_with_temp, *floatsi2_vector_mixed): + New macroized instruction patterns and corresponding post-reload + splitters. + (*floatsi2_mixed_with_temp): New macroized instruction pattern + and corresponding post-reload splitters. + (*floatsi2_mixed_interunit, *floatsi2_mixed_nointerunit): + New macroized instruction patterns. + (*floatsi2_vector_sse_with_temp, *floatsi2_vector_sse): New + macroized instruction patterns and corresponding post-reload splitters. + (*floatsi2_sse_with_temp): New macroized instruction pattern and + corresponding post-reload splitters. + (*floatsi2_sse_interunit, *floatsi2_mixed_nointerunit): + New macroized instruction patterns. + (*floatsi2_i387_with_temp): New macroized instruction pattern and + corresponding post-reload splitters. + (*floatsi2_i387): New macroized instruction patterns. + 2008-04-01 H.J. Lu * config/i386/i386.md (smaxmin): New. @@ -115,10 +143,10 @@ * common.opt (fprofile-dir=, fprofile-use=, fprofile-generate=): New options (fprofile-use): Add var flag_profile_use - * coverage.c (coverage_begin_output): Do not open a gcno file for output - only if -ftest-coverage is set. + * coverage.c (coverage_begin_output): Do not open a gcno file for + output only if -ftest-coverage is set. Do not add getpwd() to gcda file path. - (build_gcov_info): Check the new flag + (build_gcov_info): Check the new flag flag_profile_datafile_relative_path. (coverage_init): Use profile_data_prefix. Read profile counter only if flag_profile_use is set. @@ -131,8 +159,8 @@ 2008-03-31 James E. Wilson - * varasm.c (output_constant_pool_1): In LABEL_REF check, use tmp - consistently. + * varasm.c (output_constant_pool_1): In LABEL_REF check, + use tmp consistently. PR target/35695 * config/ia64/div.md (recip_approx_rf): Use UNSPEC not DIV. @@ -153,8 +181,7 @@ 2008-03-31 Ian Lance Taylor - * tlink.c (scan_linker_output): Look for symbol name in single - quotes. + * tlink.c (scan_linker_output): Look for symbol name in single quotes. 2008-03-31 Jan Hubicka @@ -173,7 +200,8 @@ (regno_reg_rtx): ... new global array. (reg_rtx_no, seq_stack, REGNO_POINTER_ALIGN): Update accestors. (pending_stack_adjust, inhibit_defer_pop, saveregs_value, - apply_args_value, forced_labels, stack_pointer_delta): Update accestors. + apply_args_value, forced_labels, stack_pointer_delta): + Update accestors. (struct varasm_status): Move here from varasm.c (struct rtl_data): New. Move here some fields from struct function. (return_label, naked_return_label, stack_slot_list, parm_birth_insn, @@ -185,7 +213,8 @@ (get_arg_pointer_save_area): Update prototype. * emit-rtl.c (rtl): Declare. (regno_reg_rtx): Declare. - (first_insn, last_insn, cur_insn_uid, last_location, first_label_num): Update. + (first_insn, last_insn, cur_insn_uid, last_location, first_label_num): + Update. (gen_reg_rtx): Update. (init_virtual_regs): Do not tate emit_status argument. (init_emit): Do not allocate emit. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8ccb0b2b919..a72e7725d83 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -580,6 +580,9 @@ ;; SSE vector mode corresponding to a scalar mode (define_mode_attr ssevecmode [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) ;; Scheduling descriptions @@ -4819,535 +4822,487 @@ ;; wants to be able to do this between registers. (define_expand "floathi2" - [(set (match_operand:MODEF 0 "register_operand" "") - (float:MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" -{ - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - { - emit_insn - (gen_floatsi2 (operands[0], - convert_to_mode (SImode, operands[1], 0))); - DONE; - } -}) - -(define_insn "*floathi2_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f,f") - (float:MODEF - (match_operand:HI 1 "nonimmediate_operand" "m,?r")))] + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] "TARGET_80387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" - "@ - fild%z1\t%1 - #" + "") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*floathi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);") + +(define_insn "*floathi2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:HI 2 "memory_operand" "=m,m"))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "#" [(set_attr "type" "fmov,multi") (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -(define_expand "floatsi2" - [(set (match_operand:MODEF 0 "register_operand" "") - (float:MODEF (match_operand:SI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" - " - /* When we use vector converts, we can't have input in memory. */ - if (GET_MODE (operands[0]) == DFmode - && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH - && SSE_FLOAT_MODE_P (DFmode)) - operands[1] = force_reg (SImode, operands[1]); - else if (GET_MODE (operands[0]) == SFmode - && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH - && SSE_FLOAT_MODE_P (SFmode)) - { - /* When !flag_trapping_math, we handle SImode->SFmode vector - conversions same way as SImode->DFmode. - - For flat_trapping_math we can't safely use vector conversion without - clearing upper half, otherwise precision exception might occur. - However we can still generate the common sequence converting value - from general register to XMM register as: - - mov reg32, mem32 - movd mem32, xmm - cvtdq2pd xmm,xmm - - because we know that movd clears the upper half. - - Sadly in this case we can't rely on reload moving the value to XMM - register, since we need to know if upper half is OK, so we need - to do reloading by hand. We force operand to memory unless target - supports inter unit moves. */ - if (!flag_trapping_math) - operands[1] = force_reg (SImode, operands[1]); - else if (!MEM_P (operands[1])) - { - int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; - rtx tmp = assign_386_stack_local (SImode, slot); - emit_move_insn (tmp, operands[1]); - operands[1] = tmp; - } - } - /* Offload operand of cvtsi2ss and cvtsi2sd into memory for - !TARGET_INTER_UNIT_CONVERSIONS - It is necessary for the patterns to not accept nonmemory operands - as we would optimize out later. */ - else if (!TARGET_INTER_UNIT_CONVERSIONS - && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) - && !optimize_size - && !MEM_P (operands[1])) - { - int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; - rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); - emit_move_insn (tmp, operands[1]); - operands[1] = tmp; - } - ") - -(define_insn "*floatsisf2_mixed_vector" - [(set (match_operand:SF 0 "register_operand" "=x,f,?f") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))] - "TARGET_MIX_SSE_I387 && !flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && !optimize_size" - "@ - cvtdq2ps\t{%1, %0|%0, %1} - fild%z1\t%1 - #" - [(set_attr "type" "sseicvt,fmov,multi") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387,*") - (set_attr "athlon_decode" "double,*,*") - (set_attr "amdfam10_decode" "double,*,*") - (set_attr "fp_int_src" "false,true,true")]) - -(define_insn "*floatsisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))] - "TARGET_MIX_SSE_I387 - && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) - || optimize_size)" - "@ - fild%z1\t%1 - # - cvtsi2ss\t{%1, %0|%0, %1} - cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,vector,double") - (set_attr "amdfam10_decode" "*,*,vector,double") +(define_insn "*floathi2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsisf2_mixed_memory" - [(set (match_operand:SF 0 "register_operand" "=f,x") - (float:SF (match_operand:SI 1 "memory_operand" "m,m")))] - "TARGET_MIX_SSE_I387 - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" - "@ - fild%z1\t%1 - cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "*,double") - (set_attr "amdfam10_decode" "*,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsisf2_sse_vector_nointernunit" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:SI 1 "memory_operand" "m")))] - "TARGET_SSE_MATH && flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && !optimize_size - && !TARGET_INTER_UNIT_MOVES" - "#" - [(set_attr "type" "multi")]) - -(define_insn "*floatsisf2_sse_vector_internunit" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "rm,x")))] - "TARGET_SSE_MATH && flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && !optimize_size - && TARGET_INTER_UNIT_MOVES" - "#" - [(set_attr "type" "multi")]) - (define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] - "flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && reload_completed - && (TARGET_INTER_UNIT_MOVES || MEM_P (operands[1])) - && !SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])" - [(set (match_dup 0) - (float:V4SF (match_dup 2)))] -{ - operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0); - operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); - emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1])); -}) + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") (define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "register_operand" "")))] - "flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && reload_completed - && SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])" - [(set (match_dup 2) (vec_duplicate:V4SI (match_dup 1))) - (set (match_dup 0) - (float:V4SF (match_dup 2)))] -{ - operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0); - operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); -}) + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "memory_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") -(define_insn "*floatsisf2_sse_vector" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:SI 1 "register_operand" "x")))] - "TARGET_SSE_MATH && !flag_trapping_math - && TARGET_USE_VECTOR_CONVERTS && !optimize_size - && !TARGET_INTER_UNIT_MOVES" - "cvtdq2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double") - (set_attr "amdfam10_decode" "double") - (set_attr "fp_int_src" "true")]) +(define_expand "float2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))] + "TARGET_80387 + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") -(define_insn "*floatsisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))] - "TARGET_SSE_MATH - && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) - || optimize_size)" - "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "fp_int_src" "true")]) +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*float2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))] + "((TARGET_80387 + && (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)) + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode == SImode + && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && flag_trapping_math) + || !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)))) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (mode, SLOT_TEMP);") -(define_insn "*floatsisf2_sse_memory" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:SI 1 "memory_operand" "m")))] - "TARGET_SSE_MATH - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" - "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double") - (set_attr "amdfam10_decode" "double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsidf2_mixed_vector" - [(set (match_operand:DF 0 "register_operand" "=x,f,f") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))] +(define_insn "*floatsi2_vector_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,m,m,m,m"))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 && TARGET_USE_VECTOR_CONVERTS && !optimize_size" - "@ - cvtdq2pd\t{%1, %0|%0, %1} - fild%z1\t%1 - #" - [(set_attr "type" "sseicvt,fmov,multi") - (set_attr "mode" "V2DF,DF,DF") - (set_attr "unit" "*,*,i387") - (set_attr "athlon_decode" "double,*,*") - (set_attr "amdfam10_decode" "double,*,*") - (set_attr "fp_int_src" "false,true,true")]) - -(define_insn "*floatsidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))] - "TARGET_SSE2 && TARGET_MIX_SSE_I387 - && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) - || optimize_size)" - "@ - fild%z1\t%1 - # - cvtsi2sd\t{%1, %0|%0, %1} - cvtsi2sd\t{%1, %0|%0, %1} - cvtdq2pd\t{%1, %0|%0, %1}" + "#" [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") - (set_attr "mode" "DF,DF,DF,DF,V2DF") + (set_attr "mode" ",,,,") (set_attr "unit" "*,i387,*,*,*") (set_attr "athlon_decode" "*,*,double,direct,double") (set_attr "amdfam10_decode" "*,*,vector,double,double") - (set_attr "fp_int_src" "true,true,true,true,false")]) + (set_attr "fp_int_src" "true")]) -(define_insn "*floatsidf2_mixed_memory" - [(set (match_operand:DF 0 "register_operand" "=f,x") - (float:DF (match_operand:SI 1 "memory_operand" "m,m")))] +(define_insn "*floatsi2_vector_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387 - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" "@ fild%z1\t%1 - cvtsi2sd\t{%1, %0|%0, %1}" + #" [(set_attr "type" "fmov,sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" ",") + (set_attr "unit" "i387,*") (set_attr "athlon_decode" "*,direct") (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatsidf2_sse_vector" - [(set (match_operand:DF 0 "register_operand" "=x") - (float:DF (match_operand:SI 1 "register_operand" "x")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_USE_VECTOR_CONVERTS && !optimize_size" - "cvtdq2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "V2DF") - (set_attr "athlon_decode" "double") - (set_attr "amdfam10_decode" "double") - (set_attr "fp_int_src" "true")]) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "memory_operand" "")))] - "TARGET_USE_VECTOR_CONVERTS && reload_completed - && SSE_REG_P (operands[0])" - [(set (match_dup 0) - (float:V2DF - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) (const_int 1)]))))] -{ - operands[2] = simplify_gen_subreg (V4SImode, operands[0], DFmode, 0); - operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); - emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1])); -}) - -(define_insn "*floatsidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x,!x") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) - || optimize_size)" - "@ - cvtsi2sd\t{%1, %0|%0, %1} - cvtsi2sd\t{%1, %0|%0, %1} - cvtdq2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF,DF,V2DF") - (set_attr "athlon_decode" "double,direct,double") - (set_attr "amdfam10_decode" "vector,double,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsidf2_memory" - [(set (match_operand:DF 0 "register_operand" "=x") - (float:DF (match_operand:SI 1 "memory_operand" "x")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS) - || optimize_size)" - "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "direct") - (set_attr "amdfam10_decode" "double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatsi2_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f,f") +(define_insn "*float2_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") (float:MODEF - (match_operand:SI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 - && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "") - (set_attr "unit" "*,i387") - (set_attr "fp_int_src" "true")]) - -(define_expand "floatdisf2" - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)" -{ - if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT - && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode) - && !optimize_size - && !MEM_P (operands[1])) - { - int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; - rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); - emit_move_insn (tmp, operands[1]); - operands[1] = tmp; - } -}) - -(define_insn "*floatdisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] - "TARGET_64BIT && TARGET_MIX_SSE_I387 - && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" - "@ - fild%z1\t%1 - # - cvtsi2ss{q}\t{%1, %0|%0, %1} - cvtsi2ss{q}\t{%1, %0|%0, %1}" + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m,m,m"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387" + "#" [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "SF") - (set_attr "unit" "*,i387,*,*") - (set_attr "athlon_decode" "*,*,vector,double") - (set_attr "amdfam10_decode" "*,*,vector,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatdisf2_mixed" - [(set (match_operand:SF 0 "register_operand" "=f,x") - (float:SF (match_operand:DI 1 "memory_operand" "m,m")))] - "TARGET_64BIT && TARGET_MIX_SSE_I387 - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" - "@ - fild%z1\t%1 - cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "*,double") - (set_attr "amdfam10_decode" "*,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatdisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] - "TARGET_64BIT && TARGET_SSE_MATH - && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" - "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,double") - (set_attr "amdfam10_decode" "vector,double") - (set_attr "fp_int_src" "true")]) - -(define_insn "*floatdisf2_memory" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:DI 1 "memory_operand" "m")))] - "TARGET_64BIT && TARGET_SSE_MATH - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" - "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double") - (set_attr "amdfam10_decode" "double") - (set_attr "fp_int_src" "true")]) - -(define_expand "floatdidf2" - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" -{ - if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH) - { - ix86_expand_convert_sign_didf_sse (operands[0], operands[1]); - DONE; - } - if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT - && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode) - && !optimize_size - && !MEM_P (operands[1])) - { - int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; - rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot); - emit_move_insn (tmp, operands[1]); - operands[1] = tmp; - } -}) - -(define_insn "*floatdidf2_mixed" - [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 - && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" - "@ - fild%z1\t%1 - # - cvtsi2sd{q}\t{%1, %0|%0, %1} - cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sseicvt,sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_mixed_memory" - [(set (match_operand:DF 0 "register_operand" "=f,x") - (float:DF (match_operand:DI 1 "memory_operand" "m,m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387 - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && TARGET_INTER_UNIT_CONVERSIONS + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_insn "*float2_mixed_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "register_operand" "m,r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" "@ fild%z1\t%1 - cvtsi2sd{q}\t{%1, %0|%0, %1}" + cvtsi2s\t{%1, %0|%0, %1} + cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "mode" "") + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_mixed_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "@ + fild%z1\t%1 + cvtsi2s\t{%1, %0|%0, %1}" [(set_attr "type" "fmov,sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "athlon_decode" "*,direct") (set_attr "amdfam10_decode" "*,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH - && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" - "cvtsi2sd{q}\t{%1, %0|%0, %1}" +(define_insn "*floatsi2_vector_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,m,m"))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "#" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct") - (set_attr "amdfam10_decode" "vector,double") + (set_attr "mode" ",,") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdidf2_sse_memory" - [(set (match_operand:DF 0 "register_operand" "=x") - (float:DF (match_operand:DI 1 "memory_operand" "m")))] - "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH - && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size" - "cvtsi2sd{q}\t{%1, %0|%0, %1}" +(define_insn "*floatsi2_vector_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size" + "#" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "") (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "double") (set_attr "fp_int_src" "true")]) -(define_insn "*floatdi2_i387" - [(set (match_operand:MODEF 0 "register_operand" "=f,f") +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[2])); + } + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + gcc_unreachable (); +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && !optimize_size + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_insn "*float2_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (float:MODEF - (match_operand:DI 1 "nonimmediate_operand" "m,?r")))] - "TARGET_80387 - && (!TARGET_SSE_MATH || !TARGET_64BIT - || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "") - (set_attr "unit" "*,i387") + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "floatxf2" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:X87MODEI 1 "nonimmediate_operand" "m,?r")))] +(define_insn "*float2_sse_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "@ + cvtsi2s\t{%1, %0|%0, %1} + cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_sse_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)" + "cvtsi2s\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m"))] "TARGET_80387" "@ fild%z1\t%1 #" [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") + (set_attr "mode" "") (set_attr "unit" "*,i387") (set_attr "fp_int_src" "true")]) -;; %%% Kill these when reload knows how to do it. +(define_insn "*float2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "TARGET_80387" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + (define_split - [(set (match_operand 0 "fp_register_operand" "") - (float (match_operand 1 "register_operand" "")))] - "reload_completed - && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" - [(const_int 0)] -{ - operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); - operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); - ix86_free_from_memory (GET_MODE (operands[1])); - DONE; -}) + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two