diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e9caaeee917..71453bb1268 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +Fri Dec 14 12:05:14 CET 2001 Jan Hubicka + + * i386.c (ix86_fpmath, ix86_fpmath_string): New. + (override_option): Set ix86_fpmath. + * i386.h (MASK_MIX_SSE_I387): Remove. + (TARGET_SSE_MATH): New. + (TARGET_MIX_SSE_I387): Use ix86_fpmath. + (TARGET_SWITCHES): Remove "mix-sse-i387". + (fpmath_unit): New enum. + (ix86_fpmath, ix86_fpmath_string): Declare. + * i386.md (swapsf): Fix condition. + (add?f, sub?f, mul?f, div?f, sqrt?f, min?f): Use TARGET_SSE_MATH. + (fp_?f_*_nosse): New. + (fp_*): Use TARGET_SSE_MATH. + * invoke.texi (-mfpmath): Document. + (-msse2): Add. + 2001-12-14 Jason Merrill * dwarf2out.c (output_die): Print the string in the comment for diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b9a5f293b09..55113a5e67a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -578,12 +578,16 @@ enum cmodel ix86_cmodel; /* which cpu are we scheduling for */ enum processor_type ix86_cpu; +/* which unit we are generating floating point math for */ +enum fpmath_unit ix86_fpmath; + /* which instruction set architecture to use. */ int ix86_arch; /* Strings to hold which cpu and instruction set architecture to use. */ const char *ix86_cpu_string; /* for -mcpu= */ const char *ix86_arch_string; /* for -march= */ +const char *ix86_fpmath_string; /* for -mfpmath= */ /* # of registers to use to pass arguments. */ const char *ix86_regparm_string; @@ -1066,8 +1070,45 @@ override_options () if (TARGET_RTD) error ("-mrtd calling convention not supported in the 64bit mode"); /* Enable by default the SSE and MMX builtins. */ - target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE; + target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); + ix86_fpmath = FPMATH_SSE; } + else + ix86_fpmath = FPMATH_387; + + if (ix86_fpmath_string != 0) + { + if (! strcmp (ix86_fpmath_string, "387")) + ix86_fpmath = FPMATH_387; + else if (! strcmp (ix86_fpmath_string, "sse")) + { + if (!TARGET_SSE) + { + warning ("SSE instruction set disabled, using 387 arithmetics"); + ix86_fpmath = FPMATH_387; + } + else + ix86_fpmath = FPMATH_SSE; + } + else if (! strcmp (ix86_fpmath_string, "387,sse") + || ! strcmp (ix86_fpmath_string, "sse,387")) + { + if (!TARGET_SSE) + { + warning ("SSE instruction set disabled, using 387 arithmetics"); + ix86_fpmath = FPMATH_387; + } + else if (!TARGET_80387) + { + warning ("387 instruction set disabled, using SSE arithmetics"); + ix86_fpmath = FPMATH_SSE; + } + else + ix86_fpmath = FPMATH_SSE | FPMATH_387; + } + else + error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); + } /* It makes no sense to ask for just SSE builtins, so MMX is also turned on by -msse. */ @@ -8117,8 +8158,8 @@ ix86_expand_fp_movcc (operands) /* For SF/DFmode conditional moves based on comparisons in same mode, we may want to use SSE min/max instructions. */ - if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode) - || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode)) + if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) + || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) /* The SSE comparisons does not support the LTGT/UNEQ pair. */ && (!TARGET_IEEE_FP diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 678e9d1eeca..b6e567ee1d7 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -272,7 +272,9 @@ extern const int x86_epilogue_using_move, x86_decompose_lea; #define TARGET_SSE ((target_flags & (MASK_SSE | MASK_SSE2)) != 0) #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0) -#define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0) +#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0) +#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \ + && (ix86_fpmath & FPMATH_387)) #define TARGET_MMX ((target_flags & MASK_MMX) != 0) #define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0) #define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0) @@ -365,10 +367,6 @@ extern const int x86_epilogue_using_move, x86_decompose_lea; { "no-sse2", -MASK_SSE2, N_("") }, \ { "no-sse2", MASK_SSE2_SET, \ N_("Do not support MMX, SSE and SSE2 builtins and code generation") }, \ - { "mix-sse-i387", MASK_MIX_SSE_I387, \ - N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\ - { "no-mix-sse-i387", -MASK_MIX_SSE_I387, \ - N_("Do not use both SSE and i387 instruction sets for floating point arithmetics") },\ { "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \ N_("sizeof(long double) is 16") }, \ { "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \ @@ -404,8 +402,14 @@ enum processor_type PROCESSOR_PENTIUM4, PROCESSOR_max }; +enum fpmath_unit +{ + FPMATH_387 = 1, + FPMATH_SSE = 2 +}; extern enum processor_type ix86_cpu; +extern enum fpmath_unit ix86_fpmath; extern int ix86_arch; @@ -421,6 +425,8 @@ extern int ix86_arch; #define TARGET_OPTIONS \ { { "cpu=", &ix86_cpu_string, \ N_("Schedule code for given CPU")}, \ + { "fpmath=", &ix86_fpmath_string, \ + N_("Generate floating point mathematics using given instruction set")},\ { "arch=", &ix86_arch_string, \ N_("Generate code for given CPU")}, \ { "regparm=", &ix86_regparm_string, \ @@ -1271,7 +1277,7 @@ enum reg_class #define SSE_REG_P(n) (REG_P (n) && SSE_REGNO_P (REGNO (n))) #define SSE_FLOAT_MODE_P(m) \ - ((TARGET_SSE && (m) == SFmode) || (TARGET_SSE2 && (m) == DFmode)) + ((TARGET_SSE_MATH && (m) == SFmode) || (TARGET_SSE2 && (m) == DFmode)) #define MMX_REGNO_P(n) ((n) >= FIRST_MMX_REG && (n) <= LAST_MMX_REG) #define MMX_REG_P(xop) (REG_P (xop) && MMX_REGNO_P (REGNO (xop))) @@ -3112,6 +3118,7 @@ extern enum cmodel ix86_cmodel; /* Variables in i386.c */ extern const char *ix86_cpu_string; /* for -mcpu= */ extern const char *ix86_arch_string; /* for -march= */ +extern const char *ix86_fpmath_string; /* for -mfpmath= */ extern const char *ix86_regparm_string; /* # registers to use to pass args */ extern const char *ix86_align_loops_string; /* power of two alignment for loops */ extern const char *ix86_align_jumps_string; /* power of two alignment for non-loop jumps */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0c5407fb00e..edd3f039e5b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2776,7 +2776,7 @@ (match_operand:SF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "reload_completed || !TARGET_SSE2" + "reload_completed || !TARGET_SSE" { if (STACK_TOP_P (operands[0])) return "fxch\t%1"; @@ -6890,14 +6890,14 @@ [(set (match_operand:DF 0 "register_operand" "") (plus:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "addsf3" [(set (match_operand:SF 0 "register_operand" "") (plus:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "") ;; Subtract instructions @@ -7207,14 +7207,14 @@ [(set (match_operand:DF 0 "register_operand" "") (minus:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "subsf3" [(set (match_operand:SF 0 "register_operand" "") (minus:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "") ;; Multiply instructions @@ -7533,14 +7533,14 @@ [(set (match_operand:DF 0 "register_operand" "") (mult:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "mulsf3" [(set (match_operand:SF 0 "register_operand" "") (mult:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "") ;; Divide instructions @@ -7587,14 +7587,14 @@ [(set (match_operand:DF 0 "register_operand" "") (div:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE2" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "divsf3" [(set (match_operand:SF 0 "register_operand" "") (div:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387 || TARGET_SSE" + "TARGET_80387 || TARGET_SSE_MATH" "") ;; Remainder instructions. @@ -13849,12 +13849,26 @@ ;; Gcc is slightly more smart about handling normal two address instructions ;; so use special patterns for add and mull. +(define_insn "*fop_sf_comm_nosse" + [(set (match_operand:SF 0 "register_operand" "=f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "SF")]) + (define_insn "*fop_sf_comm" [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "register_operand" "%0,0") (match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))] - "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387) + "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -13870,17 +13884,31 @@ (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "register_operand" "%0") (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set_attr "type" "sse") (set_attr "mode" "SF")]) +(define_insn "*fop_df_comm_nosse" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "DF")]) + (define_insn "*fop_df_comm" [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "%0,0") (match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))] - "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387) + "TARGET_80387 && TARGET_SSE_MATH && TARGET_SSE2 && TARGET_MIX_SSE_I387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -13896,7 +13924,7 @@ (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "%0") (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 + "TARGET_SSE2 && TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set_attr "type" "sse") @@ -13929,12 +13957,30 @@ (const_string "fop"))) (set_attr "mode" "XF")]) +(define_insn "*fop_sf_1_nosse" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,fm") + (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + (define_insn "*fop_sf_1" [(set (match_operand:SF 0 "register_operand" "=f,f,x") (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))] - "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387) + "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" @@ -13954,7 +14000,7 @@ (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "register_operand" "0") (match_operand:SF 2 "nonimmediate_operand" "xm")]))] - "TARGET_SSE + "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" [(set_attr "type" "sse") @@ -13966,7 +14012,7 @@ (match_operator:SF 3 "binary_fp_operator" [(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) (match_operand:SF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE" + "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:SF 3 "mult_operator" "") @@ -13984,7 +14030,7 @@ (match_operator:SF 3 "binary_fp_operator" [(match_operand:SF 1 "register_operand" "0,0") (float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE" + "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:SF 3 "mult_operator" "") @@ -13997,12 +14043,31 @@ (set_attr "ppro_uops" "many") (set_attr "mode" "SI")]) +(define_insn "*fop_df_1_nosse" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,fm") + (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + + (define_insn "*fop_df_1" [(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))] - "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387) + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" @@ -14022,7 +14087,7 @@ (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "0") (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE + "TARGET_SSE2 && TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" [(set_attr "type" "sse")]) @@ -14033,7 +14098,7 @@ (match_operator:DF 3 "binary_fp_operator" [(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) (match_operand:DF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE2" + "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") @@ -14051,7 +14116,7 @@ (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "0,0") (float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE2" + "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") @@ -14087,7 +14152,7 @@ [(match_operand:DF 1 "register_operand" "0,f") (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387 && !TARGET_SSE2" + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") @@ -14421,9 +14486,10 @@ (define_expand "sqrtdf2" [(set (match_operand:DF 0 "register_operand" "") (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))] - "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) || TARGET_SSE2" + "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) + || (TARGET_SSE2 && TARGET_SSE_MATH)" { - if (!TARGET_SSE2) + if (!TARGET_SSE2 || !TARGET_SSE_MATH) operands[1] = force_reg (DFmode, operands[1]); }) @@ -14431,7 +14497,7 @@ [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_SSE2 && TARGET_MIX_SSE_I387)" + && (TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387)" "@ fsqrt sqrtsd\t{%1, %0|%0, %1}" @@ -14442,7 +14508,7 @@ (define_insn "sqrtdf2_1_sse_only" [(set (match_operand:DF 0 "register_operand" "=Y") (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2 && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" + "TARGET_SSE2 && TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" "sqrtsd\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "mode" "DF") @@ -14452,7 +14518,7 @@ [(set (match_operand:DF 0 "register_operand" "=f") (sqrt:DF (match_operand:DF 1 "register_operand" "0")))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (!TARGET_SSE2 && !TARGET_MIX_SSE_I387)" + && (!TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "DF") @@ -14462,7 +14528,8 @@ [(set (match_operand:DF 0 "register_operand" "=f") (sqrt:DF (float_extend:DF (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && !TARGET_SSE2" + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH)" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "DF") @@ -15963,7 +16030,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))])] - "TARGET_SSE2" + "TARGET_SSE2 && TARGET_SSE_MATH" "#") (define_insn "*mindf" @@ -15973,7 +16040,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_IEEE_FP" + "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH" "#") (define_insn "*mindf_nonieee" @@ -15983,7 +16050,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))] - "TARGET_SSE2 && !TARGET_IEEE_FP" + "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP" "#") (define_split @@ -16031,7 +16098,7 @@ (match_operand:DF 2 "nonimmediate_operand" "Ym")) (match_dup 1) (match_dup 2)))] - "TARGET_SSE2 && reload_completed" + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" "minsd\t{%2, %0|%0, %2}" [(set_attr "type" "sse") (set_attr "mode" "DF")]) @@ -16124,7 +16191,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))])] - "TARGET_SSE2" + "TARGET_SSE2 && TARGET_SSE_MATH" "#") (define_insn "*maxdf" @@ -16134,7 +16201,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))] - "TARGET_SSE2 && TARGET_IEEE_FP" + "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP" "#") (define_insn "*maxdf_nonieee" @@ -16144,7 +16211,7 @@ (match_dup 1) (match_dup 2))) (clobber (reg:CC 17))] - "TARGET_SSE2 && !TARGET_IEEE_FP" + "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP" "#") (define_split @@ -16191,7 +16258,7 @@ (match_operand:DF 2 "nonimmediate_operand" "Ym")) (match_dup 1) (match_dup 2)))] - "TARGET_SSE2 && reload_completed" + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" "maxsd\t{%2, %0|%0, %2}" [(set_attr "type" "sse") (set_attr "mode" "DF")]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 91fe9bc5cab..03def553eb1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -473,12 +473,12 @@ in the following sections. @emph{i386 and x86-64 Options} @gccoptlist{ --mcpu=@var{cpu-type} -march=@var{cpu-type} @gol +-mcpu=@var{cpu-type} -march=@var{cpu-type} -mfpmath=@var{unit} @gol -mintel-syntax -mieee-fp -mno-fancy-math-387 @gol -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -m3dnow @gol +-mmmx -msse -msse2 -msse-math -m3dnow @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol @@ -7513,6 +7513,48 @@ These options are synonyms for @option{-mcpu=i386}, @option{-mcpu=i486}, @option{-mcpu=pentium}, and @option{-mcpu=pentiumpro} respectively. These synonyms are deprecated. +@item -mfpmath=@var{unit} +@opindex march +generate floating point arithmetics for selected unit @var{unit}. the choices +for @var{unit} are: + +@table @samp +@item 387 +Use the standard 387 floating point coprocessor present majority of chips and +emulated otherwise. Code compiled with this option will run almost everywhere. +The temporary results are computed in 80bit precesion instead of precision +specified by the type resulting in slightly different results compared to most +of other chips. See @option{-ffloat-store} for more detailed description. + +This is the default choice for i386 compiler. + +@item sse +Use scalar floating point instructions present in the SSE instruction set. +This instruction set is supported by Pentium3 and newer chips, in the AMD line +by Athlon-4, Athlon-xp and Athlon-mp chips. The earlier version of SSE +instruction set supports only single precision arithmetics, thus the double and +extended precision arithmetics is still done using 387. Later version, present +only in Pentium4 and the future AMD x86-64 chips supports double precision +arithmetics too. + +For i387 you need to use @option{-march=@var{cpu-type}}, @option{-msse} or +@option{-msse2} switches to enable SSE extensions and make this option +effective. For x86-64 compiler, these extensions are enabled by default. + +The resulting code should be considerably faster in majority of cases and avoid +the numerical instability problems of 387 code, but may break some existing +code that expects temporaries to be 80bit. + +This is the default choice for x86-64 compiler. + +@item sse,387 +Attempt to utilize both instruction sets at once. This effectivly double the +amount of available registers and on chips with separate execution units for +387 and SSE the execution resources too. Use this option with care, as it is +still experimental, because gcc register allocator does not model separate +functional units well resulting in instable performance. +@end table + @item -mintel-syntax @opindex mintel-syntax Emit assembly using Intel syntax opcodes instead of AT&T syntax. @@ -7663,6 +7705,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-mmx @item -msse @itemx -mno-sse +@item -msse2 +@itemx -mno-sse2 @item -m3dnow @itemx -mno-3dnow @opindex mmmx