diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 64f97946581..839b9789dbd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2007-05-31 H.J. Lu + + * config.gcc (i[34567]86-*-*): Add nmmintrin.h to + extra_headers. + (x86_64-*-*): Likewise. + + * config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New. + (OPTION_MASK_ISA_3DNOW_UNSET): Likewise. + (OPTION_MASK_ISA_SSE_UNSET): Likewise. + (OPTION_MASK_ISA_SSE2_UNSET): Likewise. + (OPTION_MASK_ISA_SSE3_UNSET): Likewise. + (OPTION_MASK_ISA_SSSE3_UNSET): Likewise. + (OPTION_MASK_ISA_SSE4_1_UNSET): Likewise. + (OPTION_MASK_ISA_SSE4_2_UNSET): Likewise. + (OPTION_MASK_ISA_SSE4): Likewise. + (OPTION_MASK_ISA_SSE4_UNSET): Likewise. + (OPTION_MASK_ISA_SSE4A_UNSET): Likewise. + (ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle + SSE4.2. + (override_options): Support SSE4.2. + (ix86_build_const_vector): Support SImode and DImode. + (ix86_build_signbit_mask): Likewise. + (ix86_expand_int_vcond): Support V2DImode. + (IX86_BUILTIN_CRC32QI): New for SSE4.2. + (IX86_BUILTIN_CRC32HI): Likewise. + (IX86_BUILTIN_CRC32SI): Likewise. + (IX86_BUILTIN_CRC32DI): Likewise. + (IX86_BUILTIN_PCMPGTQ): Likewise. + (bdesc_crc32): Likewise. + (bdesc_sse_3arg): Likewise. + (ix86_expand_crc32): Likewise. + (ix86_init_mmx_sse_builtins): Support SSE4.2. + (ix86_expand_builtin): Likewise. + + * config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define + __SSE4_2__ for -msse4.2. + + * config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2. + (CRC32MODE): Likewise. + (crc32modesuffix): Likewise. + (crc32modeconstraint): Likewise. + (sse4_2_crc32): Likewise. + (sse4_2_crc32di): Likewise. + + * config/i386/i386.opt (msse4.2): New for SSE4.2. + (msse4): Likewise. + + * config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header + file. + + * config/i386/smmintrin.h: Add SSE4.2 intrinsics. + + * config/i386/sse.md (sse4_2_gtv2di3): New pattern for + SSE4.2. + (vcond): Use SSEMODEI instead of SSEMODE124. + (vcondu): Likewise. + + * doc/extend.texi: Document SSE4.2 built-in functions. + + * doc/invoke.texi: Document -msse4.2/-msse4. + 2007-05-31 Zdenek Dvorak PR tree-optimization/32160 diff --git a/gcc/config.gcc b/gcc/config.gcc index 1b7caa86238..c4a47f9a587 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -276,12 +276,14 @@ xscale-*-*) i[34567]86-*-*) cpu_type=i386 extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h" + pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h + nmmintrin.h" ;; x86_64-*-*) cpu_type=i386 extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h" + pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h + nmmintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7a351f55a1d..73261b0a733 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1551,6 +1551,33 @@ int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; was set or cleared on the command line. */ static int ix86_isa_flags_explicit; +/* Define a set of ISAs which aren't available for a given ISA. MMX + and SSE ISAs are handled separately. */ + +#define OPTION_MASK_ISA_MMX_UNSET \ + (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET) +#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A + +#define OPTION_MASK_ISA_SSE_UNSET \ + (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET) +#define OPTION_MASK_ISA_SSE2_UNSET \ + (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET) +#define OPTION_MASK_ISA_SSE3_UNSET \ + (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET) +#define OPTION_MASK_ISA_SSSE3_UNSET \ + (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET) +#define OPTION_MASK_ISA_SSE4_1_UNSET \ + (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET) +#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A + +/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.1 -msse4.2. -mno-sse4 should the same as -msse4.1. */ +#define OPTION_MASK_ISA_SSE4 \ + (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2) +#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET + +#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4 + /* Implement TARGET_HANDLE_OPTION. */ static bool @@ -1562,10 +1589,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX; if (!value) { - ix86_isa_flags - &= ~(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A); - ix86_isa_flags_explicit - |= OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A; + ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET; } return true; @@ -1573,8 +1598,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW; if (!value) { - ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_A; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_A; + ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET; } return true; @@ -1585,14 +1610,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE; if (!value) { - ix86_isa_flags - &= ~(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3 - | OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1 - | OPTION_MASK_ISA_SSE4A); - ix86_isa_flags_explicit - |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3 - | OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1 - | OPTION_MASK_ISA_SSE4A); + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET; } return true; @@ -1600,12 +1619,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2; if (!value) { - ix86_isa_flags - &= ~(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3 - | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A); - ix86_isa_flags_explicit - |= (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3 - | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A); + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET; } return true; @@ -1613,12 +1628,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3; if (!value) { - ix86_isa_flags - &= ~(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1 - | OPTION_MASK_ISA_SSE4A); - ix86_isa_flags_explicit - |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1 - | OPTION_MASK_ISA_SSE4A); + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET; } return true; @@ -1626,10 +1637,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3; if (!value) { - ix86_isa_flags - &= ~(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A); - ix86_isa_flags_explicit - |= OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A; + ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET; } return true; @@ -1637,17 +1646,36 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1; if (!value) { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A; + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET; } return true; + case OPT_msse4_2: + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2; + if (!value) + { + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET; + } + return true; + + case OPT_msse4: + ix86_isa_flags |= OPTION_MASK_ISA_SSE4; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4; + return true; + + case OPT_mno_sse4: + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; + return true; + case OPT_msse4a: ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A; if (!value) { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1; + ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET; } return true; @@ -1723,7 +1751,8 @@ override_options (void) PTA_ABM = 1 << 11, PTA_SSE4A = 1 << 12, PTA_NO_SAHF = 1 << 13, - PTA_SSE4_1 = 1 << 14 + PTA_SSE4_1 = 1 << 14, + PTA_SSE4_2 = 1 << 15 } flags; } const processor_alias_table[] = @@ -2001,6 +2030,9 @@ override_options (void) if (processor_alias_table[i].flags & PTA_SSE4_1 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; + if (processor_alias_table[i].flags & PTA_SSE4_2 + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) + ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; if (processor_alias_table[i].flags & PTA_SSE4A && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; @@ -2202,6 +2234,13 @@ override_options (void) if (!TARGET_80387) target_flags |= MASK_NO_FANCY_MATH_387; + /* Turn on SSE4.1 builtins and popcnt instruction for -msse4.2. */ + if (TARGET_SSE4_2) + { + ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; + x86_popcnt = true; + } + /* Turn on SSSE3 builtins for -msse4.1. */ if (TARGET_SSE4_1) ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; @@ -10481,6 +10520,16 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) rtvec v; switch (mode) { + case SImode: + gcc_assert (vect); + v = gen_rtvec (4, value, value, value, value); + return gen_rtx_CONST_VECTOR (V4SImode, v); + + case DImode: + gcc_assert (vect); + v = gen_rtvec (2, value, value); + return gen_rtx_CONST_VECTOR (V2DImode, v); + case SFmode: if (vect) v = gen_rtvec (4, value, value, value, value); @@ -10501,37 +10550,53 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) } } -/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. - Create a mask for the sign bit in MODE for an SSE register. If VECT is - true, then replicate the mask for all elements of the vector register. - If INVERT is true, then create a mask excluding the sign bit. */ +/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders + and ix86_expand_int_vcond. Create a mask for the sign bit in MODE + for an SSE register. If VECT is true, then replicate the mask for + all elements of the vector register. If INVERT is true, then create + a mask excluding the sign bit. */ rtx ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) { - enum machine_mode vec_mode; + enum machine_mode vec_mode, imode; HOST_WIDE_INT hi, lo; int shift = 63; rtx v; rtx mask; /* Find the sign bit, sign extended to 2*HWI. */ - if (mode == SFmode) - lo = 0x80000000, hi = lo < 0; - else if (HOST_BITS_PER_WIDE_INT >= 64) - lo = (HOST_WIDE_INT)1 << shift, hi = -1; - else - lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); + switch (mode) + { + case SImode: + case SFmode: + imode = SImode; + vec_mode = (mode == SImode) ? V4SImode : V4SFmode; + lo = 0x80000000, hi = lo < 0; + break; + + case DImode: + case DFmode: + imode = DImode; + vec_mode = (mode == DImode) ? V2DImode : V2DFmode; + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << shift, hi = -1; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); + break; + + default: + gcc_unreachable (); + } if (invert) lo = ~lo, hi = ~hi; /* Force this value into the low part of a fp vector constant. */ - mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); + mask = immed_double_const (lo, hi, imode); mask = gen_lowpart (mode, mask); v = ix86_build_const_vector (mode, vect, mask); - vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode; return force_reg (vec_mode, v); } @@ -12739,7 +12804,7 @@ ix86_expand_fp_vcond (rtx operands[]) return true; } -/* Expand a signed integral vector conditional move. */ +/* Expand a signed/unsigned integral vector conditional move. */ bool ix86_expand_int_vcond (rtx operands[]) @@ -12783,6 +12848,29 @@ ix86_expand_int_vcond (rtx operands[]) gcc_unreachable (); } + /* Only SSE4.1/SSE4.2 supports V2DImode. */ + if (mode == V2DImode) + { + switch (code) + { + case EQ: + /* SSE4.1 supports EQ. */ + if (!TARGET_SSE4_1) + return false; + break; + + case GT: + case GTU: + /* SSE4.2 supports GT/GTU. */ + if (!TARGET_SSE4_2) + return false; + break; + + default: + gcc_unreachable (); + } + } + /* Unsigned parallel compare is not supported by the hardware. Play some tricks to turn this into a signed comparison against 0. */ if (code == GTU) @@ -12792,25 +12880,30 @@ ix86_expand_int_vcond (rtx operands[]) switch (mode) { case V4SImode: + case V2DImode: { rtx t1, t2, mask; /* Perform a parallel modulo subtraction. */ t1 = gen_reg_rtx (mode); - emit_insn (gen_subv4si3 (t1, cop0, cop1)); + emit_insn ((mode == V4SImode + ? gen_subv4si3 + : gen_subv2di3) (t1, cop0, cop1)); /* Extract the original sign bit of op0. */ - mask = GEN_INT (-0x80000000); - mask = gen_rtx_CONST_VECTOR (mode, - gen_rtvec (4, mask, mask, mask, mask)); - mask = force_reg (mode, mask); + mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), + true, false); t2 = gen_reg_rtx (mode); - emit_insn (gen_andv4si3 (t2, cop0, mask)); + emit_insn ((mode == V4SImode + ? gen_andv4si3 + : gen_andv2di3) (t2, cop0, mask)); /* XOR it back into the result of the subtraction. This results in the sign bit set iff we saw unsigned underflow. */ x = gen_reg_rtx (mode); - emit_insn (gen_xorv4si3 (x, t1, t2)); + emit_insn ((mode == V4SImode + ? gen_xorv4si3 + : gen_xorv2di3) (x, t1, t2)); code = GT; } @@ -16637,6 +16730,14 @@ enum ix86_builtins IX86_BUILTIN_VEC_SET_V4HI, IX86_BUILTIN_VEC_SET_V16QI, + /* SSE4.2. */ + IX86_BUILTIN_CRC32QI, + IX86_BUILTIN_CRC32HI, + IX86_BUILTIN_CRC32SI, + IX86_BUILTIN_CRC32DI, + + IX86_BUILTIN_PCMPGTQ, + IX86_BUILTIN_MAX }; @@ -16728,6 +16829,15 @@ static const struct builtin_description bdesc_ptest[] = { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 }, }; +static const struct builtin_description bdesc_crc32[] = +{ + /* SSE4.2 */ + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, 0, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, 0, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, 0, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 }, +}; + /* SSE builtins with 3 arguments and the last argument must be a 8 bit constant or xmm0. */ static const struct builtin_description bdesc_sse_3arg[] = @@ -17050,6 +17160,9 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 }, + + /* SSE4.2 */ + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, 0, 0 }, }; static const struct builtin_description bdesc_1arg[] = @@ -17881,6 +17994,28 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD); def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS); + /* SSE4.2. */ + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_char_type_node, + NULL_TREE); + def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI); + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI); + ftype = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI); + ftype = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + NULL_TREE); + def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI); + /* AMDFAM10 SSE4A New built-ins */ def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); @@ -18059,6 +18194,41 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp, return target; } +/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */ + +static rtx +ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (optimize + || !target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0); + } + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Subroutine of ix86_expand_builtin to take care of binop insns. */ static rtx @@ -19218,6 +19388,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (d->code == fcode) return ix86_expand_sse_ptest (d, exp, target); + for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++) + if (d->code == fcode) + return ix86_expand_crc32 (d->icode, exp, target); + gcc_unreachable (); } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9fed7279bba..9bdb3882623 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -46,6 +46,7 @@ Boston, MA 02110-1301, USA. */ #define TARGET_SSE3 OPTION_ISA_SSE3 #define TARGET_SSSE3 OPTION_ISA_SSSE3 #define TARGET_SSE4_1 OPTION_ISA_SSE4_1 +#define TARGET_SSE4_2 OPTION_ISA_SSE4_2 #define TARGET_SSE4A OPTION_ISA_SSE4A #include "config/vxworks-dummy.h" @@ -568,6 +569,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__SSSE3__"); \ if (TARGET_SSE4_1) \ builtin_define ("__SSE4_1__"); \ + if (TARGET_SSE4_2) \ + builtin_define ("__SSE4_2__"); \ if (TARGET_SSE4A) \ builtin_define ("__SSE4A__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3912fb6e4ab..38e41bccb08 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -173,6 +173,9 @@ (UNSPEC_PTEST 140) (UNSPEC_ROUNDP 141) (UNSPEC_ROUNDS 142) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) ]) (define_constants @@ -20895,6 +20898,36 @@ } [(set_attr "type" "multi")]) +(define_mode_macro CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32q\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + (include "mmx.md") (include "sse.md") (include "sync.md") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9257ae1ca4d..72b40c93987 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -225,6 +225,18 @@ msse4.1 Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation +msse4.2 +Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation + +msse4 +Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation + +mno-sse4 +Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists +Do not support SSE4.1 and SSE4.2 built-in functions and code generation + msse4a Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h new file mode 100644 index 00000000000..4b9614e6aee --- /dev/null +++ b/gcc/config/i386/nmmintrin.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 10.0. */ + +#ifndef _NMMINTRIN_H_INCLUDED +#define _NMMINTRIN_H_INCLUDED + +#ifndef __SSE4_2__ +# error "SSE4.2 instruction set not enabled" +#else +/* We just include SSE4.1 header file. */ +#include +#endif /* __SSE4_2__ */ + +#endif /* _NMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index d57e2e6640d..d0f365b1f84 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -573,6 +573,246 @@ _mm_stream_load_si128 (__m128i *__X) return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X); } +#ifdef __SSE4_2__ + +/* These macros specify the source data format. */ +#define SIDD_UBYTE_OPS 0x00 +#define SIDD_UWORD_OPS 0x01 +#define SIDD_SBYTE_OPS 0x02 +#define SIDD_SWORD_OPS 0x03 + +/* These macros specify the comparison operation. */ +#define SIDD_CMP_EQUAL_ANY 0x00 +#define SIDD_CMP_RANGES 0x04 +#define SIDD_CMP_EQUAL_EACH 0x08 +#define SIDD_CMP_EQUAL_ORDERED 0x0c + +/* These macros specify the the polarity. */ +#define SIDD_POSITIVE_POLARITY 0x00 +#define SIDD_NEGATIVE_POLARITY 0x10 +#define SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define SIDD_MASKED_NEGATIVE_POLARITY 0x30 + +/* These macros specify the output selection in _mm_cmpXstri (). */ +#define SIDD_LEAST_SIGNIFICANT 0x00 +#define SIDD_MOST_SIGNIFICANT 0x40 + +/* These macros specify the output selection in _mm_cmpXstrm (). */ +#define SIDD_BIT_MASK 0x00 +#define SIDD_UNIT_MASK 0x40 + +/* Intrinsics for text/string processing. */ + +#if 0 +static __inline __m128i __attribute__((__always_inline__)) +_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M) +{ + return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpistri (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistri128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} +#else +#define _mm_cmpistrm(X, Y, M) \ + ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M))) +#define _mm_cmpistri(X, Y, M) \ + __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M)) + +#define _mm_cmpestrm(X, LX, Y, LY, M) \ + ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M))) +#define _mm_cmpestri(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#endif + +/* Intrinsics for text/string processing and reading values of + EFlags. */ + +#if 0 +static __inline int __attribute__((__always_inline__)) +_mm_cmpistra (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistria128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistric128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpistro (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistrio128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistris128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M) +{ + return __builtin_ia32_pcmpistriz128 ((__v16qi)__X, + (__v16qi)__Y, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} + +static __inline int __attribute__((__always_inline__)) +_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) +{ + return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX, + (__v16qi)__Y, __LY, + __M); +} +#else +#define _mm_cmpistra(X, Y, M) \ + __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrc(X, Y, M) \ + __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistro(X, Y, M) \ + __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrs(X, Y, M) \ + __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M)) +#define _mm_cmpistrz(X, Y, M) \ + __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M)) + +#define _mm_cmpestra(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrc(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestro(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrs(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpestrz(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \ + (__v16qi)(Y), (int)(LY), (M)) +#endif + +/* Packed integer 64-bit comparison, zeroing or filling with ones + corresponding parts of result. */ +static __inline __m128i __attribute__((__always_inline__)) +_mm_cmpgt_epi64 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y); +} + +/* Calculate a number of bits set to 1. */ +static __inline int __attribute__((__always_inline__)) +_mm_popcnt_u32 (unsigned int __X) +{ + return __builtin_popcount (__X); +} + +#ifdef __x86_64__ +static __inline long long __attribute__((__always_inline__)) +_mm_popcnt_u64 (unsigned long long __X) +{ + return __builtin_popcountll (__X); +} +#endif + +/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */ +static __inline unsigned int __attribute__((__always_inline__)) +_mm_crc32_u8 (unsigned int __C, unsigned char __V) +{ + return __builtin_ia32_crc32qi (__C, __V); +} + +static __inline unsigned int __attribute__((__always_inline__)) +_mm_crc32_u16 (unsigned int __C, unsigned short __V) +{ + return __builtin_ia32_crc32hi (__C, __V); +} + +static __inline unsigned int __attribute__((__always_inline__)) +_mm_crc32_u32 (unsigned int __C, unsigned int __V) +{ + return __builtin_ia32_crc32si (__C, __V); +} + +#ifdef __x86_64__ +static __inline unsigned long long __attribute__((__always_inline__)) +_mm_crc32_u64 (unsigned long long __C, unsigned long long __V) +{ + return __builtin_ia32_crc32di (__C, __V); +} +#endif + +#endif /* __SSE4_2__ */ + #endif /* __SSE4_1__ */ #endif /* _SMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 81ff9258ef9..76351926127 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3633,14 +3633,24 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) +(define_insn "sse4_2_gtv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (gt:V2DI + (match_operand:V2DI 1 "nonimmediate_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE4_2" + "pcmpgtq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + (define_expand "vcond" - [(set (match_operand:SSEMODE124 0 "register_operand" "") - (if_then_else:SSEMODE124 + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (if_then_else:SSEMODEI (match_operator 3 "" - [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") - (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) - (match_operand:SSEMODE124 1 "general_operand" "") - (match_operand:SSEMODE124 2 "general_operand" "")))] + [(match_operand:SSEMODEI 4 "nonimmediate_operand" "") + (match_operand:SSEMODEI 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODEI 1 "general_operand" "") + (match_operand:SSEMODEI 2 "general_operand" "")))] "TARGET_SSE2" { if (ix86_expand_int_vcond (operands)) @@ -3650,13 +3660,13 @@ }) (define_expand "vcondu" - [(set (match_operand:SSEMODE124 0 "register_operand" "") - (if_then_else:SSEMODE124 + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (if_then_else:SSEMODEI (match_operator 3 "" - [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") - (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) - (match_operand:SSEMODE124 1 "general_operand" "") - (match_operand:SSEMODE124 2 "general_operand" "")))] + [(match_operand:SSEMODEI 4 "nonimmediate_operand" "") + (match_operand:SSEMODEI 5 "nonimmediate_operand" "")]) + (match_operand:SSEMODEI 1 "general_operand" "") + (match_operand:SSEMODEI 2 "general_operand" "")))] "TARGET_SSE2" { if (ix86_expand_int_vcond (operands)) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e1104dcc474..4217eaadb48 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7502,6 +7502,54 @@ Generates the @code{pextrd} machine instruction. Generates the @code{pextrq} machine instruction in 64bit mode. @end table +The following built-in functions are available when @option{-msse4.2} is +used. All of them generate the machine instruction that is part of the +name. + +@smallexample +v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int) +int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int) +v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int) +int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int) +__v2di __builtin_ia32_pcmpgtq (__v2di, __v2di) +@end smallexample + +The following built-in functions are available when @option{-msse4.2} is +used. + +@table @code +unsigned int __builtin_ia32_crc32qi (unsigned int, unsigned char) +Generates the @code{crc32b} machine instruction. +unsigned int __builtin_ia32_crc32hi (unsigned int, unsigned short) +Generates the @code{crc32w} machine instruction. +unsigned int __builtin_ia32_crc32si (unsigned int, unsigned int) +Generates the @code{crc32l} machine instruction. +unsigned long long __builtin_ia32_crc32di (unsigned int, unsigned long long) +@end table + +The following built-in functions are changed to generate new SSE4.2 +instructions when @option{-msse4.2} is used. + +@table @code +int __builtin_popcount (unsigned int) +Generates the @code{popcntl} machine instruction. +int __builtin_popcountl (unsigned long) +Generates the @code{popcntl} or @code{popcntq} machine instruction, +depending on the size of @code{unsigned long}. +int __builtin_popcountll (unsigned long long) +Generates the @code{popcntq} machine instruction. +@end table + The following built-in functions are available when @option{-msse4a} is used. @smallexample diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f93aeb1697c..4aedf300184 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -548,7 +548,7 @@ Objective-C and Objective-C++ Dialects}. -mno-fp-ret-in-387 -msoft-float @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} -mcx16 -msahf @gol --mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 @gol +-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol -msse4a -m3dnow -mpopcnt -mabm @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol @@ -10273,6 +10273,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-ssse3 @item -msse4.1 @itemx -mno-sse4.1 +@item -msse4.2 +@itemx -mno-sse4.2 +@item -msse4 +@itemx -mno-sse4 @item -msse4a @item -mno-sse4a @item -m3dnow