config.gcc (i[34567]86-*-*): Add nmmintrin.h to extra_headers.

2007-05-31  H.J. Lu  <hongjiu.lu@intel.com>

	* config.gcc (i[34567]86-*-*): Add nmmintrin.h to
	extra_headers.
	(x86_64-*-*): Likewise.

	* config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New.
	(OPTION_MASK_ISA_3DNOW_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE2_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE3_UNSET): Likewise.
	(OPTION_MASK_ISA_SSSE3_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE4_1_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE4_2_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE4): Likewise.
	(OPTION_MASK_ISA_SSE4_UNSET): Likewise.
	(OPTION_MASK_ISA_SSE4A_UNSET): Likewise.
	(ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET.  Handle
	SSE4.2.
	(override_options): Support SSE4.2.
	(ix86_build_const_vector): Support SImode and DImode.
	(ix86_build_signbit_mask): Likewise.
	(ix86_expand_int_vcond): Support V2DImode.
	(IX86_BUILTIN_CRC32QI): New for SSE4.2.
	(IX86_BUILTIN_CRC32HI): Likewise.
	(IX86_BUILTIN_CRC32SI): Likewise.
	(IX86_BUILTIN_CRC32DI): Likewise.
	(IX86_BUILTIN_PCMPGTQ): Likewise.
	(bdesc_crc32): Likewise.
	(bdesc_sse_3arg): Likewise.
	(ix86_expand_crc32): Likewise.
	(ix86_init_mmx_sse_builtins): Support SSE4.2.
	(ix86_expand_builtin): Likewise.

	* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
	__SSE4_2__ for -msse4.2.

	* config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2.
	(CRC32MODE): Likewise.
	(crc32modesuffix): Likewise.
	(crc32modeconstraint): Likewise.
	(sse4_2_crc32<mode>): Likewise.
	(sse4_2_crc32di): Likewise.

	* config/i386/i386.opt (msse4.2): New for SSE4.2.
	(msse4): Likewise.

	* config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header
	file.

	* config/i386/smmintrin.h: Add SSE4.2 intrinsics.

	* config/i386/sse.md (sse4_2_gtv2di3): New pattern for
	SSE4.2.
	(vcond<mode>): Use SSEMODEI instead of SSEMODE124.
	(vcondu<mode>): Likewise.

	* doc/extend.texi: Document SSE4.2 built-in functions.

	* doc/invoke.texi: Document -msse4.2/-msse4.

From-SVN: r125236
This commit is contained in:
H.J. Lu 2007-05-31 19:52:24 +00:00 committed by H.J. Lu
parent ccb4d26be0
commit 3b8dd0716f
11 changed files with 698 additions and 71 deletions

View File

@ -1,3 +1,64 @@
2007-05-31 H.J. Lu <hongjiu.lu@intel.com>
* config.gcc (i[34567]86-*-*): Add nmmintrin.h to
extra_headers.
(x86_64-*-*): Likewise.
* config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New.
(OPTION_MASK_ISA_3DNOW_UNSET): Likewise.
(OPTION_MASK_ISA_SSE_UNSET): Likewise.
(OPTION_MASK_ISA_SSE2_UNSET): Likewise.
(OPTION_MASK_ISA_SSE3_UNSET): Likewise.
(OPTION_MASK_ISA_SSSE3_UNSET): Likewise.
(OPTION_MASK_ISA_SSE4_1_UNSET): Likewise.
(OPTION_MASK_ISA_SSE4_2_UNSET): Likewise.
(OPTION_MASK_ISA_SSE4): Likewise.
(OPTION_MASK_ISA_SSE4_UNSET): Likewise.
(OPTION_MASK_ISA_SSE4A_UNSET): Likewise.
(ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle
SSE4.2.
(override_options): Support SSE4.2.
(ix86_build_const_vector): Support SImode and DImode.
(ix86_build_signbit_mask): Likewise.
(ix86_expand_int_vcond): Support V2DImode.
(IX86_BUILTIN_CRC32QI): New for SSE4.2.
(IX86_BUILTIN_CRC32HI): Likewise.
(IX86_BUILTIN_CRC32SI): Likewise.
(IX86_BUILTIN_CRC32DI): Likewise.
(IX86_BUILTIN_PCMPGTQ): Likewise.
(bdesc_crc32): Likewise.
(bdesc_sse_3arg): Likewise.
(ix86_expand_crc32): Likewise.
(ix86_init_mmx_sse_builtins): Support SSE4.2.
(ix86_expand_builtin): Likewise.
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
__SSE4_2__ for -msse4.2.
* config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2.
(CRC32MODE): Likewise.
(crc32modesuffix): Likewise.
(crc32modeconstraint): Likewise.
(sse4_2_crc32<mode>): Likewise.
(sse4_2_crc32di): Likewise.
* config/i386/i386.opt (msse4.2): New for SSE4.2.
(msse4): Likewise.
* config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header
file.
* config/i386/smmintrin.h: Add SSE4.2 intrinsics.
* config/i386/sse.md (sse4_2_gtv2di3): New pattern for
SSE4.2.
(vcond<mode>): Use SSEMODEI instead of SSEMODE124.
(vcondu<mode>): Likewise.
* doc/extend.texi: Document SSE4.2 built-in functions.
* doc/invoke.texi: Document -msse4.2/-msse4.
2007-05-31 Zdenek Dvorak <dvorakz@suse.cz>
PR tree-optimization/32160

View File

@ -276,12 +276,14 @@ xscale-*-*)
i[34567]86-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h"
;;
x86_64-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)

View File

@ -1551,6 +1551,33 @@ int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
was set or cleared on the command line. */
static int ix86_isa_flags_explicit;
/* Define a set of ISAs which aren't available for a given ISA. MMX
and SSE ISAs are handled separately. */
#define OPTION_MASK_ISA_MMX_UNSET \
(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
#define OPTION_MASK_ISA_SSE_UNSET \
(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
#define OPTION_MASK_ISA_SSE2_UNSET \
(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
#define OPTION_MASK_ISA_SSE3_UNSET \
(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
#define OPTION_MASK_ISA_SSSE3_UNSET \
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.1 -msse4.2. -mno-sse4 should the same as -msse4.1. */
#define OPTION_MASK_ISA_SSE4 \
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
/* Implement TARGET_HANDLE_OPTION. */
static bool
@ -1562,10 +1589,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
if (!value)
{
ix86_isa_flags
&= ~(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A);
ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A;
ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
}
return true;
@ -1573,8 +1598,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
if (!value)
{
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_A;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_A;
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
}
return true;
@ -1585,14 +1610,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
if (!value)
{
ix86_isa_flags
&= ~(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
| OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_SSE4A);
ix86_isa_flags_explicit
|= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
| OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_SSE4A);
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
}
return true;
@ -1600,12 +1619,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
if (!value)
{
ix86_isa_flags
&= ~(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
| OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
ix86_isa_flags_explicit
|= (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
| OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
}
return true;
@ -1613,12 +1628,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
if (!value)
{
ix86_isa_flags
&= ~(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_SSE4A);
ix86_isa_flags_explicit
|= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_SSE4A);
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
}
return true;
@ -1626,10 +1637,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
if (!value)
{
ix86_isa_flags
&= ~(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A;
ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
}
return true;
@ -1637,17 +1646,36 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
if (!value)
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
}
return true;
case OPT_msse4_2:
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
if (!value)
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
}
return true;
case OPT_msse4:
ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
return true;
case OPT_mno_sse4:
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
return true;
case OPT_msse4a:
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
if (!value)
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
}
return true;
@ -1723,7 +1751,8 @@ override_options (void)
PTA_ABM = 1 << 11,
PTA_SSE4A = 1 << 12,
PTA_NO_SAHF = 1 << 13,
PTA_SSE4_1 = 1 << 14
PTA_SSE4_1 = 1 << 14,
PTA_SSE4_2 = 1 << 15
} flags;
}
const processor_alias_table[] =
@ -2001,6 +2030,9 @@ override_options (void)
if (processor_alias_table[i].flags & PTA_SSE4_1
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
if (processor_alias_table[i].flags & PTA_SSE4_2
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
@ -2202,6 +2234,13 @@ override_options (void)
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
/* Turn on SSE4.1 builtins and popcnt instruction for -msse4.2. */
if (TARGET_SSE4_2)
{
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
x86_popcnt = true;
}
/* Turn on SSSE3 builtins for -msse4.1. */
if (TARGET_SSE4_1)
ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
@ -10481,6 +10520,16 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
rtvec v;
switch (mode)
{
case SImode:
gcc_assert (vect);
v = gen_rtvec (4, value, value, value, value);
return gen_rtx_CONST_VECTOR (V4SImode, v);
case DImode:
gcc_assert (vect);
v = gen_rtvec (2, value, value);
return gen_rtx_CONST_VECTOR (V2DImode, v);
case SFmode:
if (vect)
v = gen_rtvec (4, value, value, value, value);
@ -10501,37 +10550,53 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
}
}
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
Create a mask for the sign bit in MODE for an SSE register. If VECT is
true, then replicate the mask for all elements of the vector register.
If INVERT is true, then create a mask excluding the sign bit. */
/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
for an SSE register. If VECT is true, then replicate the mask for
all elements of the vector register. If INVERT is true, then create
a mask excluding the sign bit. */
rtx
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
{
enum machine_mode vec_mode;
enum machine_mode vec_mode, imode;
HOST_WIDE_INT hi, lo;
int shift = 63;
rtx v;
rtx mask;
/* Find the sign bit, sign extended to 2*HWI. */
if (mode == SFmode)
lo = 0x80000000, hi = lo < 0;
else if (HOST_BITS_PER_WIDE_INT >= 64)
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
else
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
switch (mode)
{
case SImode:
case SFmode:
imode = SImode;
vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
lo = 0x80000000, hi = lo < 0;
break;
case DImode:
case DFmode:
imode = DImode;
vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
if (HOST_BITS_PER_WIDE_INT >= 64)
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
else
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
break;
default:
gcc_unreachable ();
}
if (invert)
lo = ~lo, hi = ~hi;
/* Force this value into the low part of a fp vector constant. */
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
mask = immed_double_const (lo, hi, imode);
mask = gen_lowpart (mode, mask);
v = ix86_build_const_vector (mode, vect, mask);
vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
return force_reg (vec_mode, v);
}
@ -12739,7 +12804,7 @@ ix86_expand_fp_vcond (rtx operands[])
return true;
}
/* Expand a signed integral vector conditional move. */
/* Expand a signed/unsigned integral vector conditional move. */
bool
ix86_expand_int_vcond (rtx operands[])
@ -12783,6 +12848,29 @@ ix86_expand_int_vcond (rtx operands[])
gcc_unreachable ();
}
/* Only SSE4.1/SSE4.2 supports V2DImode. */
if (mode == V2DImode)
{
switch (code)
{
case EQ:
/* SSE4.1 supports EQ. */
if (!TARGET_SSE4_1)
return false;
break;
case GT:
case GTU:
/* SSE4.2 supports GT/GTU. */
if (!TARGET_SSE4_2)
return false;
break;
default:
gcc_unreachable ();
}
}
/* Unsigned parallel compare is not supported by the hardware. Play some
tricks to turn this into a signed comparison against 0. */
if (code == GTU)
@ -12792,25 +12880,30 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
case V4SImode:
case V2DImode:
{
rtx t1, t2, mask;
/* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn (gen_subv4si3 (t1, cop0, cop1));
emit_insn ((mode == V4SImode
? gen_subv4si3
: gen_subv2di3) (t1, cop0, cop1));
/* Extract the original sign bit of op0. */
mask = GEN_INT (-0x80000000);
mask = gen_rtx_CONST_VECTOR (mode,
gen_rtvec (4, mask, mask, mask, mask));
mask = force_reg (mode, mask);
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
t2 = gen_reg_rtx (mode);
emit_insn (gen_andv4si3 (t2, cop0, mask));
emit_insn ((mode == V4SImode
? gen_andv4si3
: gen_andv2di3) (t2, cop0, mask));
/* XOR it back into the result of the subtraction. This results
in the sign bit set iff we saw unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn (gen_xorv4si3 (x, t1, t2));
emit_insn ((mode == V4SImode
? gen_xorv4si3
: gen_xorv2di3) (x, t1, t2));
code = GT;
}
@ -16637,6 +16730,14 @@ enum ix86_builtins
IX86_BUILTIN_VEC_SET_V4HI,
IX86_BUILTIN_VEC_SET_V16QI,
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
IX86_BUILTIN_CRC32HI,
IX86_BUILTIN_CRC32SI,
IX86_BUILTIN_CRC32DI,
IX86_BUILTIN_PCMPGTQ,
IX86_BUILTIN_MAX
};
@ -16728,6 +16829,15 @@ static const struct builtin_description bdesc_ptest[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
};
static const struct builtin_description bdesc_crc32[] =
{
/* SSE4.2 */
{ OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, 0, 0 },
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, 0, 0 },
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, 0, 0 },
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 },
};
/* SSE builtins with 3 arguments and the last argument must be a 8 bit
constant or xmm0. */
static const struct builtin_description bdesc_sse_3arg[] =
@ -17050,6 +17160,9 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
/* SSE4.2 */
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, 0, 0 },
};
static const struct builtin_description bdesc_1arg[] =
@ -17881,6 +17994,28 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
/* SSE4.2. */
ftype = build_function_type_list (unsigned_type_node,
unsigned_type_node,
unsigned_char_type_node,
NULL_TREE);
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
ftype = build_function_type_list (unsigned_type_node,
unsigned_type_node,
short_unsigned_type_node,
NULL_TREE);
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
ftype = build_function_type_list (unsigned_type_node,
unsigned_type_node,
unsigned_type_node,
NULL_TREE);
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
ftype = build_function_type_list (long_long_unsigned_type_node,
long_long_unsigned_type_node,
long_long_unsigned_type_node,
NULL_TREE);
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
/* AMDFAM10 SSE4A New built-ins */
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
@ -18059,6 +18194,41 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
return target;
}
/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
static rtx
ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
{
rtx pat;
tree arg0 = CALL_EXPR_ARG (exp, 0);
tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
enum machine_mode mode1 = insn_data[icode].operand[2].mode;
if (optimize
|| !target
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
{
op1 = copy_to_reg (op1);
op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
}
pat = GEN_FCN (icode) (target, op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return target;
}
/* Subroutine of ix86_expand_builtin to take care of binop insns. */
static rtx
@ -19218,6 +19388,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (d->code == fcode)
return ix86_expand_sse_ptest (d, exp, target);
for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
if (d->code == fcode)
return ix86_expand_crc32 (d->icode, exp, target);
gcc_unreachable ();
}

View File

@ -46,6 +46,7 @@ Boston, MA 02110-1301, USA. */
#define TARGET_SSE3 OPTION_ISA_SSE3
#define TARGET_SSSE3 OPTION_ISA_SSSE3
#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
#define TARGET_SSE4A OPTION_ISA_SSE4A
#include "config/vxworks-dummy.h"
@ -568,6 +569,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
builtin_define ("__SSSE3__"); \
if (TARGET_SSE4_1) \
builtin_define ("__SSE4_1__"); \
if (TARGET_SSE4_2) \
builtin_define ("__SSE4_2__"); \
if (TARGET_SSE4A) \
builtin_define ("__SSE4A__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \

View File

@ -173,6 +173,9 @@
(UNSPEC_PTEST 140)
(UNSPEC_ROUNDP 141)
(UNSPEC_ROUNDS 142)
; For SSE4.2 support
(UNSPEC_CRC32 143)
])
(define_constants
@ -20895,6 +20898,36 @@
}
[(set_attr "type" "multi")])
(define_mode_macro CRC32MODE [QI HI SI])
(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
(define_insn "sse4_2_crc32<mode>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(match_operand:SI 1 "register_operand" "0")
(match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
UNSPEC_CRC32))]
"TARGET_SSE4_2"
"crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog1")
(set_attr "prefix_rep" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "SI")])
(define_insn "sse4_2_crc32di"
[(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI
[(match_operand:DI 1 "register_operand" "0")
(match_operand:DI 2 "nonimmediate_operand" "rm")]
UNSPEC_CRC32))]
"TARGET_SSE4_2 && TARGET_64BIT"
"crc32q\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog1")
(set_attr "prefix_rep" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(include "mmx.md")
(include "sse.md")
(include "sync.md")

View File

@ -225,6 +225,18 @@ msse4.1
Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
msse4.2
Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
msse4
Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
mno-sse4
Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists
Do not support SSE4.1 and SSE4.2 built-in functions and code generation
msse4a
Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation

View File

@ -0,0 +1,40 @@
/* Copyright (C) 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* As a special exception, if you include this header file into source
files compiled by GCC, this header file does not by itself cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License. */
/* Implemented from the specification included in the Intel C++ Compiler
User Guide and Reference, version 10.0. */
#ifndef _NMMINTRIN_H_INCLUDED
#define _NMMINTRIN_H_INCLUDED
#ifndef __SSE4_2__
# error "SSE4.2 instruction set not enabled"
#else
/* We just include SSE4.1 header file. */
#include <smmintrin.h>
#endif /* __SSE4_2__ */
#endif /* _NMMINTRIN_H_INCLUDED */

View File

@ -573,6 +573,246 @@ _mm_stream_load_si128 (__m128i *__X)
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
}
#ifdef __SSE4_2__
/* These macros specify the source data format. */
#define SIDD_UBYTE_OPS 0x00
#define SIDD_UWORD_OPS 0x01
#define SIDD_SBYTE_OPS 0x02
#define SIDD_SWORD_OPS 0x03
/* These macros specify the comparison operation. */
#define SIDD_CMP_EQUAL_ANY 0x00
#define SIDD_CMP_RANGES 0x04
#define SIDD_CMP_EQUAL_EACH 0x08
#define SIDD_CMP_EQUAL_ORDERED 0x0c
/* These macros specify the the polarity. */
#define SIDD_POSITIVE_POLARITY 0x00
#define SIDD_NEGATIVE_POLARITY 0x10
#define SIDD_MASKED_POSITIVE_POLARITY 0x20
#define SIDD_MASKED_NEGATIVE_POLARITY 0x30
/* These macros specify the output selection in _mm_cmpXstri (). */
#define SIDD_LEAST_SIGNIFICANT 0x00
#define SIDD_MOST_SIGNIFICANT 0x40
/* These macros specify the output selection in _mm_cmpXstrm (). */
#define SIDD_BIT_MASK 0x00
#define SIDD_UNIT_MASK 0x40
/* Intrinsics for text/string processing. */
#if 0
static __inline __m128i __attribute__((__always_inline__))
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
#else
#define _mm_cmpistrm(X, Y, M) \
((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M)))
#define _mm_cmpistri(X, Y, M) \
__builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpestrm(X, LX, Y, LY, M) \
((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M)))
#define _mm_cmpestri(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#endif
/* Intrinsics for text/string processing and reading values of
EFlags. */
#if 0
static __inline int __attribute__((__always_inline__))
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
(__v16qi)__Y,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
static __inline int __attribute__((__always_inline__))
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
(__v16qi)__Y, __LY,
__M);
}
#else
#define _mm_cmpistra(X, Y, M) \
__builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpistrc(X, Y, M) \
__builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpistro(X, Y, M) \
__builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpistrs(X, Y, M) \
__builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpistrz(X, Y, M) \
__builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M))
#define _mm_cmpestra(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#define _mm_cmpestrc(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#define _mm_cmpestro(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#define _mm_cmpestrs(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#define _mm_cmpestrz(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \
(__v16qi)(Y), (int)(LY), (M))
#endif
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
static __inline __m128i __attribute__((__always_inline__))
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
/* Calculate a number of bits set to 1. */
static __inline int __attribute__((__always_inline__))
_mm_popcnt_u32 (unsigned int __X)
{
return __builtin_popcount (__X);
}
#ifdef __x86_64__
static __inline long long __attribute__((__always_inline__))
_mm_popcnt_u64 (unsigned long long __X)
{
return __builtin_popcountll (__X);
}
#endif
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
static __inline unsigned int __attribute__((__always_inline__))
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
{
return __builtin_ia32_crc32qi (__C, __V);
}
static __inline unsigned int __attribute__((__always_inline__))
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
{
return __builtin_ia32_crc32hi (__C, __V);
}
static __inline unsigned int __attribute__((__always_inline__))
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
#ifdef __x86_64__
static __inline unsigned long long __attribute__((__always_inline__))
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V);
}
#endif
#endif /* __SSE4_2__ */
#endif /* __SSE4_1__ */
#endif /* _SMMINTRIN_H_INCLUDED */

View File

@ -3633,14 +3633,24 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse4_2_gtv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(gt:V2DI
(match_operand:V2DI 1 "nonimmediate_operand" "0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE4_2"
"pcmpgtq\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "TI")])
(define_expand "vcond<mode>"
[(set (match_operand:SSEMODE124 0 "register_operand" "")
(if_then_else:SSEMODE124
[(set (match_operand:SSEMODEI 0 "register_operand" "")
(if_then_else:SSEMODEI
(match_operator 3 ""
[(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
(match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
(match_operand:SSEMODE124 1 "general_operand" "")
(match_operand:SSEMODE124 2 "general_operand" "")))]
[(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
(match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
(match_operand:SSEMODEI 1 "general_operand" "")
(match_operand:SSEMODEI 2 "general_operand" "")))]
"TARGET_SSE2"
{
if (ix86_expand_int_vcond (operands))
@ -3650,13 +3660,13 @@
})
(define_expand "vcondu<mode>"
[(set (match_operand:SSEMODE124 0 "register_operand" "")
(if_then_else:SSEMODE124
[(set (match_operand:SSEMODEI 0 "register_operand" "")
(if_then_else:SSEMODEI
(match_operator 3 ""
[(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
(match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
(match_operand:SSEMODE124 1 "general_operand" "")
(match_operand:SSEMODE124 2 "general_operand" "")))]
[(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
(match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
(match_operand:SSEMODEI 1 "general_operand" "")
(match_operand:SSEMODEI 2 "general_operand" "")))]
"TARGET_SSE2"
{
if (ix86_expand_int_vcond (operands))

View File

@ -7502,6 +7502,54 @@ Generates the @code{pextrd} machine instruction.
Generates the @code{pextrq} machine instruction in 64bit mode.
@end table
The following built-in functions are available when @option{-msse4.2} is
used. All of them generate the machine instruction that is part of the
name.
@smallexample
v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int)
v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int)
__v2di __builtin_ia32_pcmpgtq (__v2di, __v2di)
@end smallexample
The following built-in functions are available when @option{-msse4.2} is
used.
@table @code
unsigned int __builtin_ia32_crc32qi (unsigned int, unsigned char)
Generates the @code{crc32b} machine instruction.
unsigned int __builtin_ia32_crc32hi (unsigned int, unsigned short)
Generates the @code{crc32w} machine instruction.
unsigned int __builtin_ia32_crc32si (unsigned int, unsigned int)
Generates the @code{crc32l} machine instruction.
unsigned long long __builtin_ia32_crc32di (unsigned int, unsigned long long)
@end table
The following built-in functions are changed to generate new SSE4.2
instructions when @option{-msse4.2} is used.
@table @code
int __builtin_popcount (unsigned int)
Generates the @code{popcntl} machine instruction.
int __builtin_popcountl (unsigned long)
Generates the @code{popcntl} or @code{popcntq} machine instruction,
depending on the size of @code{unsigned long}.
int __builtin_popcountll (unsigned long long)
Generates the @code{popcntq} machine instruction.
@end table
The following built-in functions are available when @option{-msse4a} is used.
@smallexample

View File

@ -548,7 +548,7 @@ Objective-C and Objective-C++ Dialects}.
-mno-fp-ret-in-387 -msoft-float @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} -mcx16 -msahf @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
-msse4a -m3dnow -mpopcnt -mabm @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
@ -10273,6 +10273,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-ssse3
@item -msse4.1
@itemx -mno-sse4.1
@item -msse4.2
@itemx -mno-sse4.2
@item -msse4
@itemx -mno-sse4
@item -msse4a
@item -mno-sse4a
@item -m3dnow