bmmintrin.h: Replace by #error.

* config/i386/bmmintrin.h: Replace by #error.

	Revert:
	Michael Meissner  <michael.meissner@amd.com>
	Dwarakanath Rajagopal  <dwarak.rajagopal@amd.com>
	Tony Linthicum  <tony.linthicum@amd.com>

	* config/i386/i386.h (TARGET_SSE5): New macro for SSE5.
	(TARGET_ROUND): New macro for the round/ptest instructions which
	are shared between SSE4.1 and SSE5.
	(OPTION_MASK_ISA_ROUND): Ditto.
	(OPTION_ISA_ROUND): Ditto.
	(TARGET_FUSED_MADD): New macro for -mfused-madd swtich.
	(TARGET_CPU_CPP_BUILTINS): Add SSE5 support.

	* config/i386/i386.opt (-msse5): New switch for SSE5 support.
	(-mfused-madd): New switch to give users control over whether the
	compiler optimizes to use the multiply/add SSE5 instructions.

	* config/i386/i386.c (enum pta_flags): Add PTA_SSE5.
	(ix86_handle_option): Turn off 3dnow if -msse5.
	(override_options): Add SSE5 support.
	(print_operand): %Y prints comparison codes for SSE5 com/pcom
	instructions.
	(ix86_expand_sse_movcc): Add SSE5 support.
	(ix86_expand_sse5_unpack): New function to use pperm to unpack a
	vector type to the next largest size.
	(ix86_expand_sse5_pack): New function to use pperm to pack a
	vector type to the next smallest size.
	(IX86_BUILTIN_FMADDSS): New for SSE5 intrinsic.
	(IX86_BUILTIN_FMADDSD): Ditto.
	(IX86_BUILTIN_FMADDPS): Ditto.
	(IX86_BUILTIN_FMADDPD): Ditto.
	(IX86_BUILTIN_FMSUBSS): Ditto.
	(IX86_BUILTIN_FMSUBSD): Ditto.
	(IX86_BUILTIN_FMSUBPS): Ditto.
	(IX86_BUILTIN_FMSUBPD): Ditto.
	(IX86_BUILTIN_FNMADDSS): Ditto.
	(IX86_BUILTIN_FNMADDSD): Ditto.
	(IX86_BUILTIN_FNMADDPS): Ditto.
	(IX86_BUILTIN_FNMADDPD): Ditto.
	(IX86_BUILTIN_FNMSUBSS): Ditto.
	(IX86_BUILTIN_FNMSUBSD): Ditto.
	(IX86_BUILTIN_FNMSUBPS): Ditto.
	(IX86_BUILTIN_FNMSUBPD): Ditto.
	(IX86_BUILTIN_PCMOV_V2DI): Ditto.
	(IX86_BUILTIN_PCMOV_V4SI): Ditto.
	(IX86_BUILTIN_PCMOV_V8HI): Ditto.
	(IX86_BUILTIN_PCMOV_V16QI): Ditto.
	(IX86_BUILTIN_PCMOV_V4SF): Ditto.
	(IX86_BUILTIN_PCMOV_V2DF): Ditto.
	(IX86_BUILTIN_PPERM): Ditto.
	(IX86_BUILTIN_PERMPS): Ditto.
	(IX86_BUILTIN_PERMPD): Ditto.
	(IX86_BUILTIN_PMACSSWW): Ditto.
	(IX86_BUILTIN_PMACSWW): Ditto.
	(IX86_BUILTIN_PMACSSWD): Ditto.
	(IX86_BUILTIN_PMACSWD): Ditto.
	(IX86_BUILTIN_PMACSSDD): Ditto.
	(IX86_BUILTIN_PMACSDD): Ditto.
	(IX86_BUILTIN_PMACSSDQL): Ditto.
	(IX86_BUILTIN_PMACSSDQH): Ditto.
	(IX86_BUILTIN_PMACSDQL): Ditto.
	(IX86_BUILTIN_PMACSDQH): Ditto.
	(IX86_BUILTIN_PMADCSSWD): Ditto.
	(IX86_BUILTIN_PMADCSWD): Ditto.
	(IX86_BUILTIN_PHADDBW): Ditto.
	(IX86_BUILTIN_PHADDBD): Ditto.
	(IX86_BUILTIN_PHADDBQ): Ditto.
	(IX86_BUILTIN_PHADDWD): Ditto.
	(IX86_BUILTIN_PHADDWQ): Ditto.
	(IX86_BUILTIN_PHADDDQ): Ditto.
	(IX86_BUILTIN_PHADDUBW): Ditto.
	(IX86_BUILTIN_PHADDUBD): Ditto.
	(IX86_BUILTIN_PHADDUBQ): Ditto.
	(IX86_BUILTIN_PHADDUWD): Ditto.
	(IX86_BUILTIN_PHADDUWQ): Ditto.
	(IX86_BUILTIN_PHADDUDQ): Ditto.
	(IX86_BUILTIN_PHSUBBW): Ditto.
	(IX86_BUILTIN_PHSUBWD): Ditto.
	(IX86_BUILTIN_PHSUBDQ): Ditto.
	(IX86_BUILTIN_PROTB): Ditto.
	(IX86_BUILTIN_PROTW): Ditto.
	(IX86_BUILTIN_PROTD): Ditto.
	(IX86_BUILTIN_PROTQ): Ditto.
	(IX86_BUILTIN_PROTB_IMM): Ditto.
	(IX86_BUILTIN_PROTW_IMM): Ditto.
	(IX86_BUILTIN_PROTD_IMM): Ditto.
	(IX86_BUILTIN_PROTQ_IMM): Ditto.
	(IX86_BUILTIN_PSHLB): Ditto.
	(IX86_BUILTIN_PSHLW): Ditto.
	(IX86_BUILTIN_PSHLD): Ditto.
	(IX86_BUILTIN_PSHLQ): Ditto.
	(IX86_BUILTIN_PSHAB): Ditto.
	(IX86_BUILTIN_PSHAW): Ditto.
	(IX86_BUILTIN_PSHAD): Ditto.
	(IX86_BUILTIN_PSHAQ): Ditto.
	(IX86_BUILTIN_FRCZSS): Ditto.
	(IX86_BUILTIN_FRCZSD): Ditto.
	(IX86_BUILTIN_FRCZPS): Ditto.
	(IX86_BUILTIN_FRCZPD): Ditto.
	(IX86_BUILTIN_CVTPH2PS): Ditto.
	(IX86_BUILTIN_CVTPS2PH): Ditto.
	(IX86_BUILTIN_COMEQSS): Ditto.
	(IX86_BUILTIN_COMNESS): Ditto.
	(IX86_BUILTIN_COMLTSS): Ditto.
	(IX86_BUILTIN_COMLESS): Ditto.
	(IX86_BUILTIN_COMGTSS): Ditto.
	(IX86_BUILTIN_COMGESS): Ditto.
	(IX86_BUILTIN_COMUEQSS): Ditto.
	(IX86_BUILTIN_COMUNESS): Ditto.
	(IX86_BUILTIN_COMULTSS): Ditto.
	(IX86_BUILTIN_COMULESS): Ditto.
	(IX86_BUILTIN_COMUGTSS): Ditto.
	(IX86_BUILTIN_COMUGESS): Ditto.
	(IX86_BUILTIN_COMORDSS): Ditto.
	(IX86_BUILTIN_COMUNORDSS): Ditto.
	(IX86_BUILTIN_COMFALSESS): Ditto.
	(IX86_BUILTIN_COMTRUESS): Ditto.
	(IX86_BUILTIN_COMEQSD): Ditto.
	(IX86_BUILTIN_COMNESD): Ditto.
	(IX86_BUILTIN_COMLTSD): Ditto.
	(IX86_BUILTIN_COMLESD): Ditto.
	(IX86_BUILTIN_COMGTSD): Ditto.
	(IX86_BUILTIN_COMGESD): Ditto.
	(IX86_BUILTIN_COMUEQSD): Ditto.
	(IX86_BUILTIN_COMUNESD): Ditto.
	(IX86_BUILTIN_COMULTSD): Ditto.
	(IX86_BUILTIN_COMULESD): Ditto.
	(IX86_BUILTIN_COMUGTSD): Ditto.
	(IX86_BUILTIN_COMUGESD): Ditto.
	(IX86_BUILTIN_COMORDSD): Ditto.
	(IX86_BUILTIN_COMUNORDSD): Ditto.
	(IX86_BUILTIN_COMFALSESD): Ditto.
	(IX86_BUILTIN_COMTRUESD): Ditto.
	(IX86_BUILTIN_COMEQPS): Ditto.
	(IX86_BUILTIN_COMNEPS): Ditto.
	(IX86_BUILTIN_COMLTPS): Ditto.
	(IX86_BUILTIN_COMLEPS): Ditto.
	(IX86_BUILTIN_COMGTPS): Ditto.
	(IX86_BUILTIN_COMGEPS): Ditto.
	(IX86_BUILTIN_COMUEQPS): Ditto.
	(IX86_BUILTIN_COMUNEPS): Ditto.
	(IX86_BUILTIN_COMULTPS): Ditto.
	(IX86_BUILTIN_COMULEPS): Ditto.
	(IX86_BUILTIN_COMUGTPS): Ditto.
	(IX86_BUILTIN_COMUGEPS): Ditto.
	(IX86_BUILTIN_COMORDPS): Ditto.
	(IX86_BUILTIN_COMUNORDPS): Ditto.
	(IX86_BUILTIN_COMFALSEPS): Ditto.
	(IX86_BUILTIN_COMTRUEPS): Ditto.
	(IX86_BUILTIN_COMEQPD): Ditto.
	(IX86_BUILTIN_COMNEPD): Ditto.
	(IX86_BUILTIN_COMLTPD): Ditto.
	(IX86_BUILTIN_COMLEPD): Ditto.
	(IX86_BUILTIN_COMGTPD): Ditto.
	(IX86_BUILTIN_COMGEPD): Ditto.
	(IX86_BUILTIN_COMUEQPD): Ditto.
	(IX86_BUILTIN_COMUNEPD): Ditto.
	(IX86_BUILTIN_COMULTPD): Ditto.
	(IX86_BUILTIN_COMULEPD): Ditto.
	(IX86_BUILTIN_COMUGTPD): Ditto.
	(IX86_BUILTIN_COMUGEPD): Ditto.
	(IX86_BUILTIN_COMORDPD): Ditto.
	(IX86_BUILTIN_COMUNORDPD): Ditto.
	(IX86_BUILTIN_COMFALSEPD): Ditto.
	(IX86_BUILTIN_COMTRUEPD): Ditto.
	(IX86_BUILTIN_PCOMEQUB): Ditto.
	(IX86_BUILTIN_PCOMNEUB): Ditto.
	(IX86_BUILTIN_PCOMLTUB): Ditto.
	(IX86_BUILTIN_PCOMLEUB): Ditto.
	(IX86_BUILTIN_PCOMGTUB): Ditto.
	(IX86_BUILTIN_PCOMGEUB): Ditto.
	(IX86_BUILTIN_PCOMFALSEUB): Ditto.
	(IX86_BUILTIN_PCOMTRUEUB): Ditto.
	(IX86_BUILTIN_PCOMEQUW): Ditto.
	(IX86_BUILTIN_PCOMNEUW): Ditto.
	(IX86_BUILTIN_PCOMLTUW): Ditto.
	(IX86_BUILTIN_PCOMLEUW): Ditto.
	(IX86_BUILTIN_PCOMGTUW): Ditto.
	(IX86_BUILTIN_PCOMGEUW): Ditto.
	(IX86_BUILTIN_PCOMFALSEUW): Ditto.
	(IX86_BUILTIN_PCOMTRUEUW): Ditto.
	(IX86_BUILTIN_PCOMEQUD): Ditto.
	(IX86_BUILTIN_PCOMNEUD): Ditto.
	(IX86_BUILTIN_PCOMLTUD): Ditto.
	(IX86_BUILTIN_PCOMLEUD): Ditto.
	(IX86_BUILTIN_PCOMGTUD): Ditto.
	(IX86_BUILTIN_PCOMGEUD): Ditto.
	(IX86_BUILTIN_PCOMFALSEUD): Ditto.
	(IX86_BUILTIN_PCOMTRUEUD): Ditto.
	(IX86_BUILTIN_PCOMEQUQ): Ditto.
	(IX86_BUILTIN_PCOMNEUQ): Ditto.
	(IX86_BUILTIN_PCOMLTUQ): Ditto.
	(IX86_BUILTIN_PCOMLEUQ): Ditto.
	(IX86_BUILTIN_PCOMGTUQ): Ditto.
	(IX86_BUILTIN_PCOMGEUQ): Ditto.
	(IX86_BUILTIN_PCOMFALSEUQ): Ditto.
	(IX86_BUILTIN_PCOMTRUEUQ): Ditto.
	(IX86_BUILTIN_PCOMEQB): Ditto.
	(IX86_BUILTIN_PCOMNEB): Ditto.
	(IX86_BUILTIN_PCOMLTB): Ditto.
	(IX86_BUILTIN_PCOMLEB): Ditto.
	(IX86_BUILTIN_PCOMGTB): Ditto.
	(IX86_BUILTIN_PCOMGEB): Ditto.
	(IX86_BUILTIN_PCOMFALSEB): Ditto.
	(IX86_BUILTIN_PCOMTRUEB): Ditto.
	(IX86_BUILTIN_PCOMEQW): Ditto.
	(IX86_BUILTIN_PCOMNEW): Ditto.
	(IX86_BUILTIN_PCOMLTW): Ditto.
	(IX86_BUILTIN_PCOMLEW): Ditto.
	(IX86_BUILTIN_PCOMGTW): Ditto.
	(IX86_BUILTIN_PCOMGEW): Ditto.
	(IX86_BUILTIN_PCOMFALSEW): Ditto.
	(IX86_BUILTIN_PCOMTRUEW): Ditto.
	(IX86_BUILTIN_PCOMEQD): Ditto.
	(IX86_BUILTIN_PCOMNED): Ditto.
	(IX86_BUILTIN_PCOMLTD): Ditto.
	(IX86_BUILTIN_PCOMLED): Ditto.
	(IX86_BUILTIN_PCOMGTD): Ditto.
	(IX86_BUILTIN_PCOMGED): Ditto.
	(IX86_BUILTIN_PCOMFALSED): Ditto.
	(IX86_BUILTIN_PCOMTRUED): Ditto.
	(IX86_BUILTIN_PCOMEQQ): Ditto.
	(IX86_BUILTIN_PCOMNEQ): Ditto.
	(IX86_BUILTIN_PCOMLTQ): Ditto.
	(IX86_BUILTIN_PCOMLEQ): Ditto.
	(IX86_BUILTIN_PCOMGTQ): Ditto.
	(IX86_BUILTIN_PCOMGEQ): Ditto.
	(IX86_BUILTIN_PCOMFALSEQ): Ditto.
	(IX86_BUILTIN_PCOMTRUEQ): Ditto.
	(enum multi_arg_type): New enum for describing the various SSE5
	intrinsic argument types.
	(bdesc_multi_arg): New table for SSE5 intrinsics.
	(ix86_init_mmx_sse_builtins): Add SSE5 intrinsic support.
	(ix86_expand_multi_arg_builtin): New function for creating SSE5
	intrinsics.
	(ix86_expand_builtin): Add SSE5 intrinsic support.
	(ix86_sse5_valid_op_p): New function to validate SSE5 3 and 4
	operand instructions.
	(ix86_expand_sse5_multiple_memory): New function to split the
	second memory reference from SSE5 instructions.
	(type_has_variadic_args_p): Delete in favor of stdarg_p.
	(ix86_return_pops_args): Use stdarg_p to determine if the function
	has variable arguments.
	(ix86_setup_incoming_varargs): Ditto.
	(x86_this_parameter): Ditto.

	* config/i386/i386-protos.h (ix86_expand_sse5_unpack): Add
	declaration.
	(ix86_expand_sse5_pack): Ditto.
	(ix86_sse5_valid_op_p): Ditto.
	(ix86_expand_sse5_multiple_memory): Ditto.

	* config/i386/i386.md (UNSPEC_SSE5_INTRINSIC): Add new UNSPEC
	constant for SSE5 support.
	(UNSPEC_SSE5_UNSIGNED_CMP): Ditto.
	(UNSPEC_SSE5_TRUEFALSE): Ditto.
	(UNSPEC_SSE5_PERMUTE): Ditto.
	(UNSPEC_SSE5_ASHIFT): Ditto.
	(UNSPEC_SSE5_LSHIFT): Ditto.
	(UNSPEC_FRCZ): Ditto.
	(UNSPEC_CVTPH2PS): Ditto.
	(UNSPEC_CVTPS2PH): Ditto.
	(PCOM_FALSE): Add new constant for true/false SSE5 comparisons.
	(PCOM_TRUE): Ditto.
	(COM_FALSE_S): Ditto.
	(COM_FALSE_P): Ditto.
	(COM_TRUE_S): Ditto.
	(COM_TRUE_P): Ditto.
	(type attribute): Add ssemuladd, sseiadd1, ssecvt1, sse4arg types.
	(unit attribute): Add support for ssemuladd, ssecvt1, sseiadd1 sse4arg
	types.
	(memory attribute): Ditto.
	(sse4_1_round<mode>2): Use TARGET_ROUND instead of TARGET_SSE4_1.
	Use SSE4_1_ROUND_* constants instead of hard coded numbers.
	(rint<mode>2): Use TARGET_ROUND instead of TARGET_SSE4_1.
	(floor<mode>2): Ditto.
	(ceil<mode>2): Ditto.
	(btrunc<mode>2): Ditto.
	(nearbyintdf2): Ditto.
	(nearbyintsf2): Ditto.
	(sse_setccsf): Disable if SSE5.
	(sse_setccdf): Ditto.
	(sse5_setcc<mode>): New support for SSE5 conditional move.
	(sse5_pcmov_<mode>): Ditto.

	* config/i386/sse.md (SSEMODE1248): New mode iterator for SSE5.
	(SSEMODEF4): Ditto.
	(SSEMODEF2P): Ditto.
	(ssemodesuffixf4): New mode attribute for SSE5.
	(ssemodesuffixf2s): Ditto.
	(ssemodesuffixf2c): Ditto.
	(sserotatemax): Ditto.
	(ssescalarmode): Ditto.
	(sse_maskcmpv4sf3): Disable if SSE5.
	(sse_maskcmpv2df3): Ditto.
	(sse_vmmaskcmpv4sf3): Ditto.
	(sse5_fmadd<mode>4): Add SSE5 floating point multiply/add
	instructions.
	(sse5_vmfmadd<mode>4): Ditto.
	(sse5_fmsub<mode>4): Ditto.
	(sse5_vmfmsub<mode>4): Ditto.
	(sse5_fnmadd<mode>4): Ditto.
	(sse5_vmfnmadd<mode>4): Ditto.
	(sse5_fnmsub<mode>4): Ditto.
	(sse5_vmfnmsub<mode>4): Ditto.
	(sse5i_fmadd<mode>4): Ditto.
	(sse5i_fmsub<mode>4): Ditto.
	(sse5i_fnmadd<mode>4): Ditto.
	(sse5i_fnmsub<mode>4): Ditto.
	(sse5i_vmfmadd<mode>4): Ditto.
	(sse5i_vmfmsub<mode>4): Ditto.
	(sse5i_vmfnmadd<mode>4): Ditto.
	(sse5i_vmfnmsub<mode>4): Ditto.
	(mulv16qi3): Add SSE5 support.
	(mulv4si3): Ditto.
	(sse5_mulv4si3): New insn for 32-bit multiply support on SSE5.
	(sse2_mulv4si3): Disable if SSE5.
	(sse4_1_roundpd): Use TARGET_ROUND instead of TARGET_SSE4_1.
	(sse4_1_roundps): Ditto.
	(sse4_1_roundsd): Ditto.
	(sse4_1_roundss): Ditto.
	(sse_maskcmpv4sf3): Disable if SSE5 so the SSE5 instruction will
	be generated.
	(sse_maskcmpsf3): Ditto.
	(sse_vmmaskcmpv4sf3): Ditto.
	(sse2_maskcmpv2df3): Ditto.
	(sse2_maskcmpdf3): Ditto.
	(sse2_vmmaskcmpv2df3): Ditto.
	(sse2_eq<mode>3): Ditto.
	(sse2_gt<mode>3): Ditto.
	(sse5_pcmov_<mode>): Add SSE5 support.
	(vec_unpacku_hi_v16qi): Ditto.
	(vec_unpacks_hi_v16qi): Ditto.
	(vec_unpacku_lo_v16qi): Ditto.
	(vec_unpacks_lo_v16qi): Ditto.
	(vec_unpacku_hi_v8hi): Ditto.
	(vec_unpacks_hi_v8hi): Ditto.
	(vec_unpacku_lo_v8hi): Ditto.
	(vec_unpacks_lo_v8hi): Ditto.
	(vec_unpacku_hi_v4si): Ditto.
	(vec_unpacks_hi_v4si): Ditto.
	(vec_unpacku_lo_v4si): Ditto.
	(vec_unpacks_lo_v4si): Ditto.
	(sse5_pmacsww): New SSE5 intrinsic insn.
	(sse5_pmacssww): Ditto.
	(sse5_pmacsdd): Ditto.
	(sse5_pmacssdd): Ditto.
	(sse5_pmacssdql): Ditto.
	(sse5_pmacssdqh): Ditto.
	(sse5_pmacsdqh): Ditto.
	(sse5_pmacsswd): Ditto.
	(sse5_pmacswd): Ditto.
	(sse5_pmadcsswd): Ditto.
	(sse5_pmadcswd): Ditto.
	(sse5_pcmov_<move>): Conditional move support on SSE5.
	(sse5_phaddbw): New SSE5 intrinsic insn.
	(sse5_phaddbd): Ditto.
	(sse5_phaddbq): Ditto.
	(sse5_phaddwd): Ditto.
	(sse5_phaddwq): Ditto.
	(sse5_phadddq): Ditto.
	(sse5_phaddubw): Ditto.
	(sse5_phaddubd): Ditto.
	(sse5_phaddubq): Ditto.
	(sse5_phadduwd): Ditto.
	(sse5_phadduwq): Ditto.
	(sse5_phaddudq): Ditto.
	(sse5_phsubbw): Ditto.
	(sse5_phsubwd): Ditto.
	(sse5_phsubdq): Ditto.
	(sse5_pperm): Ditto.
	(sse5_pperm_sign_v16qi_v8hi): New insns for pack/unpack with SSE5.
	(sse5_pperm_zero_v16qi_v8hi): Ditto.
	(sse5_pperm_sign_v8hi_v4si): Ditto.
	(sse5_pperm_zero_v8hi_v4si): Ditto.
	(sse5_pperm_sign_v4si_v2di): Ditto.
	(sse5_pperm_sign_v4si_v2di): Ditto.
	(sse5_pperm_pack_v2di_v4si): Ditto.
	(sse5_pperm_pack_v4si_v8hi): Ditto.
	(sse5_pperm_pack_v8hi_v16qi): Ditto.
	(sse5_perm<mode>): New SSE5 intrinsic insn.
	(rotl<mode>3): Ditto.
	(sse5_rotl<mode>3): Ditto.
	(sse5_ashl<mode>3): Ditto.
	(sse5_lshl<mode>3): Ditto.
	(sse5_frcz<mode>2): Ditto.
	(sse5s_frcz<mode>2): Ditto.
	(sse5_cvtph2ps): Ditto.
	(sse5_cvtps2ph): Ditto.
	(sse5_vmmaskcmp<mode>3): Ditto.
	(sse5_com_tf<mode>3): Ditto.
	(sse5_maskcmp<mode>3): Ditto.
	(sse5_maskcmp_uns<mode>3): Ditto.
	(sse5_maskcmp_uns2<mode>3): Ditto.
	(sse5_pcom_tf<mode>3): Ditto.
	
	* config/i386/predicates.md (sse5_comparison_float_operator): New predicate to match the
	comparison operators supported by the SSE5 com instruction.
	(ix86_comparison_int_operator): New predicate to match just the
	signed int comparisons.
	(ix86_comparison_uns_operator): New predicate to match just the
	unsigned int comparisons.

	* doc/invoke.texi (-msse5): Add documentation.
	(-mfused-madd): Ditto.

	* doc/extend.texi (x86 intrinsics): Document new SSE5 intrinsics.

	* config.gcc (i[34567]86-*-*): Include bmmintrin.h and
	mmintrin-common.h.
	(x86_64-*-*): Ditto.

	* config/i386/cpuid.h (bit_SSE5): Define SSE5 bit.

	* config/i386/bmmintrin.h: New file, provide common x86 compiler
	intrinisics for SSE5.

	* config/i386/smmintrin.h: Move instructions shared with SSE5 to
	mmintrin-common.h.

	* config/i386/mmintrin-common.h: New file, to contain common
	instructions between SSE4.1 and SSE5.

	* config/i386/netware.c (gen_stdcall_or_fastcall_decoration): Use
	FOREACH_FUNCTION_ARGS to iterate over the argument list.
	(gen_regparm_prefix): Ditto.

	* config/i386/winnt.c (gen_stdcall_or_fastcall_suffix): Use
	FOREACH_FUNCTION_ARGS to iterate over the argument list.  Use
	prototype_p to determine if a function is prototyped.

	* gcc.target/i386/sse5-shift1-vector.c
	* gcc.target/i386/isa-12.c
	* gcc.target/i386/isa-12.cgcc.target/i386/isa-12.c
	* gcc.target/i386/sse5-pcmov2.c
	* gcc.target/i386/isa-3.c
	* gcc.target/i386/sse5-shift2-vector.c
	* gcc.target/i386/isa-7.c
	* gcc.target/i386/funcspec-2.c
	* gcc.target/i386/sse5-haddX.c
	* gcc.target/i386/sse5-hadduX.c
	* gcc.target/i386/isa-9.c
	* gcc.target/i386/sse5-maccXX.c
	* gcc.target/i386/sse5-shift3-vector.c
	* gcc.target/i386/sse5-msubXX.c
	* gcc.target/i386/sse5-permpX.c
	* gcc.target/i386/sse5-check.h
	* gcc.target/i386/sse-12.c
	* gcc.target/i386/sse-11.c
	* gcc.target/i386/sse-10.c
	* gcc.target/i386/sse-13.c
	* gcc.target/i386/sse-14.c
	* gcc.target/i386/sse-22.c
	* gcc.target/i386/sse-2.c
	* gcc.target/i386/sse-13.c
	* gcc.target/i386/avx-2.c
	* gcc.target/i386/sse5-rotate1-vector.c
	* gcc.target/i386/isa-4.c
	* gcc.target/i386/sse5-hsubX.c
	* gcc.target/i386/sse5-pcmov.c
	* gcc.target/i386/sse5-fma.c
	* gcc.target/i386/isa-8.c
	* gcc.target/i386/sse5-rotate2-vector.c
	* gcc.target/i386/sse5-nmaccXX.c
	* gcc.target/i386/sse5-imul64-vector.c
	* gcc.target/i386/sse5-nmsubXX.c
	* gcc.target/i386/sse5-rotate3-vector.c
	* gcc.target/i386/sse5-fma-vector.c
	* gcc.target/i386/sse5-imul32widen-vector.c: Remove SSE5 related testcases
	* gcc.target/i386/sse5-ima-vector.c

	* gcc.target/i386/funcspec-8.c: Replace SSE5 by SSE4.
	* gcc.target/i386/funcspec-5.c: Remove SSE5.
	* gcc.target/i386/funcspec-6.c: Remove fused-add test.
	* gcc.target/i386/avx-1.c: Remove SSE5.
	* gcc.target/i386/avx-2.c: Remove SSE5.
	* g++.dg/other/i386-2.C: Replace SSE5 by SSE4A.
	* g++.dg/other/i386-3.C: Replace SSE5 by SSE4A.
	* g++.dg/other/i386-6.C: Replace SSE5 by SSE4A.
	* g++.dg/other/i386-5.C: Replace SSE5 by SSE4A.

From-SVN: r151099
This commit is contained in:
Jan Hubicka 2009-08-25 23:44:20 +02:00 committed by Jan Hubicka
parent 027c625ced
commit 5c1a2bb1fc
64 changed files with 635 additions and 7913 deletions

View File

@ -1,3 +1,438 @@
2009-08-25 Jan Hubicka <jh@suse.cz>
* config/i386/bmmintrin.h: Replace by #error.
Revert:
Michael Meissner <michael.meissner@amd.com>
Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
Tony Linthicum <tony.linthicum@amd.com>
* config/i386/i386.h (TARGET_SSE5): New macro for SSE5.
(TARGET_ROUND): New macro for the round/ptest instructions which
are shared between SSE4.1 and SSE5.
(OPTION_MASK_ISA_ROUND): Ditto.
(OPTION_ISA_ROUND): Ditto.
(TARGET_FUSED_MADD): New macro for -mfused-madd swtich.
(TARGET_CPU_CPP_BUILTINS): Add SSE5 support.
* config/i386/i386.opt (-msse5): New switch for SSE5 support.
(-mfused-madd): New switch to give users control over whether the
compiler optimizes to use the multiply/add SSE5 instructions.
* config/i386/i386.c (enum pta_flags): Add PTA_SSE5.
(ix86_handle_option): Turn off 3dnow if -msse5.
(override_options): Add SSE5 support.
(print_operand): %Y prints comparison codes for SSE5 com/pcom
instructions.
(ix86_expand_sse_movcc): Add SSE5 support.
(ix86_expand_sse5_unpack): New function to use pperm to unpack a
vector type to the next largest size.
(ix86_expand_sse5_pack): New function to use pperm to pack a
vector type to the next smallest size.
(IX86_BUILTIN_FMADDSS): New for SSE5 intrinsic.
(IX86_BUILTIN_FMADDSD): Ditto.
(IX86_BUILTIN_FMADDPS): Ditto.
(IX86_BUILTIN_FMADDPD): Ditto.
(IX86_BUILTIN_FMSUBSS): Ditto.
(IX86_BUILTIN_FMSUBSD): Ditto.
(IX86_BUILTIN_FMSUBPS): Ditto.
(IX86_BUILTIN_FMSUBPD): Ditto.
(IX86_BUILTIN_FNMADDSS): Ditto.
(IX86_BUILTIN_FNMADDSD): Ditto.
(IX86_BUILTIN_FNMADDPS): Ditto.
(IX86_BUILTIN_FNMADDPD): Ditto.
(IX86_BUILTIN_FNMSUBSS): Ditto.
(IX86_BUILTIN_FNMSUBSD): Ditto.
(IX86_BUILTIN_FNMSUBPS): Ditto.
(IX86_BUILTIN_FNMSUBPD): Ditto.
(IX86_BUILTIN_PCMOV_V2DI): Ditto.
(IX86_BUILTIN_PCMOV_V4SI): Ditto.
(IX86_BUILTIN_PCMOV_V8HI): Ditto.
(IX86_BUILTIN_PCMOV_V16QI): Ditto.
(IX86_BUILTIN_PCMOV_V4SF): Ditto.
(IX86_BUILTIN_PCMOV_V2DF): Ditto.
(IX86_BUILTIN_PPERM): Ditto.
(IX86_BUILTIN_PERMPS): Ditto.
(IX86_BUILTIN_PERMPD): Ditto.
(IX86_BUILTIN_PMACSSWW): Ditto.
(IX86_BUILTIN_PMACSWW): Ditto.
(IX86_BUILTIN_PMACSSWD): Ditto.
(IX86_BUILTIN_PMACSWD): Ditto.
(IX86_BUILTIN_PMACSSDD): Ditto.
(IX86_BUILTIN_PMACSDD): Ditto.
(IX86_BUILTIN_PMACSSDQL): Ditto.
(IX86_BUILTIN_PMACSSDQH): Ditto.
(IX86_BUILTIN_PMACSDQL): Ditto.
(IX86_BUILTIN_PMACSDQH): Ditto.
(IX86_BUILTIN_PMADCSSWD): Ditto.
(IX86_BUILTIN_PMADCSWD): Ditto.
(IX86_BUILTIN_PHADDBW): Ditto.
(IX86_BUILTIN_PHADDBD): Ditto.
(IX86_BUILTIN_PHADDBQ): Ditto.
(IX86_BUILTIN_PHADDWD): Ditto.
(IX86_BUILTIN_PHADDWQ): Ditto.
(IX86_BUILTIN_PHADDDQ): Ditto.
(IX86_BUILTIN_PHADDUBW): Ditto.
(IX86_BUILTIN_PHADDUBD): Ditto.
(IX86_BUILTIN_PHADDUBQ): Ditto.
(IX86_BUILTIN_PHADDUWD): Ditto.
(IX86_BUILTIN_PHADDUWQ): Ditto.
(IX86_BUILTIN_PHADDUDQ): Ditto.
(IX86_BUILTIN_PHSUBBW): Ditto.
(IX86_BUILTIN_PHSUBWD): Ditto.
(IX86_BUILTIN_PHSUBDQ): Ditto.
(IX86_BUILTIN_PROTB): Ditto.
(IX86_BUILTIN_PROTW): Ditto.
(IX86_BUILTIN_PROTD): Ditto.
(IX86_BUILTIN_PROTQ): Ditto.
(IX86_BUILTIN_PROTB_IMM): Ditto.
(IX86_BUILTIN_PROTW_IMM): Ditto.
(IX86_BUILTIN_PROTD_IMM): Ditto.
(IX86_BUILTIN_PROTQ_IMM): Ditto.
(IX86_BUILTIN_PSHLB): Ditto.
(IX86_BUILTIN_PSHLW): Ditto.
(IX86_BUILTIN_PSHLD): Ditto.
(IX86_BUILTIN_PSHLQ): Ditto.
(IX86_BUILTIN_PSHAB): Ditto.
(IX86_BUILTIN_PSHAW): Ditto.
(IX86_BUILTIN_PSHAD): Ditto.
(IX86_BUILTIN_PSHAQ): Ditto.
(IX86_BUILTIN_FRCZSS): Ditto.
(IX86_BUILTIN_FRCZSD): Ditto.
(IX86_BUILTIN_FRCZPS): Ditto.
(IX86_BUILTIN_FRCZPD): Ditto.
(IX86_BUILTIN_CVTPH2PS): Ditto.
(IX86_BUILTIN_CVTPS2PH): Ditto.
(IX86_BUILTIN_COMEQSS): Ditto.
(IX86_BUILTIN_COMNESS): Ditto.
(IX86_BUILTIN_COMLTSS): Ditto.
(IX86_BUILTIN_COMLESS): Ditto.
(IX86_BUILTIN_COMGTSS): Ditto.
(IX86_BUILTIN_COMGESS): Ditto.
(IX86_BUILTIN_COMUEQSS): Ditto.
(IX86_BUILTIN_COMUNESS): Ditto.
(IX86_BUILTIN_COMULTSS): Ditto.
(IX86_BUILTIN_COMULESS): Ditto.
(IX86_BUILTIN_COMUGTSS): Ditto.
(IX86_BUILTIN_COMUGESS): Ditto.
(IX86_BUILTIN_COMORDSS): Ditto.
(IX86_BUILTIN_COMUNORDSS): Ditto.
(IX86_BUILTIN_COMFALSESS): Ditto.
(IX86_BUILTIN_COMTRUESS): Ditto.
(IX86_BUILTIN_COMEQSD): Ditto.
(IX86_BUILTIN_COMNESD): Ditto.
(IX86_BUILTIN_COMLTSD): Ditto.
(IX86_BUILTIN_COMLESD): Ditto.
(IX86_BUILTIN_COMGTSD): Ditto.
(IX86_BUILTIN_COMGESD): Ditto.
(IX86_BUILTIN_COMUEQSD): Ditto.
(IX86_BUILTIN_COMUNESD): Ditto.
(IX86_BUILTIN_COMULTSD): Ditto.
(IX86_BUILTIN_COMULESD): Ditto.
(IX86_BUILTIN_COMUGTSD): Ditto.
(IX86_BUILTIN_COMUGESD): Ditto.
(IX86_BUILTIN_COMORDSD): Ditto.
(IX86_BUILTIN_COMUNORDSD): Ditto.
(IX86_BUILTIN_COMFALSESD): Ditto.
(IX86_BUILTIN_COMTRUESD): Ditto.
(IX86_BUILTIN_COMEQPS): Ditto.
(IX86_BUILTIN_COMNEPS): Ditto.
(IX86_BUILTIN_COMLTPS): Ditto.
(IX86_BUILTIN_COMLEPS): Ditto.
(IX86_BUILTIN_COMGTPS): Ditto.
(IX86_BUILTIN_COMGEPS): Ditto.
(IX86_BUILTIN_COMUEQPS): Ditto.
(IX86_BUILTIN_COMUNEPS): Ditto.
(IX86_BUILTIN_COMULTPS): Ditto.
(IX86_BUILTIN_COMULEPS): Ditto.
(IX86_BUILTIN_COMUGTPS): Ditto.
(IX86_BUILTIN_COMUGEPS): Ditto.
(IX86_BUILTIN_COMORDPS): Ditto.
(IX86_BUILTIN_COMUNORDPS): Ditto.
(IX86_BUILTIN_COMFALSEPS): Ditto.
(IX86_BUILTIN_COMTRUEPS): Ditto.
(IX86_BUILTIN_COMEQPD): Ditto.
(IX86_BUILTIN_COMNEPD): Ditto.
(IX86_BUILTIN_COMLTPD): Ditto.
(IX86_BUILTIN_COMLEPD): Ditto.
(IX86_BUILTIN_COMGTPD): Ditto.
(IX86_BUILTIN_COMGEPD): Ditto.
(IX86_BUILTIN_COMUEQPD): Ditto.
(IX86_BUILTIN_COMUNEPD): Ditto.
(IX86_BUILTIN_COMULTPD): Ditto.
(IX86_BUILTIN_COMULEPD): Ditto.
(IX86_BUILTIN_COMUGTPD): Ditto.
(IX86_BUILTIN_COMUGEPD): Ditto.
(IX86_BUILTIN_COMORDPD): Ditto.
(IX86_BUILTIN_COMUNORDPD): Ditto.
(IX86_BUILTIN_COMFALSEPD): Ditto.
(IX86_BUILTIN_COMTRUEPD): Ditto.
(IX86_BUILTIN_PCOMEQUB): Ditto.
(IX86_BUILTIN_PCOMNEUB): Ditto.
(IX86_BUILTIN_PCOMLTUB): Ditto.
(IX86_BUILTIN_PCOMLEUB): Ditto.
(IX86_BUILTIN_PCOMGTUB): Ditto.
(IX86_BUILTIN_PCOMGEUB): Ditto.
(IX86_BUILTIN_PCOMFALSEUB): Ditto.
(IX86_BUILTIN_PCOMTRUEUB): Ditto.
(IX86_BUILTIN_PCOMEQUW): Ditto.
(IX86_BUILTIN_PCOMNEUW): Ditto.
(IX86_BUILTIN_PCOMLTUW): Ditto.
(IX86_BUILTIN_PCOMLEUW): Ditto.
(IX86_BUILTIN_PCOMGTUW): Ditto.
(IX86_BUILTIN_PCOMGEUW): Ditto.
(IX86_BUILTIN_PCOMFALSEUW): Ditto.
(IX86_BUILTIN_PCOMTRUEUW): Ditto.
(IX86_BUILTIN_PCOMEQUD): Ditto.
(IX86_BUILTIN_PCOMNEUD): Ditto.
(IX86_BUILTIN_PCOMLTUD): Ditto.
(IX86_BUILTIN_PCOMLEUD): Ditto.
(IX86_BUILTIN_PCOMGTUD): Ditto.
(IX86_BUILTIN_PCOMGEUD): Ditto.
(IX86_BUILTIN_PCOMFALSEUD): Ditto.
(IX86_BUILTIN_PCOMTRUEUD): Ditto.
(IX86_BUILTIN_PCOMEQUQ): Ditto.
(IX86_BUILTIN_PCOMNEUQ): Ditto.
(IX86_BUILTIN_PCOMLTUQ): Ditto.
(IX86_BUILTIN_PCOMLEUQ): Ditto.
(IX86_BUILTIN_PCOMGTUQ): Ditto.
(IX86_BUILTIN_PCOMGEUQ): Ditto.
(IX86_BUILTIN_PCOMFALSEUQ): Ditto.
(IX86_BUILTIN_PCOMTRUEUQ): Ditto.
(IX86_BUILTIN_PCOMEQB): Ditto.
(IX86_BUILTIN_PCOMNEB): Ditto.
(IX86_BUILTIN_PCOMLTB): Ditto.
(IX86_BUILTIN_PCOMLEB): Ditto.
(IX86_BUILTIN_PCOMGTB): Ditto.
(IX86_BUILTIN_PCOMGEB): Ditto.
(IX86_BUILTIN_PCOMFALSEB): Ditto.
(IX86_BUILTIN_PCOMTRUEB): Ditto.
(IX86_BUILTIN_PCOMEQW): Ditto.
(IX86_BUILTIN_PCOMNEW): Ditto.
(IX86_BUILTIN_PCOMLTW): Ditto.
(IX86_BUILTIN_PCOMLEW): Ditto.
(IX86_BUILTIN_PCOMGTW): Ditto.
(IX86_BUILTIN_PCOMGEW): Ditto.
(IX86_BUILTIN_PCOMFALSEW): Ditto.
(IX86_BUILTIN_PCOMTRUEW): Ditto.
(IX86_BUILTIN_PCOMEQD): Ditto.
(IX86_BUILTIN_PCOMNED): Ditto.
(IX86_BUILTIN_PCOMLTD): Ditto.
(IX86_BUILTIN_PCOMLED): Ditto.
(IX86_BUILTIN_PCOMGTD): Ditto.
(IX86_BUILTIN_PCOMGED): Ditto.
(IX86_BUILTIN_PCOMFALSED): Ditto.
(IX86_BUILTIN_PCOMTRUED): Ditto.
(IX86_BUILTIN_PCOMEQQ): Ditto.
(IX86_BUILTIN_PCOMNEQ): Ditto.
(IX86_BUILTIN_PCOMLTQ): Ditto.
(IX86_BUILTIN_PCOMLEQ): Ditto.
(IX86_BUILTIN_PCOMGTQ): Ditto.
(IX86_BUILTIN_PCOMGEQ): Ditto.
(IX86_BUILTIN_PCOMFALSEQ): Ditto.
(IX86_BUILTIN_PCOMTRUEQ): Ditto.
(enum multi_arg_type): New enum for describing the various SSE5
intrinsic argument types.
(bdesc_multi_arg): New table for SSE5 intrinsics.
(ix86_init_mmx_sse_builtins): Add SSE5 intrinsic support.
(ix86_expand_multi_arg_builtin): New function for creating SSE5
intrinsics.
(ix86_expand_builtin): Add SSE5 intrinsic support.
(ix86_sse5_valid_op_p): New function to validate SSE5 3 and 4
operand instructions.
(ix86_expand_sse5_multiple_memory): New function to split the
second memory reference from SSE5 instructions.
(type_has_variadic_args_p): Delete in favor of stdarg_p.
(ix86_return_pops_args): Use stdarg_p to determine if the function
has variable arguments.
(ix86_setup_incoming_varargs): Ditto.
(x86_this_parameter): Ditto.
* config/i386/i386-protos.h (ix86_expand_sse5_unpack): Add
declaration.
(ix86_expand_sse5_pack): Ditto.
(ix86_sse5_valid_op_p): Ditto.
(ix86_expand_sse5_multiple_memory): Ditto.
* config/i386/i386.md (UNSPEC_SSE5_INTRINSIC): Add new UNSPEC
constant for SSE5 support.
(UNSPEC_SSE5_UNSIGNED_CMP): Ditto.
(UNSPEC_SSE5_TRUEFALSE): Ditto.
(UNSPEC_SSE5_PERMUTE): Ditto.
(UNSPEC_SSE5_ASHIFT): Ditto.
(UNSPEC_SSE5_LSHIFT): Ditto.
(UNSPEC_FRCZ): Ditto.
(UNSPEC_CVTPH2PS): Ditto.
(UNSPEC_CVTPS2PH): Ditto.
(PCOM_FALSE): Add new constant for true/false SSE5 comparisons.
(PCOM_TRUE): Ditto.
(COM_FALSE_S): Ditto.
(COM_FALSE_P): Ditto.
(COM_TRUE_S): Ditto.
(COM_TRUE_P): Ditto.
(type attribute): Add ssemuladd, sseiadd1, ssecvt1, sse4arg types.
(unit attribute): Add support for ssemuladd, ssecvt1, sseiadd1 sse4arg
types.
(memory attribute): Ditto.
(sse4_1_round<mode>2): Use TARGET_ROUND instead of TARGET_SSE4_1.
Use SSE4_1_ROUND_* constants instead of hard coded numbers.
(rint<mode>2): Use TARGET_ROUND instead of TARGET_SSE4_1.
(floor<mode>2): Ditto.
(ceil<mode>2): Ditto.
(btrunc<mode>2): Ditto.
(nearbyintdf2): Ditto.
(nearbyintsf2): Ditto.
(sse_setccsf): Disable if SSE5.
(sse_setccdf): Ditto.
(sse5_setcc<mode>): New support for SSE5 conditional move.
(sse5_pcmov_<mode>): Ditto.
* config/i386/sse.md (SSEMODE1248): New mode iterator for SSE5.
(SSEMODEF4): Ditto.
(SSEMODEF2P): Ditto.
(ssemodesuffixf4): New mode attribute for SSE5.
(ssemodesuffixf2s): Ditto.
(ssemodesuffixf2c): Ditto.
(sserotatemax): Ditto.
(ssescalarmode): Ditto.
(sse_maskcmpv4sf3): Disable if SSE5.
(sse_maskcmpv2df3): Ditto.
(sse_vmmaskcmpv4sf3): Ditto.
(sse5_fmadd<mode>4): Add SSE5 floating point multiply/add
instructions.
(sse5_vmfmadd<mode>4): Ditto.
(sse5_fmsub<mode>4): Ditto.
(sse5_vmfmsub<mode>4): Ditto.
(sse5_fnmadd<mode>4): Ditto.
(sse5_vmfnmadd<mode>4): Ditto.
(sse5_fnmsub<mode>4): Ditto.
(sse5_vmfnmsub<mode>4): Ditto.
(sse5i_fmadd<mode>4): Ditto.
(sse5i_fmsub<mode>4): Ditto.
(sse5i_fnmadd<mode>4): Ditto.
(sse5i_fnmsub<mode>4): Ditto.
(sse5i_vmfmadd<mode>4): Ditto.
(sse5i_vmfmsub<mode>4): Ditto.
(sse5i_vmfnmadd<mode>4): Ditto.
(sse5i_vmfnmsub<mode>4): Ditto.
(mulv16qi3): Add SSE5 support.
(mulv4si3): Ditto.
(sse5_mulv4si3): New insn for 32-bit multiply support on SSE5.
(sse2_mulv4si3): Disable if SSE5.
(sse4_1_roundpd): Use TARGET_ROUND instead of TARGET_SSE4_1.
(sse4_1_roundps): Ditto.
(sse4_1_roundsd): Ditto.
(sse4_1_roundss): Ditto.
(sse_maskcmpv4sf3): Disable if SSE5 so the SSE5 instruction will
be generated.
(sse_maskcmpsf3): Ditto.
(sse_vmmaskcmpv4sf3): Ditto.
(sse2_maskcmpv2df3): Ditto.
(sse2_maskcmpdf3): Ditto.
(sse2_vmmaskcmpv2df3): Ditto.
(sse2_eq<mode>3): Ditto.
(sse2_gt<mode>3): Ditto.
(sse5_pcmov_<mode>): Add SSE5 support.
(vec_unpacku_hi_v16qi): Ditto.
(vec_unpacks_hi_v16qi): Ditto.
(vec_unpacku_lo_v16qi): Ditto.
(vec_unpacks_lo_v16qi): Ditto.
(vec_unpacku_hi_v8hi): Ditto.
(vec_unpacks_hi_v8hi): Ditto.
(vec_unpacku_lo_v8hi): Ditto.
(vec_unpacks_lo_v8hi): Ditto.
(vec_unpacku_hi_v4si): Ditto.
(vec_unpacks_hi_v4si): Ditto.
(vec_unpacku_lo_v4si): Ditto.
(vec_unpacks_lo_v4si): Ditto.
(sse5_pmacsww): New SSE5 intrinsic insn.
(sse5_pmacssww): Ditto.
(sse5_pmacsdd): Ditto.
(sse5_pmacssdd): Ditto.
(sse5_pmacssdql): Ditto.
(sse5_pmacssdqh): Ditto.
(sse5_pmacsdqh): Ditto.
(sse5_pmacsswd): Ditto.
(sse5_pmacswd): Ditto.
(sse5_pmadcsswd): Ditto.
(sse5_pmadcswd): Ditto.
(sse5_pcmov_<move>): Conditional move support on SSE5.
(sse5_phaddbw): New SSE5 intrinsic insn.
(sse5_phaddbd): Ditto.
(sse5_phaddbq): Ditto.
(sse5_phaddwd): Ditto.
(sse5_phaddwq): Ditto.
(sse5_phadddq): Ditto.
(sse5_phaddubw): Ditto.
(sse5_phaddubd): Ditto.
(sse5_phaddubq): Ditto.
(sse5_phadduwd): Ditto.
(sse5_phadduwq): Ditto.
(sse5_phaddudq): Ditto.
(sse5_phsubbw): Ditto.
(sse5_phsubwd): Ditto.
(sse5_phsubdq): Ditto.
(sse5_pperm): Ditto.
(sse5_pperm_sign_v16qi_v8hi): New insns for pack/unpack with SSE5.
(sse5_pperm_zero_v16qi_v8hi): Ditto.
(sse5_pperm_sign_v8hi_v4si): Ditto.
(sse5_pperm_zero_v8hi_v4si): Ditto.
(sse5_pperm_sign_v4si_v2di): Ditto.
(sse5_pperm_sign_v4si_v2di): Ditto.
(sse5_pperm_pack_v2di_v4si): Ditto.
(sse5_pperm_pack_v4si_v8hi): Ditto.
(sse5_pperm_pack_v8hi_v16qi): Ditto.
(sse5_perm<mode>): New SSE5 intrinsic insn.
(rotl<mode>3): Ditto.
(sse5_rotl<mode>3): Ditto.
(sse5_ashl<mode>3): Ditto.
(sse5_lshl<mode>3): Ditto.
(sse5_frcz<mode>2): Ditto.
(sse5s_frcz<mode>2): Ditto.
(sse5_cvtph2ps): Ditto.
(sse5_cvtps2ph): Ditto.
(sse5_vmmaskcmp<mode>3): Ditto.
(sse5_com_tf<mode>3): Ditto.
(sse5_maskcmp<mode>3): Ditto.
(sse5_maskcmp_uns<mode>3): Ditto.
(sse5_maskcmp_uns2<mode>3): Ditto.
(sse5_pcom_tf<mode>3): Ditto.
* config/i386/predicates.md (sse5_comparison_float_operator): New predicate to match the
comparison operators supported by the SSE5 com instruction.
(ix86_comparison_int_operator): New predicate to match just the
signed int comparisons.
(ix86_comparison_uns_operator): New predicate to match just the
unsigned int comparisons.
* doc/invoke.texi (-msse5): Add documentation.
(-mfused-madd): Ditto.
* doc/extend.texi (x86 intrinsics): Document new SSE5 intrinsics.
* config.gcc (i[34567]86-*-*): Include bmmintrin.h and
mmintrin-common.h.
(x86_64-*-*): Ditto.
* config/i386/cpuid.h (bit_SSE5): Define SSE5 bit.
* config/i386/bmmintrin.h: New file, provide common x86 compiler
intrinisics for SSE5.
* config/i386/smmintrin.h: Move instructions shared with SSE5 to
mmintrin-common.h.
* config/i386/mmintrin-common.h: New file, to contain common
instructions between SSE4.1 and SSE5.
* config/i386/netware.c (gen_stdcall_or_fastcall_decoration): Use
FOREACH_FUNCTION_ARGS to iterate over the argument list.
(gen_regparm_prefix): Ditto.
* config/i386/winnt.c (gen_stdcall_or_fastcall_suffix): Use
FOREACH_FUNCTION_ARGS to iterate over the argument list. Use
prototype_p to determine if a function is prototyped.
2009-08-25 Ville Voutilainen <ville.voutilainen@gmail.com>
* c-common.c (c_common_reswords) add the alignof keyword,

File diff suppressed because it is too large Load Diff

View File

@ -230,8 +230,6 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_SSE4A)
def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_SSE5)
def_or_undef (parse_in, "__SSE5__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))

View File

@ -113,8 +113,6 @@ extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern void ix86_expand_sse5_unpack (rtx[], bool, bool);
extern void ix86_expand_sse5_pack (rtx[]);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
@ -216,9 +214,6 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int, bool);
extern void ix86_expand_sse5_multiple_memory (rtx [], int, enum machine_mode);
/* In i386-c.c */
extern void ix86_target_macros (void);
extern void ix86_register_pragmas (void);

File diff suppressed because it is too large Load Diff

View File

@ -54,7 +54,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX OPTION_ISA_AVX
#define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_SSE5 OPTION_ISA_SSE5
#define TARGET_ROUND OPTION_ISA_ROUND
#define TARGET_ABM OPTION_ISA_ABM
#define TARGET_POPCNT OPTION_ISA_POPCNT
@ -66,8 +65,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_CMPXCHG16B OPTION_ISA_CX16
/* SSE5 and SSE4.1 define the same round instructions */
#define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5)
/* SSE4.1 define round instructions */
#define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1)
#define OPTION_ISA_ROUND ((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
#include "config/vxworks-dummy.h"
@ -542,6 +541,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
%<mcpu=* \
%{mintel-syntax:-masm=intel \
%n`-mintel-syntax' is deprecated. Use `-masm=intel' instead.\n} \
%{msse5:-mavx \
%n'-msse5' was removed.\n} \
%{mfused-madd:-mavx \
%n'-mfused-madd' was removed.\n} \
%{mno-intel-syntax:-masm=att \
%n`-mno-intel-syntax' is deprecated. Use `-masm=att' instead.\n}"

View File

@ -57,7 +57,6 @@
;; X -- don't print any sort of PIC '@' suffix for a symbol.
;; & -- print some in-use local-dynamic symbol name.
;; H -- print a memory address offset by 8; used for sse high-parts
;; Y -- print condition for SSE5 com* instruction.
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).
@ -196,15 +195,6 @@
(UNSPEC_PCMPESTR 144)
(UNSPEC_PCMPISTR 145)
;; For SSE5
(UNSPEC_SSE5_INTRINSIC 150)
(UNSPEC_SSE5_UNSIGNED_CMP 151)
(UNSPEC_SSE5_TRUEFALSE 152)
(UNSPEC_SSE5_PERMUTE 153)
(UNSPEC_FRCZ 154)
(UNSPEC_CVTPH2PS 155)
(UNSPEC_CVTPS2PH 156)
; For AES support
(UNSPEC_AESENC 159)
(UNSPEC_AESENCLAST 160)
@ -259,20 +249,6 @@
(COM_TRUE_P 5)
])
;; Constants used in the SSE5 pperm instruction
(define_constants
[(PPERM_SRC 0x00) /* copy source */
(PPERM_INVERT 0x20) /* invert source */
(PPERM_REVERSE 0x40) /* bit reverse source */
(PPERM_REV_INV 0x60) /* bit reverse & invert src */
(PPERM_ZERO 0x80) /* all 0's */
(PPERM_ONES 0xa0) /* all 1's */
(PPERM_SIGN 0xc0) /* propagate sign bit */
(PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
(PPERM_SRC1 0x00) /* use first source byte */
(PPERM_SRC2 0x10) /* use second source byte */
])
;; Registers by name.
(define_constants
[(AX_REG 0)
@ -465,7 +441,7 @@
]
(const_int 0)))
;; There are also additional prefixes in 3DNOW, SSSE3 or SSE5.
;; There are also additional prefixes in 3DNOW, SSSE3.
;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
@ -8879,8 +8855,6 @@
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"")
;; SSE5 scalar multiply/add instructions are defined in sse.md.
;; Divide instructions
@ -14826,23 +14800,11 @@
(match_operator:MODEF 1 "sse_comparison_operator"
[(match_operand:MODEF 2 "register_operand" "0")
(match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
"SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
"SSE_FLOAT_MODE_P (<MODE>mode)"
"cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*sse5_setcc<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(match_operator:MODEF 1 "sse5_comparison_float_operator"
[(match_operand:MODEF 2 "register_operand" "x")
(match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
"TARGET_SSE5"
"com%Y1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "length_immediate" "1")
(set_attr "mode" "<MODE>")])
;; Basic conditional jump instructions.
;; We ignore the overflow flag for signed branch instructions.
@ -20643,20 +20605,6 @@
[(set_attr "type" "fcmov")
(set_attr "mode" "XF")])
;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict
;; the scalar versions to have only XMM registers as operands.
;; SSE5 conditional move
(define_insn "*sse5_pcmov_<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x,x")
(if_then_else:MODEF
(match_operand:MODEF 1 "register_operand" "x,0")
(match_operand:MODEF 2 "register_operand" "0,x")
(match_operand:MODEF 3 "register_operand" "x,x")))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
"pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
[(set_attr "type" "sse4arg")])
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator

View File

@ -244,15 +244,6 @@ mcld
Target Report Mask(CLD) Save
Generate cld instruction in the function prologue.
mno-fused-madd
Target RejectNegative Report Mask(NO_FUSED_MADD) Undocumented Save
mfused-madd
Target Report InverseMask(NO_FUSED_MADD, FUSED_MADD) Save
Enable automatic generation of fused floating point multiply-add instructions
if the ISA supports such instructions. The -mfused-madd option is on by
default.
;; ISA support
m32
@ -319,10 +310,6 @@ msse4a
Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
msse5
Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists Save
Support SSE5 built-in functions and code generation
mabm
Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
Support code generation of Advanced Bit Manipulation (ABM) instructions.

View File

@ -21,14 +21,13 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Common definition of the ROUND and PTEST intrinsics that are shared
between SSE4.1 and SSE5. */
/* Common definition of the ROUND and PTEST intrinsics, SSE4.1. */
#ifndef _MMINTRIN_COMMON_H_INCLUDED
#define _MMINTRIN_COMMON_H_INCLUDED
#if !defined(__SSE5__) && !defined(__SSE4_1__)
# error "SSE5 or SSE4.1 instruction set not enabled"
#if !defined(__SSE4_1__)
# error "SSE4.1 instruction set not enabled"
#else
/* Rounding mode macros. */
@ -150,6 +149,6 @@ _mm_round_ss (__m128 __D, __m128 __V, const int __M)
#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
#endif /* __SSE5__/__SSE4_1__ */
#endif /* __SSE4_1__ */
#endif /* _MMINTRIN_COMMON_H_INCLUDED */

View File

@ -988,12 +988,6 @@
(define_predicate "avx_comparison_float_operator"
(match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
;; Return 1 if OP is a comparison operator that can be issued by sse predicate
;; generation instructions
(define_predicate "sse5_comparison_float_operator"
(and (match_test "TARGET_SSE5")
(match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt")))
(define_predicate "ix86_comparison_int_operator"
(match_code "ne,eq,ge,gt,le,lt"))

File diff suppressed because it is too large Load Diff

View File

@ -590,7 +590,7 @@ Objective-C and Objective-C++ Dialects}.
-mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-maes -mpclmul @gol
-msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
-msse4a -m3dnow -mpopcnt -mabm @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
@ -599,7 +599,7 @@ Objective-C and Objective-C++ Dialects}.
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
-mcmodel=@var{code-model} -mabi=@var{name} @gol
-m32 -m64 -mlarge-data-threshold=@var{num} @gol
-mfused-madd -mno-fused-madd -msse2avx}
-msse2avx}
@emph{IA-64 Options}
@gccoptlist{-mbig-endian -mlittle-endian -mgnu-as -mgnu-ld -mno-pic @gol
@ -11641,8 +11641,6 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-pclmul
@itemx -msse4a
@itemx -mno-sse4a
@itemx -msse5
@itemx -mno-sse5
@itemx -m3dnow
@itemx -mno-3dnow
@itemx -mpopcnt
@ -11656,7 +11654,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@opindex m3dnow
@opindex mno-3dnow
These switches enable or disable the use of instructions in the MMX,
SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or
SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, ABM or
3DNow!@: extended instruction sets.
These extensions are also available as built-in functions: see
@ref{X86 Built-in Functions}, for details of the functions enabled and
@ -11834,14 +11832,6 @@ segment to cover the entire TLS area.
For systems that use GNU libc, the default is on.
@item -mfused-madd
@itemx -mno-fused-madd
@opindex mfused-madd
Enable automatic generation of fused floating point multiply-add instructions
if the ISA supports such instructions. The -mfused-madd option is on by
default. The fused multiply-add instructions have a different
rounding behavior compared to executing a multiply followed by an add.
@item -msse2avx
@itemx -mno-sse2avx
@opindex msse2avx

View File

@ -1,3 +1,55 @@
2009-08-25 Jan Hubicka <jh@suse.cz>
* gcc.target/i386/sse5-shift1-vector.c
* gcc.target/i386/isa-12.c
* gcc.target/i386/isa-12.cgcc.target/i386/isa-12.c
* gcc.target/i386/sse5-pcmov2.c
* gcc.target/i386/isa-3.c
* gcc.target/i386/sse5-shift2-vector.c
* gcc.target/i386/isa-7.c
* gcc.target/i386/funcspec-2.c
* gcc.target/i386/sse5-haddX.c
* gcc.target/i386/sse5-hadduX.c
* gcc.target/i386/isa-9.c
* gcc.target/i386/sse5-maccXX.c
* gcc.target/i386/sse5-shift3-vector.c
* gcc.target/i386/sse5-msubXX.c
* gcc.target/i386/sse5-permpX.c
* gcc.target/i386/sse5-check.h
* gcc.target/i386/sse-12.c
* gcc.target/i386/sse-11.c
* gcc.target/i386/sse-10.c
* gcc.target/i386/sse-13.c
* gcc.target/i386/sse-14.c
* gcc.target/i386/sse-22.c
* gcc.target/i386/sse-2.c
* gcc.target/i386/sse-13.c
* gcc.target/i386/avx-2.c
* gcc.target/i386/sse5-rotate1-vector.c
* gcc.target/i386/isa-4.c
* gcc.target/i386/sse5-hsubX.c
* gcc.target/i386/sse5-pcmov.c
* gcc.target/i386/sse5-fma.c
* gcc.target/i386/isa-8.c
* gcc.target/i386/sse5-rotate2-vector.c
* gcc.target/i386/sse5-nmaccXX.c
* gcc.target/i386/sse5-imul64-vector.c
* gcc.target/i386/sse5-nmsubXX.c
* gcc.target/i386/sse5-rotate3-vector.c
* gcc.target/i386/sse5-fma-vector.c
* gcc.target/i386/sse5-imul32widen-vector.c: Remove SSE5 related testcases
* gcc.target/i386/sse5-ima-vector.c
* gcc.target/i386/funcspec-8.c: Replace SSE5 by SSE4.
* gcc.target/i386/funcspec-5.c: Remove SSE5.
* gcc.target/i386/funcspec-6.c: Remove fused-add test.
* gcc.target/i386/avx-1.c: Remove SSE5.
* gcc.target/i386/avx-2.c: Remove SSE5.
* g++.dg/other/i386-2.C: Replace SSE5 by SSE4A.
* g++.dg/other/i386-3.C: Replace SSE5 by SSE4A.
* g++.dg/other/i386-6.C: Replace SSE5 by SSE4A.
* g++.dg/other/i386-5.C: Replace SSE5 by SSE4A.
2009-08-25 Uros Bizjak <ubizjak@gmail.com>
* gcc.c-torture/compile/limits-fndefn.c: Add dg-timeout-factor.

View File

@ -1,7 +1,7 @@
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -pedantic-errors. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
#include <x86intrin.h>

View File

@ -1,6 +1,6 @@
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -fkeep-inline-functions. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
#include <x86intrin.h>

View File

@ -1,6 +1,6 @@
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -fkeep-inline-functions. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
#include <x86intrin.h>

View File

@ -1,7 +1,7 @@
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -pedantic-errors. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
#include <x86intrin.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -maes -mpclmul" } */
#include <mm_malloc.h>
@ -127,13 +127,6 @@
#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
/* bmmintrin.h */
#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1)
#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1)
#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
#include <wmmintrin.h>
#include <bmmintrin.h>
#include <immintrin.h>
#include <mm3dnow.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */
#include <mm_malloc.h>
@ -13,8 +13,8 @@
#define __inline
#include <wmmintrin.h>
#include <bmmintrin.h>
#include <immintrin.h>
#include <ammintrin.h>
#include <mm3dnow.h>
#define _CONCAT(x,y) x ## y
@ -161,8 +161,3 @@ test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
test_1 (_m_pshufw, __m64, __m64, 1)
test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
/* bmmintrin.h */
test_1 (_mm_roti_epi8, __m128i, __m128i, 1)
test_1 (_mm_roti_epi16, __m128i, __m128i, 1)
test_1 (_mm_roti_epi32, __m128i, __m128i, 1)
test_1 (_mm_roti_epi64, __m128i, __m128i, 1)

View File

@ -1,99 +0,0 @@
/* Test whether using target specific options, we can generate SSE5 code. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -march=k8" } */
extern void exit (int);
#define SSE5_ATTR __attribute__((__target__("sse5,fused-madd")))
extern float flt_mul_add (float a, float b, float c) SSE5_ATTR;
extern float flt_mul_sub (float a, float b, float c) SSE5_ATTR;
extern float flt_neg_mul_add (float a, float b, float c) SSE5_ATTR;
extern float flt_neg_mul_sub (float a, float b, float c) SSE5_ATTR;
extern double dbl_mul_add (double a, double b, double c) SSE5_ATTR;
extern double dbl_mul_sub (double a, double b, double c) SSE5_ATTR;
extern double dbl_neg_mul_add (double a, double b, double c) SSE5_ATTR;
extern double dbl_neg_mul_sub (double a, double b, double c) SSE5_ATTR;
float
flt_mul_add (float a, float b, float c)
{
return (a * b) + c;
}
double
dbl_mul_add (double a, double b, double c)
{
return (a * b) + c;
}
float
flt_mul_sub (float a, float b, float c)
{
return (a * b) - c;
}
double
dbl_mul_sub (double a, double b, double c)
{
return (a * b) - c;
}
float
flt_neg_mul_add (float a, float b, float c)
{
return (-(a * b)) + c;
}
double
dbl_neg_mul_add (double a, double b, double c)
{
return (-(a * b)) + c;
}
float
flt_neg_mul_sub (float a, float b, float c)
{
return (-(a * b)) - c;
}
double
dbl_neg_mul_sub (double a, double b, double c)
{
return (-(a * b)) - c;
}
float f[10] = { 2, 3, 4 };
double d[10] = { 2, 3, 4 };
int main ()
{
f[3] = flt_mul_add (f[0], f[1], f[2]);
f[4] = flt_mul_sub (f[0], f[1], f[2]);
f[5] = flt_neg_mul_add (f[0], f[1], f[2]);
f[6] = flt_neg_mul_sub (f[0], f[1], f[2]);
d[3] = dbl_mul_add (d[0], d[1], d[2]);
d[4] = dbl_mul_sub (d[0], d[1], d[2]);
d[5] = dbl_neg_mul_add (d[0], d[1], d[2]);
d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]);
exit (0);
}
/* { dg-final { scan-assembler "fmaddss" } } */
/* { dg-final { scan-assembler "fmaddsd" } } */
/* { dg-final { scan-assembler "fmsubss" } } */
/* { dg-final { scan-assembler "fmsubsd" } } */
/* { dg-final { scan-assembler "fnmaddss" } } */
/* { dg-final { scan-assembler "fnmaddsd" } } */
/* { dg-final { scan-assembler "fnmsubss" } } */
/* { dg-final { scan-assembler "fnmsubsd" } } */
/* { dg-final { scan-assembler "call\t(.*)flt_mul_add" } } */
/* { dg-final { scan-assembler "call\t(.*)flt_mul_sub" } } */
/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_add" } } */
/* { dg-final { scan-assembler "call\t(.*)flt_neg_mul_sub" } } */
/* { dg-final { scan-assembler "call\t(.*)dbl_mul_add" } } */
/* { dg-final { scan-assembler "call\t(.*)dbl_mul_sub" } } */
/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_add" } } */
/* { dg-final { scan-assembler "call\t(.*)dbl_neg_mul_sub" } } */

View File

@ -17,7 +17,6 @@ extern void test_sse4 (void) __attribute__((__target__("sse4")));
extern void test_sse4_1 (void) __attribute__((__target__("sse4.1")));
extern void test_sse4_2 (void) __attribute__((__target__("sse4.2")));
extern void test_sse4a (void) __attribute__((__target__("sse4a")));
extern void test_sse5 (void) __attribute__((__target__("sse5")));
extern void test_ssse3 (void) __attribute__((__target__("ssse3")));
extern void test_no_abm (void) __attribute__((__target__("no-abm")));
@ -34,7 +33,6 @@ extern void test_no_sse4 (void) __attribute__((__target__("no-sse4")));
extern void test_no_sse4_1 (void) __attribute__((__target__("no-sse4.1")));
extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2")));
extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a")));
extern void test_no_sse5 (void) __attribute__((__target__("no-sse5")));
extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3")));
extern void test_arch_i386 (void) __attribute__((__target__("arch=i386")));

View File

@ -5,7 +5,6 @@
extern void test_abm (void) __attribute__((__target__("abm")));
extern void test_aes (void) __attribute__((__target__("aes")));
extern void test_fused_madd (void) __attribute__((__target__("fused-madd")));
extern void test_mmx (void) __attribute__((__target__("mmx")));
extern void test_pclmul (void) __attribute__((__target__("pclmul")));
extern void test_popcnt (void) __attribute__((__target__("popcnt")));
@ -17,12 +16,10 @@ extern void test_sse4 (void) __attribute__((__target__("sse4")));
extern void test_sse4_1 (void) __attribute__((__target__("sse4.1")));
extern void test_sse4_2 (void) __attribute__((__target__("sse4.2")));
extern void test_sse4a (void) __attribute__((__target__("sse4a")));
extern void test_sse5 (void) __attribute__((__target__("sse5")));
extern void test_ssse3 (void) __attribute__((__target__("ssse3")));
extern void test_no_abm (void) __attribute__((__target__("no-abm")));
extern void test_no_aes (void) __attribute__((__target__("no-aes")));
extern void test_no_fused_madd (void) __attribute__((__target__("no-fused-madd")));
extern void test_no_mmx (void) __attribute__((__target__("no-mmx")));
extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul")));
extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt")));
@ -34,7 +31,6 @@ extern void test_no_sse4 (void) __attribute__((__target__("no-sse4")));
extern void test_no_sse4_1 (void) __attribute__((__target__("no-sse4.1")));
extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2")));
extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a")));
extern void test_no_sse5 (void) __attribute__((__target__("no-sse5")));
extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));

View File

@ -103,25 +103,6 @@ generic_insertq (__m128i a, __m128i b)
return __builtin_ia32_insertq (a, b); /* { dg-error "needs isa option" } */
}
#ifdef __SSE5__
#error "-msse5 should not be set for this test"
#endif
__m128d sse5_fmaddpd (__m128d a, __m128d b, __m128d c) __attribute__((__target__("sse5")));
__m128d generic_fmaddpd (__m128d a, __m128d b, __m128d c);
__m128d
sse5_fmaddpd (__m128d a, __m128d b, __m128d c)
{
return __builtin_ia32_fmaddpd (a, b, c);
}
__m128d
generic_fmaddpd (__m128d a, __m128d b, __m128d c)
{
return __builtin_ia32_fmaddpd (a, b, c); /* { dg-error "needs isa option" } */
}
#ifdef __AES__
#error "-maes should not be set for this test"
#endif

View File

@ -4,15 +4,15 @@
extern void exit (int);
#ifdef __SSE5__
#warning "__SSE5__ should not be defined before #pragma GCC target."
#ifdef __SSE4A__
#warning "__SSE4A__ should not be defined before #pragma GCC target."
#endif
#pragma GCC push_options
#pragma GCC target ("sse5,fused-madd")
#pragma GCC target ("sse4a")
#ifndef __SSE5__
#warning "__SSE5__ should have be defined after #pragma GCC target."
#ifndef __SSE4A__
#warning "__SSE4A__ should have be defined after #pragma GCC target."
#endif
float
@ -22,8 +22,8 @@ flt_mul_add (float a, float b, float c)
}
#pragma GCC pop_options
#ifdef __SSE5__
#warning "__SSE5__ should not be defined after #pragma GCC pop target."
#ifdef __SSE4A__
#warning "__SSE4A__ should not be defined after #pragma GCC pop target."
#endif
double
@ -32,5 +32,5 @@ dbl_mul_add (double a, double b, double c)
return (a * b) + c;
}
/* { dg-final { scan-assembler "fmaddss" } } */
/* We used to generate fused-madd with SSE5 support, but don't do that anymore. */
/* { dg-final { scan-assembler "addsd" } } */

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse5 -mno-sse4" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse5 -mno-ssse3" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse5 -mno-sse3" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if defined __SSE4A__
abort ();
#endif
#if defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse5 -mno-sse2" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if defined __SSE2__
abort ();
#endif
#if defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if defined __SSE4A__
abort ();
#endif
#if defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,5 +1,5 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse5 -mno-sse" } */
/* { dg-options "-march=x86-64 -msse4a -mno-sse" } */
extern void abort (void);
@ -26,9 +26,6 @@ main ()
#endif
#if defined __SSE4A__
abort ();
#endif
#if defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse4 -msse5" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if !defined __SSSE3__
abort ();
#endif
#if !defined __SSE4_1__
abort ();
#endif
#if !defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=x86-64 -msse4 -msse5 -msse4a" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if !defined __SSSE3__
abort ();
#endif
#if !defined __SSE4_1__
abort ();
#endif
#if !defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=core2 -msse5 -mno-sse4" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if !defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=amdfam10 -msse5 -mno-sse4" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if !defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=amdfam10 -msse5 -mno-sse4a" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if defined __SSE4A__
abort ();
#endif
#if defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,34 +0,0 @@
/* { dg-do run } */
/* { dg-options "-march=amdfam10 -mno-sse5" } */
extern void abort (void);
int
main ()
{
#if !defined __SSE__
abort ();
#endif
#if !defined __SSE2__
abort ();
#endif
#if !defined __SSE3__
abort ();
#endif
#if defined __SSSE3__
abort ();
#endif
#if defined __SSE4_1__
abort ();
#endif
#if defined __SSE4_2__
abort ();
#endif
#if !defined __SSE4A__
abort ();
#endif
#if defined __SSE5__
abort ();
#endif
return 0;
}

View File

@ -1,25 +0,0 @@
/* Test that the compiler properly optimizes vector SI->DI conversions. This
was a bug in the initial SSE5 code. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
signed int si[SIZE];
signed long sl[SIZE];
__m128i align;
} a, b;
void conv_sign_int_sign_long (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.sl[i] = b.si[i];
}
/* { dg-final { scan-assembler "pperm" } } */

View File

@ -1,8 +0,0 @@
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
#include <x86intrin.h>
int dummy;

View File

@ -1,135 +0,0 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
#include <mm_malloc.h>
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
#define extern
#define __inline
/* Following intrinsics require immediate arguments. */
/* ammintrin.h */
#define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1)
#define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
/* immintrin.h */
#define __builtin_ia32_blendpd256(X, Y, M) __builtin_ia32_blendpd256(X, Y, 1)
#define __builtin_ia32_blendps256(X, Y, M) __builtin_ia32_blendps256(X, Y, 1)
#define __builtin_ia32_dpps256(X, Y, M) __builtin_ia32_dpps256(X, Y, 1)
#define __builtin_ia32_shufpd256(X, Y, M) __builtin_ia32_shufpd256(X, Y, 1)
#define __builtin_ia32_shufps256(X, Y, M) __builtin_ia32_shufps256(X, Y, 1)
#define __builtin_ia32_cmpsd(X, Y, O) __builtin_ia32_cmpsd(X, Y, 1)
#define __builtin_ia32_cmpss(X, Y, O) __builtin_ia32_cmpss(X, Y, 1)
#define __builtin_ia32_cmppd(X, Y, O) __builtin_ia32_cmppd(X, Y, 1)
#define __builtin_ia32_cmpps(X, Y, O) __builtin_ia32_cmpps(X, Y, 1)
#define __builtin_ia32_cmppd256(X, Y, O) __builtin_ia32_cmppd256(X, Y, 1)
#define __builtin_ia32_cmpps256(X, Y, O) __builtin_ia32_cmpps256(X, Y, 1)
#define __builtin_ia32_vextractf128_pd256(X, N) __builtin_ia32_vextractf128_pd256(X, 1)
#define __builtin_ia32_vextractf128_ps256(X, N) __builtin_ia32_vextractf128_ps256(X, 1)
#define __builtin_ia32_vextractf128_si256(X, N) __builtin_ia32_vextractf128_si256(X, 1)
#define __builtin_ia32_vpermilpd(X, N) __builtin_ia32_vpermilpd(X, 1)
#define __builtin_ia32_vpermilpd256(X, N) __builtin_ia32_vpermilpd256(X, 1)
#define __builtin_ia32_vpermilps(X, N) __builtin_ia32_vpermilps(X, 1)
#define __builtin_ia32_vpermilps256(X, N) __builtin_ia32_vpermilps256(X, 1)
#define __builtin_ia32_vpermil2pd(X, Y, C, I) __builtin_ia32_vpermil2pd(X, Y, C, 1)
#define __builtin_ia32_vpermil2pd256(X, Y, C, I) __builtin_ia32_vpermil2pd256(X, Y, C, 1)
#define __builtin_ia32_vpermil2ps(X, Y, C, I) __builtin_ia32_vpermil2ps(X, Y, C, 1)
#define __builtin_ia32_vpermil2ps256(X, Y, C, I) __builtin_ia32_vpermil2ps256(X, Y, C, 1)
#define __builtin_ia32_vperm2f128_pd256(X, Y, C) __builtin_ia32_vperm2f128_pd256(X, Y, 1)
#define __builtin_ia32_vperm2f128_ps256(X, Y, C) __builtin_ia32_vperm2f128_ps256(X, Y, 1)
#define __builtin_ia32_vperm2f128_si256(X, Y, C) __builtin_ia32_vperm2f128_si256(X, Y, 1)
#define __builtin_ia32_vinsertf128_pd256(X, Y, C) __builtin_ia32_vinsertf128_pd256(X, Y, 1)
#define __builtin_ia32_vinsertf128_ps256(X, Y, C) __builtin_ia32_vinsertf128_ps256(X, Y, 1)
#define __builtin_ia32_vinsertf128_si256(X, Y, C) __builtin_ia32_vinsertf128_si256(X, Y, 1)
#define __builtin_ia32_roundpd256(V, M) __builtin_ia32_roundpd256(V, 1)
#define __builtin_ia32_roundps256(V, M) __builtin_ia32_roundps256(V, 1)
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
/* mmintrin-common.h */
#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1)
#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1)
#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
/* smmintrin.h */
#define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
#define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
#define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1)
#define __builtin_ia32_dpps(X, Y, M) __builtin_ia32_dpps(X, Y, 1)
#define __builtin_ia32_dppd(X, Y, M) __builtin_ia32_dppd(X, Y, 1)
#define __builtin_ia32_insertps128(D, S, N) __builtin_ia32_insertps128(D, S, 1)
#define __builtin_ia32_vec_ext_v4sf(X, N) __builtin_ia32_vec_ext_v4sf(X, 1)
#define __builtin_ia32_vec_set_v16qi(D, S, N) __builtin_ia32_vec_set_v16qi(D, S, 1)
#define __builtin_ia32_vec_set_v4si(D, S, N) __builtin_ia32_vec_set_v4si(D, S, 1)
#define __builtin_ia32_vec_set_v2di(D, S, N) __builtin_ia32_vec_set_v2di(D, S, 1)
#define __builtin_ia32_vec_ext_v16qi(X, N) __builtin_ia32_vec_ext_v16qi(X, 1)
#define __builtin_ia32_vec_ext_v4si(X, N) __builtin_ia32_vec_ext_v4si(X, 1)
#define __builtin_ia32_vec_ext_v2di(X, N) __builtin_ia32_vec_ext_v2di(X, 1)
#define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1)
#define __builtin_ia32_pcmpistrm128(X, Y, M) \
__builtin_ia32_pcmpistrm128(X, Y, 1)
#define __builtin_ia32_pcmpistri128(X, Y, M) \
__builtin_ia32_pcmpistri128(X, Y, 1)
#define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestri128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpistria128(X, Y, M) \
__builtin_ia32_pcmpistria128(X, Y, 1)
#define __builtin_ia32_pcmpistric128(X, Y, M) \
__builtin_ia32_pcmpistric128(X, Y, 1)
#define __builtin_ia32_pcmpistrio128(X, Y, M) \
__builtin_ia32_pcmpistrio128(X, Y, 1)
#define __builtin_ia32_pcmpistris128(X, Y, M) \
__builtin_ia32_pcmpistris128(X, Y, 1)
#define __builtin_ia32_pcmpistriz128(X, Y, M) \
__builtin_ia32_pcmpistriz128(X, Y, 1)
#define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestria128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestric128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestris128(X, LX, Y, LY, 1)
#define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \
__builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1)
/* tmmintrin.h */
#define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8)
#define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8)
/* emmintrin.h */
#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8)
#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8)
#define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0)
#define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0)
#define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0)
#define __builtin_ia32_vec_set_v8hi(A, D, N) \
__builtin_ia32_vec_set_v8hi(A, D, 0)
#define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0)
#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0)
/* xmmintrin.h */
#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA)
#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0)
#define __builtin_ia32_vec_set_v4hi(A, D, N) \
__builtin_ia32_vec_set_v4hi(A, D, 0)
#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
/* bmmintrin.h */
#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1)
#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1)
#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
#include <x86intrin.h>

View File

@ -1,164 +0,0 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse5 -maes -mpclmul" } */
#include <mm_malloc.h>
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
#define extern
#define __inline
#include <x86intrin.h>
#define _CONCAT(x,y) x ## y
#define test_1(func, type, op1_type, imm) \
type _CONCAT(_,func) (op1_type A, int const I) \
{ return func (A, imm); }
#define test_1x(func, type, op1_type, imm1, imm2) \
type _CONCAT(_,func) (op1_type A, int const I, int const L) \
{ return func (A, imm1, imm2); }
#define test_2(func, type, op1_type, op2_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \
{ return func (A, B, imm); }
#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
{ return func (A, B, imm1, imm2); }
#define test_3(func, type, op1_type, op2_type, op3_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, int const I) \
{ return func (A, B, C, imm); }
#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, op4_type D, int const I) \
{ return func (A, B, C, D, imm); }
/* Following intrinsics require immediate arguments. They
are defined as macros for non-optimized compilations. */
/* ammintrin.h */
test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
/* immintrin.h */
test_2 (_mm256_blend_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_blend_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_dp_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_shuffle_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_shuffle_ps, __m256, __m256, __m256, 1)
test_2 (_mm_cmp_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_cmp_ss, __m128, __m128, __m128, 1)
test_2 (_mm_cmp_pd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_cmp_ps, __m128, __m128, __m128, 1)
test_2 (_mm256_cmp_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_cmp_ps, __m256, __m256, __m256, 1)
test_1 (_mm256_extractf128_pd, __m128d, __m256d, 1)
test_1 (_mm256_extractf128_ps, __m128, __m256, 1)
test_1 (_mm256_extractf128_si256, __m128i, __m256i, 1)
test_1 (_mm256_extract_epi8, int, __m256i, 20)
test_1 (_mm256_extract_epi16, int, __m256i, 13)
test_1 (_mm256_extract_epi32, int, __m256i, 6)
#ifdef __x86_64__
test_1 (_mm256_extract_epi64, long long, __m256i, 2)
#endif
test_1 (_mm_permute_pd, __m128d, __m128d, 1)
test_1 (_mm256_permute_pd, __m256d, __m256d, 1)
test_1 (_mm_permute_ps, __m128, __m128, 1)
test_1 (_mm256_permute_ps, __m256, __m256, 1)
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)
test_2 (_mm256_insertf128_pd, __m256d, __m256d, __m128d, 1)
test_2 (_mm256_insertf128_ps, __m256, __m256, __m128, 1)
test_2 (_mm256_insertf128_si256, __m256i, __m256i, __m128i, 1)
test_2 (_mm256_insert_epi8, __m256i, __m256i, int, 30)
test_2 (_mm256_insert_epi16, __m256i, __m256i, int, 7)
test_2 (_mm256_insert_epi32, __m256i, __m256i, int, 3)
#ifdef __x86_64__
test_2 (_mm256_insert_epi64, __m256i, __m256i, long long, 1)
#endif
test_1 (_mm256_round_pd, __m256d, __m256d, 1)
test_1 (_mm256_round_ps, __m256, __m256, 1)
/* wmmintrin.h */
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
/* mmintrin-common.h */
test_1 (_mm_round_pd, __m128d, __m128d, 1)
test_1 (_mm_round_ps, __m128, __m128, 1)
test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_round_ss, __m128, __m128, __m128, 1)
/* smmintrin.h */
test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_dp_ps, __m128, __m128, __m128, 1)
test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_insert_ps, __m128, __m128, __m128, 1)
test_1 (_mm_extract_ps, int, __m128, 1)
test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1)
test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1)
#ifdef __x86_64__
test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1)
#endif
test_1 (_mm_extract_epi8, int, __m128i, 1)
test_1 (_mm_extract_epi32, int, __m128i, 1)
#ifdef __x86_64__
test_1 (_mm_extract_epi64, long long, __m128i, 1)
#endif
test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1)
test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1)
test_2 (_mm_cmpistri, int, __m128i, __m128i, 1)
test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1)
test_2 (_mm_cmpistra, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistro, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1)
test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* tmmintrin.h */
test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1)
test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1)
/* emmintrin.h */
test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1)
test_1 (_mm_srli_si128, __m128i, __m128i, 1)
test_1 (_mm_slli_si128, __m128i, __m128i, 1)
test_1 (_mm_extract_epi16, int, __m128i, 1)
test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1)
test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1)
test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1)
test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1)
/* xmmintrin.h */
test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1)
test_1 (_mm_extract_pi16, int, __m64, 1)
test_1 (_m_pextrw, int, __m64, 1)
test_2 (_mm_insert_pi16, __m64, __m64, int, 1)
test_2 (_m_pinsrw, __m64, __m64, int, 1)
test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
test_1 (_m_pshufw, __m64, __m64, 1)
test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
/* bmmintrin.h */
test_1 (_mm_roti_epi8, __m128i, __m128i, 1)
test_1 (_mm_roti_epi16, __m128i, __m128i, 1)
test_1 (_mm_roti_epi32, __m128i, __m128i, 1)
test_1 (_mm_roti_epi64, __m128i, __m128i, 1)

View File

@ -1,171 +0,0 @@
/* Same as sse-14, except converted to use #pragma GCC option. */
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration" } */
#include <mm_malloc.h>
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
#define extern
#define __inline
#define _CONCAT(x,y) x ## y
#define test_1(func, type, op1_type, imm) \
type _CONCAT(_,func) (op1_type A, int const I) \
{ return func (A, imm); }
#define test_1x(func, type, op1_type, imm1, imm2) \
type _CONCAT(_,func) (op1_type A, int const I, int const L) \
{ return func (A, imm1, imm2); }
#define test_2(func, type, op1_type, op2_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \
{ return func (A, B, imm); }
#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
{ return func (A, B, imm1, imm2); }
#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, op4_type D, int const I) \
{ return func (A, B, C, D, imm); }
#ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse5,aes,pclmul")
#endif
/* Following intrinsics require immediate arguments. They
are defined as macros for non-optimized compilations. */
/* mmintrin.h (MMX). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("mmx")
#endif
#include <mmintrin.h>
/* mm3dnow.h (3DNOW). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("3dnow")
#endif
#include <mm3dnow.h>
/* xmmintrin.h (SSE). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse")
#endif
#include <xmmintrin.h>
test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1)
test_1 (_mm_extract_pi16, int, __m64, 1)
test_1 (_m_pextrw, int, __m64, 1)
test_2 (_mm_insert_pi16, __m64, __m64, int, 1)
test_2 (_m_pinsrw, __m64, __m64, int, 1)
test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
test_1 (_m_pshufw, __m64, __m64, 1)
test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
/* emmintrin.h (SSE2). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse2")
#endif
#include <emmintrin.h>
test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1)
test_1 (_mm_srli_si128, __m128i, __m128i, 1)
test_1 (_mm_slli_si128, __m128i, __m128i, 1)
test_1 (_mm_extract_epi16, int, __m128i, 1)
test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1)
test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1)
test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1)
test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1)
/* pmmintrin.h (SSE3). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse3")
#endif
#include <pmmintrin.h>
/* tmmintrin.h (SSSE3). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("ssse3")
#endif
#include <tmmintrin.h>
test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1)
test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1)
/* ammintrin.h (SSE4A). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4a")
#endif
#include <ammintrin.h>
test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
/* smmintrin.h (SSE4.1). */
/* nmmintrin.h (SSE4.2). */
/* Note, nmmintrin.h includes smmintrin.h, and smmintrin.h checks for the
#ifdef. So just set the option to SSE4.2. */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4.2")
#endif
#include <nmmintrin.h>
test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_dp_ps, __m128, __m128, __m128, 1)
test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_insert_ps, __m128, __m128, __m128, 1)
test_1 (_mm_extract_ps, int, __m128, 1)
test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1)
test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1)
#ifdef __x86_64__
test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1)
#endif
test_1 (_mm_extract_epi8, int, __m128i, 1)
test_1 (_mm_extract_epi32, int, __m128i, 1)
#ifdef __x86_64__
test_1 (_mm_extract_epi64, long long, __m128i, 1)
#endif
test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1)
test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1)
test_2 (_mm_cmpistri, int, __m128i, __m128i, 1)
test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1)
test_2 (_mm_cmpistra, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistro, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1)
test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1)
test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* bmmintrin.h (SSE5). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("sse5")
#endif
#include <bmmintrin.h>
test_1 (_mm_roti_epi8, __m128i, __m128i, 1)
test_1 (_mm_roti_epi16, __m128i, __m128i, 1)
test_1 (_mm_roti_epi32, __m128i, __m128i, 1)
test_1 (_mm_roti_epi64, __m128i, __m128i, 1)
/* wmmintrin.h (AES/PCLMUL). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("aes,pclmul")
#endif
#include <wmmintrin.h>
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
/* mmintrin-common.h */
test_1 (_mm_round_pd, __m128d, __m128d, 1)
test_1 (_mm_round_ps, __m128, __m128, 1)
test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_round_ss, __m128, __m128, __m128, 1)

View File

@ -101,8 +101,7 @@
#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
#pragma GCC target ("3dnow,sse4,sse5,aes,pclmul")
#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul")
#include <wmmintrin.h>
#include <bmmintrin.h>
#include <smmintrin.h>
#include <mm3dnow.h>

View File

@ -1,20 +0,0 @@
#include <stdlib.h>
#include "cpuid.h"
static void sse5_test (void);
int
main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx))
return 0;
/* Run SSE5 test only if host has SSE5 support. */
if (ecx & bit_SSE5)
sse5_test ();
exit (0);
}

View File

@ -1,93 +0,0 @@
/* Test that the compiler properly optimizes floating point multiply and add
instructions vector into fmaddps on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -mfused-madd -ftree-vectorize" } */
extern void exit (int);
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128 f_align;
__m128d d_align;
float f[SIZE];
double d[SIZE];
} a, b, c, d;
void
flt_mul_add (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.f[i] = (b.f[i] * c.f[i]) + d.f[i];
}
void
dbl_mul_add (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.d[i] = (b.d[i] * c.d[i]) + d.d[i];
}
void
flt_mul_sub (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.f[i] = (b.f[i] * c.f[i]) - d.f[i];
}
void
dbl_mul_sub (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.d[i] = (b.d[i] * c.d[i]) - d.d[i];
}
void
flt_neg_mul_add (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i];
}
void
dbl_neg_mul_add (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i];
}
int main ()
{
flt_mul_add ();
flt_mul_sub ();
flt_neg_mul_add ();
dbl_mul_add ();
dbl_mul_sub ();
dbl_neg_mul_add ();
exit (0);
}
/* { dg-final { scan-assembler "fmaddps" } } */
/* { dg-final { scan-assembler "fmaddpd" } } */
/* { dg-final { scan-assembler "fmsubps" } } */
/* { dg-final { scan-assembler "fmsubpd" } } */
/* { dg-final { scan-assembler "fnmaddps" } } */
/* { dg-final { scan-assembler "fnmaddpd" } } */

View File

@ -1,82 +0,0 @@
/* Test that the compiler properly optimizes floating point multiply and add
instructions into fmaddss, fmsubss, fnmaddss, fnmsubss on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -mfused-madd" } */
extern void exit (int);
float
flt_mul_add (float a, float b, float c)
{
return (a * b) + c;
}
double
dbl_mul_add (double a, double b, double c)
{
return (a * b) + c;
}
float
flt_mul_sub (float a, float b, float c)
{
return (a * b) - c;
}
double
dbl_mul_sub (double a, double b, double c)
{
return (a * b) - c;
}
float
flt_neg_mul_add (float a, float b, float c)
{
return (-(a * b)) + c;
}
double
dbl_neg_mul_add (double a, double b, double c)
{
return (-(a * b)) + c;
}
float
flt_neg_mul_sub (float a, float b, float c)
{
return (-(a * b)) - c;
}
double
dbl_neg_mul_sub (double a, double b, double c)
{
return (-(a * b)) - c;
}
float f[10] = { 2, 3, 4 };
double d[10] = { 2, 3, 4 };
int main ()
{
f[3] = flt_mul_add (f[0], f[1], f[2]);
f[4] = flt_mul_sub (f[0], f[1], f[2]);
f[5] = flt_neg_mul_add (f[0], f[1], f[2]);
f[6] = flt_neg_mul_sub (f[0], f[1], f[2]);
d[3] = dbl_mul_add (d[0], d[1], d[2]);
d[4] = dbl_mul_sub (d[0], d[1], d[2]);
d[5] = dbl_neg_mul_add (d[0], d[1], d[2]);
d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]);
exit (0);
}
/* { dg-final { scan-assembler "fmaddss" } } */
/* { dg-final { scan-assembler "fmaddsd" } } */
/* { dg-final { scan-assembler "fmsubss" } } */
/* { dg-final { scan-assembler "fmsubsd" } } */
/* { dg-final { scan-assembler "fnmaddss" } } */
/* { dg-final { scan-assembler "fnmaddsd" } } */
/* { dg-final { scan-assembler "fnmsubss" } } */
/* { dg-final { scan-assembler "fnmsubsd" } } */

View File

@ -1,208 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
signed char ssi[NUM * 16];
short si[NUM * 8];
int li[NUM * 4];
long long lli[NUM * 2];
} dst, res, src1;
static void
init_sbyte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_sword ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_sdword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_sbyte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_sbyte2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 4; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3]);
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_sbyte2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 2; j++)
{
t = i + (8 * j);
s = (i / 8) + j;
res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+ (src1.ssi[t + 6] + src1.ssi[t + 7]));
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_sword2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] + src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_sword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 2; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+ src1.si[t + 3]);
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] + src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
sse5_test (void)
{
int i;
/* Check haddbw */
init_sbyte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddw_epi8 (src1.x[i]);
if (check_sbyte2word())
abort ();
/* Check haddbd */
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epi8 (src1.x[i]);
if (check_sbyte2dword())
abort ();
/* Check haddbq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi8 (src1.x[i]);
if (check_sbyte2qword())
abort ();
/* Check haddwd */
init_sword ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epi16 (src1.x[i]);
if (check_sword2dword())
abort ();
/* Check haddbwq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi16 (src1.x[i]);
if (check_sword2qword())
abort ();
/* Check haddq */
init_sdword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi32 (src1.x[i]);
if (check_dword2qword())
abort ();
}

View File

@ -1,207 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
unsigned char ssi[NUM * 16];
unsigned short si[NUM * 8];
unsigned int li[NUM * 4];
unsigned long long lli[NUM * 2];
} dst, res, src1;
static void
init_byte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_word ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_dword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_byte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_byte2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 4; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3]);
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_byte2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 2; j++)
{
t = i + (8 * j);
s = (i / 8) + j;
res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+ (src1.ssi[t + 6] + src1.ssi[t + 7]));
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_word2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] + src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_word2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 2; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+ src1.si[t + 3]);
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] + src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
sse5_test (void)
{
int i;
/* Check haddubw */
init_byte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
if (check_byte2word())
abort ();
/* Check haddubd */
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
if (check_byte2dword())
abort ();
/* Check haddubq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
if (check_byte2qword())
abort ();
/* Check hadduwd */
init_word ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
if (check_word2dword())
abort ();
/* Check haddbuwq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
if (check_word2qword())
abort ();
/* Check hadudq */
init_dword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
if (check_dword2qword())
abort ();
}

View File

@ -1,128 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
signed char ssi[NUM * 16];
short si[NUM * 8];
int li[NUM * 4];
long long lli[NUM * 2];
} dst, res, src1;
static void
init_sbyte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_sword ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_sdword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_sbyte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_sword2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] - src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] - src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
sse5_test (void)
{
int i;
/* Check hsubbw */
init_sbyte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_hsubw_epi8 (src1.x[i]);
if (check_sbyte2word())
abort ();
/* Check hsubwd */
init_sword ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_hsubd_epi16 (src1.x[i]);
if (check_sword2dword())
abort ();
/* Check hsubdq */
init_sdword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_hsubq_epi32 (src1.x[i]);
if (check_dword2qword())
abort ();
}

View File

@ -1,34 +0,0 @@
/* Test that the compiler properly optimizes vector 32-bit integer point
multiply and add instructions vector into pmacsdd on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i align;
int i[SIZE];
} a, b, c, d;
void
int_mul_add (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i[i] = (b.i[i] * c.i[i]) + d.i[i];
}
int main ()
{
int_mul_add ();
exit (0);
}
/* { dg-final { scan-assembler "pmacsdd" } } */

View File

@ -1,36 +0,0 @@
/* Test that the compiler properly optimizes floating point multiply and add
instructions vector into pmacsdd/etc. on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
long i64[SIZE];
} a, b, c, d;
void
imul32_to_64 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i64[i] = ((long)b.i32[i]) * ((long)c.i32[i]);
}
int main ()
{
imul32_to_64 ();
exit (0);
}
/* { dg-final { scan-assembler "pmacsdql" } } */
/* { dg-final { scan-assembler "pmacsdqh" } } */

View File

@ -1,36 +0,0 @@
/* Test that the compiler properly optimizes floating point multiply and add
instructions vector into pmacsdd/etc. on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
long i64[SIZE];
} a, b, c, d;
void
imul64 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i64[i] = b.i64[i] * c.i64[i];
}
int main ()
{
imul64 ();
exit (0);
}
/* { dg-final { scan-assembler "pmacsdd" } } */
/* { dg-final { scan-assembler "phadddq" } } */
/* { dg-final { scan-assembler "pmacsdql" } } */

View File

@ -1,140 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 20
union
{
__m128 x[NUM];
float f[NUM * 4];
__m128d y[NUM];
double d[NUM * 2];
} dst, res, src1, src2, src3;
/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate
product is not rounded, only the addition is rounded. */
static void
init_maccps ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.f[i] = i;
src2.f[i] = i + 10;
src3.f[i] = i + 20;
}
}
static void
init_maccpd ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.d[i] = i;
src2.d[i] = i + 10;
src3.d[i] = i + 20;
}
}
static int
check_maccps ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
for (j = 0; j < 4; j++)
{
res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j];
if (dst.f[i + j] != res.f[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_maccpd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
for (j = 0; j < 2; j++)
{
res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j];
if (dst.d[i + j] != res.d[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_maccss ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i= i + 4)
{
res.f[i] = (src1.f[i] * src2.f[i]) + src3.f[i];
if (dst.f[i] != res.f[i])
check_fails++;
}
return check_fails++;
}
static int
check_maccsd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
{
res.d[i] = (src1.d[i] * src2.d[i]) + src3.d[i];
if (dst.d[i] != res.d[i])
check_fails++;
}
return check_fails++;
}
static void
sse5_test (void)
{
int i;
/* Check maccps */
init_maccps ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_macc_ps (src1.x[i], src2.x[i], src3.x[i]);
if (check_maccps ())
abort ();
/* check maccss */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_macc_ss (src1.x[i], src2.x[i], src3.x[i]);
if (check_maccss ())
abort ();
/* Check maccpd */
init_maccpd ();
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_macc_pd (src1.y[i], src2.y[i], src3.y[i]);
if (check_maccpd ())
abort ();
/* Check maccps */
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_macc_sd (src1.y[i], src2.y[i], src3.y[i]);
if (check_maccsd ())
abort ();
}

View File

@ -1,139 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 20
union
{
__m128 x[NUM];
float f[NUM * 4];
__m128d y[NUM];
double d[NUM * 2];
} dst, res, src1, src2, src3;
/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate
product is not rounded, only the addition is rounded. */
static void
init_msubps ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.f[i] = i;
src2.f[i] = i + 10;
src3.f[i] = i + 20;
}
}
static void
init_msubpd ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.d[i] = i;
src2.d[i] = i + 10;
src3.d[i] = i + 20;
}
}
static int
check_msubps ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
for (j = 0; j < 4; j++)
{
res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j];
if (dst.f[i + j] != res.f[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_msubpd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
for (j = 0; j < 2; j++)
{
res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j];
if (dst.d[i + j] != res.d[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_msubss ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
{
res.f[i] = (src1.f[i] * src2.f[i]) - src3.f[i];
if (dst.f[i] != res.f[i])
check_fails++;
}
return check_fails++;
}
static int
check_msubsd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
{
res.d[i] = (src1.d[i] * src2.d[i]) - src3.d[i];
if (dst.d[i] != res.d[i])
check_fails++;
}
return check_fails++;
}
static void
sse5_test (void)
{
int i;
/* Check msubps */
init_msubps ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_msub_ps (src1.x[i], src2.x[i], src3.x[i]);
if (check_msubps ())
abort ();
/* check msubss */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_msub_ss (src1.x[i], src2.x[i], src3.x[i]);
if (check_msubss ())
abort ();
/* Check msubpd */
init_msubpd ();
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_msub_pd (src1.y[i], src2.y[i], src3.y[i]);
if (check_msubpd ())
abort ();
/* Check msubps */
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_msub_sd (src1.y[i], src2.y[i], src3.y[i]);
if (check_msubsd ())
abort ();
}

View File

@ -1,139 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 20
union
{
__m128 x[NUM];
float f[NUM * 4];
__m128d y[NUM];
double d[NUM * 2];
} dst, res, src1, src2, src3;
/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate
product is not rounded, only the addition is rounded. */
static void
init_nmaccps ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.f[i] = i;
src2.f[i] = i + 10;
src3.f[i] = i + 20;
}
}
static void
init_nmaccpd ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.d[i] = i;
src2.d[i] = i + 10;
src3.d[i] = i + 20;
}
}
static int
check_nmaccps ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
for (j = 0; j < 4; j++)
{
res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j];
if (dst.f[i + j] != res.f[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_nmaccpd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
for (j = 0; j < 2; j++)
{
res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j];
if (dst.d[i + j] != res.d[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_nmaccss ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
{
res.f[i] = - (src1.f[i] * src2.f[i]) + src3.f[i];
if (dst.f[i] != res.f[i])
check_fails++;
}
return check_fails++;
}
static int
check_nmaccsd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
{
res.d[i] = - (src1.d[i] * src2.d[i]) + src3.d[i];
if (dst.d[i] != res.d[i])
check_fails++;
}
return check_fails++;
}
static void
sse5_test (void)
{
int i;
/* Check nmaccps */
init_nmaccps ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_nmacc_ps (src1.x[i], src2.x[i], src3.x[i]);
if (check_nmaccps ())
abort ();
/* check nmaccss */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_nmacc_ss (src1.x[i], src2.x[i], src3.x[i]);
if (check_nmaccss ())
abort ();
/* Check nmaccpd */
init_nmaccpd ();
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_nmacc_pd (src1.y[i], src2.y[i], src3.y[i]);
if (check_nmaccpd ())
abort ();
/* Check nmaccps */
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_nmacc_sd (src1.y[i], src2.y[i], src3.y[i]);
if (check_nmaccsd ())
abort ();
}

View File

@ -1,139 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
#define NUM 20
union
{
__m128 x[NUM];
float f[NUM * 4];
__m128d y[NUM];
double d[NUM * 2];
} dst, res, src1, src2, src3;
/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate
product is not rounded, only the addition is rounded. */
static void
init_nmsubps ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.f[i] = i;
src2.f[i] = i + 10;
src3.f[i] = i + 20;
}
}
static void
init_nmsubpd ()
{
int i;
for (i = 0; i < NUM * 4; i++)
{
src1.d[i] = i;
src2.d[i] = i + 10;
src3.d[i] = i + 20;
}
}
static int
check_nmsubps ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
for (j = 0; j < 4; j++)
{
res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j];
if (dst.f[i + j] != res.f[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_nmsubpd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
for (j = 0; j < 2; j++)
{
res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j];
if (dst.d[i + j] != res.d[i + j])
check_fails++;
}
return check_fails++;
}
static int
check_nmsubss ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
{
res.f[i] = - (src1.f[i] * src2.f[i]) - src3.f[i];
if (dst.f[i] != res.f[i])
check_fails++;
}
return check_fails++;
}
static int
check_nmsubsd ()
{
int i, j, check_fails = 0;
for (i = 0; i < NUM * 2; i = i + 2)
{
res.d[i] = - (src1.d[i] * src2.d[i]) - src3.d[i];
if (dst.d[i] != res.d[i])
check_fails++;
}
return check_fails++;
}
static void
sse5_test (void)
{
int i;
/* Check nmsubps */
init_nmsubps ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_nmsub_ps (src1.x[i], src2.x[i], src3.x[i]);
if (check_nmsubps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4]))
abort ();
/* check nmsubss */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_nmsub_ss (src1.x[i], src2.x[i], src3.x[i]);
if (check_nmsubss (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4]))
abort ();
/* Check nmsubpd */
init_nmsubpd ();
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_nmsub_pd (src1.y[i], src2.y[i], src3.y[i]);
if (check_nmsubpd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2]))
abort ();
/* Check nmsubps */
for (i = 0; i < NUM; i++)
dst.y[i] = _mm_nmsub_sd (src1.y[i], src2.y[i], src3.y[i]);
if (check_nmsubsd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2]))
abort ();
}

View File

@ -1,23 +0,0 @@
/* Test that the compiler properly optimizes conditional floating point moves
into the pcmov instruction on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5" } */
extern void exit (int);
double dbl_test (double a, double b, double c, double d)
{
return (a > b) ? c : d;
}
double dbl_a = 1, dbl_b = 2, dbl_c = 3, dbl_d = 4, dbl_e;
int main()
{
dbl_e = dbl_test (dbl_a, dbl_b, dbl_c, dbl_d);
exit (0);
}
/* { dg-final { scan-assembler "pcmov" } } */

View File

@ -1,23 +0,0 @@
/* Test that the compiler properly optimizes conditional floating point moves
into the pcmov instruction on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5" } */
extern void exit (int);
float flt_test (float a, float b, float c, float d)
{
return (a > b) ? c : d;
}
float flt_a = 1, flt_b = 2, flt_c = 3, flt_d = 4, flt_e;
int main()
{
flt_e = flt_test (flt_a, flt_b, flt_c, flt_d);
exit (0);
}
/* { dg-final { scan-assembler "pcmov" } } */

View File

@ -1,120 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target sse5 } */
/* { dg-options "-O2 -msse5" } */
#include "sse5-check.h"
#include <bmmintrin.h>
#include <string.h>
union
{
__m128 x[2];
__m128d y[2];
__m128i z[2];
float f[8];
double d[4];
int i[8];
long li[4];
} dst, res, src1, src2, src3;
static void
init_ddata ()
{
int i;
for (i = 0; i < 4; i++)
{
src1.d[i] = i;
src2.d[i] = i + 2;
}
src3.li[0] = 3;
src3.li[1] = 0;
src3.li[2] = 1;
src3.li[3] = 2;
res.d[0] = 3.0;
res.d[1] = 0.0;
res.d[2] = 3.0;
res.d[3] = 4.0;
}
static void
init_fdata ()
{
int i;
for (i = 0; i < 8; i++)
{
src1.f[i] = i;
src2.f[i] = i + 2;
}
src3.i[0] = 7;
src3.i[1] = 5;
src3.i[2] = 1;
src3.i[3] = 2;
src3.i[4] = 0;
src3.i[5] = 4;
src3.i[6] = 3;
src3.i[7] = 6;
res.f[0] = 5.0;
res.f[1] = 3.0;
res.f[2] = 1.0;
res.f[3] = 2.0;
res.f[4] = 4.0;
res.f[5] = 6.0;
res.f[6] = 7.0;
res.f[7] = 8.0;
}
static int
check_permpd ()
{
int i, check_fails = 0;
for (i = 0; i < 4; i++)
{
if (res.d[i] != dst.d[i])
check_fails++;
}
return check_fails++;
}
static int
check_permps ()
{
int i, check_fails = 0;
for (i = 0; i < 8; i++)
{
if (res.f[i] != dst.f[i])
check_fails++;
}
return check_fails++;
}
static void
sse5_test (void)
{
int i;
init_ddata();
for (i = 0; i < 2; i++)
dst.y[i] = _mm_perm_pd (src1.y[i], src2.y[i], src3.z[i]);
if (check_permpd ())
abort ();
init_fdata();
for (i = 0; i < 2; i++)
dst.x[i] = _mm_perm_ps (src1.x[i], src2.x[i], src3.z[i]);
if (check_permps ())
abort ();
}

View File

@ -1,35 +0,0 @@
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
left_rotate32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] << ((sizeof (int) * 8) - 4)) | (b.u32[i] >> 4);
}
int
main ()
{
left_rotate32 ();
exit (0);
}
/* { dg-final { scan-assembler "protd" } } */

View File

@ -1,35 +0,0 @@
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
right_rotate32_b (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - 4)) | (b.u32[i] << 4);
}
int
main ()
{
right_rotate ();
exit (0);
}
/* { dg-final { scan-assembler "prot" } } */

View File

@ -1,34 +0,0 @@
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
vector_rotate32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - c.u32[i])) | (b.u32[i] << c.u32[i]);
}
int main ()
{
vector_rotate32 ();
exit (0);
}
/* { dg-final { scan-assembler "protd" } } */

View File

@ -1,35 +0,0 @@
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
left_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i32[i] = b.i32[i] << c.i32[i];
}
int main ()
{
left_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "pshad" } } */

View File

@ -1,35 +0,0 @@
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
right_sign_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i32[i] = b.i32[i] >> c.i32[i];
}
int main ()
{
right_sign_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "pshad" } } */

View File

@ -1,35 +0,0 @@
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on SSE5 systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -msse5 -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
right_uns_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = b.u32[i] >> c.i32[i];
}
int main ()
{
right_uns_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "pshld" } } */

View File

@ -2296,6 +2296,7 @@ remove_bb (basic_block bb)
if (gimple_in_ssa_p (cfun))
release_defs (stmt);
unlink_stmt_vdef (stmt);
gsi_remove (&i, true);
}