diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 12d7aa79f47..1704faea73f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,49 @@ +2002-04-29 Bernd Schmidt + + * c-common.c (type_for_mode): Add support for V2DFmode, V2DImode, + UV2DImode. + * tree.c (build_common_tree_nodes_2): Likewise. + * tree.h (enum tree_index): Likewise. + (V2DF_type_node, V2DI_type_node, unsigned_V2DI_type_node): Define. + + * config/i386/i386.c (bdesc_comi, bdesc_2arg, bdesc_1arg): Add SSE2 + entries. + (init_mmx_sse_builtins): Initialize SSE2 builtins. + (ix86_expand_builtin): Add support for SSE2 builtins. + * config/i386/i386.h (VALID_SSE2_REG_MODE): New macro. + (VALID_SSE_REG_MODE): Use it. + (VECTOR_MODE_SUPPORTED_P): Allow SSE2 modes here as well. + (enum ix86_builtins): Add SSE2 builtins. + * config/i386/i386.md (movv2df_internal, movv2df, movv8hi_internal, + movv8hi, movv16qi_internal, movv16qi, pushv2df, pushv8hi, pushv16qi, + addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3, vmmulv2df3, + divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3, vmsminv2df3, + sse2_anddf3, sse2_nanddf3, sse2_iordf3, sse2_xordf3, sqrtv2df2, + vmsqrtv2df2, maskcmpv2df3, maskncmpv2df3, vmmaskcmpv2df3, + vmmaskncmpv2df3, sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb, + sse2_maskmovdqu, sse2_movntv2df, sse2_movntti, sse2_movntsi, cvtdq2ps, + cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq, cvtpd2pi, + cvttpd2pi, cvtpi2pd, cvtsd2si, cvttsd2si, cvtsi2sd, cvtsd2ss, + cvtss2sd, cvtpd2ps, cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3, + ssaddv16qi3, ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3, + subv4si3, subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3, + mulv8hi3, smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3, + sse2_umulv2siv2di3, sse2_pmaddwd, sse2_clrti, sse2_uavgv16qi3, + sse2_uavgv8hi3, sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd, + sse2_pshuflw, sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3, + gtv8hi3, gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3, + ashrv8hi3, ashrv4si3, lshrv8hi3, lshrv4si3, sse2_lshrv2di3, + ashlv8hi3, ashlv4si3, sse2_ashlv2di3, sse2_ashlti3, sse2_lshrti3, + sse2_unpckhpd, sse2_unpcklpd, sse2_packsswb, sse2_packssdw, + sse2_packuswb, sse2_punpckhbw, sse2_punpckhwd, sse2_punpckhdq, + sse2_punpcklbw, sse2_punpcklwd, sse2_punpckldq, sse2_movapd, + sse2_movupd, sse2_movdqa, sse2_movdqu, sse2_movdq2q, sse2_movq2dq, + sse2_movhpd, sse2_movlpd, sse2_loadsd, sse2_movsd, sse2_storesd, + sse2_shufpd, sse2_clflush, sse2_mfence, mfence_insn, sse2_lfence, + lfence_insn): New patterns. + (sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Renamed from + sse_andti3_sse2, sse_nandti3_sse2, sse_iorti3_sse2, sse_xorti3_sse2. + Mon Apr 29 17:03:24 CEST 2002 Jan Hubicka * i386.md (sse_mov?fcc*): Revert patch of Mar 14th. diff --git a/gcc/c-common.c b/gcc/c-common.c index 86fc7593817..f009cd63a1f 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -1559,6 +1559,8 @@ c_common_type_for_mode (mode, unsignedp) return unsignedp ? unsigned_V8HI_type_node : V8HI_type_node; case V4SImode: return unsignedp ? unsigned_V4SI_type_node : V4SI_type_node; + case V2DImode: + return unsignedp ? unsigned_V2DI_type_node : V2DI_type_node; case V2SImode: return unsignedp ? unsigned_V2SI_type_node : V2SI_type_node; case V4HImode: @@ -1571,6 +1573,8 @@ c_common_type_for_mode (mode, unsignedp) return V4SF_type_node; case V2SFmode: return V2SF_type_node; + case V2DFmode: + return V2DF_type_node; default: break; } diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9f519a5ff2b..1f3d85f51b1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10937,69 +10937,84 @@ struct builtin_description const unsigned int flag; }; +/* Used for builtins that are enabled both by -msse and -msse2. */ +#define MASK_SSE1 (MASK_SSE | MASK_SSE2) + static const struct builtin_description bdesc_comi[] = { - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 } + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 }, }; static const struct builtin_description bdesc_2arg[] = { /* SSE */ - { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, - { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, - { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, - { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, + { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, + { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 }, + { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 }, + { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, - { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, /* MMX */ { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, @@ -11020,15 +11035,15 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, @@ -11037,10 +11052,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -11054,8 +11069,8 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -11076,25 +11091,151 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 } + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, + /* SSE2 */ + { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 }, + { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 }, + { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, + + { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, + + /* SSE2 MMX */ + { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, + + { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_andti3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandti3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iorti3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xorti3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = { - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, + { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 } + { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + + { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 } }; void @@ -11320,6 +11461,172 @@ ix86_init_mmx_sse_builtins () tree_cons (NULL_TREE, V2SF_type_node, endlink))); + tree pint_type_node = build_pointer_type (integer_type_node); + tree pdouble_type_node = build_pointer_type (double_type_node); + tree int_ftype_v2df_v2df + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, endlink))); + + tree ti_ftype_void + = build_function_type (intTI_type_node, endlink); + tree ti_ftype_ti_ti + = build_function_type (intTI_type_node, + tree_cons (NULL_TREE, intTI_type_node, + tree_cons (NULL_TREE, intTI_type_node, + endlink))); + tree void_ftype_pvoid + = build_function_type (void_type_node, + tree_cons (NULL_TREE, ptr_type_node, endlink)); + tree v2di_ftype_di + = build_function_type (V2DI_type_node, + tree_cons (NULL_TREE, long_long_unsigned_type_node, + endlink)); + tree v4sf_ftype_v4si + = build_function_type (V4SF_type_node, + tree_cons (NULL_TREE, V4SI_type_node, endlink)); + tree v4si_ftype_v4sf + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V4SF_type_node, endlink)); + tree v2df_ftype_v4si + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V4SI_type_node, endlink)); + tree v4si_ftype_v2df + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V2DF_type_node, endlink)); + tree v2si_ftype_v2df + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2DF_type_node, endlink)); + tree v4sf_ftype_v2df + = build_function_type (V4SF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, endlink)); + tree v2df_ftype_v2si + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2SI_type_node, endlink)); + tree v2df_ftype_v4sf + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, endlink)); + tree int_ftype_v2df + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V2DF_type_node, endlink)); + tree v2df_ftype_v2df_int + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v4sf_ftype_v4sf_v2df + = build_function_type (V4SF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink))); + tree v2df_ftype_v2df_v4sf + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, V4SF_type_node, + endlink))); + tree v2df_ftype_v2df_v2df_int + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree v2df_ftype_v2df_pv2si + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, pv2si_type_node, + endlink))); + tree void_ftype_pv2si_v2df + = build_function_type (void_type_node, + tree_cons (NULL_TREE, pv2si_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink))); + tree void_ftype_pdouble_v2df + = build_function_type (void_type_node, + tree_cons (NULL_TREE, pdouble_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink))); + tree void_ftype_pint_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, pint_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, + pchar_type_node, + endlink))); + tree void_ftype_v16qi_v16qi_pchar + = build_function_type (void_type_node, maskmovdqu_args); + tree v2df_ftype_pdouble + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, pdouble_type_node, + endlink)); + tree v2df_ftype_v2df_v2df + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink))); + tree v16qi_ftype_v16qi_v16qi + = build_function_type (V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + tree_cons (NULL_TREE, V16QI_type_node, + endlink))); + tree v8hi_ftype_v8hi_v8hi + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + endlink))); + tree v4si_ftype_v4si_v4si + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + endlink))); + tree v2di_ftype_v2di_v2di + = build_function_type (V2DI_type_node, + tree_cons (NULL_TREE, V2DI_type_node, + tree_cons (NULL_TREE, V2DI_type_node, + endlink))); + tree v2di_ftype_v2df_v2df + = build_function_type (V2DI_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink))); + tree v2df_ftype_v2df + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, V2DF_type_node, + endlink)); + tree v2df_ftype_double + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, double_type_node, + endlink)); + tree v2df_ftype_double_double + = build_function_type (V2DF_type_node, + tree_cons (NULL_TREE, double_type_node, + tree_cons (NULL_TREE, double_type_node, + endlink))); + tree int_ftype_v8hi_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v8hi_ftype_v8hi_int_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree v4si_ftype_v4si_int + = build_function_type (V4SI_type_node, + tree_cons (NULL_TREE, V4SI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v8hi_ftype_v8hi_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); /* Add all builtins that are more or less simple operations on two operands. */ @@ -11336,6 +11643,24 @@ ix86_init_mmx_sse_builtins () switch (mode) { + case V16QImode: + type = v16qi_ftype_v16qi_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si_v4si; + break; + case V2DImode: + type = v2di_ftype_v2di_v2di; + break; + case V2DFmode: + type = v2df_ftype_v2df_v2df; + break; + case TImode: + type = ti_ftype_ti_ti; + break; case V4SFmode: type = v4sf_ftype_v4sf_v4sf; break; @@ -11363,6 +11688,12 @@ ix86_init_mmx_sse_builtins () || d->icode == CODE_FOR_vmmaskncmpv4sf3) type = v4si_ftype_v4sf_v4sf; + if (d->icode == CODE_FOR_maskcmpv2df3 + || d->icode == CODE_FOR_maskncmpv2df3 + || d->icode == CODE_FOR_vmmaskcmpv2df3 + || d->icode == CODE_FOR_vmmaskncmpv2df3) + type = v2di_ftype_v2df_v2df; + def_builtin (d->mask, d->name, type, d->code); } @@ -11387,58 +11718,61 @@ ix86_init_mmx_sse_builtins () /* comi/ucomi insns. */ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) - def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); + if (d->mask == MASK_SSE2) + def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); + else + def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); - def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); - def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); - def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); - def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); - def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); - def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); + def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); - def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); - def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); - def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); + def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); + def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); + def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); + def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); - def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); - def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); - def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); - def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); - def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); - def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); + def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); - def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); - def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); + def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); + def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); + def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); - def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); - def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); - def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); - def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); - def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); - def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); + def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); + def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); + def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); + def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); + def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); + def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); - def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); + def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); /* Original 3DNow! */ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); @@ -11470,7 +11804,76 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); - def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); + + /* SSE2 */ + def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); + def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); + + def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD); + def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); + + def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); + def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB128); + def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); + def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTDQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); + def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); + def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); + def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW128); + + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); + def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); + + def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); + + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); + def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); + + def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); + def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); + + def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH); + def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); + def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -11828,7 +12231,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return 0; case IX86_BUILTIN_PEXTRW: - icode = CODE_FOR_mmx_pextrw; + case IX86_BUILTIN_PEXTRW128: + icode = (fcode == IX86_BUILTIN_PEXTRW + ? CODE_FOR_mmx_pextrw + : CODE_FOR_sse2_pextrw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -11856,7 +12262,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_PINSRW: - icode = CODE_FOR_mmx_pinsrw; + case IX86_BUILTIN_PINSRW128: + icode = (fcode == IX86_BUILTIN_PINSRW + ? CODE_FOR_mmx_pinsrw + : CODE_FOR_sse2_pinsrw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); @@ -11889,7 +12298,9 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_MASKMOVQ: - icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq; + icode = (fcode == IX86_BUILTIN_MASKMOVQ + ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) + : CODE_FOR_sse2_maskmovdqu); /* Note the arg order is different from the operand order. */ arg1 = TREE_VALUE (arglist); arg2 = TREE_VALUE (TREE_CHAIN (arglist)); @@ -11952,8 +12363,12 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_LOADHPS: case IX86_BUILTIN_LOADLPS: - icode = (fcode == IX86_BUILTIN_LOADHPS - ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); + case IX86_BUILTIN_LOADHPD: + case IX86_BUILTIN_LOADLPD: + icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps + : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps + : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd + : CODE_FOR_sse2_movlpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -11977,8 +12392,12 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STOREHPS: case IX86_BUILTIN_STORELPS: - icode = (fcode == IX86_BUILTIN_STOREHPS - ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); + case IX86_BUILTIN_STOREHPD: + case IX86_BUILTIN_STORELPD: + icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps + : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps + : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd + : CODE_FOR_sse2_movlpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -12014,7 +12433,10 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return copy_to_mode_reg (SImode, target); case IX86_BUILTIN_SHUFPS: - icode = CODE_FOR_sse_shufps; + case IX86_BUILTIN_SHUFPD: + icode = (fcode == IX86_BUILTIN_SHUFPS + ? CODE_FOR_sse_shufps + : CODE_FOR_sse2_shufpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); @@ -12047,7 +12469,13 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_PSHUFW: - icode = CODE_FOR_mmx_pshufw; + case IX86_BUILTIN_PSHUFD: + case IX86_BUILTIN_PSHUFHW: + case IX86_BUILTIN_PSHUFLW: + icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw + : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw + : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd + : CODE_FOR_mmx_pshufw); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -12163,6 +12591,88 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (gen_mmx_clrdi (target)); return target; + case IX86_BUILTIN_SQRTSD: + return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); + case IX86_BUILTIN_LOADAPD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); + case IX86_BUILTIN_LOADUPD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); + + case IX86_BUILTIN_STOREAPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + case IX86_BUILTIN_STOREUPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); + + case IX86_BUILTIN_LOADSD: + return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); + + case IX86_BUILTIN_STORESD: + return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); + + case IX86_BUILTIN_SETPD1: + target = assign_386_stack_local (DFmode, 0); + arg0 = TREE_VALUE (arglist); + emit_move_insn (adjust_address (target, DFmode, 0), + expand_expr (arg0, NULL_RTX, VOIDmode, 0)); + op0 = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0))); + emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0))); + return op0; + + case IX86_BUILTIN_SETPD: + target = assign_386_stack_local (V2DFmode, 0); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + emit_move_insn (adjust_address (target, DFmode, 0), + expand_expr (arg0, NULL_RTX, VOIDmode, 0)); + emit_move_insn (adjust_address (target, DFmode, 8), + expand_expr (arg1, NULL_RTX, VOIDmode, 0)); + op0 = gen_reg_rtx (V2DFmode); + emit_insn (gen_sse2_movapd (op0, target)); + return op0; + + case IX86_BUILTIN_LOADRPD: + target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, + gen_reg_rtx (V2DFmode), 1); + emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1))); + return target; + + case IX86_BUILTIN_LOADPD1: + target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, + gen_reg_rtx (V2DFmode), 1); + emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx)); + return target; + + case IX86_BUILTIN_STOREPD1: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + case IX86_BUILTIN_STORERPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + + case IX86_BUILTIN_MFENCE: + emit_insn (gen_sse2_mfence ()); + return 0; + case IX86_BUILTIN_LFENCE: + emit_insn (gen_sse2_lfence ()); + return 0; + + case IX86_BUILTIN_CLFLUSH: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + icode = CODE_FOR_sse2_clflush; + mode0 = insn_data[icode].operand[0].mode; + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_sse2_clflush (op0)); + return 0; + + case IX86_BUILTIN_MOVNTPD: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); + case IX86_BUILTIN_MOVNTDQ: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntti, arglist); + case IX86_BUILTIN_MOVNTI: + return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); + default: break; } @@ -12174,7 +12684,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) if (d->icode == CODE_FOR_maskcmpv4sf3 || d->icode == CODE_FOR_vmmaskcmpv4sf3 || d->icode == CODE_FOR_maskncmpv4sf3 - || d->icode == CODE_FOR_vmmaskncmpv4sf3) + || d->icode == CODE_FOR_vmmaskncmpv4sf3 + || d->icode == CODE_FOR_maskcmpv2df3 + || d->icode == CODE_FOR_vmmaskcmpv2df3 + || d->icode == CODE_FOR_maskncmpv2df3 + || d->icode == CODE_FOR_vmmaskncmpv2df3) return ix86_expand_sse_compare (d, arglist, target); return ix86_expand_binop_builtin (d->icode, arglist, target); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c37dc60c78c..0454cad6084 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -987,9 +987,15 @@ do { \ ? (TARGET_64BIT ? 4 : 6) \ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))) +#define VALID_SSE2_REG_MODE(MODE) \ + ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ + || (MODE) == V2DImode) + #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \ || (MODE) == SFmode \ + /* Always accept SSE2 modes so that xmmintrin.h compiles. */ \ + || VALID_SSE2_REG_MODE (MODE) \ || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE)))) #define VALID_MMX_REG_MODE_3DNOW(MODE) \ @@ -2218,6 +2224,212 @@ enum ix86_builtins IX86_BUILTIN_SSE_ZERO, IX86_BUILTIN_MMX_ZERO, + /* SSE2 */ + IX86_BUILTIN_ADDPD, + IX86_BUILTIN_ADDSD, + IX86_BUILTIN_DIVPD, + IX86_BUILTIN_DIVSD, + IX86_BUILTIN_MULPD, + IX86_BUILTIN_MULSD, + IX86_BUILTIN_SUBPD, + IX86_BUILTIN_SUBSD, + + IX86_BUILTIN_CMPEQPD, + IX86_BUILTIN_CMPLTPD, + IX86_BUILTIN_CMPLEPD, + IX86_BUILTIN_CMPGTPD, + IX86_BUILTIN_CMPGEPD, + IX86_BUILTIN_CMPNEQPD, + IX86_BUILTIN_CMPNLTPD, + IX86_BUILTIN_CMPNLEPD, + IX86_BUILTIN_CMPNGTPD, + IX86_BUILTIN_CMPNGEPD, + IX86_BUILTIN_CMPORDPD, + IX86_BUILTIN_CMPUNORDPD, + IX86_BUILTIN_CMPNEPD, + IX86_BUILTIN_CMPEQSD, + IX86_BUILTIN_CMPLTSD, + IX86_BUILTIN_CMPLESD, + IX86_BUILTIN_CMPGTSD, + IX86_BUILTIN_CMPGESD, + IX86_BUILTIN_CMPNEQSD, + IX86_BUILTIN_CMPNLTSD, + IX86_BUILTIN_CMPNLESD, + IX86_BUILTIN_CMPNGTSD, + IX86_BUILTIN_CMPNGESD, + IX86_BUILTIN_CMPORDSD, + IX86_BUILTIN_CMPUNORDSD, + IX86_BUILTIN_CMPNESD, + + IX86_BUILTIN_COMIEQSD, + IX86_BUILTIN_COMILTSD, + IX86_BUILTIN_COMILESD, + IX86_BUILTIN_COMIGTSD, + IX86_BUILTIN_COMIGESD, + IX86_BUILTIN_COMINEQSD, + IX86_BUILTIN_UCOMIEQSD, + IX86_BUILTIN_UCOMILTSD, + IX86_BUILTIN_UCOMILESD, + IX86_BUILTIN_UCOMIGTSD, + IX86_BUILTIN_UCOMIGESD, + IX86_BUILTIN_UCOMINEQSD, + + IX86_BUILTIN_MAXPD, + IX86_BUILTIN_MAXSD, + IX86_BUILTIN_MINPD, + IX86_BUILTIN_MINSD, + + IX86_BUILTIN_ANDPD, + IX86_BUILTIN_ANDNPD, + IX86_BUILTIN_ORPD, + IX86_BUILTIN_XORPD, + + IX86_BUILTIN_SQRTPD, + IX86_BUILTIN_SQRTSD, + + IX86_BUILTIN_UNPCKHPD, + IX86_BUILTIN_UNPCKLPD, + + IX86_BUILTIN_SHUFPD, + + IX86_BUILTIN_LOADAPD, + IX86_BUILTIN_LOADUPD, + IX86_BUILTIN_STOREAPD, + IX86_BUILTIN_STOREUPD, + IX86_BUILTIN_LOADSD, + IX86_BUILTIN_STORESD, + IX86_BUILTIN_MOVSD, + + IX86_BUILTIN_LOADHPD, + IX86_BUILTIN_LOADLPD, + IX86_BUILTIN_STOREHPD, + IX86_BUILTIN_STORELPD, + + IX86_BUILTIN_CVTDQ2PD, + IX86_BUILTIN_CVTDQ2PS, + + IX86_BUILTIN_CVTPD2DQ, + IX86_BUILTIN_CVTPD2PI, + IX86_BUILTIN_CVTPD2PS, + IX86_BUILTIN_CVTTPD2DQ, + IX86_BUILTIN_CVTTPD2PI, + + IX86_BUILTIN_CVTPI2PD, + IX86_BUILTIN_CVTSI2SD, + + IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SS, + IX86_BUILTIN_CVTSS2SD, + IX86_BUILTIN_CVTTSD2SI, + + IX86_BUILTIN_CVTPS2DQ, + IX86_BUILTIN_CVTPS2PD, + IX86_BUILTIN_CVTTPS2DQ, + + IX86_BUILTIN_MOVNTI, + IX86_BUILTIN_MOVNTPD, + IX86_BUILTIN_MOVNTDQ, + + IX86_BUILTIN_SETPD1, + IX86_BUILTIN_SETPD, + IX86_BUILTIN_CLRPD, + IX86_BUILTIN_SETRPD, + IX86_BUILTIN_LOADPD1, + IX86_BUILTIN_LOADRPD, + IX86_BUILTIN_STOREPD1, + IX86_BUILTIN_STORERPD, + + /* SSE2 MMX */ + IX86_BUILTIN_MASKMOVDQU, + IX86_BUILTIN_MOVMSKPD, + IX86_BUILTIN_PMOVMSKB128, + IX86_BUILTIN_MOVQ2DQ, + + IX86_BUILTIN_PACKSSWB128, + IX86_BUILTIN_PACKSSDW128, + IX86_BUILTIN_PACKUSWB128, + + IX86_BUILTIN_PADDB128, + IX86_BUILTIN_PADDW128, + IX86_BUILTIN_PADDD128, + IX86_BUILTIN_PADDQ128, + IX86_BUILTIN_PADDSB128, + IX86_BUILTIN_PADDSW128, + IX86_BUILTIN_PADDUSB128, + IX86_BUILTIN_PADDUSW128, + IX86_BUILTIN_PSUBB128, + IX86_BUILTIN_PSUBW128, + IX86_BUILTIN_PSUBD128, + IX86_BUILTIN_PSUBQ128, + IX86_BUILTIN_PSUBSB128, + IX86_BUILTIN_PSUBSW128, + IX86_BUILTIN_PSUBUSB128, + IX86_BUILTIN_PSUBUSW128, + + IX86_BUILTIN_PAND128, + IX86_BUILTIN_PANDN128, + IX86_BUILTIN_POR128, + IX86_BUILTIN_PXOR128, + + IX86_BUILTIN_PAVGB128, + IX86_BUILTIN_PAVGW128, + + IX86_BUILTIN_PCMPEQB128, + IX86_BUILTIN_PCMPEQW128, + IX86_BUILTIN_PCMPEQD128, + IX86_BUILTIN_PCMPGTB128, + IX86_BUILTIN_PCMPGTW128, + IX86_BUILTIN_PCMPGTD128, + + IX86_BUILTIN_PEXTRW128, + IX86_BUILTIN_PINSRW128, + + IX86_BUILTIN_PMADDWD128, + + IX86_BUILTIN_PMAXSW128, + IX86_BUILTIN_PMAXUB128, + IX86_BUILTIN_PMINSW128, + IX86_BUILTIN_PMINUB128, + + IX86_BUILTIN_PMULUDQ, + IX86_BUILTIN_PMULUDQ128, + IX86_BUILTIN_PMULHUW128, + IX86_BUILTIN_PMULHW128, + IX86_BUILTIN_PMULLW128, + + IX86_BUILTIN_PSADBW128, + IX86_BUILTIN_PSHUFHW, + IX86_BUILTIN_PSHUFLW, + IX86_BUILTIN_PSHUFD, + + IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, + IX86_BUILTIN_PSLLQ128, + IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, + IX86_BUILTIN_PSRLW128, + IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128, + IX86_BUILTIN_PSLLWI128, + IX86_BUILTIN_PSLLDI128, + IX86_BUILTIN_PSLLQI128, + IX86_BUILTIN_PSRAWI128, + IX86_BUILTIN_PSRADI128, + IX86_BUILTIN_PSRLWI128, + IX86_BUILTIN_PSRLDI128, + IX86_BUILTIN_PSRLQI128, + + IX86_BUILTIN_PUNPCKHBW128, + IX86_BUILTIN_PUNPCKHWD128, + IX86_BUILTIN_PUNPCKHDQ128, + IX86_BUILTIN_PUNPCKLBW128, + IX86_BUILTIN_PUNPCKLWD128, + IX86_BUILTIN_PUNPCKLDQ128, + + IX86_BUILTIN_CLFLUSH, + IX86_BUILTIN_MFENCE, + IX86_BUILTIN_LFENCE, + IX86_BUILTIN_MAX }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d327f4923b8..bd2dfbf1d33 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -98,6 +98,12 @@ ;; 52 This is a `pfrcpit2' operation. ;; 53 This is a `pfrsqrt' operation. ;; 54 This is a `pfrsqrit1' operation. +;; 55 This is a `pshuflw' operation. +;; 56 This is a `pshufhw' operation. +;; 57 This is a `clflush' operation. +;; 58 This is a `sfence' operation. +;; 59 This is a `mfence' operation. +;; 60 This is a `lfence' operation. ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -17896,6 +17902,57 @@ DONE; }) +(define_insn "movv2df_internal" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (match_operand:V2DF 1 "general_operand" "xm,x"))] + "TARGET_SSE2" + ;; @@@ let's try to use movaps here. + "movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv8hi_internal" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") + (match_operand:V8HI 1 "general_operand" "xm,x"))] + "TARGET_SSE2" + ;; @@@ let's try to use movaps here. + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv16qi_internal" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (match_operand:V16QI 1 "general_operand" "xm,x"))] + "TARGET_SSE2" + ;; @@@ let's try to use movaps here. + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_expand "movv2df" + [(set (match_operand:V2DF 0 "general_operand" "") + (match_operand:V2DF 1 "general_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V2DFmode, operands); + DONE; +}) + +(define_expand "movv8hi" + [(set (match_operand:V8HI 0 "general_operand" "") + (match_operand:V8HI 1 "general_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V8HImode, operands); + DONE; +}) + +(define_expand "movv16qi" + [(set (match_operand:V16QI 0 "general_operand" "") + (match_operand:V16QI 1 "general_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V16QImode, operands); + DONE; +}) + (define_expand "movv4sf" [(set (match_operand:V4SF 0 "general_operand" "") (match_operand:V4SF 1 "general_operand" ""))] @@ -17961,6 +18018,39 @@ "" [(set_attr "type" "sse")]) +(define_insn_and_split "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V2DF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V8HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V16QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + (define_insn_and_split "*pushv4sf" [(set (match_operand:V4SF 0 "push_operand" "=<") (match_operand:V4SF 1 "nonmemory_operand" "x"))] @@ -18402,7 +18492,7 @@ "andps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) -(define_insn "*sse_andti3_sse2" +(define_insn "sse2_andti3" [(set (match_operand:TI 0 "register_operand" "=x") (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] @@ -18435,12 +18525,12 @@ "andnps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) -(define_insn "*sse_nandti3_sse2" +(define_insn "sse2_nandti3" [(set (match_operand:TI 0 "register_operand" "=x") (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "pnand\t{%2, %0|%0, %2}" + "pandn\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "*sse_iorti3_df_1" @@ -18484,7 +18574,7 @@ "orps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) -(define_insn "*sse_iorti3_sse2" +(define_insn "sse2_iorti3" [(set (match_operand:TI 0 "register_operand" "=x") (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] @@ -18534,7 +18624,7 @@ "xorps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) -(define_insn "*sse_xorti3_sse2" +(define_insn "sse2_xorti3" [(set (match_operand:TI 0 "register_operand" "=x") (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] @@ -19824,3 +19914,1248 @@ return "prefetchw\t%a0"; } [(set_attr "type" "mmx")]) + +;; SSE2 support + +(define_insn "addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "addpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmaddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "addsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "subpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "subsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "mulpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "mulsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "divpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmdivv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "divsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; SSE min/max + +(define_insn "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsmaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_anddf3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (subreg:V2DF (and:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))] + "TARGET_SSE2" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_nanddf3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:TI 1 "register_operand" "0") 0)) + (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_iordf3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (subreg:V2DF (ior:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))] + "TARGET_SSE2" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_xordf3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (subreg:V2DF (xor:TI (subreg:TI (match_operand:TI 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:TI 2 "nonimmediate_operand" "xm") 0)) 0))] + "TARGET_SSE2" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) +;; SSE2 square root. There doesn't appear to be an extension for the +;; reciprocal/rsqrt instructions if the Intel manual is to be believed. + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmsqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) + (match_operand:V2DF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; SSE mask-generating compares + +(define_insn "maskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]))] + "TARGET_SSE2" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "maskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] + "TARGET_SSE2" + "cmpn%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])) + (subreg:V2DI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_comi" + [(set (reg:CCFP 17) + (match_operator:CCFP 2 "sse_comparison_operator" + [(vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE2" + "comisd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_ucomi" + [(set (reg:CCFPU 17) + (match_operator:CCFPU 2 "sse_comparison_operator" + [(vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE2" + "ucomisd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; SSE Strange Moves. + +(define_insn "sse2_movmskpd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 33))] + "TARGET_SSE2" + "movmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 33))] + "TARGET_SSE2" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_maskmovdqu" + [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] 32))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movntv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 34))] + "TARGET_SSE2" + "movntpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movntti" + [(set (match_operand:TI 0 "memory_operand" "=m") + (unspec:TI [(match_operand:TI 1 "register_operand" "x")] 34))] + "TARGET_SSE2" + "movntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movntsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 34))] + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; SSE <-> integer/MMX conversions + +;; Conversions between SI and SF + +(define_insn "cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30))] + "TARGET_SSE2" + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; Conversions between SI and DF + +(define_insn "cvtdq2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (vec_select:V2SI + (match_operand:V2SI 1 "nonimmediate_operand" "xm") + (parallel + [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvtpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 30) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvttpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 30))] + "TARGET_SSE2" + "cvttpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtpi2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSE2" + "cvtpi2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; Conversions between SI and DF + +(define_insn "cvtsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "cvtsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))] 30))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtsi2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (vec_duplicate:V2DF + (float:DF + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 2)))] + "TARGET_SSE2" + "cvtsd2si\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; Conversions between SF and DF + +(define_insn "cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "xm"))) + (const_int 14)))] + "TARGET_SSE2" + "cvtsd2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 2 "register_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))) + (const_int 2)))] + "TARGET_SSE2" + "cvtss2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (subreg:V4SF + (vec_concat:V4SI + (subreg:V2SI (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0) + (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))] + "TARGET_SSE2" + "cvtpd2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtps2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float_extend:V2DF + (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtps2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; SSE2 variants of MMX insns + +;; MMX arithmetic + +(define_insn "addv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "addv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "addv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "addv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ssaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ssaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "usaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "usaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (minus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (minus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sssubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sssubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ussubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ussubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (mult:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; See the MMX logical operations for the reason for the unspec +(define_insn "sse2_umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(mult:DI (zero_extend:DI (match_operand:DI 1 "register_operand" "0")) + (zero_extend:DI (match_operand:DI 2 "nonimmediate_operand" "ym")))] 45))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_umulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=y") + (mult:V2DI (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSE2" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "sse2_clrti" + [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] + "TARGET_SSE2" + "pxor\t{%0, %0|%0, %0}" + [(set_attr "type" "sse")]) + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "sse2_uavgv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ashiftrt:V16QI + (plus:V16QI (plus:V16QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "ym")) + (const_vector:V16QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_uavgv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI + (plus:V8HI (plus:V8HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "ym")) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; @@@ this isn't the right representation. +(define_insn "sse2_psadbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (abs:V16QI (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "ym"))))] + "TARGET_SSE2" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX insert/extract/shuffle + +(define_insn "sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") + (vec_duplicate:V8HI + (match_operand:SI 2 "nonimmediate_operand" "rm")) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE2" + "pinsrw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE2" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pshufd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] 41))] + "TARGET_SSE2" + "pshufd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pshuflw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] 55))] + "TARGET_SSE2" + "pshuflw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_pshufhw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] 56))] + "TARGET_SSE2" + "pshufhw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + +;; MMX mask-generating comparisons + +(define_insn "eqv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (eq:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "eqv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (eq:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "eqv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (eq:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "gtv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (gt:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "gtv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (gt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "gtv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (gt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX max/min insns + +(define_insn "umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umax:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smax:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umin:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smin:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX shifts + +(define_insn "ashrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ashrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "lshrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "lshrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_lshrv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ashlv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "ashlv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_ashlv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; See logical MMX insns for the reason for the unspec. Strictly speaking +;; we wouldn't need here it since we never generate TImode arithmetic. + +;; There has to be some kind of prize for the weirdest new instruction... +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(ashift:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] 30))] + "TARGET_SSE2" + "pslldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] 30))] + "TARGET_SSE2" + "pslrdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; SSE unpack + +(define_insn "sse2_unpckhpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 1)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "unpckhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_unpcklpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "unpcklpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; MMX pack/unpack insns. + +(define_insn "sse2_packsswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_packssdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0")) + (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_packuswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpckhbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpckhwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (const_int 85)))] + "TARGET_SSE2" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpckhdq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpcklbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpcklwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (const_int 85)))] + "TARGET_SSE2" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_punpckldq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; SSE2 moves + +(define_insn "sse2_movapd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] 38))] + "TARGET_SSE2" + "@ + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movupd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] 39))] + "TARGET_SSE2" + "@ + movupd\t{%1, %0|%0, %1} + movupd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movdqa" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") + (unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")] 38))] + "TARGET_SSE2" + "@ + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movdqu" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") + (unspec:TI [(match_operand:TI 1 "general_operand" "xm,x")] 39))] + "TARGET_SSE2" + "@ + movdqu\t{%1, %0|%0, %1} + movdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movdq2q" + [(set (match_operand:DI 0 "nonimmediate_operand" "=y") + (vec_select:DI (match_operand:V2DI 1 "general_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movdq2q\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movq2dq" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x") + (vec_concat:V2DI (match_operand:DI 1 "general_operand" "y") + (const_vector:DI [(const_int 0)])))] + "TARGET_SSE2" + "movq2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 2)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 1)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_loadsd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operand:DF 1 "memory_operand" "m") + (vec_duplicate:DF (float:DF (const_int 0))) + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_storesd" + [(set (match_operand:DF 0 "memory_operand" "=m") + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_shufpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE2" + ;; @@@ check operand order for intel/nonintel syntax + "shufpd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + +(define_insn "sse2_clflush" + [(unspec_volatile [(match_operand:SI 0 "address_operand" "p")] 57)] + "TARGET_SSE2" + "clflush %0" + [(set_attr "type" "sse")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] 59))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*mfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] 59))] + "TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_lfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] 60))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*lfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] 60))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) diff --git a/gcc/tree.c b/gcc/tree.c index 4ec4bd04e86..6f59026ab40 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -4764,6 +4764,8 @@ build_common_tree_nodes_2 (short_double) = make_vector (V4SImode, unsigned_intSI_type_node, 1); unsigned_V2SI_type_node = make_vector (V2SImode, unsigned_intSI_type_node, 1); + unsigned_V2DI_type_node + = make_vector (V2DImode, unsigned_intDI_type_node, 1); unsigned_V4HI_type_node = make_vector (V4HImode, unsigned_intHI_type_node, 1); unsigned_V8QI_type_node @@ -4777,10 +4779,12 @@ build_common_tree_nodes_2 (short_double) V4SF_type_node = make_vector (V4SFmode, float_type_node, 0); V4SI_type_node = make_vector (V4SImode, intSI_type_node, 0); V2SI_type_node = make_vector (V2SImode, intSI_type_node, 0); + V2DI_type_node = make_vector (V2DImode, intDI_type_node, 0); V4HI_type_node = make_vector (V4HImode, intHI_type_node, 0); V8QI_type_node = make_vector (V8QImode, intQI_type_node, 0); V8HI_type_node = make_vector (V8HImode, intHI_type_node, 0); V2SF_type_node = make_vector (V2SFmode, float_type_node, 0); + V2DF_type_node = make_vector (V2DFmode, double_type_node, 0); V16QI_type_node = make_vector (V16QImode, intQI_type_node, 0); } diff --git a/gcc/tree.h b/gcc/tree.h index 3e51f7dbb3f..2d955187466 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1928,6 +1928,7 @@ enum tree_index TI_UV4HI_TYPE, TI_UV2SI_TYPE, TI_UV2SF_TYPE, + TI_UV2DI_TYPE, TI_UV16QI_TYPE, TI_V4SF_TYPE, @@ -1938,6 +1939,8 @@ enum tree_index TI_V4HI_TYPE, TI_V2SI_TYPE, TI_V2SF_TYPE, + TI_V2DF_TYPE, + TI_V2DI_TYPE, TI_V16QI_TYPE, TI_MAIN_IDENTIFIER, @@ -2005,6 +2008,7 @@ extern tree global_trees[TI_MAX]; #define unsigned_V8HI_type_node global_trees[TI_UV8HI_TYPE] #define unsigned_V4HI_type_node global_trees[TI_UV4HI_TYPE] #define unsigned_V2SI_type_node global_trees[TI_UV2SI_TYPE] +#define unsigned_V2DI_type_node global_trees[TI_UV2DI_TYPE] #define V16QI_type_node global_trees[TI_V16QI_TYPE] #define V4SF_type_node global_trees[TI_V4SF_TYPE] @@ -2014,6 +2018,8 @@ extern tree global_trees[TI_MAX]; #define V4HI_type_node global_trees[TI_V4HI_TYPE] #define V2SI_type_node global_trees[TI_V2SI_TYPE] #define V2SF_type_node global_trees[TI_V2SF_TYPE] +#define V2DI_type_node global_trees[TI_V2DI_TYPE] +#define V2DF_type_node global_trees[TI_V2DF_TYPE] #define V16SF_type_node global_trees[TI_V16SF_TYPE] /* An enumeration of the standard C integer types. These must be