Add support for vpermil2p* in XOP.
2010-02-13 Sebastian Pop <sebastian.pop@amd.com> * config/i386/i386-builtin-types.def (V2DF_FTYPE_V2DF_V2DF_V2DI_INT): Declared. (V4DF_FTYPE_V4DF_V4DF_V4DI_INT): Declared. (V4SF_FTYPE_V4SF_V4SF_V4SI_INT): Declared. (V8SF_FTYPE_V8SF_V8SF_V8SI_INT): Declared. * config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256, and IX86_BUILTIN_VPERMIL2PS256. (MULTI_ARG_4_DF2_DI_I): Defined. (MULTI_ARG_4_DF2_DI_I1): Defined. (MULTI_ARG_4_SF2_SI_I): Defined. (MULTI_ARG_4_SF2_SI_I1): Defined. (bdesc_multi_arg): Add __builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256, and __builtin_ia32_vpermil2ps256. (ix86_expand_multi_arg_builtin): Handle MULTI_ARG_4_DF2_DI_I, MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and MULTI_ARG_4_SF2_SI_I1. Handle builtins with 4 arguments. (ix86_expand_args_builtin): Handle MULTI_ARG_4_DF2_DI_I, MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and MULTI_ARG_4_SF2_SI_I1. Handle CODE_FOR_xop_vpermil2v2df3, CODE_FOR_xop_vpermil2v4sf3, CODE_FOR_xop_vpermil2v4df3, and CODE_FOR_xop_vpermil2v8sf3. * config/i386/i386.md (UNSPEC_VPERMIL2): Declared. * config/i386/sse.md (xop_vpermil2<mode>3): New insn pattern. * config/i386/xopintrin.h (_mm_permute2_pd): New. (_mm256_permute2_pd): New. (_mm_permute2_ps): New. (_mm256_permute2_ps): New. * gcc.target/i386/sse-14.c: Add tests for _mm_permute2_pd, _mm256_permute2_pd, _mm_permute2_ps, and _mm256_permute2_ps. * gcc.target/i386/xop-vpermil2pd-1.c: New. * gcc.target/i386/xop-vpermil2pd-256-1.c: New. * gcc.target/i386/xop-vpermil2ps-1.c: New. * gcc.target/i386/xop-vpermil2ps-256-1.c: New. From-SVN: r156778
This commit is contained in:
parent
d5c5922408
commit
02edd2f6e9
@ -1,3 +1,35 @@
|
||||
2010-02-15 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* config/i386/i386-builtin-types.def
|
||||
(V2DF_FTYPE_V2DF_V2DF_V2DI_INT): Declared.
|
||||
(V4DF_FTYPE_V4DF_V4DF_V4DI_INT): Declared.
|
||||
(V4SF_FTYPE_V4SF_V4SF_V4SI_INT): Declared.
|
||||
(V8SF_FTYPE_V8SF_V8SF_V8SI_INT): Declared.
|
||||
* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_VPERMIL2PD,
|
||||
IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256, and
|
||||
IX86_BUILTIN_VPERMIL2PS256.
|
||||
(MULTI_ARG_4_DF2_DI_I): Defined.
|
||||
(MULTI_ARG_4_DF2_DI_I1): Defined.
|
||||
(MULTI_ARG_4_SF2_SI_I): Defined.
|
||||
(MULTI_ARG_4_SF2_SI_I1): Defined.
|
||||
(bdesc_multi_arg): Add __builtin_ia32_vpermil2pd,
|
||||
__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256, and
|
||||
__builtin_ia32_vpermil2ps256.
|
||||
(ix86_expand_multi_arg_builtin): Handle MULTI_ARG_4_DF2_DI_I,
|
||||
MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
|
||||
MULTI_ARG_4_SF2_SI_I1. Handle builtins with 4 arguments.
|
||||
(ix86_expand_args_builtin): Handle MULTI_ARG_4_DF2_DI_I,
|
||||
MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
|
||||
MULTI_ARG_4_SF2_SI_I1. Handle CODE_FOR_xop_vpermil2v2df3,
|
||||
CODE_FOR_xop_vpermil2v4sf3, CODE_FOR_xop_vpermil2v4df3, and
|
||||
CODE_FOR_xop_vpermil2v8sf3.
|
||||
* config/i386/i386.md (UNSPEC_VPERMIL2): Declared.
|
||||
* config/i386/sse.md (xop_vpermil2<mode>3): New insn pattern.
|
||||
* config/i386/xopintrin.h (_mm_permute2_pd): New.
|
||||
(_mm256_permute2_pd): New.
|
||||
(_mm_permute2_ps): New.
|
||||
(_mm256_permute2_ps): New.
|
||||
|
||||
2010-02-15 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* config/h8300/h8300.c: (h8300_push_pop): Use bool type for
|
||||
|
@ -311,6 +311,7 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
|
||||
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
|
||||
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
|
||||
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
|
||||
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
|
||||
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
|
||||
DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
|
||||
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
|
||||
@ -319,11 +320,13 @@ DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
|
||||
DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
|
||||
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
|
||||
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
|
||||
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
|
||||
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
|
||||
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
|
||||
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
|
||||
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
|
||||
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V2DI)
|
||||
@ -335,6 +338,7 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
|
||||
DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
|
||||
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
|
||||
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
|
||||
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
|
||||
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
|
||||
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
|
||||
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
|
||||
|
@ -20958,6 +20958,10 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_VPERMILPS,
|
||||
IX86_BUILTIN_VPERMILPD256,
|
||||
IX86_BUILTIN_VPERMILPS256,
|
||||
IX86_BUILTIN_VPERMIL2PD,
|
||||
IX86_BUILTIN_VPERMIL2PS,
|
||||
IX86_BUILTIN_VPERMIL2PD256,
|
||||
IX86_BUILTIN_VPERMIL2PS256,
|
||||
IX86_BUILTIN_VPERM2F128PD256,
|
||||
IX86_BUILTIN_VPERM2F128PS256,
|
||||
IX86_BUILTIN_VPERM2F128SI256,
|
||||
@ -22147,6 +22151,10 @@ static const struct builtin_description bdesc_args[] =
|
||||
};
|
||||
|
||||
/* FMA4 and XOP. */
|
||||
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
|
||||
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
|
||||
#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
|
||||
#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
|
||||
#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
|
||||
#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
|
||||
#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
|
||||
@ -22389,6 +22397,11 @@ static const struct builtin_description bdesc_multi_arg[] =
|
||||
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
|
||||
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
|
||||
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
|
||||
|
||||
};
|
||||
|
||||
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
|
||||
@ -22769,6 +22782,14 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
||||
|
||||
switch (m_type)
|
||||
{
|
||||
case MULTI_ARG_4_DF2_DI_I:
|
||||
case MULTI_ARG_4_DF2_DI_I1:
|
||||
case MULTI_ARG_4_SF2_SI_I:
|
||||
case MULTI_ARG_4_SF2_SI_I1:
|
||||
nargs = 4;
|
||||
last_arg_constant = true;
|
||||
break;
|
||||
|
||||
case MULTI_ARG_3_SF:
|
||||
case MULTI_ARG_3_DF:
|
||||
case MULTI_ARG_3_SF2:
|
||||
@ -22912,6 +22933,10 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
||||
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@ -23530,6 +23555,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
nargs = 3;
|
||||
nargs_constant = 2;
|
||||
break;
|
||||
case MULTI_ARG_4_DF2_DI_I:
|
||||
case MULTI_ARG_4_DF2_DI_I1:
|
||||
case MULTI_ARG_4_SF2_SI_I:
|
||||
case MULTI_ARG_4_SF2_SI_I1:
|
||||
nargs = 4;
|
||||
nargs_constant = 1;
|
||||
break;
|
||||
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
|
||||
nargs = 4;
|
||||
nargs_constant = 2;
|
||||
@ -23599,6 +23631,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
|
||||
case CODE_FOR_sse4_1_blendpd:
|
||||
case CODE_FOR_avx_vpermilv2df:
|
||||
case CODE_FOR_xop_vpermil2v2df3:
|
||||
case CODE_FOR_xop_vpermil2v4sf3:
|
||||
case CODE_FOR_xop_vpermil2v4df3:
|
||||
case CODE_FOR_xop_vpermil2v8sf3:
|
||||
error ("the last argument must be a 2-bit immediate");
|
||||
return const0_rtx;
|
||||
|
||||
|
@ -219,11 +219,12 @@
|
||||
; For AVX support
|
||||
(UNSPEC_PCMP 166)
|
||||
(UNSPEC_VPERMIL 167)
|
||||
(UNSPEC_VPERMIL2F128 168)
|
||||
(UNSPEC_MASKLOAD 169)
|
||||
(UNSPEC_MASKSTORE 170)
|
||||
(UNSPEC_CAST 171)
|
||||
(UNSPEC_VTESTP 172)
|
||||
(UNSPEC_VPERMIL2 168)
|
||||
(UNSPEC_VPERMIL2F128 169)
|
||||
(UNSPEC_MASKLOAD 170)
|
||||
(UNSPEC_MASKSTORE 171)
|
||||
(UNSPEC_CAST 172)
|
||||
(UNSPEC_VTESTP 173)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
|
@ -11539,6 +11539,20 @@
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "xop_vpermil2<mode>3"
|
||||
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
|
||||
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
|
||||
(match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
|
||||
(match_operand:SI 4 "const_0_to_3_operand" "n")]
|
||||
UNSPEC_VPERMIL2))]
|
||||
"TARGET_XOP"
|
||||
"vpermil2p<xopmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
|
||||
[(set_attr "type" "sse4arg")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(define_insn "*avx_aesenc"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
|
@ -766,6 +766,70 @@ _mm256_frcz_pd (__m256d __A)
|
||||
return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
|
||||
}
|
||||
|
||||
/* PERMIL2 */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
(__v2di)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
|
||||
(__v4df)__Y,
|
||||
(__v4di)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
(__v4si)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
|
||||
(__v8sf)__Y,
|
||||
(__v8si)__C,
|
||||
__I);
|
||||
}
|
||||
#else
|
||||
#define _mm_permute2_pd(X, Y, C, I) \
|
||||
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), \
|
||||
(__v2di)(__m128d)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm256_permute2_pd(X, Y, C, I) \
|
||||
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
|
||||
(__v4df)(__m256d)(Y), \
|
||||
(__v4di)(__m256d)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm_permute2_ps(X, Y, C, I) \
|
||||
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), \
|
||||
(__v4si)(__m128)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm256_permute2_ps(X, Y, C, I) \
|
||||
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
|
||||
(__v8sf)(__m256)(Y), \
|
||||
(__v8si)(__m256)(C), \
|
||||
(int)(I)))
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
#endif /* __XOP__ */
|
||||
|
||||
#endif /* _XOPMMINTRIN_H_INCLUDED */
|
||||
|
@ -1,3 +1,12 @@
|
||||
2010-02-15 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* gcc.target/i386/sse-14.c: Add tests for _mm_permute2_pd,
|
||||
_mm256_permute2_pd, _mm_permute2_ps, and _mm256_permute2_ps.
|
||||
* gcc.target/i386/xop-vpermil2pd-1.c: New.
|
||||
* gcc.target/i386/xop-vpermil2pd-256-1.c: New.
|
||||
* gcc.target/i386/xop-vpermil2ps-1.c: New.
|
||||
* gcc.target/i386/xop-vpermil2ps-256-1.c: New.
|
||||
|
||||
2010-02-15 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/43068
|
||||
|
@ -162,6 +162,10 @@ test_1 ( _mm_roti_epi8, __m128i, __m128i, 1)
|
||||
test_1 ( _mm_roti_epi16, __m128i, __m128i, 1)
|
||||
test_1 ( _mm_roti_epi32, __m128i, __m128i, 1)
|
||||
test_1 ( _mm_roti_epi64, __m128i, __m128i, 1)
|
||||
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
|
||||
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
|
||||
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
|
||||
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
|
||||
|
||||
/* lwpintrin.h */
|
||||
test_2 ( __lwpval32, void, unsigned int, unsigned int, 1)
|
||||
|
55
gcc/testsuite/gcc.target/i386/xop-vpermil2pd-1.c
Normal file
55
gcc/testsuite/gcc.target/i386/xop-vpermil2pd-1.c
Normal file
@ -0,0 +1,55 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O2 -mxop" } */
|
||||
|
||||
#include "xop-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 2
|
||||
#endif
|
||||
|
||||
static double
|
||||
select2dp(double *src1, double *src2, long long sel)
|
||||
{
|
||||
double tmp = 0.0;
|
||||
|
||||
if ((sel & 0x3) == 0) tmp = src1[0];
|
||||
if ((sel & 0x3) == 1) tmp = src1[1];
|
||||
if ((sel & 0x3) == 2) tmp = src2[0];
|
||||
if ((sel & 0x3) == 3) tmp = src2[1];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double
|
||||
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
|
||||
{
|
||||
double tmp;
|
||||
|
||||
tmp = select2dp(src1, src2, sel & 0x3);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
xop_test ()
|
||||
{
|
||||
union128d s1, s2, u;
|
||||
union128i_q s3;
|
||||
double e[2];
|
||||
|
||||
s1.x = _mm_set_pd (1, 2);
|
||||
s2.x = _mm_set_pd (3, 4);
|
||||
s3.x = _mm_set_epi64x (1, 2);
|
||||
u.x = _mm_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
|
||||
|
||||
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
|
||||
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
|
||||
|
||||
if (check_union128d (u, e))
|
||||
abort ();
|
||||
}
|
||||
|
56
gcc/testsuite/gcc.target/i386/xop-vpermil2pd-256-1.c
Normal file
56
gcc/testsuite/gcc.target/i386/xop-vpermil2pd-256-1.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O2 -mxop" } */
|
||||
|
||||
#include "xop-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 1
|
||||
#endif
|
||||
|
||||
static double
|
||||
select2dp(double *src1, double *src2, long long sel)
|
||||
{
|
||||
double tmp = 3.414;
|
||||
|
||||
if ((sel & 0x3) == 0) tmp = src1[0];
|
||||
if ((sel & 0x3) == 1) tmp = src1[1];
|
||||
if ((sel & 0x3) == 2) tmp = src2[0];
|
||||
if ((sel & 0x3) == 3) tmp = src2[1];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double
|
||||
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
|
||||
{
|
||||
double tmp;
|
||||
|
||||
tmp = select2dp(src1, src2, sel);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
xop_test ()
|
||||
{
|
||||
union256d u, s1, s2;
|
||||
double e[4] = {0.0};
|
||||
union256i_q s3;
|
||||
|
||||
s1.x = _mm256_set_pd (1, 2, 3, 4);
|
||||
s2.x = _mm256_set_pd (5, 6, 7, 8);
|
||||
s3.x = _mm256_set_epi64x (0, 1, 2, 3);
|
||||
u.x = _mm256_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
|
||||
|
||||
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
|
||||
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
|
||||
e[2] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[2] & 0xe)>>1, ZERO_MATCH);
|
||||
e[3] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[3] & 0xe)>>1, ZERO_MATCH);
|
||||
|
||||
if (check_union256d (u, e))
|
||||
abort ();
|
||||
}
|
62
gcc/testsuite/gcc.target/i386/xop-vpermil2ps-1.c
Normal file
62
gcc/testsuite/gcc.target/i386/xop-vpermil2ps-1.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O2 -mxop" } */
|
||||
|
||||
#include "xop-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 1
|
||||
#endif
|
||||
|
||||
static float
|
||||
select2sp(float *src1, float *src2, int sel)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
if ((sel & 0x7) == 0) tmp = src1[0];
|
||||
if ((sel & 0x7) == 1) tmp = src1[1];
|
||||
if ((sel & 0x7) == 2) tmp = src1[2];
|
||||
if ((sel & 0x7) == 3) tmp = src1[3];
|
||||
if ((sel & 0x7) == 4) tmp = src2[0];
|
||||
if ((sel & 0x7) == 5) tmp = src2[1];
|
||||
if ((sel & 0x7) == 6) tmp = src2[2];
|
||||
if ((sel & 0x7) == 7) tmp = src2[3];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
static float
|
||||
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
tmp = select2sp(src1, src2, sel & 0x7);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
xop_test ()
|
||||
{
|
||||
int i;
|
||||
union128 source1, source2, u;
|
||||
union128i_d source3;
|
||||
float s1[4] = {1, 2, 3, 4};
|
||||
float s2[4] = {5, 6, 7, 8};
|
||||
int s3[4] = {0, 1, 0, 1};
|
||||
float e[4];
|
||||
|
||||
source1.x = _mm_loadu_ps(s1);
|
||||
source2.x = _mm_loadu_ps(s2);
|
||||
source3.x = _mm_loadu_si128((__m128i*) s3);
|
||||
u.x = _mm_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
|
||||
}
|
||||
|
||||
if (check_union128 (u, e))
|
||||
abort ();
|
||||
}
|
62
gcc/testsuite/gcc.target/i386/xop-vpermil2ps-256-1.c
Normal file
62
gcc/testsuite/gcc.target/i386/xop-vpermil2ps-256-1.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O2 -mxop" } */
|
||||
|
||||
#include "xop-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 3
|
||||
#endif
|
||||
|
||||
static float
|
||||
select2sp(float *src1, float *src2, int sel)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
if ((sel & 0x7) == 0) tmp = src1[0];
|
||||
if ((sel & 0x7) == 1) tmp = src1[1];
|
||||
if ((sel & 0x7) == 2) tmp = src1[2];
|
||||
if ((sel & 0x7) == 3) tmp = src1[3];
|
||||
if ((sel & 0x7) == 4) tmp = src2[0];
|
||||
if ((sel & 0x7) == 5) tmp = src2[1];
|
||||
if ((sel & 0x7) == 6) tmp = src2[2];
|
||||
if ((sel & 0x7) == 7) tmp = src2[3];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
static float
|
||||
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
tmp = select2sp(src1, src2, sel & 0x7);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
xop_test ()
|
||||
{
|
||||
int i;
|
||||
union256 source1, source2, u;
|
||||
union256i_d source3;
|
||||
float s1[8]={1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float s2[8]={9, 10, 11, 12, 13, 14, 15, 16};
|
||||
int s3[8]={11, 2, 3, 15, 5, 12, 7, 8};
|
||||
float e[8];
|
||||
|
||||
source1.x = _mm256_loadu_ps(s1);
|
||||
source2.x = _mm256_loadu_ps(s2);
|
||||
source3.x = _mm256_loadu_si256((__m256i*) s3);
|
||||
u.x = _mm256_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
|
||||
}
|
||||
|
||||
if (check_union256(u, e))
|
||||
abort ();
|
||||
}
|
Loading…
Reference in New Issue
Block a user