Add support for vpermil2p* in XOP.

2010-02-13  Sebastian Pop  <sebastian.pop@amd.com>

	* config/i386/i386-builtin-types.def
	(V2DF_FTYPE_V2DF_V2DF_V2DI_INT): Declared.
	(V4DF_FTYPE_V4DF_V4DF_V4DI_INT): Declared.
	(V4SF_FTYPE_V4SF_V4SF_V4SI_INT): Declared.
	(V8SF_FTYPE_V8SF_V8SF_V8SI_INT): Declared.
	* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_VPERMIL2PD,
	IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256, and
	IX86_BUILTIN_VPERMIL2PS256.
	(MULTI_ARG_4_DF2_DI_I): Defined.
	(MULTI_ARG_4_DF2_DI_I1): Defined.
	(MULTI_ARG_4_SF2_SI_I): Defined.
	(MULTI_ARG_4_SF2_SI_I1): Defined.
	(bdesc_multi_arg): Add __builtin_ia32_vpermil2pd,
	__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256, and
	__builtin_ia32_vpermil2ps256.
	(ix86_expand_multi_arg_builtin): Handle MULTI_ARG_4_DF2_DI_I,
	MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
	MULTI_ARG_4_SF2_SI_I1.  Handle builtins with 4 arguments.
	(ix86_expand_args_builtin): Handle MULTI_ARG_4_DF2_DI_I,
	MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
	MULTI_ARG_4_SF2_SI_I1.  Handle CODE_FOR_xop_vpermil2v2df3,
	CODE_FOR_xop_vpermil2v4sf3, CODE_FOR_xop_vpermil2v4df3, and
	CODE_FOR_xop_vpermil2v8sf3.
	* config/i386/i386.md (UNSPEC_VPERMIL2): Declared.
	* config/i386/sse.md (xop_vpermil2<mode>3): New insn pattern.
	* config/i386/xopintrin.h (_mm_permute2_pd): New.
	(_mm256_permute2_pd): New.
	(_mm_permute2_ps): New.
	(_mm256_permute2_ps): New.

	* gcc.target/i386/sse-14.c: Add tests for _mm_permute2_pd,
	_mm256_permute2_pd, _mm_permute2_ps, and _mm256_permute2_ps.
	* gcc.target/i386/xop-vpermil2pd-1.c: New.
	* gcc.target/i386/xop-vpermil2pd-256-1.c: New.
	* gcc.target/i386/xop-vpermil2ps-1.c: New.
	* gcc.target/i386/xop-vpermil2ps-256-1.c: New.

From-SVN: r156778
This commit is contained in:
Sebastian Pop 2010-02-15 17:21:21 +00:00 committed by Sebastian Pop
parent d5c5922408
commit 02edd2f6e9
12 changed files with 404 additions and 5 deletions

View File

@ -1,3 +1,35 @@
2010-02-15 Sebastian Pop <sebastian.pop@amd.com>
* config/i386/i386-builtin-types.def
(V2DF_FTYPE_V2DF_V2DF_V2DI_INT): Declared.
(V4DF_FTYPE_V4DF_V4DF_V4DI_INT): Declared.
(V4SF_FTYPE_V4SF_V4SF_V4SI_INT): Declared.
(V8SF_FTYPE_V8SF_V8SF_V8SI_INT): Declared.
* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_VPERMIL2PD,
IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256, and
IX86_BUILTIN_VPERMIL2PS256.
(MULTI_ARG_4_DF2_DI_I): Defined.
(MULTI_ARG_4_DF2_DI_I1): Defined.
(MULTI_ARG_4_SF2_SI_I): Defined.
(MULTI_ARG_4_SF2_SI_I1): Defined.
(bdesc_multi_arg): Add __builtin_ia32_vpermil2pd,
__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256, and
__builtin_ia32_vpermil2ps256.
(ix86_expand_multi_arg_builtin): Handle MULTI_ARG_4_DF2_DI_I,
MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
MULTI_ARG_4_SF2_SI_I1. Handle builtins with 4 arguments.
(ix86_expand_args_builtin): Handle MULTI_ARG_4_DF2_DI_I,
MULTI_ARG_4_DF2_DI_I1, MULTI_ARG_4_SF2_SI_I, and
MULTI_ARG_4_SF2_SI_I1. Handle CODE_FOR_xop_vpermil2v2df3,
CODE_FOR_xop_vpermil2v4sf3, CODE_FOR_xop_vpermil2v4df3, and
CODE_FOR_xop_vpermil2v8sf3.
* config/i386/i386.md (UNSPEC_VPERMIL2): Declared.
* config/i386/sse.md (xop_vpermil2<mode>3): New insn pattern.
* config/i386/xopintrin.h (_mm_permute2_pd): New.
(_mm256_permute2_pd): New.
(_mm_permute2_ps): New.
(_mm256_permute2_ps): New.
2010-02-15 Nick Clifton <nickc@redhat.com>
* config/h8300/h8300.c: (h8300_push_pop): Use bool type for

View File

@ -311,6 +311,7 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
@ -319,11 +320,13 @@ DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V2DI)
@ -335,6 +338,7 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)

View File

@ -20958,6 +20958,10 @@ enum ix86_builtins
IX86_BUILTIN_VPERMILPS,
IX86_BUILTIN_VPERMILPD256,
IX86_BUILTIN_VPERMILPS256,
IX86_BUILTIN_VPERMIL2PD,
IX86_BUILTIN_VPERMIL2PS,
IX86_BUILTIN_VPERMIL2PD256,
IX86_BUILTIN_VPERMIL2PS256,
IX86_BUILTIN_VPERM2F128PD256,
IX86_BUILTIN_VPERM2F128PS256,
IX86_BUILTIN_VPERM2F128SI256,
@ -22147,6 +22151,10 @@ static const struct builtin_description bdesc_args[] =
};
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
@ -22389,6 +22397,11 @@ static const struct builtin_description bdesc_multi_arg[] =
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
{ OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
};
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
@ -22769,6 +22782,14 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
switch (m_type)
{
case MULTI_ARG_4_DF2_DI_I:
case MULTI_ARG_4_DF2_DI_I1:
case MULTI_ARG_4_SF2_SI_I:
case MULTI_ARG_4_SF2_SI_I1:
nargs = 4;
last_arg_constant = true;
break;
case MULTI_ARG_3_SF:
case MULTI_ARG_3_DF:
case MULTI_ARG_3_SF2:
@ -22912,6 +22933,10 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
break;
case 4:
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
break;
default:
gcc_unreachable ();
}
@ -23530,6 +23555,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
case MULTI_ARG_4_DF2_DI_I:
case MULTI_ARG_4_DF2_DI_I1:
case MULTI_ARG_4_SF2_SI_I:
case MULTI_ARG_4_SF2_SI_I1:
nargs = 4;
nargs_constant = 1;
break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
@ -23599,6 +23631,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
case CODE_FOR_xop_vpermil2v2df3:
case CODE_FOR_xop_vpermil2v4sf3:
case CODE_FOR_xop_vpermil2v4df3:
case CODE_FOR_xop_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;

View File

@ -219,11 +219,12 @@
; For AVX support
(UNSPEC_PCMP 166)
(UNSPEC_VPERMIL 167)
(UNSPEC_VPERMIL2F128 168)
(UNSPEC_MASKLOAD 169)
(UNSPEC_MASKSTORE 170)
(UNSPEC_CAST 171)
(UNSPEC_VTESTP 172)
(UNSPEC_VPERMIL2 168)
(UNSPEC_VPERMIL2F128 169)
(UNSPEC_MASKLOAD 170)
(UNSPEC_MASKSTORE 171)
(UNSPEC_CAST 172)
(UNSPEC_VTESTP 173)
])
(define_constants

View File

@ -11539,6 +11539,20 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
(define_insn "xop_vpermil2<mode>3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
(match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
(match_operand:SI 4 "const_0_to_3_operand" "n")]
UNSPEC_VPERMIL2))]
"TARGET_XOP"
"vpermil2p<xopmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
[(set_attr "type" "sse4arg")
(set_attr "length_immediate" "1")
(set_attr "mode" "<MODE>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*avx_aesenc"
[(set (match_operand:V2DI 0 "register_operand" "=x")

View File

@ -766,6 +766,70 @@ _mm256_frcz_pd (__m256d __A)
return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
}
/* PERMIL2 */
#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
{
return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
(__v2df)__Y,
(__v2di)__C,
__I);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
{
return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
(__v4df)__Y,
(__v4di)__C,
__I);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
{
return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
(__v4sf)__Y,
(__v4si)__C,
__I);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
{
return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
(__v8sf)__Y,
(__v8si)__C,
__I);
}
#else
#define _mm_permute2_pd(X, Y, C, I) \
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__v2di)(__m128d)(C), \
(int)(I)))
#define _mm256_permute2_pd(X, Y, C, I) \
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \
(__v4di)(__m256d)(C), \
(int)(I)))
#define _mm_permute2_ps(X, Y, C, I) \
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), \
(__v4si)(__m128)(C), \
(int)(I)))
#define _mm256_permute2_ps(X, Y, C, I) \
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \
(__v8si)(__m256)(C), \
(int)(I)))
#endif /* __OPTIMIZE__ */
#endif /* __XOP__ */
#endif /* _XOPMMINTRIN_H_INCLUDED */

View File

@ -1,3 +1,12 @@
2010-02-15 Sebastian Pop <sebastian.pop@amd.com>
* gcc.target/i386/sse-14.c: Add tests for _mm_permute2_pd,
_mm256_permute2_pd, _mm_permute2_ps, and _mm256_permute2_ps.
* gcc.target/i386/xop-vpermil2pd-1.c: New.
* gcc.target/i386/xop-vpermil2pd-256-1.c: New.
* gcc.target/i386/xop-vpermil2ps-1.c: New.
* gcc.target/i386/xop-vpermil2ps-256-1.c: New.
2010-02-15 Richard Guenther <rguenther@suse.de>
PR middle-end/43068

View File

@ -162,6 +162,10 @@ test_1 ( _mm_roti_epi8, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi16, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi32, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi64, __m128i, __m128i, 1)
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
/* lwpintrin.h */
test_2 ( __lwpval32, void, unsigned int, unsigned int, 1)

View File

@ -0,0 +1,55 @@
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 2
#endif
static double
select2dp(double *src1, double *src2, long long sel)
{
double tmp = 0.0;
if ((sel & 0x3) == 0) tmp = src1[0];
if ((sel & 0x3) == 1) tmp = src1[1];
if ((sel & 0x3) == 2) tmp = src2[0];
if ((sel & 0x3) == 3) tmp = src2[1];
return tmp;
}
static double
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
{
double tmp;
tmp = select2dp(src1, src2, sel & 0x3);
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
return tmp;
}
void static
xop_test ()
{
union128d s1, s2, u;
union128i_q s3;
double e[2];
s1.x = _mm_set_pd (1, 2);
s2.x = _mm_set_pd (3, 4);
s3.x = _mm_set_epi64x (1, 2);
u.x = _mm_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
if (check_union128d (u, e))
abort ();
}

View File

@ -0,0 +1,56 @@
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 1
#endif
static double
select2dp(double *src1, double *src2, long long sel)
{
double tmp = 3.414;
if ((sel & 0x3) == 0) tmp = src1[0];
if ((sel & 0x3) == 1) tmp = src1[1];
if ((sel & 0x3) == 2) tmp = src2[0];
if ((sel & 0x3) == 3) tmp = src2[1];
return tmp;
}
static double
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
{
double tmp;
tmp = select2dp(src1, src2, sel);
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
return tmp;
}
void static
xop_test ()
{
union256d u, s1, s2;
double e[4] = {0.0};
union256i_q s3;
s1.x = _mm256_set_pd (1, 2, 3, 4);
s2.x = _mm256_set_pd (5, 6, 7, 8);
s3.x = _mm256_set_epi64x (0, 1, 2, 3);
u.x = _mm256_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
e[2] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[2] & 0xe)>>1, ZERO_MATCH);
e[3] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[3] & 0xe)>>1, ZERO_MATCH);
if (check_union256d (u, e))
abort ();
}

View File

@ -0,0 +1,62 @@
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 1
#endif
static float
select2sp(float *src1, float *src2, int sel)
{
float tmp;
if ((sel & 0x7) == 0) tmp = src1[0];
if ((sel & 0x7) == 1) tmp = src1[1];
if ((sel & 0x7) == 2) tmp = src1[2];
if ((sel & 0x7) == 3) tmp = src1[3];
if ((sel & 0x7) == 4) tmp = src2[0];
if ((sel & 0x7) == 5) tmp = src2[1];
if ((sel & 0x7) == 6) tmp = src2[2];
if ((sel & 0x7) == 7) tmp = src2[3];
return tmp;
}
static float
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
{
float tmp;
tmp = select2sp(src1, src2, sel & 0x7);
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
return tmp;
}
void static
xop_test ()
{
int i;
union128 source1, source2, u;
union128i_d source3;
float s1[4] = {1, 2, 3, 4};
float s2[4] = {5, 6, 7, 8};
int s3[4] = {0, 1, 0, 1};
float e[4];
source1.x = _mm_loadu_ps(s1);
source2.x = _mm_loadu_ps(s2);
source3.x = _mm_loadu_si128((__m128i*) s3);
u.x = _mm_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
for (i = 0; i < 4; ++i) {
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
}
if (check_union128 (u, e))
abort ();
}

View File

@ -0,0 +1,62 @@
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 3
#endif
static float
select2sp(float *src1, float *src2, int sel)
{
float tmp;
if ((sel & 0x7) == 0) tmp = src1[0];
if ((sel & 0x7) == 1) tmp = src1[1];
if ((sel & 0x7) == 2) tmp = src1[2];
if ((sel & 0x7) == 3) tmp = src1[3];
if ((sel & 0x7) == 4) tmp = src2[0];
if ((sel & 0x7) == 5) tmp = src2[1];
if ((sel & 0x7) == 6) tmp = src2[2];
if ((sel & 0x7) == 7) tmp = src2[3];
return tmp;
}
static float
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
{
float tmp;
tmp = select2sp(src1, src2, sel & 0x7);
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
return tmp;
}
void static
xop_test ()
{
int i;
union256 source1, source2, u;
union256i_d source3;
float s1[8]={1, 2, 3, 4, 5, 6, 7, 8};
float s2[8]={9, 10, 11, 12, 13, 14, 15, 16};
int s3[8]={11, 2, 3, 15, 5, 12, 7, 8};
float e[8];
source1.x = _mm256_loadu_ps(s1);
source2.x = _mm256_loadu_ps(s2);
source3.x = _mm256_loadu_si256((__m256i*) s3);
u.x = _mm256_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
for (i = 0; i < 8; ++i) {
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
}
if (check_union256(u, e))
abort ();
}