backport: re PR target/52932 (AVX2 intrinsic _mm256_permutevar8x32_ps has wrong parameter type)
2012-04-16 Uros Bizjak <ubizjak@gmail.com> Backport from mainline 2012-04-12 Uros Bizjak <ubizjak@gmail.com> PR target/52932 * config/i386/avx2intrin.h (_mm256_permutevar8x32_ps): Change second argument type to __m256i. Update call to __builtin_ia32_permvarsf256. * config/i386/sse.md (avx2_permvarv8sf): Change operand 1 to V8SI. (avx2_permvarv8sf, avx2_permvarv8si): Switch operands 1 and 2. * config/i386/i386.c (bdesc_args) <__builtin_ia32_permvarsf256>: Update builtin type to V8SF_FTYPE_V8SF_V8SI. (ix86_expand_vec_perm): Update calls to gen_avx2_permvarv8si and gen_avx2_permvarv8sf. testsuite/ChangeLog: 2012-04-16 Uros Bizjak <ubizjak@gmail.com> Backport from mainline 2012-04-12 Uros Bizjak <ubizjak@gmail.com> PR target/52932 * gcc.target/i386/avx2-vpermps-1.c (avx2_test): Use __m256i type for second function argument. * gcc.target/i386/avx2-vpermps-2.c (init_permps): Update declaration. (calc_permps): Update declaration. Calculate result correctly. (avx2_test): Change src2 type to union256i_d. * gcc.target/i386/avx2-vpermd-2.c (calc_permd): Calculate result correctly. From-SVN: r186500
This commit is contained in:
parent
a5a5cf332c
commit
95c275d99b
|
@ -1,3 +1,18 @@
|
|||
2012-04-16 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
Backport from mainline
|
||||
2012-04-12 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/52932
|
||||
* config/i386/avx2intrin.h (_mm256_permutevar8x32_ps): Change second
|
||||
argument type to __m256i. Update call to __builtin_ia32_permvarsf256.
|
||||
* config/i386/sse.md (avx2_permvarv8sf): Change operand 1 to V8SI.
|
||||
(avx2_permvarv8sf, avx2_permvarv8si): Switch operands 1 and 2.
|
||||
* config/i386/i386.c (bdesc_args) <__builtin_ia32_permvarsf256>:
|
||||
Update builtin type to V8SF_FTYPE_V8SF_V8SI.
|
||||
(ix86_expand_vec_perm): Update calls to gen_avx2_permvarv8si and
|
||||
gen_avx2_permvarv8sf.
|
||||
|
||||
2012-04-16 Martin Jambor <mjambor@suse.cz>
|
||||
|
||||
Backported from mainline
|
||||
|
|
|
@ -1034,9 +1034,9 @@ _mm256_permute4x64_pd (__m256d __X, const int __M)
|
|||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutevar8x32_ps (__m256 __X, __m256 __Y)
|
||||
_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
|
||||
{
|
||||
return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X,(__v8sf)__Y);
|
||||
return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
|
|
|
@ -19763,7 +19763,7 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
vt = force_reg (maskmode, vt);
|
||||
mask = gen_lowpart (maskmode, mask);
|
||||
if (maskmode == V8SImode)
|
||||
emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
|
||||
emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
|
||||
else
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
|
||||
|
||||
|
@ -19797,13 +19797,13 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
the high bits of the shuffle elements. No need for us to
|
||||
perform an AND ourselves. */
|
||||
if (one_operand_shuffle)
|
||||
emit_insn (gen_avx2_permvarv8si (target, mask, op0));
|
||||
emit_insn (gen_avx2_permvarv8si (target, op0, mask));
|
||||
else
|
||||
{
|
||||
t1 = gen_reg_rtx (V8SImode);
|
||||
t2 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
|
||||
emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
|
||||
emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
|
||||
emit_insn (gen_avx2_permvarv8si (t2, op0, mask));
|
||||
goto merge_two;
|
||||
}
|
||||
return;
|
||||
|
@ -19811,13 +19811,13 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
case V8SFmode:
|
||||
mask = gen_lowpart (V8SFmode, mask);
|
||||
if (one_operand_shuffle)
|
||||
emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
|
||||
emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
|
||||
else
|
||||
{
|
||||
t1 = gen_reg_rtx (V8SFmode);
|
||||
t2 = gen_reg_rtx (V8SFmode);
|
||||
emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
|
||||
emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
|
||||
emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
|
||||
emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
|
||||
goto merge_two;
|
||||
}
|
||||
return;
|
||||
|
@ -19830,7 +19830,7 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
t2 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
|
||||
emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
|
||||
emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
|
||||
emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
|
||||
emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
|
||||
return;
|
||||
|
||||
|
@ -19840,7 +19840,7 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
mask = gen_lowpart (V4SFmode, mask);
|
||||
emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
|
||||
emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
|
||||
emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
|
||||
emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
|
||||
emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
|
||||
return;
|
||||
|
||||
|
@ -26763,7 +26763,7 @@ static const struct builtin_description bdesc_args[] =
|
|||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
|
||||
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
|
||||
|
|
|
@ -11880,11 +11880,11 @@
|
|||
(define_insn "avx2_permvarv8si"
|
||||
[(set (match_operand:V8SI 0 "register_operand" "=x")
|
||||
(unspec:V8SI
|
||||
[(match_operand:V8SI 1 "register_operand" "x")
|
||||
(match_operand:V8SI 2 "nonimmediate_operand" "xm")]
|
||||
[(match_operand:V8SI 1 "nonimmediate_operand" "xm")
|
||||
(match_operand:V8SI 2 "register_operand" "x")]
|
||||
UNSPEC_VPERMSI))]
|
||||
"TARGET_AVX2"
|
||||
"vpermd\t{%2, %1, %0|%0, %1, %2}"
|
||||
"vpermd\t{%1, %2, %0|%0, %2, %1}"
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "OI")])
|
||||
|
@ -11905,11 +11905,11 @@
|
|||
(define_insn "avx2_permvarv8sf"
|
||||
[(set (match_operand:V8SF 0 "register_operand" "=x")
|
||||
(unspec:V8SF
|
||||
[(match_operand:V8SF 1 "register_operand" "x")
|
||||
(match_operand:V8SF 2 "nonimmediate_operand" "xm")]
|
||||
[(match_operand:V8SF 1 "nonimmediate_operand" "xm")
|
||||
(match_operand:V8SI 2 "register_operand" "x")]
|
||||
UNSPEC_VPERMSF))]
|
||||
"TARGET_AVX2"
|
||||
"vpermps\t{%2, %1, %0|%0, %1, %2}"
|
||||
"vpermps\t{%1, %2, %0|%0, %2, %1}"
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "OI")])
|
||||
|
|
|
@ -1,3 +1,17 @@
|
|||
2012-04-16 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
Backport from mainline
|
||||
2012-04-12 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/52932
|
||||
* gcc.target/i386/avx2-vpermps-1.c (avx2_test): Use __m256i type for
|
||||
second function argument.
|
||||
* gcc.target/i386/avx2-vpermps-2.c (init_permps): Update declaration.
|
||||
(calc_permps): Update declaration. Calculate result correctly.
|
||||
(avx2_test): Change src2 type to union256i_d.
|
||||
* gcc.target/i386/avx2-vpermd-2.c (calc_permd): Calculate result
|
||||
correctly.
|
||||
|
||||
2012-04-16 Martin Jambor <mjambor@suse.cz>
|
||||
|
||||
Backported from mainline
|
||||
|
|
|
@ -29,8 +29,8 @@ calc_permd (int *src1, int *src2, int *dst)
|
|||
memcpy (dst, src1, 32);
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
temp = src1[i];
|
||||
dst[i] = src2[temp & 7];
|
||||
temp = src2[i];
|
||||
dst[i] = src1[temp & 7];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,9 +5,10 @@
|
|||
#include <immintrin.h>
|
||||
|
||||
__m256 x;
|
||||
__m256i y;
|
||||
|
||||
void extern
|
||||
avx2_test (void)
|
||||
{
|
||||
x = _mm256_permutevar8x32_ps (x, x);
|
||||
x = _mm256_permutevar8x32_ps (x, y);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define NUM 10
|
||||
|
||||
static void
|
||||
init_permps (float *src1, float *src2, int seed)
|
||||
init_permps (float *src1, int *src2, int seed)
|
||||
{
|
||||
int i, sign = 1;
|
||||
|
||||
|
@ -21,24 +21,24 @@ init_permps (float *src1, float *src2, int seed)
|
|||
}
|
||||
|
||||
static void
|
||||
calc_permps (float *src1, float *src2, float *dst)
|
||||
calc_permps (float *src1, int *src2, float *dst)
|
||||
{
|
||||
int i;
|
||||
unsigned temp;
|
||||
unsigned *idx = (int *) src1;
|
||||
|
||||
memcpy (dst, src1, 32);
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
temp = idx[i];
|
||||
dst[i] = src2[temp & 7];
|
||||
temp = src2[i];
|
||||
dst[i] = src1[temp & 7];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
union256 src1, src2, dst;
|
||||
union256 src1, dst;
|
||||
union256i_d src2;
|
||||
float dst_ref[8];
|
||||
int i;
|
||||
|
||||
|
|
Loading…
Reference in New Issue