sse.md (vec_widen_smult_hi_v8hi, [...]): Macroize using VI2_AVX2 mode iterator and any_extend code iterator.
* config/i386/sse.md (vec_widen_smult_hi_v8hi, vec_widen_smult_lo_v8hi, vec_widen_umult_hi_v8hi, vec_widen_umult_lo_v8hi): Macroize using VI2_AVX2 mode iterator and any_extend code iterator. (vec_widen_<s>mult_hi_v8si, vec_widen_<s>mult_lo_v8si): New expanders. (vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si): Enable also for TARGET_SSE4_1 using pmuldq insn. (sdot_prodv8hi): Macroize using VI2_AVX2 iterator. (sse2_sse4_1): New code attr. (udot_prodv4si): Macroize using any_extend code iterator. (<s>dot_prodv8si): New expander. * gcc.target/i386/sse2-mul-1.c: New test. * gcc.target/i386/sse4_1-mul-1.c: New test. * gcc.target/i386/avx-mul-1.c: New test. * gcc.target/i386/xop-mul-1.c: New test. * gcc.target/i386/avx2-mul-1.c: New test. From-SVN: r180005
This commit is contained in:
parent
2ec5455527
commit
1c4153dd02
|
@ -1,3 +1,18 @@
|
|||
2011-10-14 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/sse.md (vec_widen_smult_hi_v8hi,
|
||||
vec_widen_smult_lo_v8hi, vec_widen_umult_hi_v8hi,
|
||||
vec_widen_umult_lo_v8hi): Macroize using VI2_AVX2
|
||||
mode iterator and any_extend code iterator.
|
||||
(vec_widen_<s>mult_hi_v8si, vec_widen_<s>mult_lo_v8si): New
|
||||
expanders.
|
||||
(vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si): Enable
|
||||
also for TARGET_SSE4_1 using pmuldq insn.
|
||||
(sdot_prodv8hi): Macroize using VI2_AVX2 iterator.
|
||||
(sse2_sse4_1): New code attr.
|
||||
(udot_prodv4si): Macroize using any_extend code iterator.
|
||||
(<s>dot_prodv8si): New expander.
|
||||
|
||||
2011-10-14 Yakovlev Vladimir <vladimir.b.yakovlev@intel.com>
|
||||
|
||||
* config/i386/i386.c (atom_cost): Changed cost for loading
|
||||
|
|
|
@ -5507,83 +5507,97 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
(define_expand "vec_widen_<s>mult_hi_<mode>"
|
||||
[(match_operand:<sseunpackmode> 0 "register_operand" "")
|
||||
(any_extend:<sseunpackmode>
|
||||
(match_operand:VI2_AVX2 1 "register_operand" ""))
|
||||
(match_operand:VI2_AVX2 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
t1 = gen_reg_rtx (<MODE>mode);
|
||||
t2 = gen_reg_rtx (<MODE>mode);
|
||||
dest = gen_lowpart (<MODE>mode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
|
||||
emit_insn (gen_mul<mode>3 (t1, op1, op2));
|
||||
emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
(define_expand "vec_widen_<s>mult_lo_<mode>"
|
||||
[(match_operand:<sseunpackmode> 0 "register_operand" "")
|
||||
(any_extend:<sseunpackmode>
|
||||
(match_operand:VI2_AVX2 1 "register_operand" ""))
|
||||
(match_operand:VI2_AVX2 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
t1 = gen_reg_rtx (<MODE>mode);
|
||||
t2 = gen_reg_rtx (<MODE>mode);
|
||||
dest = gen_lowpart (<MODE>mode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
|
||||
emit_insn (gen_mul<mode>3 (t1, op1, op2));
|
||||
emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
(define_expand "vec_widen_<s>mult_hi_v8si"
|
||||
[(match_operand:V4DI 0 "register_operand" "")
|
||||
(any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
|
||||
(match_operand:V8SI 2 "nonimmediate_operand" "")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
|
||||
t1 = gen_reg_rtx (V4DImode);
|
||||
t2 = gen_reg_rtx (V4DImode);
|
||||
t3 = gen_reg_rtx (V8SImode);
|
||||
t4 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
|
||||
const0_rtx, const2_rtx,
|
||||
const1_rtx, GEN_INT (3)));
|
||||
emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
|
||||
const0_rtx, const2_rtx,
|
||||
const1_rtx, GEN_INT (3)));
|
||||
emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
|
||||
GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
|
||||
emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
|
||||
GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
(define_expand "vec_widen_<s>mult_lo_v8si"
|
||||
[(match_operand:V4DI 0 "register_operand" "")
|
||||
(any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
|
||||
(match_operand:V8SI 2 "nonimmediate_operand" "")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
|
||||
t1 = gen_reg_rtx (V4DImode);
|
||||
t2 = gen_reg_rtx (V4DImode);
|
||||
t3 = gen_reg_rtx (V8SImode);
|
||||
t4 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
|
||||
const0_rtx, const2_rtx,
|
||||
const1_rtx, GEN_INT (3)));
|
||||
emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
|
||||
const0_rtx, const2_rtx,
|
||||
const1_rtx, GEN_INT (3)));
|
||||
emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
|
||||
GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
|
||||
emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
|
||||
GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -5591,24 +5605,28 @@
|
|||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_XOP"
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
rtx t1, t2;
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
|
||||
GEN_INT (0),
|
||||
GEN_INT (2),
|
||||
GEN_INT (1),
|
||||
GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
|
||||
GEN_INT (0),
|
||||
GEN_INT (2),
|
||||
GEN_INT (1),
|
||||
GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
|
||||
DONE;
|
||||
}
|
||||
|
||||
emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -5616,24 +5634,28 @@
|
|||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_XOP"
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
rtx t1, t2;
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
|
||||
GEN_INT (0),
|
||||
GEN_INT (2),
|
||||
GEN_INT (1),
|
||||
GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
|
||||
GEN_INT (0),
|
||||
GEN_INT (2),
|
||||
GEN_INT (1),
|
||||
GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
|
||||
GEN_INT (1), GEN_INT (3)));
|
||||
emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
|
||||
DONE;
|
||||
}
|
||||
|
||||
emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -5675,30 +5697,35 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")
|
||||
(match_operand:V4SI 3 "register_operand" "")]
|
||||
(define_expand "sdot_prod<mode>"
|
||||
[(match_operand:<sseunpackmode> 0 "register_operand" "")
|
||||
(match_operand:VI2_AVX2 1 "register_operand" "")
|
||||
(match_operand:VI2_AVX2 2 "register_operand" "")
|
||||
(match_operand:<sseunpackmode> 3 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx t = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
|
||||
emit_insn (gen_addv4si3 (operands[0], operands[3], t));
|
||||
rtx t = gen_reg_rtx (<sseunpackmode>mode);
|
||||
emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
|
||||
gen_rtx_PLUS (<sseunpackmode>mode,
|
||||
operands[3], t)));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "udot_prodv4si"
|
||||
(define_code_attr sse2_sse4_1
|
||||
[(zero_extend "sse2") (sign_extend "sse4_1")])
|
||||
|
||||
(define_expand "<s>dot_prodv4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
|
||||
(match_operand:V4SI 2 "register_operand" "")
|
||||
(match_operand:V2DI 3 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
"<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
|
||||
{
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
t1 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_addv2di3 (t1, t1, operands[3]));
|
||||
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
@ -5711,12 +5738,41 @@
|
|||
GEN_INT (32)));
|
||||
|
||||
t4 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
|
||||
emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
|
||||
|
||||
emit_insn (gen_addv2di3 (operands[0], t1, t4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "<s>dot_prodv8si"
|
||||
[(match_operand:V4DI 0 "register_operand" "")
|
||||
(any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
|
||||
(match_operand:V8SI 2 "register_operand" "")
|
||||
(match_operand:V4DI 3 "register_operand" "")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
t1 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_addv4di3 (t1, t1, operands[3]));
|
||||
|
||||
t2 = gen_reg_rtx (V8SImode);
|
||||
t3 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
|
||||
gen_lowpart (V2TImode, operands[1]),
|
||||
GEN_INT (32)));
|
||||
emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
|
||||
gen_lowpart (V2TImode, operands[2]),
|
||||
GEN_INT (32)));
|
||||
|
||||
t4 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
|
||||
|
||||
emit_insn (gen_addv4di3 (operands[0], t1, t4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "ashr<mode>3"
|
||||
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
|
||||
(ashiftrt:VI24_AVX2
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2011-10-14 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.target/i386/sse2-mul-1.c: New test.
|
||||
* gcc.target/i386/sse4_1-mul-1.c: New test.
|
||||
* gcc.target/i386/avx-mul-1.c: New test.
|
||||
* gcc.target/i386/xop-mul-1.c: New test.
|
||||
* gcc.target/i386/avx2-mul-1.c: New test.
|
||||
|
||||
2011-10-14 Jason Merrill <jason@redhat.com>
|
||||
|
||||
PR c++/50563
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -mavx" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include "sse2-mul-1.c"
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
/* { dg-options "-O3 -mavx2" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "avx2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx2_test
|
||||
#endif
|
||||
|
||||
#include "sse2-mul-1.c"
|
|
@ -0,0 +1,209 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
/* { dg-options "-O3 -msse2" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse2_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 512
|
||||
static short a1[N], a2[N], a3[N];
|
||||
static unsigned short b1[N], b2[N], b3[N];
|
||||
static int c1[N], c2[N], c3[N];
|
||||
static unsigned int d1[N], d2[N], d3[N];
|
||||
static long long e1[N], e2[N], e3[N];
|
||||
static unsigned long long g1[N], g2[N], g3[N];
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f1 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
a1[i] = a2[i] * a3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f2 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
b1[i] = b2[i] * b3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f3 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
c1[i] = c2[i] * c3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f4 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
d1[i] = d2[i] * d3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f5 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
e1[i] = e2[i] * e3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f6 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
g1[i] = g2[i] * g3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f7 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
c1[i] = a2[i] * a3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f8 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
d1[i] = (unsigned int) b2[i] * b3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f9 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
e1[i] = (long long) c2[i] * (long long) c3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
f10 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; ++i)
|
||||
g1[i] = (unsigned long long) d2[i] * (unsigned long long) d3[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) int
|
||||
f11 (void)
|
||||
{
|
||||
int i, r = 0;
|
||||
for (i = 0; i < N; ++i)
|
||||
r += a2[i] * a3[i];
|
||||
return r;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) unsigned int
|
||||
f12 (void)
|
||||
{
|
||||
int i;
|
||||
unsigned r = 0;
|
||||
for (i = 0; i < N; ++i)
|
||||
r += (unsigned int) b2[i] * b3[i];
|
||||
return r;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) long long
|
||||
f13 (void)
|
||||
{
|
||||
int i;
|
||||
long long r = 0;
|
||||
for (i = 0; i < N; ++i)
|
||||
r += (long long) c2[i] * (long long) c3[i];
|
||||
return r;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) unsigned long long
|
||||
f14 (void)
|
||||
{
|
||||
int i;
|
||||
unsigned long long r = 0;
|
||||
for (i = 0; i < N; ++i)
|
||||
r += (unsigned long long) d2[i] * (unsigned long long) d3[i];
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
int s1 = 0;
|
||||
unsigned int s2 = 0;
|
||||
long long s3 = 0;
|
||||
unsigned long long s4 = 0;
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
asm volatile ("" : : "r" (&s1) : "memory");
|
||||
asm volatile ("" : : "r" (&s2) : "memory");
|
||||
asm volatile ("" : : "r" (&s3) : "memory");
|
||||
asm volatile ("" : : "r" (&s4) : "memory");
|
||||
b2[i] = (int) random ();
|
||||
b3[i] = (int) random ();
|
||||
a2[i] = b2[i];
|
||||
a3[i] = b3[i];
|
||||
d2[i] = (((int) random ()) << 16) | b2[i];
|
||||
d3[i] = (((int) random ()) << 16) | b3[i];
|
||||
c2[i] = d2[i];
|
||||
c3[i] = d3[i];
|
||||
s1 += a2[i] * a3[i];
|
||||
s2 += (unsigned int) b2[i] * b3[i];
|
||||
s3 += (long long) c2[i] * (long long) c3[i];
|
||||
s4 += (unsigned long long) d2[i] * (unsigned long long) d3[i];
|
||||
}
|
||||
f1 ();
|
||||
f2 ();
|
||||
f3 ();
|
||||
f4 ();
|
||||
f5 ();
|
||||
f6 ();
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
if (a1[i] != (short) (a2[i] * a3[i]))
|
||||
abort ();
|
||||
if (b1[i] != (unsigned short) (b2[i] * b3[i]))
|
||||
abort ();
|
||||
if (c1[i] != c2[i] * c3[i])
|
||||
abort ();
|
||||
if (d1[i] != d2[i] * d3[i])
|
||||
abort ();
|
||||
if (e1[i] != e2[i] * e3[i])
|
||||
abort ();
|
||||
if (g1[i] != g2[i] * g3[i])
|
||||
abort ();
|
||||
}
|
||||
f7 ();
|
||||
f8 ();
|
||||
f9 ();
|
||||
f10 ();
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
if (c1[i] != a2[i] * a3[i])
|
||||
abort ();
|
||||
if (d1[i] != b2[i] * b3[i])
|
||||
abort ();
|
||||
if (e1[i] != (long long) c2[i] * (long long) c3[i])
|
||||
abort ();
|
||||
if (g1[i] != (unsigned long long) d2[i] * (unsigned long long) d3[i])
|
||||
abort ();
|
||||
}
|
||||
if (f11 () != s1 || f12 () != s2 || f13 () != s3 || f14 () != s4)
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O3 -msse4.1" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include "sse2-mul-1.c"
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O3 -mxop" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "xop-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST xop_test
|
||||
#endif
|
||||
|
||||
#include "sse2-mul-1.c"
|
Loading…
Reference in New Issue