i386: Add vashlm3/vashrm3/vlshrm3 to enable vectorization of vector shift vector. [PR98434]
Add expanders for vashl<VI12_AVX512BW>, vlshr<VI12_AVX512BW>, vashr<VI1_AVX512BW> and vashr<v32hi,v16hi,v4di,v8di>. Besides there's some assumption in expand_mult_const that mul and add must be available at the same time, but for i386, addv8qi is restricted under TARGET_64BIT, but mulv8qi not, that could cause ICE. So restrict mulv8qi and shiftv8qi under TARGET_64BIT. gcc/ChangeLog: PR target/98434 * config/i386/i386-expand.c (ix86_expand_vec_interleave): Adjust comments for ix86_expand_vecop_qihi2. (ix86_expand_vecmul_qihi): Renamed to .. (ix86_expand_vecop_qihi2): Adjust function prototype to support shift operation, add static to definition. (ix86_expand_vec_shift_qihi_constant): Add static to definition. (ix86_expand_vecop_qihi): Call ix86_expand_vecop_qihi2 and ix86_expand_vec_shift_qihi_constant. * config/i386/i386-protos.h (ix86_expand_vecmul_qihi): Deleted. (ix86_expand_vec_shift_qihi_constant): Deleted. * config/i386/sse.md (VI12_256_512_AVX512VL): New mode iterator. (mulv8qi3): Call ix86_expand_vecop_qihi directly, add condition TARGET_64BIT. (mul<mode>3): Ditto. (<insn><mode>3): Ditto. (vlshr<mode>3): Extend to support avx512 vlshr. (v<insn><mode>3): New expander for vashr/vlshr/vashl. (v<insn>v8qi3): Ditto. (vashrv8hi3<mask_name>): Renamed to .. (vashr<mode>3): And extend to support V16QImode for avx512. (vashrv16qi3): Deleted. (vashrv2di3<mask_name>): Extend expander to support avx512 instruction. gcc/testsuite/ChangeLog: PR target/98434 * gcc.target/i386/pr98434-1.c: New test. * gcc.target/i386/pr98434-2.c: New test. * gcc.target/i386/avx512vl-pr95488-1.c: Adjust testcase.
This commit is contained in:
parent
fcf617f0d2
commit
3bd86940c4
@ -20705,8 +20705,9 @@ ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
|
||||
gcc_assert (ok);
|
||||
}
|
||||
|
||||
/* Optimize vector MUL generation for V8QI, V16QI and V32QI
|
||||
under TARGET_AVX512BW. i.e. for v16qi a * b, it has
|
||||
/* This function is similar as ix86_expand_vecop_qihi,
|
||||
but optimized under AVX512BW by using vpmovwb.
|
||||
For example, optimize vector MUL generation like
|
||||
|
||||
vpmovzxbw ymm2, xmm0
|
||||
vpmovzxbw ymm3, xmm1
|
||||
@ -20716,13 +20717,14 @@ ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
|
||||
it would take less instructions than ix86_expand_vecop_qihi.
|
||||
Return true if success. */
|
||||
|
||||
bool
|
||||
ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
|
||||
static bool
|
||||
ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
{
|
||||
machine_mode himode, qimode = GET_MODE (dest);
|
||||
rtx hop1, hop2, hdest;
|
||||
rtx (*gen_extend)(rtx, rtx);
|
||||
rtx (*gen_truncate)(rtx, rtx);
|
||||
bool uns_p = (code == ASHIFTRT) ? false : true;
|
||||
|
||||
/* There's no V64HImode multiplication instruction. */
|
||||
if (qimode == E_V64QImode)
|
||||
@ -20743,17 +20745,17 @@ ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
|
||||
{
|
||||
case E_V8QImode:
|
||||
himode = V8HImode;
|
||||
gen_extend = gen_zero_extendv8qiv8hi2;
|
||||
gen_extend = uns_p ? gen_zero_extendv8qiv8hi2 : gen_extendv8qiv8hi2;
|
||||
gen_truncate = gen_truncv8hiv8qi2;
|
||||
break;
|
||||
case E_V16QImode:
|
||||
himode = V16HImode;
|
||||
gen_extend = gen_zero_extendv16qiv16hi2;
|
||||
gen_extend = uns_p ? gen_zero_extendv16qiv16hi2 : gen_extendv16qiv16hi2;
|
||||
gen_truncate = gen_truncv16hiv16qi2;
|
||||
break;
|
||||
case E_V32QImode:
|
||||
himode = V32HImode;
|
||||
gen_extend = gen_zero_extendv32qiv32hi2;
|
||||
gen_extend = uns_p ? gen_zero_extendv32qiv32hi2 : gen_extendv32qiv32hi2;
|
||||
gen_truncate = gen_truncv32hiv32qi2;
|
||||
break;
|
||||
default:
|
||||
@ -20765,7 +20767,7 @@ ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
|
||||
hdest = gen_reg_rtx (himode);
|
||||
emit_insn (gen_extend (hop1, op1));
|
||||
emit_insn (gen_extend (hop2, op2));
|
||||
emit_insn (gen_rtx_SET (hdest, simplify_gen_binary (MULT, himode,
|
||||
emit_insn (gen_rtx_SET (hdest, simplify_gen_binary (code, himode,
|
||||
hop1, hop2)));
|
||||
emit_insn (gen_truncate (dest, hdest));
|
||||
return true;
|
||||
@ -20773,8 +20775,9 @@ ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
|
||||
|
||||
/* Expand a vector operation shift by constant for a V*QImode in terms of the
|
||||
same operation on V*HImode. Return true if success. */
|
||||
bool
|
||||
ix86_expand_vec_shift_qihi_constant (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
static bool
|
||||
ix86_expand_vec_shift_qihi_constant (enum rtx_code code,
|
||||
rtx dest, rtx op1, rtx op2)
|
||||
{
|
||||
machine_mode qimode, himode;
|
||||
HOST_WIDE_INT and_constant, xor_constant;
|
||||
@ -20886,6 +20889,16 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
bool uns_p = false;
|
||||
int i;
|
||||
|
||||
if (CONST_INT_P (op2)
|
||||
&& (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT)
|
||||
&& ix86_expand_vec_shift_qihi_constant (code, dest, op1, op2))
|
||||
return;
|
||||
|
||||
if (TARGET_AVX512BW
|
||||
&& VECTOR_MODE_P (GET_MODE (op2))
|
||||
&& ix86_expand_vecop_qihi2 (code, dest, op1, op2))
|
||||
return;
|
||||
|
||||
switch (qimode)
|
||||
{
|
||||
case E_V16QImode:
|
||||
@ -20907,7 +20920,6 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
op2_l = op2_h = op2;
|
||||
switch (code)
|
||||
{
|
||||
case MULT:
|
||||
@ -20936,17 +20948,46 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
||||
op1_h = gen_reg_rtx (himode);
|
||||
ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
|
||||
ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
|
||||
/* vashr/vlshr/vashl */
|
||||
if (GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT)
|
||||
{
|
||||
rtx tmp = force_reg (qimode, op2);
|
||||
op2_l = gen_reg_rtx (himode);
|
||||
op2_h = gen_reg_rtx (himode);
|
||||
ix86_expand_sse_unpack (op2_l, tmp, uns_p, false);
|
||||
ix86_expand_sse_unpack (op2_h, tmp, uns_p, true);
|
||||
}
|
||||
else
|
||||
op2_l = op2_h = op2;
|
||||
|
||||
full_interleave = true;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Perform the operation. */
|
||||
res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
|
||||
1, OPTAB_DIRECT);
|
||||
res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
|
||||
1, OPTAB_DIRECT);
|
||||
/* Perform vashr/vlshr/vashl. */
|
||||
if (code != MULT
|
||||
&& GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT)
|
||||
{
|
||||
res_l = gen_reg_rtx (himode);
|
||||
res_h = gen_reg_rtx (himode);
|
||||
emit_insn (gen_rtx_SET (res_l,
|
||||
simplify_gen_binary (code, himode,
|
||||
op1_l, op2_l)));
|
||||
emit_insn (gen_rtx_SET (res_h,
|
||||
simplify_gen_binary (code, himode,
|
||||
op1_h, op2_h)));
|
||||
}
|
||||
/* Performance mult/ashr/lshr/ashl. */
|
||||
else
|
||||
{
|
||||
res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
|
||||
1, OPTAB_DIRECT);
|
||||
res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
|
||||
1, OPTAB_DIRECT);
|
||||
}
|
||||
|
||||
gcc_assert (res_l && res_h);
|
||||
|
||||
/* Merge the data back into the right place. */
|
||||
|
@ -208,10 +208,7 @@ extern void ix86_expand_round (rtx, rtx);
|
||||
extern void ix86_expand_rounddf_32 (rtx, rtx);
|
||||
extern void ix86_expand_round_sse4 (rtx, rtx);
|
||||
|
||||
extern bool ix86_expand_vecmul_qihi (rtx, rtx, rtx);
|
||||
extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
|
||||
extern bool ix86_expand_vec_shift_qihi_constant (enum rtx_code, rtx, rtx, rtx);
|
||||
|
||||
extern rtx ix86_split_stack_guard (void);
|
||||
|
||||
extern void ix86_move_vector_high_sse_to_mmx (rtx);
|
||||
|
@ -397,6 +397,10 @@
|
||||
(define_mode_iterator VI1_AVX512F
|
||||
[(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
|
||||
|
||||
(define_mode_iterator VI12_256_512_AVX512VL
|
||||
[V64QI (V32QI "TARGET_AVX512VL")
|
||||
V32HI (V16HI "TARGET_AVX512VL")])
|
||||
|
||||
(define_mode_iterator VI2_AVX2
|
||||
[(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
|
||||
|
||||
@ -11780,9 +11784,9 @@
|
||||
[(set (match_operand:V8QI 0 "register_operand")
|
||||
(mult:V8QI (match_operand:V8QI 1 "register_operand")
|
||||
(match_operand:V8QI 2 "register_operand")))]
|
||||
"TARGET_AVX512VL && TARGET_AVX512BW"
|
||||
"TARGET_AVX512VL && TARGET_AVX512BW && TARGET_64BIT"
|
||||
{
|
||||
gcc_assert (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]));
|
||||
ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
@ -11792,8 +11796,6 @@
|
||||
(match_operand:VI1_AVX512 2 "register_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_vecmul_qihi (operands[0], operands[1], operands[2]))
|
||||
DONE;
|
||||
ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
@ -20239,12 +20241,20 @@
|
||||
(lshiftrt:VI12_128
|
||||
(match_operand:VI12_128 1 "register_operand")
|
||||
(match_operand:VI12_128 2 "nonimmediate_operand")))]
|
||||
"TARGET_XOP"
|
||||
"TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
|
||||
{
|
||||
rtx neg = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
|
||||
DONE;
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
rtx neg = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
|
||||
DONE;
|
||||
}
|
||||
else if (<MODE>mode == V16QImode)
|
||||
{
|
||||
ix86_expand_vecop_qihi (LSHIFTRT, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "vlshr<mode>3"
|
||||
@ -20263,6 +20273,31 @@
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "v<insn><mode>3"
|
||||
[(set (match_operand:VI12_256_512_AVX512VL 0 "register_operand")
|
||||
(any_shift:VI12_256_512_AVX512VL
|
||||
(match_operand:VI12_256_512_AVX512VL 1 "register_operand")
|
||||
(match_operand:VI12_256_512_AVX512VL 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512BW"
|
||||
{
|
||||
if (<MODE>mode == V32QImode || <MODE>mode == V64QImode)
|
||||
{
|
||||
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "v<insn>v8qi3"
|
||||
[(set (match_operand:V8QI 0 "register_operand")
|
||||
(any_shift:V8QI
|
||||
(match_operand:V8QI 1 "register_operand")
|
||||
(match_operand:V8QI 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512BW && TARGET_AVX512VL && TARGET_64BIT"
|
||||
{
|
||||
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vlshr<mode>3"
|
||||
[(set (match_operand:VI48_512 0 "register_operand")
|
||||
(lshiftrt:VI48_512
|
||||
@ -20277,33 +20312,32 @@
|
||||
(match_operand:VI48_256 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX2")
|
||||
|
||||
(define_expand "vashrv8hi3<mask_name>"
|
||||
[(set (match_operand:V8HI 0 "register_operand")
|
||||
(ashiftrt:V8HI
|
||||
(match_operand:V8HI 1 "register_operand")
|
||||
(match_operand:V8HI 2 "nonimmediate_operand")))]
|
||||
(define_expand "vashr<mode>3"
|
||||
[(set (match_operand:VI8_256_512 0 "register_operand")
|
||||
(ashiftrt:VI8_256_512
|
||||
(match_operand:VI8_256_512 1 "register_operand")
|
||||
(match_operand:VI8_256_512 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512F")
|
||||
|
||||
(define_expand "vashr<mode>3"
|
||||
[(set (match_operand:VI12_128 0 "register_operand")
|
||||
(ashiftrt:VI12_128
|
||||
(match_operand:VI12_128 1 "register_operand")
|
||||
(match_operand:VI12_128 2 "nonimmediate_operand")))]
|
||||
"TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
|
||||
{
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
rtx neg = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_negv8hi2 (neg, operands[2]));
|
||||
emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
|
||||
rtx neg = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
|
||||
DONE;
|
||||
}
|
||||
else if(<MODE>mode == V16QImode)
|
||||
{
|
||||
ix86_expand_vecop_qihi (ASHIFTRT, operands[0],operands[1], operands[2]);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "vashrv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand")
|
||||
(ashiftrt:V16QI
|
||||
(match_operand:V16QI 1 "register_operand")
|
||||
(match_operand:V16QI 2 "nonimmediate_operand")))]
|
||||
"TARGET_XOP"
|
||||
{
|
||||
rtx neg = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_negv16qi2 (neg, operands[2]));
|
||||
emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vashrv2di3<mask_name>"
|
||||
@ -20354,10 +20388,18 @@
|
||||
(ashift:VI12_128
|
||||
(match_operand:VI12_128 1 "register_operand")
|
||||
(match_operand:VI12_128 2 "nonimmediate_operand")))]
|
||||
"TARGET_XOP"
|
||||
"TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
|
||||
{
|
||||
emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
}
|
||||
else if (<MODE>mode == V16QImode)
|
||||
{
|
||||
ix86_expand_vecop_qihi (ASHIFT, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "vashl<mode>3"
|
||||
@ -20461,8 +20503,7 @@
|
||||
gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
|
||||
emit_insn (gen (operands[0], operands[1], tmp));
|
||||
}
|
||||
else if (!ix86_expand_vec_shift_qihi_constant (<CODE>, operands[0],
|
||||
operands[1], operands[2]))
|
||||
else
|
||||
ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
@ -1,10 +1,10 @@
|
||||
/* PR target/pr95488 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
|
||||
/* { dg-final { scan-assembler-times "vpmovzxbw" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vpmovzxbw" 8 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpmullw\[^\n\]*ymm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpmullw\[^\n\]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpmovwb" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vpmullw\[^\n\]*xmm" 2 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpmovwb" 4 { target { ! ia32 } } } } */
|
||||
|
||||
typedef char v16qi __attribute__ ((vector_size (16)));
|
||||
typedef char v8qi __attribute__ ((vector_size (8)));
|
||||
|
64
gcc/testsuite/gcc.target/i386/pr98434-1.c
Normal file
64
gcc/testsuite/gcc.target/i386/pr98434-1.c
Normal file
@ -0,0 +1,64 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512bw -mavx512vl -O2 -mprefer-vector-width=512" } */
|
||||
/* { dg-final { scan-assembler-times {vpsravw[\t ]*%xmm} 2 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times {vpsrlvw[\t ]*%ymm} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {vpsllvw[\t ]*%zmm} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {vpsllvq[\t ]*%xmm} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vpsravq[\t ]*%ymm} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vpsrlvq[\t ]*%zmm} 1 } } */
|
||||
|
||||
int n;
|
||||
|
||||
typedef char v8qi __attribute__((vector_size (8)));
|
||||
typedef char v16qi __attribute__((vector_size (16)));
|
||||
typedef char v32qi __attribute__((vector_size (32)));
|
||||
typedef short v8hi __attribute__((vector_size (16)));
|
||||
typedef short v16hi __attribute__((vector_size (32)));
|
||||
typedef short v32hi __attribute__((vector_size (64)));
|
||||
typedef long long v2di __attribute__((vector_size (16)));
|
||||
typedef long long v4di __attribute__((vector_size (32)));
|
||||
typedef long long v8di __attribute__((vector_size (64)));
|
||||
typedef unsigned char v8uqi __attribute__((vector_size (8)));
|
||||
typedef unsigned char v16uqi __attribute__((vector_size (16)));
|
||||
typedef unsigned char v32uqi __attribute__((vector_size (32)));
|
||||
typedef unsigned short v8uhi __attribute__((vector_size (16)));
|
||||
typedef unsigned short v16uhi __attribute__((vector_size (32)));
|
||||
typedef unsigned short v32uhi __attribute__((vector_size (64)));
|
||||
typedef unsigned long long v2udi __attribute__((vector_size (16)));
|
||||
typedef unsigned long long v4udi __attribute__((vector_size (32)));
|
||||
typedef unsigned long long v8udi __attribute__((vector_size (64)));
|
||||
|
||||
#define FOO(TYPE, OP, NAME) \
|
||||
__attribute__((noipa)) TYPE \
|
||||
foo_##TYPE##_##NAME (TYPE a, TYPE b) \
|
||||
{ \
|
||||
return a OP b; \
|
||||
} \
|
||||
|
||||
FOO (v8qi, <<, vashl);
|
||||
FOO (v8qi, >>, vashr);
|
||||
FOO (v8uqi, >>, vlshr);
|
||||
FOO (v16qi, <<, vashl);
|
||||
FOO (v16qi, >>, vashr);
|
||||
FOO (v16uqi, >>, vlshr);
|
||||
FOO (v32qi, <<, vashl);
|
||||
FOO (v32qi, >>, vashr);
|
||||
FOO (v32uqi, >>, vlshr);
|
||||
FOO (v8hi, <<, vashl);
|
||||
FOO (v8hi, >>, vashr);
|
||||
FOO (v8uhi, >>, vlshr);
|
||||
FOO (v16hi, <<, vashl);
|
||||
FOO (v16hi, >>, vashr);
|
||||
FOO (v16uhi, >>, vlshr);
|
||||
FOO (v32hi, <<, vashl);
|
||||
FOO (v32hi, >>, vashr);
|
||||
FOO (v32uhi, >>, vlshr);
|
||||
FOO (v2di, <<, vashl);
|
||||
FOO (v2di, >>, vashr);
|
||||
FOO (v2udi, >>, vlshr);
|
||||
FOO (v4di, <<, vashl);
|
||||
FOO (v4di, >>, vashr);
|
||||
FOO (v4udi, >>, vlshr);
|
||||
FOO (v8di, <<, vashl);
|
||||
FOO (v8di, >>, vashr);
|
||||
FOO (v8udi, >>, vlshr);
|
129
gcc/testsuite/gcc.target/i386/pr98434-2.c
Normal file
129
gcc/testsuite/gcc.target/i386/pr98434-2.c
Normal file
@ -0,0 +1,129 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mprefer-vector-width=512 -mavx512vl -mavx512bw" } */
|
||||
/* { dg-require-effective-target avx512bw } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
|
||||
#include "pr98434-1.c"
|
||||
void test (void);
|
||||
#define DO_TEST test
|
||||
#define AVX512VL
|
||||
#define AVX512BW
|
||||
#include "avx512-check.h"
|
||||
|
||||
|
||||
typedef char int8;
|
||||
typedef unsigned char uint8;
|
||||
typedef short int16;
|
||||
typedef unsigned short uint16;
|
||||
typedef long long int64;
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
#define F_EMULATE(TYPE, SIZE, OP, NAME) \
|
||||
__attribute__((noipa, optimize("-fno-tree-vectorize"))) void \
|
||||
emulate_##SIZE##_##TYPE##_##NAME (TYPE *a, \
|
||||
TYPE *b, \
|
||||
TYPE *c) \
|
||||
{ \
|
||||
int i; \
|
||||
for (i = 0; i < SIZE; i++) \
|
||||
{ \
|
||||
a[i] = b[i] OP c[i]; \
|
||||
} \
|
||||
}
|
||||
|
||||
F_EMULATE (int8, 8, <<, vashl);
|
||||
F_EMULATE (int8, 8, >>, vashr);
|
||||
F_EMULATE (uint8, 8, >>, vlshr);
|
||||
F_EMULATE (int8, 16, <<, vashl);
|
||||
F_EMULATE (int8, 16, >>, vashr);
|
||||
F_EMULATE (uint8, 16, >>, vlshr);
|
||||
F_EMULATE (int8, 32, <<, vashl);
|
||||
F_EMULATE (int8, 32, >>, vashr);
|
||||
F_EMULATE (uint8, 32, >>, vlshr);
|
||||
F_EMULATE (int16, 8, <<, vashl);
|
||||
F_EMULATE (int16, 8, >>, vashr);
|
||||
F_EMULATE (uint16, 8, >>, vlshr);
|
||||
F_EMULATE (int16, 16, <<, vashl);
|
||||
F_EMULATE (int16, 16, >>, vashr);
|
||||
F_EMULATE (uint16, 16, >>, vlshr);
|
||||
F_EMULATE (int16, 32, <<, vashl);
|
||||
F_EMULATE (int16, 32, >>, vashr);
|
||||
F_EMULATE (uint16, 32, >>, vlshr);
|
||||
F_EMULATE (int64, 2, <<, vashl);
|
||||
F_EMULATE (int64, 2, >>, vashr);
|
||||
F_EMULATE (uint64, 2, >>, vlshr);
|
||||
F_EMULATE (int64, 4, <<, vashl);
|
||||
F_EMULATE (int64, 4, >>, vashr);
|
||||
F_EMULATE (uint64, 4, >>, vlshr);
|
||||
F_EMULATE (int64, 8, <<, vashl);
|
||||
F_EMULATE (int64, 8, >>, vashr);
|
||||
F_EMULATE (uint64, 8, >>, vlshr);
|
||||
|
||||
#define VSHIFT(VTYPE, NAME, src1, src2) \
|
||||
foo_##VTYPE##_##NAME (src1, src2)
|
||||
|
||||
#define EMULATE(SIZE, TYPE, NAME, dst, src1, src2) \
|
||||
emulate_##SIZE##_##TYPE##_##NAME (dst, src1, src2)
|
||||
|
||||
#define F_TEST_SHIFT(VTYPE, VTYPEU, TYPE, TYPEU, SIZE) \
|
||||
__attribute__((noipa, optimize("-fno-tree-vectorize"))) void \
|
||||
test_##VTYPE ()\
|
||||
{\
|
||||
TYPE src1[SIZE], src2[SIZE], ref[SIZE]; \
|
||||
TYPEU usrc1[SIZE], usrc2[SIZE], uref[SIZE]; \
|
||||
VTYPE dst; \
|
||||
VTYPEU udst; \
|
||||
int i;\
|
||||
for (i = 0; i < SIZE; i++)\
|
||||
{\
|
||||
dst[i] = ref[i] = -i; \
|
||||
src1[i] = -(i + SIZE); \
|
||||
src2[i] = i % 8; \
|
||||
udst[i] = uref[i] = i; \
|
||||
usrc1[i] = (i + SIZE); \
|
||||
usrc2[i] = (i % 8); \
|
||||
}\
|
||||
EMULATE(SIZE, TYPE, vashl, ref, src1, src2); \
|
||||
dst = VSHIFT(VTYPE, vashl, *((VTYPE* )&src1[0]), *((VTYPE*) &src2[0])); \
|
||||
for (i = 0; i < SIZE; i++)\
|
||||
{\
|
||||
if(dst[i] != ref[i]) __builtin_abort();\
|
||||
}\
|
||||
EMULATE(SIZE, TYPE, vashr, ref, src1, src2); \
|
||||
dst = VSHIFT(VTYPE, vashr, *((VTYPE* )&src1[0]), *((VTYPE*) &src2[0])); \
|
||||
for (i = 0; i < SIZE; i++)\
|
||||
{\
|
||||
if(dst[i] != ref[i]) __builtin_abort();\
|
||||
}\
|
||||
EMULATE(SIZE, TYPEU, vlshr, uref, usrc1, usrc2); \
|
||||
udst = VSHIFT(VTYPEU, vlshr, *((VTYPEU* )&usrc1[0]), *((VTYPEU*) &usrc2[0])); \
|
||||
for (i = 0; i < SIZE; i++)\
|
||||
{\
|
||||
if(udst[i] != uref[i]) __builtin_abort();\
|
||||
}\
|
||||
}
|
||||
|
||||
F_TEST_SHIFT (v8qi, v8uqi, int8, uint8, 8);
|
||||
F_TEST_SHIFT (v16qi, v16uqi, int8, uint8, 16);
|
||||
F_TEST_SHIFT (v32qi, v32uqi, int8, uint8, 32);
|
||||
F_TEST_SHIFT (v8hi, v8uhi, int16, uint16, 8);
|
||||
F_TEST_SHIFT (v16hi, v16uhi, int16, uint16, 16);
|
||||
F_TEST_SHIFT (v32hi, v32uhi, int16, uint16, 32);
|
||||
F_TEST_SHIFT (v2di, v2udi, int64, uint64, 2);
|
||||
F_TEST_SHIFT (v4di, v4udi, int64, uint64, 4);
|
||||
F_TEST_SHIFT (v8di, v8udi, int64, uint64, 8);
|
||||
|
||||
|
||||
void
|
||||
test (void)
|
||||
{
|
||||
test_v8qi ();
|
||||
test_v16qi ();
|
||||
test_v32qi ();
|
||||
test_v8hi ();
|
||||
test_v16hi ();
|
||||
test_v32hi ();
|
||||
test_v2di ();
|
||||
test_v4di ();
|
||||
test_v8di ();
|
||||
}
|
Loading…
Reference in New Issue
Block a user