diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 47f8b18b82e..a63df0d0b1f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -985,6 +985,15 @@ (V32HI "V32HI") (V64QI "V64QI") (V32QI "V32QI") (V16QI "V16QI")]) +;; Mapping of vector modes to an V*HImode of the same size +(define_mode_attr ssewvecmode + [(V8DI "V32HI") (V4DI "V16HI") (V2DI "V8HI") + (V16SI "V32HI") (V8SI "V16HI") (V4SI "V8HI")]) + +(define_mode_attr ssewvecmodelower + [(V8DI "v32hi") (V4DI "v16hi") (V2DI "v8hi") + (V16SI "v32hi") (V8SI "v16hi") (V4SI "v8hi")]) + (define_mode_attr sseintvecmode2 [(V8DF "XI") (V4DF "OI") (V2DF "TI") (V8SF "OI") (V4SF "TI") @@ -1194,6 +1203,11 @@ (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI") (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")]) +(define_mode_attr ssepackmodelower + [(V8HI "v16qi") (V4SI "v8hi") (V2DI "v4si") + (V16HI "v32qi") (V8SI "v16hi") (V4DI "v8si") + (V32HI "v64qi") (V16SI "v32hi") (V8DI "v16si")]) + ;; Mapping of the max integer size for xop rotate immediate constraint (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) @@ -10681,6 +10695,46 @@ (set_attr "prefix" "evex") (set_attr "mode" "HF")]) +(define_insn_and_split "*vec_set_0_zero_extendhi" + [(set (match_operand:VI48_AVX512F 0 "register_operand") + (vec_merge:VI48_AVX512F + (vec_duplicate:VI48_AVX512F + (zero_extend: + (match_operand:HI 1 "nonimmediate_operand"))) + (match_operand:VI48_AVX512F 2 "const0_operand") + (const_int 1)))] + "TARGET_AVX512FP16 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = gen_reg_rtx (mode); + emit_insn (gen_vec_set_0 (dest, + CONST0_RTX (mode), + operands[1])); + emit_move_insn (operands[0], + lowpart_subreg (mode, dest, mode)); + DONE; +}) + +(define_insn_and_split "*vec_setv2di_0_zero_extendhi_1" + [(set (match_operand:V2DI 0 "register_operand") + (vec_concat:V2DI + (zero_extend:DI + (match_operand:HI 1 "nonimmediate_operand")) + (const_int 0)))] + "TARGET_AVX512FP16 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = gen_reg_rtx (V8HImode); + emit_insn (gen_vec_setv8hi_0 (dest, CONST0_RTX (V8HImode), operands[1])); + emit_move_insn (operands[0], + lowpart_subreg (V2DImode, dest, V8HImode)); + DONE; +}) + (define_insn "avx512fp16_movsh" [(set (match_operand:V8HF 0 "register_operand" "=v") (vec_merge:V8HF @@ -10750,6 +10804,46 @@ ] (symbol_ref "true")))]) +(define_insn_and_split "*vec_set_0_zero_extendsi" + [(set (match_operand:VI8 0 "register_operand") + (vec_merge:VI8 + (vec_duplicate:VI8 + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand"))) + (match_operand:VI8 2 "const0_operand") + (const_int 1)))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = gen_reg_rtx (mode); + emit_insn (gen_vec_set_0 (dest, + CONST0_RTX (mode), + operands[1])); + emit_move_insn (operands[0], + lowpart_subreg (mode, dest, mode)); + DONE; +}) + +(define_insn_and_split "*vec_setv2di_0_zero_extendsi_1" + [(set (match_operand:V2DI 0 "register_operand") + (vec_concat:V2DI + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand")) + (const_int 0)))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_setv4si_0 (dest, CONST0_RTX (V4SImode), operands[1])); + emit_move_insn (operands[0], + lowpart_subreg (V2DImode, dest, V4SImode)); + DONE; +}) + (define_insn "sse4_1_insertps" [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm") diff --git a/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c b/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c new file mode 100644 index 00000000000..913ff8806f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times {(?n)vmovd[ \t]+} 3 } } */ +/* { dg-final { scan-assembler-not {(?n)movq[ \t]+} } } */ + +#include + +__m128i +foo1 (int* p) +{ + return _mm_set_epi64x (0, (unsigned int) ((*(__m32_u *)p)[0])); +} + +__m256i +foo3 (int* p) +{ + return _mm256_set_epi64x (0, 0, 0, (unsigned int) ((*(__m32_u *)p)[0])); +} + +__m512i +foo5 (int* p) +{ + return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, 0, + (unsigned int) ((*(__m32_u *)p)[0])); +} diff --git a/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c b/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c new file mode 100644 index 00000000000..ac47865d17a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c @@ -0,0 +1,45 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times {(?n)vmovw[ \t]+} 6 } } */ +/* { dg-final { scan-assembler-not {(?n)mov[dq][ \t]+} } } */ + +#include +__m128i +foo (short* p) +{ + return _mm_set_epi32 (0, 0, 0, (unsigned short) ((*(__m16_u *)p)[0])); +} + +__m128i +foo1 (short* p) +{ + return _mm_set_epi64x (0, (unsigned short) ((*(__m16_u *)p)[0])); +} + +__m256i +foo2 (short* p) +{ + return _mm256_set_epi32 (0, 0, 0, 0, 0, 0, 0, + (unsigned short) ((*(__m16_u *)p)[0])); +} + +__m256i +foo3 (short* p) +{ + return _mm256_set_epi64x (0, 0, 0, (unsigned short) ((*(__m16_u *)p)[0])); +} + +__m512i +foo4 (short* p) +{ + return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + (unsigned short) ((*(__m16_u *)p)[0])); +} + +__m512i +foo5 (short* p) +{ + return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, 0, + (unsigned short) ((*(__m16_u *)p)[0])); +}