i386: Introduce V2QImode vectorized logic [PR103861]

Add V2QImode logic operations with SSE and GP registers and split
them to V4QImode SSE instructions or SImode GP instructions.

The patch also fixes PR target/103900.

2022-01-04  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

	PR target/103861
	* config/i386/mmx.md (one_cmplv2qi3): New insn pattern.
	(one_cmplv2qi3 splitters): New post-reload splitters.
	(*andnotv2qi3): New insn pattern.
	(andnotv2qi3 splitters): New post-reload splitters.
	(<any_logic:code>v2qi3): New insn pattern.
	(<any_logic:insn>v2qi3 splitters): New post-reload splitters.

gcc/testsuite/ChangeLog:

	PR target/103861
	* gcc.target/i386/warn-vect-op-2.c: Adjust warnings.
	* gcc.target/i386/pr103900.c: New test.
This commit is contained in:
Uros Bizjak 2022-01-04 19:41:47 +01:00
parent dd80200324
commit 708b87dcb6
3 changed files with 172 additions and 1 deletions

View File

@ -2745,6 +2745,45 @@
"TARGET_SSE2"
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
(define_insn "one_cmplv2qi2"
[(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v")
(not:V2QI
(match_operand:V2QI 1 "register_operand" "0,x,v")))]
""
"#"
[(set_attr "isa" "*,sse2,avx512vl")
(set_attr "type" "negnot,sselog,sselog")
(set_attr "mode" "SI,TI,TI")])
(define_split
[(set (match_operand:V2QI 0 "general_reg_operand")
(not:V2QI
(match_operand:V2QI 1 "general_reg_operand")))]
"reload_completed"
[(set (match_dup 0)
(not:SI (match_dup 1)))]
{
operands[1] = gen_lowpart (SImode, operands[1]);
operands[0] = gen_lowpart (SImode, operands[0]);
})
(define_split
[(set (match_operand:V2QI 0 "sse_reg_operand")
(not:V2QI
(match_operand:V2QI 1 "sse_reg_operand")))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
(xor:V4QI
(match_dup 0) (match_dup 1)))]
{
emit_insn
(gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])),
CONSTM1_RTX (V16QImode)));
operands[1] = gen_lowpart (V4QImode, operands[1]);
operands[0] = gen_lowpart (V4QImode, operands[0]);
})
(define_insn "mmx_andnot<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
@ -2775,6 +2814,69 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "*andnotv2qi3"
[(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v")
(and:V2QI
(not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v"))
(match_operand:V2QI 2 "register_operand" "r,r,x,x,v")))
(clobber (reg:CC FLAGS_REG))]
""
"#"
[(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
(set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
(set_attr "mode" "SI,SI,TI,TI,TI")])
(define_split
[(set (match_operand:V2QI 0 "general_reg_operand")
(and:V2QI
(not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
(match_operand:V2QI 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI && reload_completed"
[(parallel
[(set (match_dup 0)
(and:SI (not:SI (match_dup 1)) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[2] = gen_lowpart (SImode, operands[2]);
operands[1] = gen_lowpart (SImode, operands[1]);
operands[0] = gen_lowpart (SImode, operands[0]);
})
(define_split
[(set (match_operand:V2QI 0 "general_reg_operand")
(and:V2QI
(not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
(match_operand:V2QI 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_BMI && reload_completed"
[(set (match_dup 0)
(not:SI (match_dup 1)))
(parallel
[(set (match_dup 0)
(and:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[2] = gen_lowpart (SImode, operands[2]);
operands[1] = gen_lowpart (SImode, operands[1]);
operands[0] = gen_lowpart (SImode, operands[0]);
})
(define_split
[(set (match_operand:V2QI 0 "sse_reg_operand")
(and:V2QI
(not:V2QI (match_operand:V2QI 1 "sse_reg_operand"))
(match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
(and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))]
{
operands[2] = gen_lowpart (V4QImode, operands[2]);
operands[1] = gen_lowpart (V4QImode, operands[1]);
operands[0] = gen_lowpart (V4QImode, operands[0]);
})
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
@ -2821,6 +2923,50 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "<code>v2qi3"
[(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v")
(any_logic:V2QI
(match_operand:V2QI 1 "register_operand" "%0,0,x,v")
(match_operand:V2QI 2 "register_operand" "r,x,x,v")))
(clobber (reg:CC FLAGS_REG))]
""
"#"
[(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
(set_attr "type" "alu,sselog,sselog,sselog")
(set_attr "mode" "SI,TI,TI,TI")])
(define_split
[(set (match_operand:V2QI 0 "general_reg_operand")
(any_logic:V2QI
(match_operand:V2QI 1 "general_reg_operand")
(match_operand:V2QI 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
[(parallel
[(set (match_dup 0)
(any_logic:SI (match_dup 1) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[2] = gen_lowpart (SImode, operands[2]);
operands[1] = gen_lowpart (SImode, operands[1]);
operands[0] = gen_lowpart (SImode, operands[0]);
})
(define_split
[(set (match_operand:V2QI 0 "sse_reg_operand")
(any_logic:V2QI
(match_operand:V2QI 1 "sse_reg_operand")
(match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
(any_logic:V4QI (match_dup 1) (match_dup 2)))]
{
operands[2] = gen_lowpart (V4QImode, operands[2]);
operands[1] = gen_lowpart (V4QImode, operands[1]);
operands[0] = gen_lowpart (V4QImode, operands[0]);
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral element swizzling

View File

@ -0,0 +1,25 @@
/* PR target/103900 */
/* { dg-do compile } */
/* { dg-options "-O -fno-tree-dce -fno-tree-dse" } */
typedef unsigned char __attribute__((__vector_size__(2))) T;
typedef unsigned char __attribute__((__vector_size__(32))) U;
typedef int __attribute__((__vector_size__(64))) V;
typedef int __attribute__((__vector_size__(32))) W;
T foo0_v128u8_0;
U foo0_v256u8_0;
T foo0_v16u16_0;
int foo0_v128u64_0, foo0_v512u64_0;
void
foo0() {
V v512u128_0;
T v16u8_0;
foo0_v128u64_0 += (short)v16u8_0;
T v16u8_1 = ~__builtin_shufflevector(foo0_v128u8_0, foo0_v256u8_0, 0, 5);
W v256u128_1;
V v512u8_r =
foo0_v512u64_0 + v512u128_0;
(union {U b;}){}.b + (U)v256u128_1;
T v16u8_r = v16u8_0 + v16u8_1 + foo0_v16u16_0;
}

View File

@ -14,7 +14,7 @@ int main (int argc, char *argv[])
v0 + v1, /* { dg-warning "expanded piecewise" } */
v0 - v1, /* { dg-warning "expanded piecewise" } */
v0 > v1, /* { dg-warning "expanded piecewise" } */
v0 & v1, /* { dg-warning "expanded in parallel" } */
v0 & v1, /* { dg-warning "expanded piecewise" } */
__builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */
__builtin_shuffle (v0, v1, v1) /* { dg-warning "expanded piecewise" } */
};