i386: Introduce V2QImode vectorized logic [PR103861]
Add V2QImode logic operations with SSE and GP registers and split them to V4QImode SSE instructions or SImode GP instructions. The patch also fixes PR target/103900. 2022-01-04 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/mmx.md (one_cmplv2qi3): New insn pattern. (one_cmplv2qi3 splitters): New post-reload splitters. (*andnotv2qi3): New insn pattern. (andnotv2qi3 splitters): New post-reload splitters. (<any_logic:code>v2qi3): New insn pattern. (<any_logic:insn>v2qi3 splitters): New post-reload splitters. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/warn-vect-op-2.c: Adjust warnings. * gcc.target/i386/pr103900.c: New test.
This commit is contained in:
parent
dd80200324
commit
708b87dcb6
@ -2745,6 +2745,45 @@
|
||||
"TARGET_SSE2"
|
||||
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
|
||||
|
||||
(define_insn "one_cmplv2qi2"
|
||||
[(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v")
|
||||
(not:V2QI
|
||||
(match_operand:V2QI 1 "register_operand" "0,x,v")))]
|
||||
""
|
||||
"#"
|
||||
[(set_attr "isa" "*,sse2,avx512vl")
|
||||
(set_attr "type" "negnot,sselog,sselog")
|
||||
(set_attr "mode" "SI,TI,TI")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "general_reg_operand")
|
||||
(not:V2QI
|
||||
(match_operand:V2QI 1 "general_reg_operand")))]
|
||||
"reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(not:SI (match_dup 1)))]
|
||||
{
|
||||
operands[1] = gen_lowpart (SImode, operands[1]);
|
||||
operands[0] = gen_lowpart (SImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "sse_reg_operand")
|
||||
(not:V2QI
|
||||
(match_operand:V2QI 1 "sse_reg_operand")))]
|
||||
"TARGET_SSE2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(xor:V4QI
|
||||
(match_dup 0) (match_dup 1)))]
|
||||
{
|
||||
emit_insn
|
||||
(gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])),
|
||||
CONSTM1_RTX (V16QImode)));
|
||||
|
||||
operands[1] = gen_lowpart (V4QImode, operands[1]);
|
||||
operands[0] = gen_lowpart (V4QImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_insn "mmx_andnot<mode>3"
|
||||
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
|
||||
(and:MMXMODEI
|
||||
@ -2775,6 +2814,69 @@
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*andnotv2qi3"
|
||||
[(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v")
|
||||
(and:V2QI
|
||||
(not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v"))
|
||||
(match_operand:V2QI 2 "register_operand" "r,r,x,x,v")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
"#"
|
||||
[(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
|
||||
(set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
|
||||
(set_attr "mode" "SI,SI,TI,TI,TI")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "general_reg_operand")
|
||||
(and:V2QI
|
||||
(not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
|
||||
(match_operand:V2QI 2 "general_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_BMI && reload_completed"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(and:SI (not:SI (match_dup 1)) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[2] = gen_lowpart (SImode, operands[2]);
|
||||
operands[1] = gen_lowpart (SImode, operands[1]);
|
||||
operands[0] = gen_lowpart (SImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "general_reg_operand")
|
||||
(and:V2QI
|
||||
(not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
|
||||
(match_operand:V2QI 2 "general_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!TARGET_BMI && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(not:SI (match_dup 1)))
|
||||
(parallel
|
||||
[(set (match_dup 0)
|
||||
(and:SI (match_dup 0) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[2] = gen_lowpart (SImode, operands[2]);
|
||||
operands[1] = gen_lowpart (SImode, operands[1]);
|
||||
operands[0] = gen_lowpart (SImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "sse_reg_operand")
|
||||
(and:V2QI
|
||||
(not:V2QI (match_operand:V2QI 1 "sse_reg_operand"))
|
||||
(match_operand:V2QI 2 "sse_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_SSE2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))]
|
||||
{
|
||||
operands[2] = gen_lowpart (V4QImode, operands[2]);
|
||||
operands[1] = gen_lowpart (V4QImode, operands[1]);
|
||||
operands[0] = gen_lowpart (V4QImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_expand "mmx_<code><mode>3"
|
||||
[(set (match_operand:MMXMODEI 0 "register_operand")
|
||||
(any_logic:MMXMODEI
|
||||
@ -2821,6 +2923,50 @@
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "<code>v2qi3"
|
||||
[(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v")
|
||||
(any_logic:V2QI
|
||||
(match_operand:V2QI 1 "register_operand" "%0,0,x,v")
|
||||
(match_operand:V2QI 2 "register_operand" "r,x,x,v")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
"#"
|
||||
[(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
|
||||
(set_attr "type" "alu,sselog,sselog,sselog")
|
||||
(set_attr "mode" "SI,TI,TI,TI")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "general_reg_operand")
|
||||
(any_logic:V2QI
|
||||
(match_operand:V2QI 1 "general_reg_operand")
|
||||
(match_operand:V2QI 2 "general_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"reload_completed"
|
||||
[(parallel
|
||||
[(set (match_dup 0)
|
||||
(any_logic:SI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[2] = gen_lowpart (SImode, operands[2]);
|
||||
operands[1] = gen_lowpart (SImode, operands[1]);
|
||||
operands[0] = gen_lowpart (SImode, operands[0]);
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:V2QI 0 "sse_reg_operand")
|
||||
(any_logic:V2QI
|
||||
(match_operand:V2QI 1 "sse_reg_operand")
|
||||
(match_operand:V2QI 2 "sse_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_SSE2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(any_logic:V4QI (match_dup 1) (match_dup 2)))]
|
||||
{
|
||||
operands[2] = gen_lowpart (V4QImode, operands[2]);
|
||||
operands[1] = gen_lowpart (V4QImode, operands[1]);
|
||||
operands[0] = gen_lowpart (V4QImode, operands[0]);
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel integral element swizzling
|
||||
|
25
gcc/testsuite/gcc.target/i386/pr103900.c
Normal file
25
gcc/testsuite/gcc.target/i386/pr103900.c
Normal file
@ -0,0 +1,25 @@
|
||||
/* PR target/103900 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fno-tree-dce -fno-tree-dse" } */
|
||||
|
||||
typedef unsigned char __attribute__((__vector_size__(2))) T;
|
||||
typedef unsigned char __attribute__((__vector_size__(32))) U;
|
||||
typedef int __attribute__((__vector_size__(64))) V;
|
||||
typedef int __attribute__((__vector_size__(32))) W;
|
||||
T foo0_v128u8_0;
|
||||
U foo0_v256u8_0;
|
||||
T foo0_v16u16_0;
|
||||
int foo0_v128u64_0, foo0_v512u64_0;
|
||||
|
||||
void
|
||||
foo0() {
|
||||
V v512u128_0;
|
||||
T v16u8_0;
|
||||
foo0_v128u64_0 += (short)v16u8_0;
|
||||
T v16u8_1 = ~__builtin_shufflevector(foo0_v128u8_0, foo0_v256u8_0, 0, 5);
|
||||
W v256u128_1;
|
||||
V v512u8_r =
|
||||
foo0_v512u64_0 + v512u128_0;
|
||||
(union {U b;}){}.b + (U)v256u128_1;
|
||||
T v16u8_r = v16u8_0 + v16u8_1 + foo0_v16u16_0;
|
||||
}
|
@ -14,7 +14,7 @@ int main (int argc, char *argv[])
|
||||
v0 + v1, /* { dg-warning "expanded piecewise" } */
|
||||
v0 - v1, /* { dg-warning "expanded piecewise" } */
|
||||
v0 > v1, /* { dg-warning "expanded piecewise" } */
|
||||
v0 & v1, /* { dg-warning "expanded in parallel" } */
|
||||
v0 & v1, /* { dg-warning "expanded piecewise" } */
|
||||
__builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */
|
||||
__builtin_shuffle (v0, v1, v1) /* { dg-warning "expanded piecewise" } */
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user