From ac173024954b4e620c80ab81715bc5d50907369a Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 20 Jun 2019 15:30:54 +0000 Subject: [PATCH] i386: Generate standard floating point scalar operation patterns Standard floating point scalar operation patterns for combiner, which preserve the rest of the vector, look like (vec_merge:V2DF (vec_duplicate:V2DF (reg:DF 87)) (reg/v:V2DF 85 [ x ]) (const_int 1 [0x1])])) and (vec_merge:V2DF (vec_duplicate:V2DF (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) (parallel [ (const_int 0 [0])])) (reg:DF 87)) (reg/v:V2DF 85 [ x ]) (const_int 1 [0x1])])) This patch adds and generates such standard floating point scalar operation patterns for +, -, *, /, > and <. Tested on x86-64. gcc/ PR target/54855 * config/i386/i386-expand.c (ix86_expand_vector_set): Generate standard scalar operation pattern for V2DF. * config/i386/sse.md (*_vm3): New. (*_vm3): Likewise. (*ieee_3): Likewise. (vec_setv2df_0): Likewise. gcc/testsuite/ PR target/54855 * gcc.target/i386/pr54855-1.c: New test. * gcc.target/i386/pr54855-2.c: Likewise. * gcc.target/i386/pr54855-3.c: Likewise. * gcc.target/i386/pr54855-4.c: Likewise. * gcc.target/i386/pr54855-5.c: Likewise. * gcc.target/i386/pr54855-6.c: Likewise. * gcc.target/i386/pr54855-7.c: Likewise. * gcc.target/i386/pr54855-8.c: Likewise. * gcc.target/i386/pr54855-9.c: Likewise. * gcc.target/i386/pr54855-10.c: Likewise. From-SVN: r272511 --- gcc/ChangeLog | 10 +++ gcc/config/i386/i386-expand.c | 12 +++ gcc/config/i386/sse.md | 88 ++++++++++++++++++++++ gcc/testsuite/ChangeLog | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-1.c | 16 ++++ gcc/testsuite/gcc.target/i386/pr54855-10.c | 13 ++++ gcc/testsuite/gcc.target/i386/pr54855-2.c | 15 ++++ gcc/testsuite/gcc.target/i386/pr54855-3.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-4.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-5.c | 16 ++++ gcc/testsuite/gcc.target/i386/pr54855-6.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-7.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-8.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr54855-9.c | 14 ++++ 14 files changed, 268 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-10.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-8.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-9.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 06e4ef45b66..d4c32201b81 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-06-20 H.J. Lu + + PR target/54855 + * config/i386/i386-expand.c (ix86_expand_vector_set): Generate + standard scalar operation pattern for V2DF. + * config/i386/sse.md (*_vm3): New. + (*_vm3): Likewise. + (*ieee_3): Likewise. + (vec_setv2df_0): Likewise. + 2019-06-20 Jan Hubicka * tree-ssa-alias.c (aliasing_component_refs_p): Remove ref2_is_decl diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 4acd7621cf2..72be1df0dac 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14214,6 +14214,17 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) return; case E_V2DFmode: + /* NB: For ELT == 0, use standard scalar operation patterns which + preserve the rest of the vector for combiner: + + (vec_merge:V2DF + (vec_duplicate:V2DF (reg:DF)) + (reg:V2DF) + (const_int 1)) + */ + if (elt == 0) + goto do_vec_merge; + { rtx op0, op1; @@ -14511,6 +14522,7 @@ quarter: } else if (use_vec_merge) { +do_vec_merge: tmp = gen_rtx_VEC_DUPLICATE (mode, val); tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (HOST_WIDE_INT_1U << elt)); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 26309aef5c9..a8d1fbf1fdd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1826,6 +1826,28 @@ (set_attr "type" "sseadd") (set_attr "mode" "")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*_vm3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (plusminus: + (vec_select: + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand: 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + (define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 @@ -1880,6 +1902,29 @@ (set_attr "type" "ssemul") (set_attr "mode" "")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*_vm3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (multdiv: + (vec_select: + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand: 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "direct,double") + (set_attr "mode" "")]) + (define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 @@ -2229,6 +2274,30 @@ (set_attr "prefix" "") (set_attr "mode" "")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*ieee_3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (unspec: + [(vec_select: + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand: 2 "nonimmediate_operand" "xm,vm")] + IEEE_MAXMIN)) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + (define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 @@ -7911,6 +7980,25 @@ [(set (match_dup 0) (match_dup 1))] "operands[0] = adjust_address (operands[0], mode, 0);") +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "vec_setv2df_0" + [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v") + (vec_merge:V2DF + (vec_duplicate:V2DF + (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m")) + (match_operand:V2DF 1 "register_operand" " 0,v,0,v") + (const_int 1)))] + "TARGET_SSE2" + "@ + movsd\t{%2, %0|%0, %2} + vmovsd\t{%2, %1, %0|%0, %1, %2} + movlpd\t{%2, %0|%0, %2} + vmovlpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "mode" "DF")]) + (define_expand "vec_set" [(match_operand:V 0 "register_operand") (match_operand: 1 "register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c1e948d862c..0fd1ce07210 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2019-06-20 H.J. Lu + + PR target/54855 + * gcc.target/i386/pr54855-1.c: New test. + * gcc.target/i386/pr54855-2.c: Likewise. + * gcc.target/i386/pr54855-3.c: Likewise. + * gcc.target/i386/pr54855-4.c: Likewise. + * gcc.target/i386/pr54855-5.c: Likewise. + * gcc.target/i386/pr54855-6.c: Likewise. + * gcc.target/i386/pr54855-7.c: Likewise. + * gcc.target/i386/pr54855-8.c: Likewise. + * gcc.target/i386/pr54855-9.c: Likewise. + * gcc.target/i386/pr54855-10.c: Likewise. + 2019-06-20 Jan Hubicka * gcc.c-torture/execute/alias-access-path-1.c: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/pr54855-1.c b/gcc/testsuite/gcc.target/i386/pr54855-1.c new file mode 100644 index 00000000000..693aafa09ab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "addsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +__m128d +_mm_add_sd (__m128d x, __m128d y) +{ + __m128d z = __extension__ (__m128d)(__v2df) + { (((__v2df) x)[0] + ((__v2df) y)[0]), ((__v2df) x)[1] }; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-10.c b/gcc/testsuite/gcc.target/i386/pr54855-10.c new file mode 100644 index 00000000000..9e08a85723e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-10.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "movlpd" 1 } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x, double *a) +{ + x[0] = *a; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-2.c b/gcc/testsuite/gcc.target/i386/pr54855-2.c new file mode 100644 index 00000000000..20c6f8eb529 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "mulsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); + +__v2df +_mm_mul_sd (__v2df x, __v2df y) +{ + __v2df z = x; + z[0] = x[0] * y[0]; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-3.c b/gcc/testsuite/gcc.target/i386/pr54855-3.c new file mode 100644 index 00000000000..3c15dfc93d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x) +{ + x[0] -= 1.; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-4.c b/gcc/testsuite/gcc.target/i386/pr54855-4.c new file mode 100644 index 00000000000..32eb28e852a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x, double a) +{ + x[0] -= a; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-5.c b/gcc/testsuite/gcc.target/i386/pr54855-5.c new file mode 100644 index 00000000000..e06999074e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-5.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "subsd" 1 } } */ +/* { dg-final { scan-assembler-times "mulpd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); + +__v2df +foo (__v2df x, __v2df y) +{ + x[0] -= y[0]; + x *= y; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-6.c b/gcc/testsuite/gcc.target/i386/pr54855-6.c new file mode 100644 index 00000000000..8f44d17b6d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "divss" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movss" } } */ + +typedef float vec __attribute__((vector_size(16))); + +vec +foo (vec x, float f) +{ + x[0] /= f; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-7.c b/gcc/testsuite/gcc.target/i386/pr54855-7.c new file mode 100644 index 00000000000..a551bd5c92f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "divss" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movss" } } */ + +typedef float vec __attribute__((vector_size(16))); + +vec +foo (vec x) +{ + x[0] /= 2.1f; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-8.c b/gcc/testsuite/gcc.target/i386/pr54855-8.c new file mode 100644 index 00000000000..7602dc293a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-8.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "maxsd" 1 } } */ +/* { dg-final { scan-assembler-not "movapd" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ + +typedef double vec __attribute__((vector_size(16))); + +vec +foo (vec x, double a) +{ + x[0] = x[0] > a ? x[0] : a; + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr54855-9.c b/gcc/testsuite/gcc.target/i386/pr54855-9.c new file mode 100644 index 00000000000..40add5f6763 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54855-9.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "minss" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movss" } } */ + +typedef float vec __attribute__((vector_size(16))); + +vec +foo (vec x, float a) +{ + x[0] = x[0] < a ? x[0] : a; + return x; +}