x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber

commit 16ed2601ad
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Wed May 15 15:26:19 2019 +0000

    i386: Emulate MMX pshufb with SSE version

has

+(define_insn_and_split "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+  (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+           (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+          UNSPEC_PSHUFB))
+   (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
                                       ^^^  There are earlyclobber.
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 3)
+  (and:V4SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 0)
+  (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]

If input register operand 2 is dead after this insn, RA may choose it
as scratch operand.  Since it isn't marked as earlyclobber, operand 2
becomes unused after split and then it gets optimized out.  Mark scratch
operand as earlyclobber fixes the issue.

gcc/

	PR target/94467
	* config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
	as earlyclobber.

gcc/testsuite/

	PR target/94467
	* gcc.target/i386/pr94467-1.c: New test.
	* gcc.target/i386/pr94467-2.c: Likewise.
This commit is contained in:
H.J. Lu 2020-04-03 11:49:10 -07:00
parent b949f8e2ac
commit bbcdf9bb3f
5 changed files with 101 additions and 1 deletions

View File

@ -1,3 +1,9 @@
2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
PR target/94467
* config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
as earlyclobber.
2020-04-03 Jeff Law <law@redhat.com>
PR rtl-optimization/92264

View File

@ -16695,7 +16695,7 @@
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
UNSPEC_PSHUFB))
(clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
(clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
"@
pshufb\t{%2, %0|%0, %2}

View File

@ -1,3 +1,9 @@
2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
PR target/94467
* gcc.target/i386/pr94467-1.c: New test.
* gcc.target/i386/pr94467-2.c: Likewise.
2020-04-03 Jakub Jelinek <jakub@redhat.com>
PR target/94460

View File

@ -0,0 +1,40 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O -mavx" } */
#include "avx-check.h"
typedef char __attribute__ ((__vector_size__ (8))) v8qi;
typedef short __attribute__ ((__vector_size__ (8))) v4hi;
typedef int __attribute__ ((__vector_size__ (8))) v2si;
typedef long long __attribute__ ((__vector_size__ (8))) v1di;
typedef unsigned long long u64;
u64 k, c;
v8qi g, h, p, q;
v4hi d, e, f, l, n, o;
v2si j;
u64
foo (v4hi r)
{
v8qi s;
f = (v4hi) j;
e = __builtin_ia32_psrlwi ((v4hi) k, c);
s = __builtin_ia32_pavgb (h, h);
n = __builtin_ia32_pabsw (f);
o = __builtin_ia32_psubusw (n, l);
p = __builtin_ia32_packsswb (r, o);
q = __builtin_ia32_pshufb (p, s);
g = __builtin_ia32_punpcklbw (q, (v8qi) r);
d = r;
return (u64) g + (u64) h + (u64) j;
}
static void
avx_test (void)
{
u64 x = foo ((v4hi) { 5 });
if (x != 0x0005000500050505)
__builtin_abort ();
}

View File

@ -0,0 +1,48 @@
/* { dg-do run } */
/* { dg-require-effective-target ssse3 } */
/* { dg-options "-O -mssse3" } */
#ifndef CHECK_H
#define CHECK_H "ssse3-check.h"
#endif
#ifndef TEST
#define TEST ssse3_test
#endif
#include CHECK_H
typedef char __attribute__ ((__vector_size__ (8))) v8qi;
typedef short __attribute__ ((__vector_size__ (8))) v4hi;
typedef int __attribute__ ((__vector_size__ (8))) v2si;
typedef long long __attribute__ ((__vector_size__ (8))) v1di;
typedef unsigned long long u64;
u64 k, c;
v8qi g, h, p, q;
v4hi d, e, f, l, n, o;
v2si j;
u64
foo (v4hi r)
{
v8qi s;
f = (v4hi) j;
e = __builtin_ia32_psrlwi ((v4hi) k, c);
s = __builtin_ia32_pavgb (h, h);
n = __builtin_ia32_pabsw (f);
o = __builtin_ia32_psubusw (n, l);
p = __builtin_ia32_packsswb (r, o);
q = __builtin_ia32_pshufb (p, s);
g = __builtin_ia32_punpcklbw (q, (v8qi) r);
d = r;
return (u64) g + (u64) h + (u64) j;
}
static void
ssse3_test (void)
{
u64 x = foo ((v4hi) { 5 });
if (x != 0x0005000500050505)
__builtin_abort ();
}