[gcc]
2017-11-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/altivec.md (*p9_vadu<mode>3) Rename to p9_vadu<mode>3. (usadv16qi): New define_expand. (usadv8hi): New define_expand. [gcc/testsuite] 2017-11-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * gcc.target/powerpc/sad-vectorize-1.c: New file. * gcc.target/powerpc/sad-vectorize-2.c: New file. * gcc.target/powerpc/sad-vectorize-3.c: New file. * gcc.target/powerpc/sad-vectorize-4.c: New file. From-SVN: r254453
This commit is contained in:
parent
8e7d1486f6
commit
962b966886
@ -1,3 +1,10 @@
|
||||
2017-11-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/altivec.md (*p9_vadu<mode>3) Rename to
|
||||
p9_vadu<mode>3.
|
||||
(usadv16qi): New define_expand.
|
||||
(usadv8hi): New define_expand.
|
||||
|
||||
2017-11-06 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
PR bootstrap/82832
|
||||
|
@ -4020,7 +4020,7 @@
|
||||
"TARGET_P9_VECTOR")
|
||||
|
||||
;; Vector absolute difference unsigned
|
||||
(define_insn "*p9_vadu<mode>3"
|
||||
(define_insn "p9_vadu<mode>3"
|
||||
[(set (match_operand:VI 0 "register_operand" "=v")
|
||||
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
|
||||
(match_operand:VI 2 "register_operand" "v")]
|
||||
@ -4184,6 +4184,49 @@
|
||||
"vbpermd %0,%1,%2"
|
||||
[(set_attr "type" "vecsimple")])
|
||||
|
||||
;; Support for SAD (sum of absolute differences).
|
||||
|
||||
;; Due to saturating semantics, we can't combine the sum-across
|
||||
;; with the vector accumulate in vsum4ubs. A vadduwm is needed.
|
||||
(define_expand "usadv16qi"
|
||||
[(use (match_operand:V4SI 0 "register_operand"))
|
||||
(use (match_operand:V16QI 1 "register_operand"))
|
||||
(use (match_operand:V16QI 2 "register_operand"))
|
||||
(use (match_operand:V4SI 3 "register_operand"))]
|
||||
"TARGET_P9_VECTOR"
|
||||
{
|
||||
rtx absd = gen_reg_rtx (V16QImode);
|
||||
rtx zero = gen_reg_rtx (V4SImode);
|
||||
rtx psum = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
|
||||
emit_insn (gen_altivec_vsum4ubs (psum, absd, zero));
|
||||
emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Since vsum4shs is saturating and further performs signed
|
||||
;; arithmetic, we can't combine the sum-across with the vector
|
||||
;; accumulate in vsum4shs. A vadduwm is needed.
|
||||
(define_expand "usadv8hi"
|
||||
[(use (match_operand:V4SI 0 "register_operand"))
|
||||
(use (match_operand:V8HI 1 "register_operand"))
|
||||
(use (match_operand:V8HI 2 "register_operand"))
|
||||
(use (match_operand:V4SI 3 "register_operand"))]
|
||||
"TARGET_P9_VECTOR"
|
||||
{
|
||||
rtx absd = gen_reg_rtx (V8HImode);
|
||||
rtx zero = gen_reg_rtx (V4SImode);
|
||||
rtx psum = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
|
||||
emit_insn (gen_altivec_vsum4shs (psum, absd, zero));
|
||||
emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Decimal Integer operations
|
||||
(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB])
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
2017-11-06 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/sad-vectorize-1.c: New file.
|
||||
* gcc.target/powerpc/sad-vectorize-2.c: New file.
|
||||
* gcc.target/powerpc/sad-vectorize-3.c: New file.
|
||||
* gcc.target/powerpc/sad-vectorize-4.c: New file.
|
||||
|
||||
2017-11-06 Martin Liska <mliska@suse.cz>
|
||||
|
||||
* c-c++-common/cilk-plus/AN/pr57541-2.c (foo1): Return a value
|
||||
|
36
gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c
Normal file
36
gcc/testsuite/gcc.target/powerpc/sad-vectorize-1.c
Normal file
@ -0,0 +1,36 @@
|
||||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
/* { dg-skip-if "" { powerpc*-*-aix* } } */
|
||||
/* { dg-options "-O3 -mcpu=power9" } */
|
||||
|
||||
/* Verify that we vectorize this SAD loop using vabsdub. */
|
||||
|
||||
extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
|
||||
|
||||
static int
|
||||
foo (unsigned char *w, int i, unsigned char *x, int j)
|
||||
{
|
||||
int tot = 0;
|
||||
for (int a = 0; a < 16; a++)
|
||||
{
|
||||
for (int b = 0; b < 16; b++)
|
||||
tot += abs (w[b] - x[b]);
|
||||
w += i;
|
||||
x += j;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
void
|
||||
bar (unsigned char *w, unsigned char *x, int i, int *result)
|
||||
{
|
||||
*result = foo (w, 16, x, i);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vabsdub" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vsum4ubs" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vadduwm" 17 } } */
|
||||
|
||||
/* Note: One of the 16 adds is optimized out (add with zero),
|
||||
leaving 15. The extra two adds are for the final reduction. */
|
36
gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c
Normal file
36
gcc/testsuite/gcc.target/powerpc/sad-vectorize-2.c
Normal file
@ -0,0 +1,36 @@
|
||||
/* { dg-do compile { target { powerpc*-*-* } } } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
/* { dg-skip-if "" { powerpc*-*-aix* } } */
|
||||
/* { dg-options "-O3 -mcpu=power9" } */
|
||||
|
||||
/* Verify that we vectorize this SAD loop using vabsduh. */
|
||||
|
||||
extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
|
||||
|
||||
static int
|
||||
foo (unsigned short *w, int i, unsigned short *x, int j)
|
||||
{
|
||||
int tot = 0;
|
||||
for (int a = 0; a < 16; a++)
|
||||
{
|
||||
for (int b = 0; b < 8; b++)
|
||||
tot += abs (w[b] - x[b]);
|
||||
w += i;
|
||||
x += j;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
void
|
||||
bar (unsigned short *w, unsigned short *x, int i, int *result)
|
||||
{
|
||||
*result = foo (w, 8, x, i);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vabsduh" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vsum4shs" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "vadduwm" 17 } } */
|
||||
|
||||
/* Note: One of the 16 adds is optimized out (add with zero),
|
||||
leaving 15. The extra two adds are for the final reduction. */
|
57
gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c
Normal file
57
gcc/testsuite/gcc.target/powerpc/sad-vectorize-3.c
Normal file
@ -0,0 +1,57 @@
|
||||
/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
/* { dg-options "-O3 -mcpu=power9" } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
|
||||
|
||||
/* Verify that we get correct code when we vectorize this SAD loop using
|
||||
vabsdub. */
|
||||
|
||||
extern void abort ();
|
||||
extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
|
||||
|
||||
static int
|
||||
foo (unsigned char *w, int i, unsigned char *x, int j)
|
||||
{
|
||||
int tot = 0;
|
||||
for (int a = 0; a < 16; a++)
|
||||
{
|
||||
for (int b = 0; b < 16; b++)
|
||||
tot += abs (w[b] - x[b]);
|
||||
w += i;
|
||||
x += j;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
void
|
||||
bar (unsigned char *w, unsigned char *x, int i, int *result)
|
||||
{
|
||||
*result = foo (w, 16, x, i);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned char m[256];
|
||||
unsigned char n[256];
|
||||
int sum, i;
|
||||
|
||||
for (i = 0; i < 256; ++i)
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
m[i] = (i % 8) * 2 + 1;
|
||||
n[i] = -(i % 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
m[i] = -((i % 8) * 2 + 2);
|
||||
n[i] = -((i % 8) >> 1);
|
||||
}
|
||||
|
||||
bar (m, n, 16, &sum);
|
||||
|
||||
if (sum != 32384)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
57
gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c
Normal file
57
gcc/testsuite/gcc.target/powerpc/sad-vectorize-4.c
Normal file
@ -0,0 +1,57 @@
|
||||
/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
/* { dg-options "-O3 -mcpu=power9" } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
|
||||
|
||||
/* Verify that we get correct code when we vectorize this SAD loop using
|
||||
vabsduh. */
|
||||
|
||||
extern void abort ();
|
||||
extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
|
||||
|
||||
static int
|
||||
foo (unsigned short *w, int i, unsigned short *x, int j)
|
||||
{
|
||||
int tot = 0;
|
||||
for (int a = 0; a < 16; a++)
|
||||
{
|
||||
for (int b = 0; b < 8; b++)
|
||||
tot += abs (w[b] - x[b]);
|
||||
w += i;
|
||||
x += j;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
void
|
||||
bar (unsigned short *w, unsigned short *x, int i, int *result)
|
||||
{
|
||||
*result = foo (w, 8, x, i);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned short m[128];
|
||||
unsigned short n[128];
|
||||
int sum, i;
|
||||
|
||||
for (i = 0; i < 128; ++i)
|
||||
if (i % 2 == 0)
|
||||
{
|
||||
m[i] = (i % 8) * 2 + 1;
|
||||
n[i] = i % 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
m[i] = (i % 8) * 4 - 3;
|
||||
n[i] = (i % 8) >> 1;
|
||||
}
|
||||
|
||||
bar (m, n, 8, &sum);
|
||||
|
||||
if (sum != 992)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user