S/390: arch12: New vector popcount variants
arch12 provides pop count vector instructions for bigger elements than just chars. gcc/testsuite/ChangeLog: 2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * gcc.target/s390/vxe/popcount-1.c: New test. gcc/ChangeLog: 2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * config/s390/vector.md ("popcountv16qi2", "popcountv8hi2") ("popcountv4si2", "popcountv2di2"): Rename to ... ("popcount<mode>2", "popcountv8hi2_vx", "popcountv4si2_vx") ("popcountv2di2_vx"): ... these and add !TARGET_VXE to the condition. ("popcount<mode>2_vxe"): New pattern. From-SVN: r246454
This commit is contained in:
parent
9ec988605d
commit
6c7774d15e
|
@ -1,3 +1,12 @@
|
|||
2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* config/s390/vector.md ("popcountv16qi2", "popcountv8hi2")
|
||||
("popcountv4si2", "popcountv2di2"): Rename to ...
|
||||
("popcount<mode>2", "popcountv8hi2_vx", "popcountv4si2_vx")
|
||||
("popcountv2di2_vx"): ... these and add !TARGET_VXE to the
|
||||
condition.
|
||||
("popcount<mode>2_vxe"): New pattern.
|
||||
|
||||
2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* common/config/s390/s390-common.c (processor_flags_table): Add
|
||||
|
|
|
@ -715,11 +715,33 @@
|
|||
|
||||
; Vector population count
|
||||
|
||||
(define_insn "popcountv16qi2"
|
||||
(define_expand "popcount<mode>2"
|
||||
[(set (match_operand:VI_HW 0 "register_operand" "=v")
|
||||
(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v")]
|
||||
UNSPEC_POPCNT))]
|
||||
"TARGET_VX"
|
||||
{
|
||||
if (TARGET_VXE)
|
||||
emit_insn (gen_popcount<mode>2_vxe (operands[0], operands[1]));
|
||||
else
|
||||
emit_insn (gen_popcount<mode>2_vx (operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
; vpopctb, vpopcth, vpopctf, vpopctg
|
||||
(define_insn "popcount<mode>2_vxe"
|
||||
[(set (match_operand:VI_HW 0 "register_operand" "=v")
|
||||
(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v")]
|
||||
UNSPEC_POPCNT))]
|
||||
"TARGET_VXE"
|
||||
"vpopct<bhfgq>\t%v0,%v1"
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
(define_insn "popcountv16qi2_vx"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
|
||||
UNSPEC_POPCNT))]
|
||||
"TARGET_VX"
|
||||
"TARGET_VX && !TARGET_VXE"
|
||||
"vpopct\t%v0,%v1,0"
|
||||
[(set_attr "op_type" "VRR")])
|
||||
|
||||
|
@ -729,7 +751,7 @@
|
|||
; of the result, add it to the result and extend it to halfword
|
||||
; element size (unpack).
|
||||
|
||||
(define_expand "popcountv8hi2"
|
||||
(define_expand "popcountv8hi2_vx"
|
||||
[(set (match_dup 2)
|
||||
(unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" "v") 0)]
|
||||
UNSPEC_POPCNT))
|
||||
|
@ -761,7 +783,7 @@
|
|||
(and:V8HI (subreg:V8HI (match_dup 2) 0)
|
||||
(subreg:V8HI (match_dup 3) 0)))
|
||||
]
|
||||
"TARGET_VX"
|
||||
"TARGET_VX && !TARGET_VXE"
|
||||
{
|
||||
operands[2] = gen_reg_rtx (V16QImode);
|
||||
operands[3] = gen_reg_rtx (V16QImode);
|
||||
|
@ -769,20 +791,20 @@
|
|||
operands[5] = CONST0_RTX (V16QImode);
|
||||
})
|
||||
|
||||
(define_expand "popcountv4si2"
|
||||
(define_expand "popcountv4si2_vx"
|
||||
[(set (match_dup 2)
|
||||
(unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" "v") 0)]
|
||||
UNSPEC_POPCNT))
|
||||
(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_dup 2) (match_dup 3)]
|
||||
UNSPEC_VEC_VSUM))]
|
||||
"TARGET_VX"
|
||||
"TARGET_VX && !TARGET_VXE"
|
||||
{
|
||||
operands[2] = gen_reg_rtx (V16QImode);
|
||||
operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode));
|
||||
})
|
||||
|
||||
(define_expand "popcountv2di2"
|
||||
(define_expand "popcountv2di2_vx"
|
||||
[(set (match_dup 2)
|
||||
(unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" "v") 0)]
|
||||
UNSPEC_POPCNT))
|
||||
|
@ -792,7 +814,7 @@
|
|||
(set (match_operand:V2DI 0 "register_operand" "=v")
|
||||
(unspec:V2DI [(match_dup 3) (match_dup 5)]
|
||||
UNSPEC_VEC_VSUMG))]
|
||||
"TARGET_VX"
|
||||
"TARGET_VX && !TARGET_VXE"
|
||||
{
|
||||
operands[2] = gen_reg_rtx (V16QImode);
|
||||
operands[3] = gen_reg_rtx (V4SImode);
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/vxe/popcount-1.c: New test.
|
||||
|
||||
2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/vxe/bitops-1.c: New test.
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 -mzarch -march=arch12 --save-temps" } */
|
||||
/* { dg-require-effective-target s390_vxe } */
|
||||
|
||||
/* Vectorization currently only works for v4si. v8hi at least uses 2x
|
||||
vpopctf but no vpopcth. */
|
||||
|
||||
typedef unsigned char uv16qi __attribute__((vector_size(16)));
|
||||
typedef unsigned short uv8hi __attribute__((vector_size(16)));
|
||||
typedef unsigned int uv4si __attribute__((vector_size(16)));
|
||||
typedef unsigned long long uv2di __attribute__((vector_size(16)));
|
||||
|
||||
uv16qi __attribute__((noinline))
|
||||
vpopctb (uv16qi a)
|
||||
{
|
||||
uv16qi r;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
r[i] = __builtin_popcount (a[i]);
|
||||
|
||||
return r;
|
||||
}
|
||||
/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" { xfail *-*-* } } } */
|
||||
|
||||
uv8hi __attribute__((noinline))
|
||||
vpopcth (uv8hi a)
|
||||
{
|
||||
uv8hi r;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
r[i] = __builtin_popcount (a[i]);
|
||||
|
||||
return r;
|
||||
}
|
||||
/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" { xfail *-*-* } } } */
|
||||
|
||||
uv4si __attribute__((noinline))
|
||||
vpopctf (uv4si a)
|
||||
{
|
||||
uv4si r;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
r[i] = __builtin_popcount (a[i]);
|
||||
|
||||
return r;
|
||||
}
|
||||
/* { dg-final { scan-assembler "vpopctf\t%v24,%v24" } } */
|
||||
|
||||
uv2di __attribute__((noinline))
|
||||
vpopctg (uv2di a)
|
||||
{
|
||||
uv2di r;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
r[i] = __builtin_popcount (a[i]);
|
||||
|
||||
return r;
|
||||
}
|
||||
/* { dg-final { scan-assembler "vpopctg\t%v24,%v24" { xfail *-*-* } } } */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
uv16qi a = (uv16qi){ 42, 1, ~0, 2, 42, 1, ~0, 2, 42, 1, ~0, 2, 42, 1, ~0, 2 };
|
||||
if (__builtin_s390_vec_any_ne (vpopctb (a),
|
||||
(uv16qi){ 3, 1, 8, 1, 3, 1, 8, 1,
|
||||
3, 1, 8, 1, 3, 1, 8, 1 }))
|
||||
__builtin_abort ();
|
||||
|
||||
if (__builtin_s390_vec_any_ne (vpopcth ((uv8hi){ 42, 1, ~0, 2, 42, 1, ~0, 2 }),
|
||||
(uv8hi){ 3, 1, 16, 1, 3, 1, 16, 1 }))
|
||||
__builtin_abort ();
|
||||
|
||||
if (__builtin_s390_vec_any_ne (vpopctf ((uv4si){ 42, 1, ~0, 2 }),
|
||||
(uv4si){ 3, 1, 32, 1 }))
|
||||
__builtin_abort ();
|
||||
|
||||
if (__builtin_s390_vec_any_ne (vpopctg ((uv2di){ 42, 1 }),
|
||||
(uv2di){ 3, 1 }))
|
||||
__builtin_abort ();
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue