Add popcount<mode> expander to enable popcount auto vectorization under AVX512BITALG/AVX512POPCNTDQ target.
gcc/ChangeLog PR target/97770 * config/i386/sse.md (popcount<mode>2): New expander for SI/DI vector modes. (popcount<mode>2): Likewise for QI/HI vector modes. gcc/testsuite/ChangeLog PR target/97770 * gcc.target/i386/avx512bitalg-pr97770-1.c: New test. * gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Likewise. * gcc.target/i386/avx512vpopcntdq-pr97770-2.c: Likewise. * gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c: Likewise.
This commit is contained in:
parent
c05ece92c6
commit
81d590760c
@ -22702,6 +22702,12 @@
|
|||||||
(set_attr ("prefix") ("evex"))
|
(set_attr ("prefix") ("evex"))
|
||||||
(set_attr ("mode") ("TI"))])
|
(set_attr ("mode") ("TI"))])
|
||||||
|
|
||||||
|
(define_expand "popcount<mode>2"
|
||||||
|
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
|
||||||
|
(popcount:VI48_AVX512VL
|
||||||
|
(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
|
||||||
|
"TARGET_AVX512VPOPCNTDQ")
|
||||||
|
|
||||||
(define_insn "vpopcount<mode><mask_name>"
|
(define_insn "vpopcount<mode><mask_name>"
|
||||||
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
|
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
|
||||||
(popcount:VI48_AVX512VL
|
(popcount:VI48_AVX512VL
|
||||||
@ -22746,6 +22752,12 @@
|
|||||||
"TARGET_SSE && TARGET_64BIT"
|
"TARGET_SSE && TARGET_64BIT"
|
||||||
"jmp\t%P1")
|
"jmp\t%P1")
|
||||||
|
|
||||||
|
(define_expand "popcount<mode>2"
|
||||||
|
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||||
|
(popcount:VI12_AVX512VL
|
||||||
|
(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
|
||||||
|
"TARGET_AVX512BITALG")
|
||||||
|
|
||||||
(define_insn "vpopcount<mode><mask_name>"
|
(define_insn "vpopcount<mode><mask_name>"
|
||||||
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||||
(popcount:VI12_AVX512VL
|
(popcount:VI12_AVX512VL
|
||||||
|
60
gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
Normal file
60
gcc/testsuite/gcc.target/i386/avx512bitalg-pr97770-1.c
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/* PR target/97770 */
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
|
||||||
|
/* Add xfail since no IFN for QI/HImode popcount */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountb_128 (char * __restrict dest, char* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 16; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountw_128 (short* __restrict dest, short* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 8; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountb_256 (char * __restrict dest, char* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 32; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountw_256 (short* __restrict dest, short* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 16; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountb_512 (char * __restrict dest, char* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 64; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountw_512 (short* __restrict dest, short* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 32; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
63
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
Normal file
63
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-1.c
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/* PR target/97770 */
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
|
||||||
|
/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
|
||||||
|
#ifndef AVX512VPOPCNTQ_H_INCLUDED
|
||||||
|
#define AVX512VPOPCNTQ_H_INCLUDED
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountd_128 (int* __restrict dest, int* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 4; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountq_128 (long long* __restrict dest, long long* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 2; i++)
|
||||||
|
dest[i] = __builtin_popcountll (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountd_256 (int* __restrict dest, int* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 8; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountq_256 (long long* __restrict dest, long long* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 4; i++)
|
||||||
|
dest[i] = __builtin_popcountll (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountd_512 (int* __restrict dest, int* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 16; i++)
|
||||||
|
dest[i] = __builtin_popcount (src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
__attribute__ ((noipa, optimize("-O3")))
|
||||||
|
popcountq_512 (long long* __restrict dest, long long* src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i != 8; i++)
|
||||||
|
dest[i] = __builtin_popcountll (src[i]);
|
||||||
|
}
|
||||||
|
#endif
|
39
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c
Normal file
39
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-pr97770-2.c
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/* { dg-do run } */
|
||||||
|
/* { dg-options "-O2 -mavx512vpopcntdq" } */
|
||||||
|
|
||||||
|
#define AVX512VPOPCNTDQ
|
||||||
|
|
||||||
|
#include "avx512f-helper.h"
|
||||||
|
#include "avx512vpopcntdq-pr97770-1.c"
|
||||||
|
|
||||||
|
#define SIZE_D AVX512F_LEN / 32
|
||||||
|
#define SIZE_Q AVX512F_LEN / 64
|
||||||
|
|
||||||
|
|
||||||
|
#define RTEST(TYPE, LEN, SIZE, MODE) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
TYPE res[SIZE], src[SIZE], res_ref[SIZE], v; \
|
||||||
|
int i, j, ret; \
|
||||||
|
for (i = 0; i < SIZE; i++) \
|
||||||
|
{ \
|
||||||
|
v = src[i] = i * 2 + 3; \
|
||||||
|
ret = 0; \
|
||||||
|
for (j = 0; j < sizeof(v) * 8; j++) \
|
||||||
|
if ((v & ((TYPE)1 << (TYPE) j))) \
|
||||||
|
ret++; \
|
||||||
|
res_ref[i] = ret; \
|
||||||
|
} \
|
||||||
|
EVAL(popcount, MODE, LEN) (res, src); \
|
||||||
|
for (i = 0; i < SIZE; i++) \
|
||||||
|
if (res[i] != res_ref[i]) \
|
||||||
|
abort (); \
|
||||||
|
} \
|
||||||
|
while (0)
|
||||||
|
|
||||||
|
void
|
||||||
|
TEST (void)
|
||||||
|
{
|
||||||
|
RTEST (long long, AVX512F_LEN, SIZE_Q, q_);
|
||||||
|
RTEST (int, AVX512F_LEN, SIZE_D, d_);
|
||||||
|
}
|
14
gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c
Normal file
14
gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
/* { dg-do run } */
|
||||||
|
/* { dg-options "-O3 -mavx512vpopcntdq -mavx512vl" } */
|
||||||
|
|
||||||
|
#define AVX512VL
|
||||||
|
#define AVX512F_LEN 256
|
||||||
|
#define AVX512F_LEN_HALF 128
|
||||||
|
#include "avx512vpopcntdq-pr97770-2.c"
|
||||||
|
|
||||||
|
#undef AVX512F_LEN
|
||||||
|
#undef AVX512F_LEN_HALF
|
||||||
|
|
||||||
|
#define AVX512F_LEN 128
|
||||||
|
#define AVX512F_LEN_HALF 128
|
||||||
|
#include "avx512vpopcntdq-pr97770-2.c"
|
Loading…
Reference in New Issue
Block a user