Add popcount<mode> expander to enable popcount auto vectorization under AVX512BITALG/AVX512POPCNTDQ target.

gcc/ChangeLog

	PR target/97770
	* config/i386/sse.md (popcount<mode>2): New expander
	for SI/DI vector modes.
	(popcount<mode>2): Likewise for QI/HI vector modes.

gcc/testsuite/ChangeLog

	PR target/97770
	* gcc.target/i386/avx512bitalg-pr97770-1.c: New test.
	* gcc.target/i386/avx512vpopcntdq-pr97770-1.c: Likewise.
	* gcc.target/i386/avx512vpopcntdq-pr97770-2.c: Likewise.
	* gcc.target/i386/avx512vpopcntdqvl-pr97770-1.c: Likewise.
This commit is contained in:
Hongyu Wang 2020-11-11 09:41:13 +08:00 committed by liuhongt
parent c05ece92c6
commit 81d590760c
5 changed files with 188 additions and 0 deletions

View File

@ -22702,6 +22702,12 @@
(set_attr ("prefix") ("evex"))
(set_attr ("mode") ("TI"))])
(define_expand "popcount<mode>2"
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
(popcount:VI48_AVX512VL
(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
"TARGET_AVX512VPOPCNTDQ")
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(popcount:VI48_AVX512VL
@ -22746,6 +22752,12 @@
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
(define_expand "popcount<mode>2"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(popcount:VI12_AVX512VL
(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512BITALG")
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(popcount:VI12_AVX512VL

View File

@ -0,0 +1,60 @@
/* PR target/97770 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
/* Add xfail since no IFN for QI/HImode popcount */
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
#include <immintrin.h>
void
__attribute__ ((noipa, optimize("-O3")))
popcountb_128 (char * __restrict dest, char* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountw_128 (short* __restrict dest, short* src)
{
for (int i = 0; i != 8; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountb_256 (char * __restrict dest, char* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountw_256 (short* __restrict dest, short* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountb_512 (char * __restrict dest, char* src)
{
for (int i = 0; i != 64; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountw_512 (short* __restrict dest, short* src)
{
for (int i = 0; i != 32; i++)
dest[i] = __builtin_popcount (src[i]);
}

View File

@ -0,0 +1,63 @@
/* PR target/97770 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
#ifndef AVX512VPOPCNTQ_H_INCLUDED
#define AVX512VPOPCNTQ_H_INCLUDED
#include <immintrin.h>
void
__attribute__ ((noipa, optimize("-O3")))
popcountd_128 (int* __restrict dest, int* src)
{
for (int i = 0; i != 4; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountq_128 (long long* __restrict dest, long long* src)
{
for (int i = 0; i != 2; i++)
dest[i] = __builtin_popcountll (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountd_256 (int* __restrict dest, int* src)
{
for (int i = 0; i != 8; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountq_256 (long long* __restrict dest, long long* src)
{
for (int i = 0; i != 4; i++)
dest[i] = __builtin_popcountll (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountd_512 (int* __restrict dest, int* src)
{
for (int i = 0; i != 16; i++)
dest[i] = __builtin_popcount (src[i]);
}
void
__attribute__ ((noipa, optimize("-O3")))
popcountq_512 (long long* __restrict dest, long long* src)
{
for (int i = 0; i != 8; i++)
dest[i] = __builtin_popcountll (src[i]);
}
#endif

View File

@ -0,0 +1,39 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavx512vpopcntdq" } */
#define AVX512VPOPCNTDQ
#include "avx512f-helper.h"
#include "avx512vpopcntdq-pr97770-1.c"
#define SIZE_D AVX512F_LEN / 32
#define SIZE_Q AVX512F_LEN / 64
#define RTEST(TYPE, LEN, SIZE, MODE) \
do \
{ \
TYPE res[SIZE], src[SIZE], res_ref[SIZE], v; \
int i, j, ret; \
for (i = 0; i < SIZE; i++) \
{ \
v = src[i] = i * 2 + 3; \
ret = 0; \
for (j = 0; j < sizeof(v) * 8; j++) \
if ((v & ((TYPE)1 << (TYPE) j))) \
ret++; \
res_ref[i] = ret; \
} \
EVAL(popcount, MODE, LEN) (res, src); \
for (i = 0; i < SIZE; i++) \
if (res[i] != res_ref[i]) \
abort (); \
} \
while (0)
void
TEST (void)
{
RTEST (long long, AVX512F_LEN, SIZE_Q, q_);
RTEST (int, AVX512F_LEN, SIZE_D, d_);
}

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O3 -mavx512vpopcntdq -mavx512vl" } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512vpopcntdq-pr97770-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512vpopcntdq-pr97770-2.c"