Enhance vec_pack_trunc for integral mode mask.
For testcase in PR, the patch supports QI:4 -> HI:16 pack with multi steps(first pack QI:4 -> QI:8 through vec_pack_sbool_trunc_qi, then pack QI:8 -> HI:16 through vec_pack_trunc_hi). Similar for QI:2 -> HI:16 which is test4 in mask-pack-prefer-128.c. gcc/ChangeLog: PR target/103771 * tree-vect-stmts.cc (supportable_narrowing_operation): Enhance integral mode mask pack by multi steps which takes vec_pack_sbool_trunc_optab as start when elements number is less than BITS_PER_UNITS. gcc/testsuite/ChangeLog: * gcc.target/i386/mask-pack-prefer128.c: New test. * gcc.target/i386/mask-pack-prefer256.c: New test. * gcc.target/i386/pr103771.c: New test.
This commit is contained in:
parent
ad4188f838
commit
8bc700f4c3
8
gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c
Normal file
8
gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=128" } */
|
||||
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
|
||||
/* { dg-final { scan-assembler-not "maskmov" } } */
|
||||
|
||||
#include "mask-pack.c"
|
8
gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c
Normal file
8
gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=256" } */
|
||||
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
|
||||
/* { dg-final { scan-assembler-not "maskmov" } } */
|
||||
|
||||
#include "mask-pack.c"
|
18
gcc/testsuite/gcc.target/i386/pr103771.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr103771.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=cascadelake -O3 -fdump-tree-vect-details -mprefer-vector-width=128" } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
static uint8_t x264_clip_uint8 (int x)
|
||||
{
|
||||
return x & (~255) ? (-x) >> 31 : x;
|
||||
}
|
||||
|
||||
void
|
||||
mc_weight (uint8_t* __restrict dst, uint8_t* __restrict src,
|
||||
int i_width,int i_scale)
|
||||
{
|
||||
for(int x = 0; x < i_width; x++)
|
||||
dst[x] = x264_clip_uint8 (src[x] * i_scale);
|
||||
}
|
@ -12124,6 +12124,7 @@ supportable_narrowing_operation (enum tree_code code,
|
||||
tree intermediate_type, prev_type;
|
||||
machine_mode intermediate_mode, prev_mode;
|
||||
int i;
|
||||
unsigned HOST_WIDE_INT n_elts;
|
||||
bool uns;
|
||||
|
||||
*multi_step_cvt = 0;
|
||||
@ -12133,8 +12134,9 @@ supportable_narrowing_operation (enum tree_code code,
|
||||
c1 = VEC_PACK_TRUNC_EXPR;
|
||||
if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
|
||||
&& VECTOR_BOOLEAN_TYPE_P (vectype)
|
||||
&& TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
|
||||
&& SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
|
||||
&& SCALAR_INT_MODE_P (TYPE_MODE (vectype))
|
||||
&& TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
|
||||
&& n_elts < BITS_PER_UNIT)
|
||||
optab1 = vec_pack_sbool_trunc_optab;
|
||||
else
|
||||
optab1 = optab_for_tree_code (c1, vectype, optab_default);
|
||||
@ -12225,8 +12227,9 @@ supportable_narrowing_operation (enum tree_code code,
|
||||
= lang_hooks.types.type_for_mode (intermediate_mode, uns);
|
||||
if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
|
||||
&& VECTOR_BOOLEAN_TYPE_P (prev_type)
|
||||
&& intermediate_mode == prev_mode
|
||||
&& SCALAR_INT_MODE_P (prev_mode))
|
||||
&& SCALAR_INT_MODE_P (prev_mode)
|
||||
&& TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
|
||||
&& n_elts < BITS_PER_UNIT)
|
||||
interm_optab = vec_pack_sbool_trunc_optab;
|
||||
else
|
||||
interm_optab
|
||||
|
Loading…
x
Reference in New Issue
Block a user