Add single_use to simplification (uncond_op + vec_cond -> cond_op).
gcc/ChangeLog: PR tree-optimization/104479 * match.pd (uncond_op + vec_cond -> cond_op): Add single_use for the dest of uncond_op. gcc/testsuite/ChangeLog: * gcc.target/i386/pr104479.c: New test. * gcc.target/i386/cond_op_shift_w-1.c: Adjust testcase.
This commit is contained in:
parent
7cc6a8d00a
commit
165947fecf
12
gcc/match.pd
12
gcc/match.pd
@ -7384,13 +7384,15 @@ and,
|
||||
(vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
|
||||
(with { tree op_type = TREE_TYPE (@4); }
|
||||
(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0)))
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0))
|
||||
&& single_use (@4))
|
||||
(view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
|
||||
(simplify
|
||||
(vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
|
||||
(with { tree op_type = TREE_TYPE (@4); }
|
||||
(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0)))
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0))
|
||||
&& single_use (@4))
|
||||
(view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
|
||||
|
||||
/* Same for ternary operations. */
|
||||
@ -7400,13 +7402,15 @@ and,
|
||||
(vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
|
||||
(with { tree op_type = TREE_TYPE (@5); }
|
||||
(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0)))
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0))
|
||||
&& single_use (@5))
|
||||
(view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
|
||||
(simplify
|
||||
(vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
|
||||
(with { tree op_type = TREE_TYPE (@5); }
|
||||
(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0)))
|
||||
&& is_truth_type_for (op_type, TREE_TYPE (@0))
|
||||
&& single_use (@5))
|
||||
(view_convert (cond_op (bit_not @0) @2 @3 @4
|
||||
(view_convert:op_type @1)))))))
|
||||
#endif
|
||||
|
@ -1,7 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=int16" } */
|
||||
/* { dg-final { scan-tree-dump-times ".COND_SHR" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times ".COND_SHL" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "\.COND_" 4 "optimized" } } */
|
||||
/* { dg-final { scan-assembler-times "vpsraw" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpsllw" 1 } } */
|
||||
|
||||
|
33
gcc/testsuite/gcc.target/i386/pr104479.c
Normal file
33
gcc/testsuite/gcc.target/i386/pr104479.c
Normal file
@ -0,0 +1,33 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=icelake-server -Ofast -fdump-tree-optimized" } */
|
||||
/* { dg-final { scan-tree-dump-not "\.COND_SHR" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "\.COND_FMA" "optimized" } } */
|
||||
|
||||
void
|
||||
cond_shr (unsigned int* __restrict dst,
|
||||
unsigned int* __restrict src,
|
||||
unsigned int* __restrict y,
|
||||
int i_width)
|
||||
{
|
||||
for(int x = 0; x < i_width; x++)
|
||||
{
|
||||
unsigned int temp = src[x] >> 3;
|
||||
dst[x] = temp > 255 ? temp : y[x];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
cond_fma (float* __restrict dst,
|
||||
float* __restrict src1,
|
||||
float* __restrict src2,
|
||||
float* __restrict src3,
|
||||
unsigned int* __restrict y,
|
||||
int i_width)
|
||||
{
|
||||
for(int x = 0; x < i_width; x++)
|
||||
{
|
||||
float temp = __builtin_fmaf (src1[x], src2[x], src3[x]);
|
||||
dst[x] = temp > 0.0f ? temp : y[x];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user