[16/n] PR85694: Add detection of averaging operations

This patch adds detection of average instructions:

       a = (((wide) b + (wide) c) >> 1);
   --> a = (wide) .AVG_FLOOR (b, c);

       a = (((wide) b + (wide) c + 1) >> 1);
   --> a = (wide) .AVG_CEIL (b, c);

in cases where users of "a" need only the low half of the result,
making the cast to (wide) redundant.  The heavy lifting was done by
earlier patches.

This showed up another problem in vectorizable_call: if the call is a
pattern definition statement rather than the main pattern statement,
the type of vectorised call might be different from the type of the
original statement.

2018-07-03  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/85694
	* doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil)
	(uavgM3_ceil): Document new optabs.
	* doc/sourcebuild.texi (vect_avg_qi): Document new target selector.
	* internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal
	functions.
	* optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab)
	(savg_ceil_optab): New optabs.
	* tree-vect-patterns.c (vect_recog_average_pattern): New function.
	(vect_vect_recog_func_ptrs): Add it.
	* tree-vect-stmts.c (vectorizable_call): Get the type of the zero
	constant directly from the associated lhs.

gcc/testsuite/
	PR tree-optimization/85694
	* lib/target-supports.exp (check_effective_target_vect_avg_qi): New
	proc.
	* gcc.dg/vect/vect-avg-1.c: New test.
	* gcc.dg/vect/vect-avg-2.c: Likewise.
	* gcc.dg/vect/vect-avg-3.c: Likewise.
	* gcc.dg/vect/vect-avg-4.c: Likewise.
	* gcc.dg/vect/vect-avg-5.c: Likewise.
	* gcc.dg/vect/vect-avg-6.c: Likewise.
	* gcc.dg/vect/vect-avg-7.c: Likewise.
	* gcc.dg/vect/vect-avg-8.c: Likewise.
	* gcc.dg/vect/vect-avg-9.c: Likewise.
	* gcc.dg/vect/vect-avg-10.c: Likewise.
	* gcc.dg/vect/vect-avg-11.c: Likewise.
	* gcc.dg/vect/vect-avg-12.c: Likewise.
	* gcc.dg/vect/vect-avg-13.c: Likewise.
	* gcc.dg/vect/vect-avg-14.c: Likewise.

From-SVN: r262335
This commit is contained in:
Richard Sandiford 2018-07-03 10:03:44 +00:00 committed by Richard Sandiford
parent 4ef79c960a
commit 0267732bae
23 changed files with 502 additions and 3 deletions

View File

@ -1,3 +1,18 @@
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/85694
* doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil)
(uavgM3_ceil): Document new optabs.
* doc/sourcebuild.texi (vect_avg_qi): Document new target selector.
* internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal
functions.
* optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab)
(savg_ceil_optab): New optabs.
* tree-vect-patterns.c (vect_recog_average_pattern): New function.
(vect_vect_recog_func_ptrs): Add it.
* tree-vect-stmts.c (vectorizable_call): Get the type of the zero
constant directly from the associated lhs.
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-patterns.c (vect_split_statement): New function.

View File

@ -5599,6 +5599,34 @@ Other shift and rotate instructions, analogous to the
Vector shift and rotate instructions that take vectors as operand 2
instead of a scalar type.
@cindex @code{avg@var{m}3_floor} instruction pattern
@cindex @code{uavg@var{m}3_floor} instruction pattern
@item @samp{avg@var{m}3_floor}
@itemx @samp{uavg@var{m}3_floor}
Signed and unsigned average instructions. These instructions add
operands 1 and 2 without truncation, divide the result by 2,
round towards -Inf, and store the result in operand 0. This is
equivalent to the C code:
@smallexample
narrow op0, op1, op2;
@dots{}
op0 = (narrow) (((wide) op1 + (wide) op2) >> 1);
@end smallexample
where the sign of @samp{narrow} determines whether this is a signed
or unsigned operation.
@cindex @code{avg@var{m}3_ceil} instruction pattern
@cindex @code{uavg@var{m}3_ceil} instruction pattern
@item @samp{avg@var{m}3_ceil}
@itemx @samp{uavg@var{m}3_ceil}
Like @samp{avg@var{m}3_floor} and @samp{uavg@var{m}3_floor}, but round
towards +Inf. This is equivalent to the C code:
@smallexample
narrow op0, op1, op2;
@dots{}
op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1);
@end smallexample
@cindex @code{bswap@var{m}2} instruction pattern
@item @samp{bswap@var{m}2}
Reverse the order of bytes of operand 1 and store the result in operand 0.

View File

@ -1417,6 +1417,10 @@ Target supports Fortran @code{real} kinds larger than @code{real(8)}.
The target's ABI allows stack variables to be aligned to the preferred
vector alignment.
@item vect_avg_qi
Target supports both signed and unsigned averaging operations on vectors
of bytes.
@item vect_condition
Target supports vector conditional operations.

View File

@ -143,6 +143,11 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
savg_floor, uavg_floor, binary)
DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
savg_ceil, uavg_ceil, binary)
DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary)

View File

@ -316,6 +316,10 @@ OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
OPTAB_D (extract_last_optab, "extract_last_$a")
OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
OPTAB_D (savg_floor_optab, "avg$a3_floor")
OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")

View File

@ -1,3 +1,23 @@
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/85694
* lib/target-supports.exp (check_effective_target_vect_avg_qi): New
proc.
* gcc.dg/vect/vect-avg-1.c: New test.
* gcc.dg/vect/vect-avg-2.c: Likewise.
* gcc.dg/vect/vect-avg-3.c: Likewise.
* gcc.dg/vect/vect-avg-4.c: Likewise.
* gcc.dg/vect/vect-avg-5.c: Likewise.
* gcc.dg/vect/vect-avg-6.c: Likewise.
* gcc.dg/vect/vect-avg-7.c: Likewise.
* gcc.dg/vect/vect-avg-8.c: Likewise.
* gcc.dg/vect/vect-avg-9.c: Likewise.
* gcc.dg/vect/vect-avg-10.c: Likewise.
* gcc.dg/vect/vect-avg-11.c: Likewise.
* gcc.dg/vect/vect-avg-12.c: Likewise.
* gcc.dg/vect/vect-avg-13.c: Likewise.
* gcc.dg/vect/vect-avg-14.c: Likewise.
2018-07-03 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-over-widen-5.c: Test that the extensions

View File

@ -0,0 +1,47 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
#define N 50
#ifndef SIGNEDNESS
#define SIGNEDNESS unsigned
#endif
#ifndef BIAS
#define BIAS 0
#endif
void __attribute__ ((noipa))
f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
SIGNEDNESS char *restrict c)
{
for (__INTPTR_TYPE__ i = 0; i < N; ++i)
a[i] = (b[i] + c[i] + BIAS) >> 1;
}
#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
int
main (void)
{
check_vect ();
SIGNEDNESS char a[N], b[N], c[N];
for (int i = 0; i < N; ++i)
{
b[i] = BASE1 + i * 5;
c[i] = BASE2 + i * 4;
asm volatile ("" ::: "memory");
}
f (a, b, c);
for (int i = 0; i < N; ++i)
if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,8 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#define BIAS 2
#include "vect-avg-5.c"
/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */

View File

@ -0,0 +1,57 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
#define N 50
#ifndef SIGNEDNESS
#define SIGNEDNESS unsigned
#endif
#ifndef BIAS
#define BIAS 0
#endif
void __attribute__ ((noipa))
f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
SIGNEDNESS char *restrict c)
{
for (__INTPTR_TYPE__ i = 0; i < N; ++i)
{
int tmp = b[i];
tmp ^= 0x55;
tmp += BIAS;
tmp += c[i];
tmp >>= 1;
tmp |= 0x40;
a[i] = tmp;
}
}
#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
int
main (void)
{
check_vect ();
SIGNEDNESS char a[N], b[N], c[N];
for (int i = 0; i < N; ++i)
{
b[i] = BASE1 + i * 5;
c[i] = BASE2 + i * 4;
asm volatile ("" ::: "memory");
}
f (a, b, c);
for (int i = 0; i < N; ++i)
if (a[i] != (((((BASE1 + i * 5) ^ 0x55)
+ (BASE2 + i * 4)
+ BIAS) >> 1) | 0x40))
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,10 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#include "vect-avg-11.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS unsigned
#define BIAS 1
#include "vect-avg-11.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#define BIAS 1
#include "vect-avg-11.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,10 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#include "vect-avg-1.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS unsigned
#define BIAS 1
#include "vect-avg-1.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#define BIAS 1
#include "vect-avg-1.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
#define N 50
#ifndef SIGNEDNESS
#define SIGNEDNESS unsigned
#endif
#ifndef BIAS
#define BIAS 0
#endif
void __attribute__ ((noipa))
f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
SIGNEDNESS char *restrict c)
{
for (__INTPTR_TYPE__ i = 0; i < N; ++i)
{
int tmp1 = b[i] + BIAS;
int tmp2 = tmp1 + c[i];
a[i] = tmp2 >> 1;
}
}
#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
int
main (void)
{
check_vect ();
SIGNEDNESS char a[N], b[N], c[N];
for (int i = 0; i < N; ++i)
{
b[i] = BASE1 + i * 5;
c[i] = BASE2 + i * 4;
asm volatile ("" ::: "memory");
}
f (a, b, c);
for (int i = 0; i < N; ++i)
if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,10 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#include "vect-avg-5.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS unsigned
#define BIAS 1
#include "vect-avg-5.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,11 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS signed
#define BIAS 1
#include "vect-avg-5.c"
/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */

View File

@ -0,0 +1,8 @@
/* { dg-require-effective-target vect_int } */
#define SIGNEDNESS unsigned
#define BIAS 2
#include "vect-avg-5.c"
/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */

View File

@ -6313,6 +6313,13 @@ proc check_effective_target_vect_usad_char { } {
return $et_vect_usad_char_saved($et_index)
}
# Return 1 if the target plus current options supports both signed
# and unsigned average operations on vectors of bytes.
proc check_effective_target_vect_avg_qi {} {
return 0
}
# Return 1 if the target plus current options supports a vector
# demotion (packing) of shorts (to chars) and ints (to shorts)
# using modulo arithmetic, 0 otherwise.

View File

@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
return pattern_stmt;
}
/* Recognize the patterns:
ATYPE a; // narrower than TYPE
BTYPE b; // narrower than TYPE
(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
where only the bottom half of avg is used. Try to transform them into:
(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
followed by:
TYPE avg = (TYPE) avg';
where NTYPE is no wider than half of TYPE. Since only the bottom half
of avg is used, all or part of the cast of avg' should become redundant. */
static gimple *
vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
{
/* Check for a shift right by one bit. */
gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
if (!last_stmt
|| gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
|| !integer_onep (gimple_assign_rhs2 (last_stmt)))
return NULL;
stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
vec_info *vinfo = last_stmt_info->vinfo;
/* Check that the shift result is wider than the users of the
result need (i.e. that narrowing would be a natural choice). */
tree lhs = gimple_assign_lhs (last_stmt);
tree type = TREE_TYPE (lhs);
unsigned int target_precision
= vect_element_precision (last_stmt_info->min_output_precision);
if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
return NULL;
/* Get the definition of the shift input. */
tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
if (!plus_stmt_info)
return NULL;
/* Check whether the shift input can be seen as a tree of additions on
2 or 3 widened inputs.
Note that the pattern should be a win even if the result of one or
more additions is reused elsewhere: if the pattern matches, we'd be
replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
internal_fn ifn = IFN_AVG_FLOOR;
vect_unpromoted_value unprom[3];
tree new_type;
unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
PLUS_EXPR, false, 3,
unprom, &new_type);
if (nops == 0)
return NULL;
if (nops == 3)
{
/* Check that one operand is 1. */
unsigned int i;
for (i = 0; i < 3; ++i)
if (integer_onep (unprom[i].op))
break;
if (i == 3)
return NULL;
/* Throw away the 1 operand and keep the other two. */
if (i < 2)
unprom[i] = unprom[2];
ifn = IFN_AVG_CEIL;
}
vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
/* We know that:
(a) the operation can be viewed as:
TYPE widened0 = (TYPE) UNPROM[0];
TYPE widened1 = (TYPE) UNPROM[1];
TYPE tmp1 = widened0 + widened1 {+ 1};
TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
(b) the first two statements are equivalent to:
TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
(c) vect_recog_over_widening_pattern has already tried to narrow TYPE
where sensible;
(d) all the operations can be performed correctly at twice the width of
NEW_TYPE, due to the nature of the average operation; and
(e) users of the result of the right shift need only TARGET_PRECISION
bits, where TARGET_PRECISION is no more than half of TYPE's
precision.
Under these circumstances, the only situation in which NEW_TYPE
could be narrower than TARGET_PRECISION is if widened0, widened1
and an addition result are all used more than once. Thus we can
treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
as "free", whereas widening the result of the average instruction
from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
therefore better not to go narrower than TARGET_PRECISION. */
if (TYPE_PRECISION (new_type) < target_precision)
new_type = build_nonstandard_integer_type (target_precision,
TYPE_UNSIGNED (new_type));
/* Check for target support. */
tree new_vectype = get_vectype_for_scalar_type (new_type);
if (!new_vectype
|| !direct_internal_fn_supported_p (ifn, new_vectype,
OPTIMIZE_FOR_SPEED))
return NULL;
/* The IR requires a valid vector type for the cast result, even though
it's likely to be discarded. */
*type_out = get_vectype_for_scalar_type (type);
if (!*type_out)
return NULL;
/* Generate the IFN_AVG* call. */
tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
tree new_ops[2];
vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
unprom, new_vectype);
gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
new_ops[1]);
gimple_call_set_lhs (average_stmt, new_var);
gimple_set_location (average_stmt, gimple_location (last_stmt));
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"created pattern stmt: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
}
stmts->safe_push (last_stmt);
return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
}
/* Recognize cases in which the input to a cast is wider than its
output, and the input is fed by a widening operation. Fold this
by removing the unnecessary intermediate widening. E.g.:
@ -4670,6 +4817,9 @@ struct vect_recog_func
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_over_widening_pattern, "over_widening" },
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */
{ vect_recog_average_pattern, "average" },
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
{ vect_recog_widen_mult_pattern, "widen_mult" },
{ vect_recog_dot_prod_pattern, "dot_prod" },

View File

@ -3116,7 +3116,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gcall *stmt;
tree vec_dest;
tree scalar_dest;
tree op, type;
tree op;
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
tree vectype_out, vectype_in;
@ -3592,12 +3592,11 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (slp_node)
return true;
type = TREE_TYPE (scalar_dest);
if (is_pattern_stmt_p (stmt_info))
stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
lhs = gimple_get_lhs (stmt_info->stmt);
new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
set_vinfo_for_stmt (new_stmt, stmt_info);
set_vinfo_for_stmt (stmt_info->stmt, NULL);
STMT_VINFO_STMT (stmt_info) = new_stmt;