diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fd928b0a1fd..52b5e29bd26 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2018-07-03 Richard Sandiford + + PR tree-optimization/85694 + * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil) + (uavgM3_ceil): Document new optabs. + * doc/sourcebuild.texi (vect_avg_qi): Document new target selector. + * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal + functions. + * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab) + (savg_ceil_optab): New optabs. + * tree-vect-patterns.c (vect_recog_average_pattern): New function. + (vect_vect_recog_func_ptrs): Add it. + * tree-vect-stmts.c (vectorizable_call): Get the type of the zero + constant directly from the associated lhs. + 2018-07-03 Richard Sandiford * tree-vect-patterns.c (vect_split_statement): New function. diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 22919e4310c..09d6e307c24 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5599,6 +5599,34 @@ Other shift and rotate instructions, analogous to the Vector shift and rotate instructions that take vectors as operand 2 instead of a scalar type. +@cindex @code{avg@var{m}3_floor} instruction pattern +@cindex @code{uavg@var{m}3_floor} instruction pattern +@item @samp{avg@var{m}3_floor} +@itemx @samp{uavg@var{m}3_floor} +Signed and unsigned average instructions. These instructions add +operands 1 and 2 without truncation, divide the result by 2, +round towards -Inf, and store the result in operand 0. This is +equivalent to the C code: +@smallexample +narrow op0, op1, op2; +@dots{} +op0 = (narrow) (((wide) op1 + (wide) op2) >> 1); +@end smallexample +where the sign of @samp{narrow} determines whether this is a signed +or unsigned operation. + +@cindex @code{avg@var{m}3_ceil} instruction pattern +@cindex @code{uavg@var{m}3_ceil} instruction pattern +@item @samp{avg@var{m}3_ceil} +@itemx @samp{uavg@var{m}3_ceil} +Like @samp{avg@var{m}3_floor} and @samp{uavg@var{m}3_floor}, but round +towards +Inf. This is equivalent to the C code: +@smallexample +narrow op0, op1, op2; +@dots{} +op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1); +@end smallexample + @cindex @code{bswap@var{m}2} instruction pattern @item @samp{bswap@var{m}2} Reverse the order of bytes of operand 1 and store the result in operand 0. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index d52183d9c60..89157079ffb 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1417,6 +1417,10 @@ Target supports Fortran @code{real} kinds larger than @code{real(8)}. The target's ABI allows stack variables to be aligned to the preferred vector alignment. +@item vect_avg_qi +Target supports both signed and unsigned averaging operations on vectors +of bytes. + @item vect_condition Target supports vector conditional operations. diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 66336d8062b..6293ab36dc9 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -143,6 +143,11 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) +DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first, + savg_floor, uavg_floor, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, + savg_ceil, uavg_ceil, binary) + DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary) DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 11af7aaeb15..707d9696b4c 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -316,6 +316,10 @@ OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") OPTAB_D (extract_last_optab, "extract_last_$a") OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a") +OPTAB_D (savg_floor_optab, "avg$a3_floor") +OPTAB_D (uavg_floor_optab, "uavg$a3_floor") +OPTAB_D (savg_ceil_optab, "avg$a3_ceil") +OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil") OPTAB_D (sdot_prod_optab, "sdot_prod$I$a") OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") OPTAB_D (udot_prod_optab, "udot_prod$I$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 90aa4d7e22a..0ed116fd209 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,23 @@ +2018-07-03 Richard Sandiford + + PR tree-optimization/85694 + * lib/target-supports.exp (check_effective_target_vect_avg_qi): New + proc. + * gcc.dg/vect/vect-avg-1.c: New test. + * gcc.dg/vect/vect-avg-2.c: Likewise. + * gcc.dg/vect/vect-avg-3.c: Likewise. + * gcc.dg/vect/vect-avg-4.c: Likewise. + * gcc.dg/vect/vect-avg-5.c: Likewise. + * gcc.dg/vect/vect-avg-6.c: Likewise. + * gcc.dg/vect/vect-avg-7.c: Likewise. + * gcc.dg/vect/vect-avg-8.c: Likewise. + * gcc.dg/vect/vect-avg-9.c: Likewise. + * gcc.dg/vect/vect-avg-10.c: Likewise. + * gcc.dg/vect/vect-avg-11.c: Likewise. + * gcc.dg/vect/vect-avg-12.c: Likewise. + * gcc.dg/vect/vect-avg-13.c: Likewise. + * gcc.dg/vect/vect-avg-14.c: Likewise. + 2018-07-03 Richard Sandiford * gcc.dg/vect/vect-over-widen-5.c: Test that the extensions diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-1.c b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c new file mode 100644 index 00000000000..a7bc7cc9096 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + a[i] = (b[i] + c[i] + BIAS) >> 1; +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-10.c b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c new file mode 100644 index 00000000000..2630aeab913 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 2 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-11.c b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c new file mode 100644 index 00000000000..85292f1b824 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int tmp = b[i]; + tmp ^= 0x55; + tmp += BIAS; + tmp += c[i]; + tmp >>= 1; + tmp |= 0x40; + a[i] = tmp; + } +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != (((((BASE1 + i * 5) ^ 0x55) + + (BASE2 + i * 4) + + BIAS) >> 1) | 0x40)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-12.c b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c new file mode 100644 index 00000000000..f40331ea1bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-13.c b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c new file mode 100644 index 00000000000..7957c0e4adc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-14.c b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c new file mode 100644 index 00000000000..8ab11f74e94 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-11.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-2.c b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c new file mode 100644 index 00000000000..b5586b5f013 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-3.c b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c new file mode 100644 index 00000000000..104fe961393 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-4.c b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c new file mode 100644 index 00000000000..92181d7fc3d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-1.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-5.c b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c new file mode 100644 index 00000000000..6c43575f448 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS +#define SIGNEDNESS unsigned +#endif +#ifndef BIAS +#define BIAS 0 +#endif + +void __attribute__ ((noipa)) +f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b, + SIGNEDNESS char *restrict c) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int tmp1 = b[i] + BIAS; + int tmp2 = tmp1 + c[i]; + a[i] = tmp2 >> 1; + } +} + +#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) +#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) + +int +main (void) +{ + check_vect (); + + SIGNEDNESS char a[N], b[N], c[N]; + for (int i = 0; i < N; ++i) + { + b[i] = BASE1 + i * 5; + c[i] = BASE2 + i * 4; + asm volatile ("" ::: "memory"); + } + f (a, b, c); + for (int i = 0; i < N; ++i) + if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1)) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-6.c b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c new file mode 100644 index 00000000000..efe97b8a5f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c @@ -0,0 +1,10 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-7.c b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c new file mode 100644 index 00000000000..62a8474f690 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 1 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-8.c b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c new file mode 100644 index 00000000000..cc7c4cde6b0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS signed +#define BIAS 1 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-9.c b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c new file mode 100644 index 00000000000..80865b6661a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c @@ -0,0 +1,8 @@ +/* { dg-require-effective-target vect_int } */ + +#define SIGNEDNESS unsigned +#define BIAS 2 + +#include "vect-avg-5.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ffbc882b07d..fc189f31b71 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6313,6 +6313,13 @@ proc check_effective_target_vect_usad_char { } { return $et_vect_usad_char_saved($et_index) } +# Return 1 if the target plus current options supports both signed +# and unsigned average operations on vectors of bytes. + +proc check_effective_target_vect_avg_qi {} { + return 0 +} + # Return 1 if the target plus current options supports a vector # demotion (packing) of shorts (to chars) and ints (to shorts) # using modulo arithmetic, 0 otherwise. diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index a1649d8b0fe..51defa08627 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec *stmts, tree *type_out) return pattern_stmt; } +/* Recognize the patterns: + + ATYPE a; // narrower than TYPE + BTYPE b; // narrower than TYPE + (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1; + or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1; + + where only the bottom half of avg is used. Try to transform them into: + + (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b); + or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b); + + followed by: + + TYPE avg = (TYPE) avg'; + + where NTYPE is no wider than half of TYPE. Since only the bottom half + of avg is used, all or part of the cast of avg' should become redundant. */ + +static gimple * +vect_recog_average_pattern (vec *stmts, tree *type_out) +{ + /* Check for a shift right by one bit. */ + gassign *last_stmt = dyn_cast (stmts->pop ()); + if (!last_stmt + || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR + || !integer_onep (gimple_assign_rhs2 (last_stmt))) + return NULL; + + stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt); + vec_info *vinfo = last_stmt_info->vinfo; + + /* Check that the shift result is wider than the users of the + result need (i.e. that narrowing would be a natural choice). */ + tree lhs = gimple_assign_lhs (last_stmt); + tree type = TREE_TYPE (lhs); + unsigned int target_precision + = vect_element_precision (last_stmt_info->min_output_precision); + if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) + return NULL; + + /* Get the definition of the shift input. */ + tree rshift_rhs = gimple_assign_rhs1 (last_stmt); + stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); + if (!plus_stmt_info) + return NULL; + + /* Check whether the shift input can be seen as a tree of additions on + 2 or 3 widened inputs. + + Note that the pattern should be a win even if the result of one or + more additions is reused elsewhere: if the pattern matches, we'd be + replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */ + internal_fn ifn = IFN_AVG_FLOOR; + vect_unpromoted_value unprom[3]; + tree new_type; + unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR, + PLUS_EXPR, false, 3, + unprom, &new_type); + if (nops == 0) + return NULL; + if (nops == 3) + { + /* Check that one operand is 1. */ + unsigned int i; + for (i = 0; i < 3; ++i) + if (integer_onep (unprom[i].op)) + break; + if (i == 3) + return NULL; + /* Throw away the 1 operand and keep the other two. */ + if (i < 2) + unprom[i] = unprom[2]; + ifn = IFN_AVG_CEIL; + } + + vect_pattern_detected ("vect_recog_average_pattern", last_stmt); + + /* We know that: + + (a) the operation can be viewed as: + + TYPE widened0 = (TYPE) UNPROM[0]; + TYPE widened1 = (TYPE) UNPROM[1]; + TYPE tmp1 = widened0 + widened1 {+ 1}; + TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO + + (b) the first two statements are equivalent to: + + TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0]; + TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1]; + + (c) vect_recog_over_widening_pattern has already tried to narrow TYPE + where sensible; + + (d) all the operations can be performed correctly at twice the width of + NEW_TYPE, due to the nature of the average operation; and + + (e) users of the result of the right shift need only TARGET_PRECISION + bits, where TARGET_PRECISION is no more than half of TYPE's + precision. + + Under these circumstances, the only situation in which NEW_TYPE + could be narrower than TARGET_PRECISION is if widened0, widened1 + and an addition result are all used more than once. Thus we can + treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION + as "free", whereas widening the result of the average instruction + from NEW_TYPE to TARGET_PRECISION would be a new operation. It's + therefore better not to go narrower than TARGET_PRECISION. */ + if (TYPE_PRECISION (new_type) < target_precision) + new_type = build_nonstandard_integer_type (target_precision, + TYPE_UNSIGNED (new_type)); + + /* Check for target support. */ + tree new_vectype = get_vectype_for_scalar_type (new_type); + if (!new_vectype + || !direct_internal_fn_supported_p (ifn, new_vectype, + OPTIMIZE_FOR_SPEED)) + return NULL; + + /* The IR requires a valid vector type for the cast result, even though + it's likely to be discarded. */ + *type_out = get_vectype_for_scalar_type (type); + if (!*type_out) + return NULL; + + /* Generate the IFN_AVG* call. */ + tree new_var = vect_recog_temp_ssa_var (new_type, NULL); + tree new_ops[2]; + vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, + unprom, new_vectype); + gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], + new_ops[1]); + gimple_call_set_lhs (average_stmt, new_var); + gimple_set_location (average_stmt, gimple_location (last_stmt)); + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "created pattern stmt: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0); + } + + stmts->safe_push (last_stmt); + return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype); +} + /* Recognize cases in which the input to a cast is wider than its output, and the input is fed by a widening operation. Fold this by removing the unnecessary intermediate widening. E.g.: @@ -4670,6 +4817,9 @@ struct vect_recog_func less comples onex (widen_sum only after dot_prod or sad for example). */ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_over_widening_pattern, "over_widening" }, + /* Must come after over_widening, which narrows the shift as much as + possible beforehand. */ + { vect_recog_average_pattern, "average" }, { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, { vect_recog_widen_mult_pattern, "widen_mult" }, { vect_recog_dot_prod_pattern, "dot_prod" }, diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ae62fc36401..ea303bd7023 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3116,7 +3116,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, gcall *stmt; tree vec_dest; tree scalar_dest; - tree op, type; + tree op; tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info; tree vectype_out, vectype_in; @@ -3592,12 +3592,11 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (slp_node) return true; - type = TREE_TYPE (scalar_dest); if (is_pattern_stmt_p (stmt_info)) stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); lhs = gimple_get_lhs (stmt_info->stmt); - new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); set_vinfo_for_stmt (new_stmt, stmt_info); set_vinfo_for_stmt (stmt_info->stmt, NULL); STMT_VINFO_STMT (stmt_info) = new_stmt;