backport: rs6000.c (rs6000_vect_nonmem): New static var.

[gcc]

2017-05-13  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Backport from mainline
	2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
	(rs6000_init_cost): Initialize rs6000_vect_nonmem.
	(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
	(rs6000_finish_cost): Avoid vectorizing simple copy loops with
	VF=2 that require versioning.

[gcc/testsuite]

2017-05-13  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	Backport from mainline
	2017-05-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/versioned-copy-loop.c: New file.

From-SVN: r248010
This commit is contained in:
Bill Schmidt 2017-05-13 21:35:44 +00:00 committed by William Schmidt
parent 80fb7eb063
commit 7b5a39f069
4 changed files with 75 additions and 0 deletions

View File

@ -1,3 +1,14 @@
2017-05-13 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Backport from mainline
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
(rs6000_init_cost): Initialize rs6000_vect_nonmem.
(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
(rs6000_finish_cost): Avoid vectorizing simple copy loops with
VF=2 that require versioning.
2017-05-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Backport from mainline

View File

@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data)
/* Implement targetm.vectorize.init_cost. */
/* For each vectorized loop, this var holds TRUE iff a non-memory vector
instruction is needed by the vectorization. */
static bool rs6000_vect_nonmem;
static void *
rs6000_init_cost (struct loop *loop_info)
{
@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info)
data->cost[vect_prologue] = 0;
data->cost[vect_body] = 0;
data->cost[vect_epilogue] = 0;
rs6000_vect_nonmem = false;
return data;
}
@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
retval = (unsigned) (count * stmt_cost);
cost_data->cost[where] += retval;
/* Check whether we're doing something other than just a copy loop.
Not all such loops may be profitably vectorized; see
rs6000_finish_cost. */
if ((kind == vec_to_scalar || kind == vec_perm
|| kind == vec_promote_demote || kind == vec_construct
|| kind == scalar_to_vec)
|| (where == vect_body && kind == vector_stmt))
rs6000_vect_nonmem = true;
}
return retval;
@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
if (cost_data->loop_info)
rs6000_density_test (cost_data);
/* Don't vectorize minimum-vectorization-factor, simple copy loops
that require versioning for any reason. The vectorization is at
best a wash inside the loop, and the versioning checks make
profitability highly unlikely and potentially quite harmful. */
if (cost_data->loop_info)
{
loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
if (!rs6000_vect_nonmem
&& LOOP_VINFO_VECT_FACTOR (vec_info) == 2
&& LOOP_REQUIRES_VERSIONING (vec_info))
cost_data->cost[vect_body] += 10000;
}
*prologue_cost = cost_data->cost[vect_prologue];
*body_cost = cost_data->cost[vect_body];
*epilogue_cost = cost_data->cost[vect_epilogue];

View File

@ -1,3 +1,10 @@
2017-05-13 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Backport from mainline
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/versioned-copy-loop.c: New file.
2017-05-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Backport from mainline

View File

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-options "-O3 -fdump-tree-vect-details" } */
/* Verify that a pure copy loop with a vectorization factor of two
that requires alignment will not be vectorized. See the cost
model hooks in rs6000.c. */
typedef long unsigned int size_t;
typedef unsigned char uint8_t;
extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
void foo (void *dstPtr, const void *srcPtr, void *dstEnd)
{
uint8_t *d = (uint8_t*)dstPtr;
const uint8_t *s = (const uint8_t*)srcPtr;
uint8_t* const e = (uint8_t*)dstEnd;
do
{
memcpy (d, s, 8);
d += 8;
s += 8;
}
while (d < e);
}
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */