rs6000.c (rs6000_vect_nonmem): New static var.
[gcc] 2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. (rs6000_init_cost): Initialize rs6000_vect_nonmem. (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. (rs6000_finish_cost): Avoid vectorizing simple copy loops with VF=2 that require versioning. [gcc/testsuite] 2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * gcc.target/powerpc/versioned-copy-loop.c: New file. From-SVN: r247671
This commit is contained in:
parent
ba82e6b5c8
commit
9945596cef
|
@ -1,3 +1,11 @@
|
|||
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
|
||||
(rs6000_init_cost): Initialize rs6000_vect_nonmem.
|
||||
(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
|
||||
(rs6000_finish_cost): Avoid vectorizing simple copy loops with
|
||||
VF=2 that require versioning.
|
||||
|
||||
2017-05-05 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const
|
||||
|
|
|
@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data)
|
|||
|
||||
/* Implement targetm.vectorize.init_cost. */
|
||||
|
||||
/* For each vectorized loop, this var holds TRUE iff a non-memory vector
|
||||
instruction is needed by the vectorization. */
|
||||
static bool rs6000_vect_nonmem;
|
||||
|
||||
static void *
|
||||
rs6000_init_cost (struct loop *loop_info)
|
||||
{
|
||||
|
@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info)
|
|||
data->cost[vect_prologue] = 0;
|
||||
data->cost[vect_body] = 0;
|
||||
data->cost[vect_epilogue] = 0;
|
||||
rs6000_vect_nonmem = false;
|
||||
return data;
|
||||
}
|
||||
|
||||
|
@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
|||
|
||||
retval = (unsigned) (count * stmt_cost);
|
||||
cost_data->cost[where] += retval;
|
||||
|
||||
/* Check whether we're doing something other than just a copy loop.
|
||||
Not all such loops may be profitably vectorized; see
|
||||
rs6000_finish_cost. */
|
||||
if ((kind == vec_to_scalar || kind == vec_perm
|
||||
|| kind == vec_promote_demote || kind == vec_construct
|
||||
|| kind == scalar_to_vec)
|
||||
|| (where == vect_body && kind == vector_stmt))
|
||||
rs6000_vect_nonmem = true;
|
||||
}
|
||||
|
||||
return retval;
|
||||
|
@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
|
|||
if (cost_data->loop_info)
|
||||
rs6000_density_test (cost_data);
|
||||
|
||||
/* Don't vectorize minimum-vectorization-factor, simple copy loops
|
||||
that require versioning for any reason. The vectorization is at
|
||||
best a wash inside the loop, and the versioning checks make
|
||||
profitability highly unlikely and potentially quite harmful. */
|
||||
if (cost_data->loop_info)
|
||||
{
|
||||
loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
|
||||
if (!rs6000_vect_nonmem
|
||||
&& LOOP_VINFO_VECT_FACTOR (vec_info) == 2
|
||||
&& LOOP_REQUIRES_VERSIONING (vec_info))
|
||||
cost_data->cost[vect_body] += 10000;
|
||||
}
|
||||
|
||||
*prologue_cost = cost_data->cost[vect_prologue];
|
||||
*body_cost = cost_data->cost[vect_body];
|
||||
*epilogue_cost = cost_data->cost[vect_epilogue];
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/versioned-copy-loop.c: New file.
|
||||
|
||||
2017-05-05 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/79038
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_p8vector_ok } */
|
||||
/* { dg-options "-O3 -fdump-tree-vect-details" } */
|
||||
|
||||
/* Verify that a pure copy loop with a vectorization factor of two
|
||||
that requires alignment will not be vectorized. See the cost
|
||||
model hooks in rs6000.c. */
|
||||
|
||||
typedef long unsigned int size_t;
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
|
||||
size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
|
||||
|
||||
void foo (void *dstPtr, const void *srcPtr, void *dstEnd)
|
||||
{
|
||||
uint8_t *d = (uint8_t*)dstPtr;
|
||||
const uint8_t *s = (const uint8_t*)srcPtr;
|
||||
uint8_t* const e = (uint8_t*)dstEnd;
|
||||
|
||||
do
|
||||
{
|
||||
memcpy (d, s, 8);
|
||||
d += 8;
|
||||
s += 8;
|
||||
}
|
||||
while (d < e);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
Loading…
Reference in New Issue