tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p.
* tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p to strided_p. (STMT_VINFO_STRIDE_LOAD_P): Rename to ... (STMT_VINFO_STRIDED_P): ... this. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust. (vect_verify_datarefs_alignment): Likewise. (vect_enhance_data_refs_alignment): Likewise. (vect_analyze_data_ref_access): Likewise. (vect_analyze_data_refs): Accept strided stores. * tree-vect-stmts.c (vect_model_store_cost): Count strided stores. (vect_model_load_cost): Adjust for macro rename. (vectorizable_mask_load_store): Likewise. (vectorizable_load): Likewise. (vectorizable_store): Open code strided stores. testsuite/ * gcc.dg/vect/vect-strided-store.c: New test. * gfortran.dg/vect/fast-math-pr37021.f90: Adjust. * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust. From-SVN: r223486
This commit is contained in:
parent
1e43cc9461
commit
f2e2a98542
@ -1,3 +1,20 @@
|
||||
2015-05-08 Michael Matz <matz@suse.de>
|
||||
|
||||
* tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
|
||||
to strided_p.
|
||||
(STMT_VINFO_STRIDE_LOAD_P): Rename to ...
|
||||
(STMT_VINFO_STRIDED_P): ... this.
|
||||
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
|
||||
(vect_verify_datarefs_alignment): Likewise.
|
||||
(vect_enhance_data_refs_alignment): Likewise.
|
||||
(vect_analyze_data_ref_access): Likewise.
|
||||
(vect_analyze_data_refs): Accept strided stores.
|
||||
* tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
|
||||
(vect_model_load_cost): Adjust for macro rename.
|
||||
(vectorizable_mask_load_store): Likewise.
|
||||
(vectorizable_load): Likewise.
|
||||
(vectorizable_store): Open code strided stores.
|
||||
|
||||
2015-05-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* doc/sourcebuild.texi (7.2.3.9 Other hardware attributes):
|
||||
|
@ -1,3 +1,9 @@
|
||||
2015-05-08 Michael Matz <matz@suse.de>
|
||||
|
||||
* gcc.dg/vect/vect-strided-store.c: New test.
|
||||
* gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
|
||||
* gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.
|
||||
|
||||
2015-05-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_sqrt_insn): New check.
|
||||
|
36
gcc/testsuite/gcc.dg/vect/vect-strided-store.c
Normal file
36
gcc/testsuite/gcc.dg/vect/vect-strided-store.c
Normal file
@ -0,0 +1,36 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
void __attribute__((noinline))
|
||||
sumit (float * __restrict dest,
|
||||
float * __restrict src, float * __restrict src2,
|
||||
int stride, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
dest[i*stride] = src[i] + src2[i];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int i, stride;
|
||||
float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float dest[64];
|
||||
check_vect ();
|
||||
for (stride = 0; stride < 8; stride++)
|
||||
{
|
||||
sumit (dest, src, src, stride, 8);
|
||||
if (!stride && dest[0] != 16)
|
||||
abort();
|
||||
else if (stride)
|
||||
for (i = 0; i < 8; i++)
|
||||
if (2*src[i] != dest[i*stride])
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
|
||||
! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
||||
|
@ -29,5 +29,5 @@
|
||||
return
|
||||
end function trs2a2
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
|
||||
! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
||||
|
@ -663,9 +663,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
|
||||
/* Initialize misalignment to unknown. */
|
||||
SET_DR_MISALIGNMENT (dr, -1);
|
||||
|
||||
/* Strided loads perform only component accesses, misalignment information
|
||||
/* Strided accesses perform only component accesses, misalignment information
|
||||
is irrelevant for them. */
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
return true;
|
||||
|
||||
@ -942,9 +942,9 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||
|| !STMT_VINFO_VECTORIZABLE (stmt_info))
|
||||
continue;
|
||||
|
||||
/* Strided loads perform only component accesses, alignment is
|
||||
/* Strided accesses perform only component accesses, alignment is
|
||||
irrelevant for them. */
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
continue;
|
||||
|
||||
@ -1410,9 +1410,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
if (integer_zerop (DR_STEP (dr)))
|
||||
continue;
|
||||
|
||||
/* Strided loads perform only component accesses, alignment is
|
||||
/* Strided accesses perform only component accesses, alignment is
|
||||
irrelevant for them. */
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
continue;
|
||||
|
||||
@ -1703,9 +1703,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt)
|
||||
continue;
|
||||
|
||||
/* Strided loads perform only component accesses, alignment is
|
||||
/* Strided accesses perform only component accesses, alignment is
|
||||
irrelevant for them. */
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
continue;
|
||||
|
||||
@ -1824,7 +1824,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt))
|
||||
continue;
|
||||
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
/* Strided loads perform only component accesses, alignment is
|
||||
irrelevant for them. */
|
||||
@ -2346,7 +2346,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
|
||||
|
||||
/* Assume this is a DR handled by non-constant strided load case. */
|
||||
if (TREE_CODE (step) != INTEGER_CST)
|
||||
return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
return (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
|| vect_analyze_group_access (dr)));
|
||||
|
||||
@ -3758,8 +3758,7 @@ again:
|
||||
else if (loop_vinfo
|
||||
&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
|
||||
{
|
||||
if (nested_in_vect_loop_p (loop, stmt)
|
||||
|| !DR_IS_READ (dr))
|
||||
if (nested_in_vect_loop_p (loop, stmt))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
@ -3771,7 +3770,7 @@ again:
|
||||
}
|
||||
return false;
|
||||
}
|
||||
STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
|
||||
STMT_VINFO_STRIDED_P (stmt_info) = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1014,6 +1014,18 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
}
|
||||
|
||||
/* Costs of the stores. */
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
/* N scalar stores plus extracting the elements. */
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
inside_cost += record_stmt_cost (body_cost_vec,
|
||||
ncopies * TYPE_VECTOR_SUBPARTS (vectype),
|
||||
scalar_store, stmt_info, 0, vect_body);
|
||||
inside_cost += record_stmt_cost (body_cost_vec,
|
||||
ncopies * TYPE_VECTOR_SUBPARTS (vectype),
|
||||
vec_to_scalar, stmt_info, 0, vect_body);
|
||||
}
|
||||
else
|
||||
vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
@ -1113,7 +1125,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
access is instead being provided by a load-and-permute operation,
|
||||
include the cost of the permutes. */
|
||||
if (!load_lanes_p && group_size > 1
|
||||
&& !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
&& !STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
/* Uses an even and odd extract operations or shuffle operations
|
||||
for each needed permute. */
|
||||
@ -1128,7 +1140,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
}
|
||||
|
||||
/* The loads themselves. */
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info)
|
||||
&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
/* N scalar loads plus gathering them into a vector. */
|
||||
@ -1143,7 +1155,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
|
||||
|| group_size > 1 || slp_node),
|
||||
&inside_cost, &prologue_cost,
|
||||
prologue_cost_vec, body_cost_vec, true);
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
|
||||
stmt_info, 0, vect_body);
|
||||
|
||||
@ -1823,7 +1835,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_GATHER_P (stmt_info))
|
||||
@ -5016,7 +5028,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
tree dataref_ptr = NULL_TREE;
|
||||
tree dataref_offset = NULL_TREE;
|
||||
gimple ptr_incr = NULL;
|
||||
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
int ncopies;
|
||||
int j;
|
||||
gimple next_stmt, first_stmt = NULL;
|
||||
@ -5103,6 +5115,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
if (!STMT_VINFO_DATA_REF (stmt_info))
|
||||
return false;
|
||||
|
||||
if (!STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
negative =
|
||||
tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
|
||||
? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
|
||||
@ -5114,7 +5128,6 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
"multiple types with negative step.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (negative)
|
||||
{
|
||||
gcc_assert (!grouped_store);
|
||||
@ -5137,6 +5150,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||
{
|
||||
@ -5233,6 +5247,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"transform store. ncopies = %d\n", ncopies);
|
||||
|
||||
if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
gimple_stmt_iterator incr_gsi;
|
||||
bool insert_after;
|
||||
gimple incr;
|
||||
tree offvar;
|
||||
tree ivstep;
|
||||
tree running_off;
|
||||
gimple_seq stmts = NULL;
|
||||
tree stride_base, stride_step, alias_off;
|
||||
tree vec_oprnd;
|
||||
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, stmt));
|
||||
|
||||
stride_base
|
||||
= fold_build_pointer_plus
|
||||
(unshare_expr (DR_BASE_ADDRESS (dr)),
|
||||
size_binop (PLUS_EXPR,
|
||||
convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
|
||||
convert_to_ptrofftype (DR_INIT(dr))));
|
||||
stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
|
||||
|
||||
/* For a store with loop-invariant (but other than power-of-2)
|
||||
stride (i.e. not a grouped access) like so:
|
||||
|
||||
for (i = 0; i < n; i += stride)
|
||||
array[i] = ...;
|
||||
|
||||
we generate a new induction variable and new stores from
|
||||
the components of the (vectorized) rhs:
|
||||
|
||||
for (j = 0; ; j += VF*stride)
|
||||
vectemp = ...;
|
||||
tmp1 = vectemp[0];
|
||||
array[j] = tmp1;
|
||||
tmp2 = vectemp[1];
|
||||
array[j + stride] = tmp2;
|
||||
...
|
||||
*/
|
||||
|
||||
ivstep = stride_step;
|
||||
ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
|
||||
build_int_cst (TREE_TYPE (ivstep),
|
||||
ncopies * nunits));
|
||||
|
||||
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
|
||||
|
||||
create_iv (stride_base, ivstep, NULL,
|
||||
loop, &incr_gsi, insert_after,
|
||||
&offvar, NULL);
|
||||
incr = gsi_stmt (incr_gsi);
|
||||
set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
|
||||
|
||||
stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
|
||||
if (stmts)
|
||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
running_off = offvar;
|
||||
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
/* We've set op and dt above, from gimple_assign_rhs1(stmt),
|
||||
and first_stmt == stmt. */
|
||||
if (j == 0)
|
||||
vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
|
||||
else
|
||||
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
|
||||
|
||||
for (i = 0; i < nunits; i++)
|
||||
{
|
||||
tree newref, newoff;
|
||||
gimple incr, assign;
|
||||
tree size = TYPE_SIZE (elem_type);
|
||||
/* Extract the i'th component. */
|
||||
tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
|
||||
size);
|
||||
tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
|
||||
size, pos);
|
||||
|
||||
elem = force_gimple_operand_gsi (gsi, elem, true,
|
||||
NULL_TREE, true,
|
||||
GSI_SAME_STMT);
|
||||
|
||||
newref = build2 (MEM_REF, TREE_TYPE (vectype),
|
||||
running_off, alias_off);
|
||||
|
||||
/* And store it to *running_off. */
|
||||
assign = gimple_build_assign (newref, elem);
|
||||
vect_finish_stmt_generation (stmt, assign, gsi);
|
||||
|
||||
newoff = copy_ssa_name (running_off, NULL);
|
||||
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
|
||||
running_off, stride_step);
|
||||
vect_finish_stmt_generation (stmt, incr, gsi);
|
||||
|
||||
running_off = newoff;
|
||||
if (j == 0 && i == i)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
|
||||
prev_stmt_info = vinfo_for_stmt (assign);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
dr_chain.create (group_size);
|
||||
oprnds.create (group_size);
|
||||
|
||||
@ -5796,7 +5917,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||
if (!slp
|
||||
&& !PURE_SLP_STMT (stmt_info)
|
||||
&& !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
&& !STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
if (vect_load_lanes_supported (vectype, group_size))
|
||||
load_lanes_p = true;
|
||||
@ -5851,7 +5972,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
else if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
if ((grouped_load
|
||||
&& (slp || PURE_SLP_STMT (stmt_info)))
|
||||
@ -6099,7 +6220,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||
else if (STMT_VINFO_STRIDED_P (stmt_info))
|
||||
{
|
||||
gimple_stmt_iterator incr_gsi;
|
||||
bool insert_after;
|
||||
|
@ -646,7 +646,9 @@ typedef struct _stmt_vec_info {
|
||||
|
||||
/* For loads only, true if this is a gather load. */
|
||||
bool gather_p;
|
||||
bool stride_load_p;
|
||||
|
||||
/* True if this is an access with loop-invariant stride. */
|
||||
bool strided_p;
|
||||
|
||||
/* For both loads and stores. */
|
||||
bool simd_lane_access_p;
|
||||
@ -664,7 +666,7 @@ typedef struct _stmt_vec_info {
|
||||
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
|
||||
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
|
||||
#define STMT_VINFO_GATHER_P(S) (S)->gather_p
|
||||
#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
|
||||
#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
|
||||
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
|
||||
|
||||
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
|
||||
|
Loading…
Reference in New Issue
Block a user