From 7b5fc413c14b6134b55a06c906f9ac2d9aff0628 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 8 May 2015 15:13:55 +0000 Subject: [PATCH] re PR tree-optimization/66036 (strided group loads are not vectorized) 2015-05-08 Richard Biener PR tree-optimization/66036 * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Handle strided group loads. (vect_verify_datarefs_alignment): Likewise. (vect_enhance_data_refs_alignment): Likewise. (vect_analyze_group_access): Likewise. (vect_analyze_data_ref_access): Likewise. (vect_analyze_data_ref_accesses): Likewise. * tree-vect-stmts.c (vect_model_load_cost): Likewise. (vectorizable_load): Likewise. * gcc.dg/vect/slp-41.c: New testcase. From-SVN: r222914 --- gcc/ChangeLog | 13 +++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/vect/slp-41.c | 69 ++++++++++++++++ gcc/tree-vect-data-refs.c | 123 +++++++++++++---------------- gcc/tree-vect-stmts.c | 99 +++++++++++++++++------ 5 files changed, 216 insertions(+), 93 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-41.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f1575504f9..f7f03b3b8e3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2015-05-08 Richard Biener + + PR tree-optimization/66036 + * tree-vect-data-refs.c (vect_compute_data_ref_alignment): + Handle strided group loads. + (vect_verify_datarefs_alignment): Likewise. + (vect_enhance_data_refs_alignment): Likewise. + (vect_analyze_group_access): Likewise. + (vect_analyze_data_ref_access): Likewise. + (vect_analyze_data_ref_accesses): Likewise. + * tree-vect-stmts.c (vect_model_load_cost): Likewise. + (vectorizable_load): Likewise. + 2015-05-08 Segher Boessenkool * config/rs6000/rs6000.md: Require operand inequality in one diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c904070b6ac..2b6f663cc01 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-05-08 Richard Biener + + PR tree-optimization/66036 + * gcc.dg/vect/slp-41.c: New testcase. + 2015-05-08 Mikael Morin * gfortran.dg/elemental_optional_args_7.f90: New. diff --git a/gcc/testsuite/gcc.dg/vect/slp-41.c b/gcc/testsuite/gcc.dg/vect/slp-41.c new file mode 100644 index 00000000000..7d487b4dad9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-41.c @@ -0,0 +1,69 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_pack_trunc } */ +/* { dg-require-effective-target vect_unpack } */ +/* { dg-require-effective-target vect_hw_misalign } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +testi (int *p, short *q, int stride, int n) +{ + int i; + for (i = 0; i < n; ++i) + { + q[i*4+0] = p[i*stride+0]; + q[i*4+1] = p[i*stride+1]; + q[i*4+2] = p[i*stride+2]; + q[i*4+3] = p[i*stride+3]; + } +} + +void __attribute__((noinline,noclone)) +testi2 (int *q, short *p, int stride, int n) +{ + int i; + for (i = 0; i < n; ++i) + { + q[i*4+0] = p[i*stride+0]; + q[i*4+1] = p[i*stride+1]; + q[i*4+2] = p[i*stride+2]; + q[i*4+3] = p[i*stride+3]; + } +} + +int ia[256]; +short sa[256]; + +extern void abort (void); + +int main() +{ + int i; + + check_vect (); + + for (i = 0; i < 256; ++i) + { + ia[i] = sa[i] = i; + __asm__ volatile (""); + } + testi (ia, sa, 8, 32); + for (i = 0; i < 128; ++i) + if (sa[i] != ia[(i / 4) * 8 + i % 4]) + abort (); + + for (i = 0; i < 256; ++i) + { + ia[i] = sa[i] = i; + __asm__ volatile (""); + } + testi2 (ia, sa, 8, 32); + for (i = 0; i < 128; ++i) + if (ia[i] != sa[(i / 4) * 8 + i % 4]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 0992d6ce1d7..7e938996866 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -649,7 +649,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr) tree vectype; tree base, base_addr; bool base_aligned; - tree misalign; + tree misalign = NULL_TREE; tree aligned_to; unsigned HOST_WIDE_INT alignment; @@ -665,10 +665,12 @@ vect_compute_data_ref_alignment (struct data_reference *dr) /* Strided loads perform only component accesses, misalignment information is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) return true; - misalign = DR_INIT (dr); + if (tree_fits_shwi_p (DR_STEP (dr))) + misalign = DR_INIT (dr); aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); vectype = STMT_VINFO_VECTYPE (stmt_info); @@ -682,9 +684,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr) if (loop && nested_in_vect_loop_p (loop, stmt)) { tree step = DR_STEP (dr); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); - if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) + if (tree_fits_shwi_p (step) + && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -710,9 +712,9 @@ vect_compute_data_ref_alignment (struct data_reference *dr) if (!loop) { tree step = DR_STEP (dr); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); - if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) + if (tree_fits_shwi_p (step) + && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -942,7 +944,8 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) /* Strided loads perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); @@ -1409,7 +1412,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) /* Strided loads perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); @@ -1701,7 +1705,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) /* Strided loads perform only component accesses, alignment is irrelevant for them. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; save_misalignment = DR_MISALIGNMENT (dr); @@ -1819,10 +1824,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) && GROUP_FIRST_ELEMENT (stmt_info) != stmt)) continue; - /* Strided loads perform only component accesses, alignment is - irrelevant for them. */ if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) - continue; + { + /* Strided loads perform only component accesses, alignment is + irrelevant for them. */ + if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) + continue; + do_versioning = false; + break; + } supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); @@ -2035,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr) stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); + HOST_WIDE_INT dr_step = -1; HOST_WIDE_INT groupsize, last_accessed_element = 1; bool slp_impossible = false; struct loop *loop = NULL; @@ -2045,7 +2055,13 @@ vect_analyze_group_access (struct data_reference *dr) /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the size of the interleaving group (including gaps). */ - groupsize = absu_hwi (dr_step) / type_size; + if (tree_fits_shwi_p (step)) + { + dr_step = tree_to_shwi (step); + groupsize = absu_hwi (dr_step) / type_size; + } + else + groupsize = 0; /* Not consecutive access is possible only if it is a part of interleaving. */ if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) @@ -2120,7 +2136,6 @@ vect_analyze_group_access (struct data_reference *dr) tree prev_init = DR_INIT (data_ref); gimple prev = stmt; HOST_WIDE_INT diff, gaps = 0; - unsigned HOST_WIDE_INT count_in_bytes; while (next) { @@ -2185,30 +2200,12 @@ vect_analyze_group_access (struct data_reference *dr) count++; } - /* COUNT is the number of accesses found, we multiply it by the size of - the type to get COUNT_IN_BYTES. */ - count_in_bytes = type_size * count; + if (groupsize == 0) + groupsize = count + gaps; - /* Check that the size of the interleaving (including gaps) is not - greater than STEP. */ - if (dr_step != 0 - && absu_hwi (dr_step) < count_in_bytes + gaps * type_size) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "interleaving size is greater than step for "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - DR_REF (dr)); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - /* Check that the size of the interleaving is equal to STEP for stores, + /* Check that the size of the interleaving is equal to count for stores, i.e., that there are no gaps. */ - if (dr_step != 0 - && absu_hwi (dr_step) != count_in_bytes) + if (groupsize != count) { if (DR_IS_READ (dr)) { @@ -2227,26 +2224,6 @@ vect_analyze_group_access (struct data_reference *dr) } } - /* Check that STEP is a multiple of type size. */ - if (dr_step != 0 - && (dr_step % type_size) != 0) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "step is not a multiple of type size: step "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step); - dump_printf (MSG_MISSED_OPTIMIZATION, " size "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - TYPE_SIZE_UNIT (scalar_type)); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (groupsize == 0) - groupsize = count + gaps; - GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -2366,9 +2343,12 @@ vect_analyze_data_ref_access (struct data_reference *dr) return false; } + /* Assume this is a DR handled by non-constant strided load case. */ if (TREE_CODE (step) != INTEGER_CST) - return STMT_VINFO_STRIDE_LOAD_P (stmt_info); + return (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && (!STMT_VINFO_GROUPED_ACCESS (stmt_info) + || vect_analyze_group_access (dr))); /* Not consecutive access - check if it's a part of interleaving group. */ return vect_analyze_group_access (dr); @@ -2570,15 +2550,16 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) || !gimple_assign_single_p (DR_STMT (drb))) break; - /* Check that the data-refs have the same constant size and step. */ + /* Check that the data-refs have the same constant size. */ tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); if (!tree_fits_uhwi_p (sza) || !tree_fits_uhwi_p (szb) - || !tree_int_cst_equal (sza, szb) - || !tree_fits_shwi_p (DR_STEP (dra)) - || !tree_fits_shwi_p (DR_STEP (drb)) - || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb))) + || !tree_int_cst_equal (sza, szb)) + break; + + /* Check that the data-refs have the same step. */ + if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) break; /* Do not place the same access in the interleaving chain twice. */ @@ -2611,11 +2592,15 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) != type_size_a)) break; - /* The step (if not zero) is greater than the difference between - data-refs' inits. This splits groups into suitable sizes. */ - HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); - if (step != 0 && step <= (init_b - init_a)) - break; + /* If the step (if not zero or non-constant) is greater than the + difference between data-refs' inits this splits groups into + suitable sizes. */ + if (tree_fits_shwi_p (DR_STEP (dra))) + { + HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); + if (step != 0 && step <= (init_b - init_a)) + break; + } if (dump_enabled_p ()) { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 31f26e794e6..f82decb798e 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1112,7 +1112,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, equivalent to the cost of GROUP_SIZE separate loads. If a grouped access is instead being provided by a load-and-permute operation, include the cost of the permutes. */ - if (!load_lanes_p && group_size > 1) + if (!load_lanes_p && group_size > 1 + && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) { /* Uses an even and odd extract operations or shuffle operations for each needed permute. */ @@ -1127,15 +1128,14 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, } /* The loads themselves. */ - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) { /* N scalar loads plus gathering them into a vector. */ tree vectype = STMT_VINFO_VECTYPE (stmt_info); inside_cost += record_stmt_cost (body_cost_vec, ncopies * TYPE_VECTOR_SUBPARTS (vectype), scalar_load, stmt_info, 0, vect_body); - inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, - stmt_info, 0, vect_body); } else vect_get_load_cost (first_dr, ncopies, @@ -1143,6 +1143,9 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, || group_size > 1 || slp_node), &inside_cost, &prologue_cost, prologue_cost_vec, body_cost_vec, true); + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, + stmt_info, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -5657,7 +5660,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; - int i, j, group_size, group_gap; + int i, j, group_size = -1, group_gap; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; @@ -5790,9 +5793,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return false; } - if (!slp && !PURE_SLP_STMT (stmt_info)) + group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + if (!slp + && !PURE_SLP_STMT (stmt_info) + && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) { - group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; else if (!vect_grouped_load_supported (vectype, group_size)) @@ -5847,7 +5852,22 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } } else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) - ; + { + if ((grouped_load + && (slp || PURE_SLP_STMT (stmt_info))) + && (group_size > nunits + || nunits % group_size != 0 + /* ??? During analysis phase we are not called with the + slp node/instance we are in so whether we'll end up + with a permutation we don't know. Still we don't + support load permutations. */ + || slp_perm)) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unhandled strided group load\n"); + return false; + } + } else { negative = tree_int_cst_compare (nested_in_vect_loop @@ -6136,34 +6156,65 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, prev_stmt_info = NULL; running_off = offvar; alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); + int nloads = nunits; + tree ltype = TREE_TYPE (vectype); + if (slp) + { + nloads = nunits / group_size; + if (group_size < nunits) + ltype = build_vector_type (TREE_TYPE (vectype), group_size); + else + ltype = vectype; + ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (!slp_perm); + } for (j = 0; j < ncopies; j++) { tree vec_inv; - vec_alloc (v, nunits); - for (i = 0; i < nunits; i++) + if (nloads > 1) { - tree newref, newoff; - gimple incr; - newref = build2 (MEM_REF, TREE_TYPE (vectype), - running_off, alias_off); + vec_alloc (v, nloads); + for (i = 0; i < nloads; i++) + { + tree newref, newoff; + gimple incr; + newref = build2 (MEM_REF, ltype, running_off, alias_off); - newref = force_gimple_operand_gsi (gsi, newref, true, - NULL_TREE, true, - GSI_SAME_STMT); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); - newoff = copy_ssa_name (running_off); - incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + newref = force_gimple_operand_gsi (gsi, newref, true, + NULL_TREE, true, + GSI_SAME_STMT); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); + newoff = copy_ssa_name (running_off); + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, stride_step); + vect_finish_stmt_generation (stmt, incr, gsi); + + running_off = newoff; + } + + vec_inv = build_constructor (vectype, v); + new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + else + { + new_stmt = gimple_build_assign (make_ssa_name (ltype), + build2 (MEM_REF, ltype, + running_off, alias_off)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + tree newoff = copy_ssa_name (running_off); + gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, running_off, stride_step); vect_finish_stmt_generation (stmt, incr, gsi); running_off = newoff; } - vec_inv = build_constructor (vectype, v); - new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); - new_stmt = SSA_NAME_DEF_STMT (new_temp); - + if (slp) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else