From 5c9669a0e6cbf477a03024522943197bdb2682d4 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 2 Jul 2020 10:14:33 +0100 Subject: [PATCH] vect: Fix an ICE in exact_div [PR95961] In the test case for PR95961, vectorization factor computed by vect_determine_vectorization_factor is [8,8]. But this is updated to [1,1] later by vect_update_vf_for_slp. When we call vect_get_num_vectors in vect_enhance_data_refs_alignment, the number of scalars which is based on the vectorization factor is not a multiple of the the number of elements in the vector type. This leads to the ICE. This isn't a simple stream of contiguous vector accesses. It's hard to predict from the available information how many vector accesses we'll actually need per iteration. As discussed, here we should use the number of scalars instead of the number of vectors as an upper bound for the loop saving info about DR in the hash table. 2020-07-02 Felix Yang gcc/ PR tree-optimization/95961 * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Use the number of scalars instead of the number of vectors as an upper bound for the loop saving info about DR in the hash table. Remove unused local variables. gcc/testsuite/ PR tree-optimization/95961 * gcc.target/aarch64/sve/pr95961.c: New test. --- .../gcc.target/aarch64/sve/pr95961.c | 16 +++++++++++++ gcc/tree-vect-data-refs.c | 23 +++++-------------- 2 files changed, 22 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr95961.c diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c b/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c new file mode 100644 index 00000000000..b9802c8d0cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+sve -fno-vect-cost-model" } */ + +typedef struct { + unsigned short mprr_2[5][16][16]; +} ImageParameters; +int s[16][2]; +void intrapred_luma_16x16(ImageParameters *img, int s0) +{ + for (int j=0; j < 16; j++) + for (int i=0; i < 16; i++) + { + img->mprr_2[1 ][j][i]=s[j][1]; + img->mprr_2[2 ][j][i]=s0; + } +} diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index eb8288e7a85..2462276e7c2 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1722,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) dr_vec_info *first_store = NULL; dr_vec_info *dr0_info = NULL; struct data_reference *dr; - unsigned int i, j; + unsigned int i; bool do_peeling = false; bool do_versioning = false; unsigned int npeel = 0; @@ -1730,9 +1730,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) bool one_misalignment_unknown = false; bool one_dr_unsupportable = false; dr_vec_info *unsupportable_dr_info = NULL; - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - unsigned possible_npeel_number = 1; - tree vectype; unsigned int mis, same_align_drs_max = 0; hash_table peeling_htab (1); @@ -1792,7 +1789,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; - vectype = STMT_VINFO_VECTYPE (stmt_info); /* If known_alignment_for_access_p then we have set DR_MISALIGNMENT which is only done if we know it at compiler time, so it is safe to assume target alignment is constant. @@ -1819,22 +1815,17 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) vectorization factor. We do this automatically for cost model, since we calculate cost for every peeling option. */ + poly_uint64 nscalars = npeel_tmp; if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) { - poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info) - ? vf * DR_GROUP_SIZE (stmt_info) : vf); - possible_npeel_number - = vect_get_num_vectors (nscalars, vectype); - - /* NPEEL_TMP is 0 when there is no misalignment, but also - allow peeling NELEMENTS. */ - if (DR_MISALIGNMENT (dr_info) == 0) - possible_npeel_number++; + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + nscalars = (STMT_SLP_TYPE (stmt_info) + ? vf * DR_GROUP_SIZE (stmt_info) : vf); } /* Save info about DR in the hash table. Also include peeling amounts according to the explanation above. */ - for (j = 0; j < possible_npeel_number; j++) + while (known_le (npeel_tmp, nscalars)) { vect_peeling_hash_insert (&peeling_htab, loop_vinfo, dr_info, npeel_tmp); @@ -2059,8 +2050,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (do_peeling) { stmt_vec_info stmt_info = dr0_info->stmt; - vectype = STMT_VINFO_VECTYPE (stmt_info); - if (known_alignment_for_access_p (dr0_info)) { bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),