Fix SLP downward group access classification (PR92420)

This PR was caused by the SLP handling in get_group_load_store_type
returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for
downward groups.

A more elaborate fix would be to try to combine the reverse permutation
into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on
optimisation and not backport material.  It might also not necessarily
be a win, if the target supports (say) reversing and odd/even swaps
as independent permutes but doesn't recognise the combined form.

2019-11-11  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/92420
	* tree-vect-stmts.c (get_negative_load_store_type): Move further
	up file.
	(get_group_load_store_type): Use it for reversed SLP accesses.

gcc/testsuite/
	* gcc.dg/vect/pr92420.c: New test.

From-SVN: r278064
This commit is contained in:
Richard Sandiford 2019-11-11 19:43:52 +00:00 committed by Richard Sandiford
parent 98aad29470
commit 599bd99078
4 changed files with 118 additions and 51 deletions

View File

@ -1,3 +1,10 @@
2019-11-11 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/92420
* tree-vect-stmts.c (get_negative_load_store_type): Move further
up file.
(get_group_load_store_type): Use it for reversed SLP accesses.
2019-11-11 Jan Hubicka <hubcika@ucw.cz>
* ipa-prop.c (ipa_propagate_indirect_call_infos): Remove ipcp

View File

@ -1,3 +1,7 @@
2019-11-11 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/pr92420.c: New test.
2019-11-11 Claudiu Zissulescu <claziss@gmail.com>
* gcc.target/arc/delay-slot-limm.c: New test.

View File

@ -0,0 +1,48 @@
/* { dg-additional-options "-mavx2" { target avx_runtime } } */
#include "tree-vect.h"
#define N 16
struct C { int r, i; };
struct C a[N], b[N], c[N], d[N], e[N];
__attribute__((noipa)) static void
foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w)
{
int i;
for (int i = 0; i < w; i++)
{
z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i;
z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i;
}
}
__attribute__((noipa)) static void
bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w)
{
int i;
for (int i = 0; i < w; i++)
{
z[i].r = x[i].r * y[i].r - x[i].i * y[i].i;
z[i].i = x[i].i * y[i].r + x[i].r * y[i].i;
}
}
int
main ()
{
check_vect ();
int i;
for (i = 0; i < N; ++i)
{
a[i].r = N - i; a[i].i = i - N;
b[i].r = i - N; b[i].i = i + N;
c[i].r = -1 - i; c[i].i = 2 * N - 1 - i;
}
foo (a, b + N, d, N);
bar (a, c, e, N);
for (i = 0; i < N; ++i)
if (d[i].r != e[i].r || d[i].i != e[i].i)
__builtin_abort ();
return 0;
}

View File

@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype)
return vect_gen_perm_mask_checked (vectype, indices);
}
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is a load or store that
accesses consecutive elements with a negative step. */
static vect_memory_access_type
get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
vec_load_store_type vls_type,
unsigned int ncopies)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
dr_alignment_support alignment_support_scheme;
if (ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types with negative step.\n");
return VMAT_ELEMENTWISE;
}
alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
if (alignment_support_scheme != dr_aligned
&& alignment_support_scheme != dr_unaligned_supported)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"negative step but alignment required.\n");
return VMAT_ELEMENTWISE;
}
if (vls_type == VLS_STORE_INVARIANT)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"negative step with invariant source;"
" no permute needed.\n");
return VMAT_CONTIGUOUS_DOWN;
}
if (!perm_mask_for_reverse (vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"negative step and reversing not supported.\n");
return VMAT_ELEMENTWISE;
}
return VMAT_CONTIGUOUS_REVERSE;
}
/* STMT_INFO is either a masked or unconditional store. Return the value
being stored. */
@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
"Peeling for outer loop is not supported\n");
return false;
}
*memory_access_type = VMAT_CONTIGUOUS;
int cmp = compare_step_with_zero (stmt_info);
if (cmp < 0)
*memory_access_type = get_negative_load_store_type
(stmt_info, vectype, vls_type, 1);
else
{
gcc_assert (!loop_vinfo || cmp > 0);
*memory_access_type = VMAT_CONTIGUOUS;
}
}
}
else
@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
return true;
}
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is a load or store that
accesses consecutive elements with a negative step. */
static vect_memory_access_type
get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
vec_load_store_type vls_type,
unsigned int ncopies)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
dr_alignment_support alignment_support_scheme;
if (ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types with negative step.\n");
return VMAT_ELEMENTWISE;
}
alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
if (alignment_support_scheme != dr_aligned
&& alignment_support_scheme != dr_unaligned_supported)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"negative step but alignment required.\n");
return VMAT_ELEMENTWISE;
}
if (vls_type == VLS_STORE_INVARIANT)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"negative step with invariant source;"
" no permute needed.\n");
return VMAT_CONTIGUOUS_DOWN;
}
if (!perm_mask_for_reverse (vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"negative step and reversing not supported.\n");
return VMAT_ELEMENTWISE;
}
return VMAT_CONTIGUOUS_REVERSE;
}
/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
if there is a memory access type that the vectorized form can use,
storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers