re PR target/88834 ([SVE] Poor addressing mode choices for LD2 and ST2)

gcc/ChangeLog:

2019-06-13  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
	IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
	(get_alias_ptr_type_for_ptr_address): Likewise.
	(add_iv_candidate_for_use): Add scaled index candidate if useful.
	* tree-ssa-address.c (preferred_mem_scale_factor): New.
	* config/aarch64/aarch64.c (aarch64_classify_address): Relax
	allow_reg_index_p.

gcc/testsuite/ChangeLog:

2019-06-13  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	PR target/88834
	* gcc.target/aarch64/pr88834.c: New test.
	* gcc.target/aarch64/sve/struct_vect_1.c: Adjust.
	* gcc.target/aarch64/sve/struct_vect_14.c: Likewise.
	* gcc.target/aarch64/sve/struct_vect_15.c: Likewise.
	* gcc.target/aarch64/sve/struct_vect_16.c: Likewise.
	* gcc.target/aarch64/sve/struct_vect_17.c: Likewise.
	* gcc.target/aarch64/sve/struct_vect_7.c: Likewise.

From-SVN: r272232
This commit is contained in:
Kugan Vivekanandarajah 2019-06-13 03:18:54 +00:00 committed by Kugan Vivekanandarajah
parent dd550c9965
commit fa9863e7d3
13 changed files with 118 additions and 25 deletions

View File

@ -1,3 +1,14 @@
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
PR target/88834
* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
(get_alias_ptr_type_for_ptr_address): Likewise.
(add_iv_candidate_for_use): Add scaled index candidate if useful.
* tree-ssa-address.c (preferred_mem_scale_factor): New.
* config/aarch64/aarch64.c (aarch64_classify_address): Relax
allow_reg_index_p.
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* config/aarch64/iterators.md (ADDSUB): Fix typo in comment.

View File

@ -6629,7 +6629,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
bool allow_reg_index_p = (!load_store_pair_p
&& (known_lt (GET_MODE_SIZE (mode), 16)
|| vec_flags == VEC_ADVSIMD
|| vec_flags == VEC_SVE_DATA));
|| vec_flags & VEC_SVE_DATA));
/* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
[Rn, #offset, MUL VL]. */

View File

@ -1,3 +1,14 @@
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
PR target/88834
* gcc.target/aarch64/pr88834.c: New test.
* gcc.target/aarch64/sve/struct_vect_1.c: Adjust.
* gcc.target/aarch64/sve/struct_vect_14.c: Likewise.
* gcc.target/aarch64/sve/struct_vect_15.c: Likewise.
* gcc.target/aarch64/sve/struct_vect_16.c: Likewise.
* gcc.target/aarch64/sve/struct_vect_17.c: Likewise.
* gcc.target/aarch64/sve/struct_vect_7.c: Likewise.
2019-06-12 Marek Polacek <polacek@redhat.com>
PR c++/87410

View File

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
void
f (int *restrict x, int *restrict y, int *restrict z, int n)
{
for (int i = 0; i < n; i += 2)
{
x[i] = y[i] + z[i];
x[i + 1] = y[i + 1] - z[i + 1];
}
}
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */

View File

@ -83,9 +83,9 @@ NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
}
}
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */

View File

@ -43,12 +43,12 @@
#undef NAME
#undef TYPE
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */

View File

@ -3,12 +3,12 @@
#include "struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */

View File

@ -3,12 +3,12 @@
#include "struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */

View File

@ -3,12 +3,12 @@
#include "struct_vect_14.c"
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */

View File

@ -78,9 +78,9 @@ g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
}
}
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */

View File

@ -1127,6 +1127,35 @@ maybe_fold_tmr (tree ref)
return new_ref;
}
/* Return the preferred index scale factor for accessing memory of mode
MEM_MODE in the address space of pointer BASE. Assume that we're
optimizing for speed if SPEED is true and for size otherwise. */
unsigned int
preferred_mem_scale_factor (tree base, machine_mode mem_mode,
bool speed)
{
struct mem_address parts = {};
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
/* Addressing mode "base + index". */
parts.index = integer_one_node;
parts.base = integer_one_node;
rtx addr = addr_for_mem_ref (&parts, as, false);
unsigned cost = address_cost (addr, mem_mode, as, speed);
/* Addressing mode "base + index << scale". */
parts.step = wide_int_to_tree (sizetype, fact);
addr = addr_for_mem_ref (&parts, as, false);
unsigned new_cost = address_cost (addr, mem_mode, as, speed);
/* Compare the cost of an address with an unscaled index with
a scaled index and return factor if useful. */
if (new_cost < cost)
return GET_MODE_UNIT_SIZE (mem_mode);
return 1;
}
/* Dump PARTS to FILE. */
extern void dump_mem_address (FILE *, struct mem_address *);

View File

@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
extern void copy_ref_info (tree, tree);
tree maybe_fold_tmr (tree);
extern unsigned int preferred_mem_scale_factor (tree base,
machine_mode mem_mode,
bool speed);
#endif /* GCC_TREE_SSA_ADDRESS_H */

View File

@ -2381,11 +2381,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
switch (gimple_call_internal_fn (call))
{
case IFN_MASK_LOAD:
case IFN_MASK_LOAD_LANES:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_lhs (call));
return NULL_TREE;
case IFN_MASK_STORE:
case IFN_MASK_STORE_LANES:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_arg (call, 3));
return NULL_TREE;
@ -3430,6 +3432,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
basetype = sizetype;
record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
/* Compare the cost of an address with an unscaled index with the cost of
an address with a scaled index and add candidate if useful. */
poly_int64 step;
if (use != NULL
&& poly_int_tree_p (iv->step, &step)
&& address_p (use->type))
{
poly_int64 new_step;
unsigned int fact = preferred_mem_scale_factor
(use->iv->base,
TYPE_MODE (use->mem_type),
optimize_loop_for_speed_p (data->current_loop));
if (fact != 1
&& multiple_p (step, fact, &new_step))
add_candidate (data, size_int (0),
wide_int_to_tree (sizetype, new_step),
true, NULL);
}
/* Record common candidate with constant offset stripped in base.
Like the use itself, we also add candidate directly for it. */
base = strip_offset (iv->base, &offset);
@ -7042,6 +7064,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
{
case IFN_MASK_LOAD:
case IFN_MASK_STORE:
case IFN_MASK_LOAD_LANES:
case IFN_MASK_STORE_LANES:
/* The second argument contains the correct alias type. */
gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
return TREE_TYPE (gimple_call_arg (call, 1));