re PR target/88834 ([SVE] Poor addressing mode choices for LD2 and ST2)
gcc/ChangeLog: 2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES. (get_alias_ptr_type_for_ptr_address): Likewise. (add_iv_candidate_for_use): Add scaled index candidate if useful. * tree-ssa-address.c (preferred_mem_scale_factor): New. * config/aarch64/aarch64.c (aarch64_classify_address): Relax allow_reg_index_p. gcc/testsuite/ChangeLog: 2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * gcc.target/aarch64/pr88834.c: New test. * gcc.target/aarch64/sve/struct_vect_1.c: Adjust. * gcc.target/aarch64/sve/struct_vect_14.c: Likewise. * gcc.target/aarch64/sve/struct_vect_15.c: Likewise. * gcc.target/aarch64/sve/struct_vect_16.c: Likewise. * gcc.target/aarch64/sve/struct_vect_17.c: Likewise. * gcc.target/aarch64/sve/struct_vect_7.c: Likewise. From-SVN: r272232
This commit is contained in:
parent
dd550c9965
commit
fa9863e7d3
@ -1,3 +1,14 @@
|
||||
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
PR target/88834
|
||||
* tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
|
||||
IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
|
||||
(get_alias_ptr_type_for_ptr_address): Likewise.
|
||||
(add_iv_candidate_for_use): Add scaled index candidate if useful.
|
||||
* tree-ssa-address.c (preferred_mem_scale_factor): New.
|
||||
* config/aarch64/aarch64.c (aarch64_classify_address): Relax
|
||||
allow_reg_index_p.
|
||||
|
||||
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
* config/aarch64/iterators.md (ADDSUB): Fix typo in comment.
|
||||
|
@ -6629,7 +6629,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
||||
bool allow_reg_index_p = (!load_store_pair_p
|
||||
&& (known_lt (GET_MODE_SIZE (mode), 16)
|
||||
|| vec_flags == VEC_ADVSIMD
|
||||
|| vec_flags == VEC_SVE_DATA));
|
||||
|| vec_flags & VEC_SVE_DATA));
|
||||
|
||||
/* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
|
||||
[Rn, #offset, MUL VL]. */
|
||||
|
@ -1,3 +1,14 @@
|
||||
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||
|
||||
PR target/88834
|
||||
* gcc.target/aarch64/pr88834.c: New test.
|
||||
* gcc.target/aarch64/sve/struct_vect_1.c: Adjust.
|
||||
* gcc.target/aarch64/sve/struct_vect_14.c: Likewise.
|
||||
* gcc.target/aarch64/sve/struct_vect_15.c: Likewise.
|
||||
* gcc.target/aarch64/sve/struct_vect_16.c: Likewise.
|
||||
* gcc.target/aarch64/sve/struct_vect_17.c: Likewise.
|
||||
* gcc.target/aarch64/sve/struct_vect_7.c: Likewise.
|
||||
|
||||
2019-06-12 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c++/87410
|
||||
|
15
gcc/testsuite/gcc.target/aarch64/pr88834.c
Normal file
15
gcc/testsuite/gcc.target/aarch64/pr88834.c
Normal file
@ -0,0 +1,15 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
|
||||
|
||||
void
|
||||
f (int *restrict x, int *restrict y, int *restrict z, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i += 2)
|
||||
{
|
||||
x[i] = y[i] + z[i];
|
||||
x[i + 1] = y[i + 1] - z[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */
|
@ -83,9 +83,9 @@ NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
|
@ -43,12 +43,12 @@
|
||||
#undef NAME
|
||||
#undef TYPE
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
|
@ -3,12 +3,12 @@
|
||||
|
||||
#include "struct_vect_14.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
|
@ -3,12 +3,12 @@
|
||||
|
||||
#include "struct_vect_14.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
|
@ -3,12 +3,12 @@
|
||||
|
||||
#include "struct_vect_14.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
|
||||
|
@ -78,9 +78,9 @@ g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
|
||||
/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
|
||||
|
@ -1127,6 +1127,35 @@ maybe_fold_tmr (tree ref)
|
||||
return new_ref;
|
||||
}
|
||||
|
||||
/* Return the preferred index scale factor for accessing memory of mode
|
||||
MEM_MODE in the address space of pointer BASE. Assume that we're
|
||||
optimizing for speed if SPEED is true and for size otherwise. */
|
||||
unsigned int
|
||||
preferred_mem_scale_factor (tree base, machine_mode mem_mode,
|
||||
bool speed)
|
||||
{
|
||||
struct mem_address parts = {};
|
||||
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
|
||||
unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
|
||||
|
||||
/* Addressing mode "base + index". */
|
||||
parts.index = integer_one_node;
|
||||
parts.base = integer_one_node;
|
||||
rtx addr = addr_for_mem_ref (&parts, as, false);
|
||||
unsigned cost = address_cost (addr, mem_mode, as, speed);
|
||||
|
||||
/* Addressing mode "base + index << scale". */
|
||||
parts.step = wide_int_to_tree (sizetype, fact);
|
||||
addr = addr_for_mem_ref (&parts, as, false);
|
||||
unsigned new_cost = address_cost (addr, mem_mode, as, speed);
|
||||
|
||||
/* Compare the cost of an address with an unscaled index with
|
||||
a scaled index and return factor if useful. */
|
||||
if (new_cost < cost)
|
||||
return GET_MODE_UNIT_SIZE (mem_mode);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Dump PARTS to FILE. */
|
||||
|
||||
extern void dump_mem_address (FILE *, struct mem_address *);
|
||||
|
@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
|
||||
extern void copy_ref_info (tree, tree);
|
||||
tree maybe_fold_tmr (tree);
|
||||
|
||||
extern unsigned int preferred_mem_scale_factor (tree base,
|
||||
machine_mode mem_mode,
|
||||
bool speed);
|
||||
#endif /* GCC_TREE_SSA_ADDRESS_H */
|
||||
|
@ -2381,11 +2381,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
|
||||
switch (gimple_call_internal_fn (call))
|
||||
{
|
||||
case IFN_MASK_LOAD:
|
||||
case IFN_MASK_LOAD_LANES:
|
||||
if (op_p == gimple_call_arg_ptr (call, 0))
|
||||
return TREE_TYPE (gimple_call_lhs (call));
|
||||
return NULL_TREE;
|
||||
|
||||
case IFN_MASK_STORE:
|
||||
case IFN_MASK_STORE_LANES:
|
||||
if (op_p == gimple_call_arg_ptr (call, 0))
|
||||
return TREE_TYPE (gimple_call_arg (call, 3));
|
||||
return NULL_TREE;
|
||||
@ -3430,6 +3432,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
|
||||
basetype = sizetype;
|
||||
record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
|
||||
|
||||
/* Compare the cost of an address with an unscaled index with the cost of
|
||||
an address with a scaled index and add candidate if useful. */
|
||||
poly_int64 step;
|
||||
if (use != NULL
|
||||
&& poly_int_tree_p (iv->step, &step)
|
||||
&& address_p (use->type))
|
||||
{
|
||||
poly_int64 new_step;
|
||||
unsigned int fact = preferred_mem_scale_factor
|
||||
(use->iv->base,
|
||||
TYPE_MODE (use->mem_type),
|
||||
optimize_loop_for_speed_p (data->current_loop));
|
||||
|
||||
if (fact != 1
|
||||
&& multiple_p (step, fact, &new_step))
|
||||
add_candidate (data, size_int (0),
|
||||
wide_int_to_tree (sizetype, new_step),
|
||||
true, NULL);
|
||||
}
|
||||
|
||||
/* Record common candidate with constant offset stripped in base.
|
||||
Like the use itself, we also add candidate directly for it. */
|
||||
base = strip_offset (iv->base, &offset);
|
||||
@ -7042,6 +7064,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
|
||||
{
|
||||
case IFN_MASK_LOAD:
|
||||
case IFN_MASK_STORE:
|
||||
case IFN_MASK_LOAD_LANES:
|
||||
case IFN_MASK_STORE_LANES:
|
||||
/* The second argument contains the correct alias type. */
|
||||
gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
|
||||
return TREE_TYPE (gimple_call_arg (call, 1));
|
||||
|
Loading…
Reference in New Issue
Block a user