diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c3aa1130b6e..5fbf319475f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-08-15 Richard Sandiford + + * config/aarch64/aarch64.c (aarch64_add_offset): In the fallback + multiplication case, try to compute VG * (lowest set bit) directly + rather than always basing the multiplication on VG. Use + expand_mult for the multiplication if we can. + 2019-08-15 Richard Sandiford * config/aarch64/aarch64-protos.h diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b8c947e97b7..3b0dceaa27f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -73,6 +73,7 @@ #include "selftest-rtl.h" #include "rtx-vector-builder.h" #include "intl.h" +#include "expmed.h" /* This file should be included last. */ #include "target-def.h" @@ -3465,20 +3466,36 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, } else { - /* Use CNTD, then multiply it by FACTOR. */ - val = gen_int_mode (poly_int64 (2, 2), mode); + /* Base the factor on LOW_BIT if we can calculate LOW_BIT + directly, since that should increase the chances of being + able to use a shift and add sequence. If LOW_BIT itself + is out of range, just use CNTD. */ + if (low_bit <= 16 * 8) + factor /= low_bit; + else + low_bit = 1; + + val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode); val = aarch64_force_temporary (mode, temp1, val); - /* Go back to using a negative multiplication factor if we have - no register from which to subtract. */ - if (code == MINUS && src == const0_rtx) + if (can_create_pseudo_p ()) { - factor = -factor; - code = PLUS; + rtx coeff1 = gen_int_mode (factor, mode); + val = expand_mult (mode, val, coeff1, NULL_RTX, false, true); + } + else + { + /* Go back to using a negative multiplication factor if we have + no register from which to subtract. */ + if (code == MINUS && src == const0_rtx) + { + factor = -factor; + code = PLUS; + } + rtx coeff1 = gen_int_mode (factor, mode); + coeff1 = aarch64_force_temporary (mode, temp2, coeff1); + val = gen_rtx_MULT (mode, val, coeff1); } - rtx coeff1 = gen_int_mode (factor, mode); - coeff1 = aarch64_force_temporary (mode, temp2, coeff1); - val = gen_rtx_MULT (mode, val, coeff1); } if (shift > 0) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9f667f2f9a8..0008ff94b37 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-08-15 Richard Sandiford + + * gcc.target/aarch64/sve/loop_add_4.c: Expect 10 INCWs and + INCDs rather than 8. + 2019-08-15 Richard Sandiford * gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c index 7f02497e839..9ead9c21b35 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c @@ -68,7 +68,8 @@ TEST_ALL (LOOP) /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ -/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */ +/* 2 for the calculations of -17 and 17. */ +/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */ /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ @@ -85,7 +86,8 @@ TEST_ALL (LOOP) /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ -/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */ +/* 2 for the calculations of -17 and 17. */ +/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */ /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */