[AArch64][SVE] Utilize ASRD instruction for division and remainder
2019-09-30 Yuliang Wang <yuliang.wang@arm.com> gcc/ * config/aarch64/aarch64-sve.md (sdiv_pow2<mode>3): New pattern for ASRD. * config/aarch64/iterators.md (UNSPEC_ASRD): New unspec. * internal-fn.def (IFN_DIV_POW2): New internal function. * optabs.def (sdiv_pow2_optab): New optab. * tree-vect-patterns.c (vect_recog_divmod_pattern): Modify pattern to support new operation. * doc/md.texi (sdiv_pow2$var{m3}): Documentation for the above. * doc/sourcebuild.texi (vect_sdiv_pow2_si): Document new target selector. gcc/testsuite/ * gcc.dg/vect/vect-sdiv-pow2-1.c: New test. * gcc.target/aarch64/sve/asrdiv_1.c: As above. * lib/target-supports.exp (check_effective_target_vect_sdiv_pow2_si): Return true for AArch64 with SVE. From-SVN: r276343
This commit is contained in:
parent
dcdd0f0557
commit
c0c2f01390
|
@ -1,3 +1,16 @@
|
|||
2019-09-30 Yuliang Wang <yuliang.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-sve.md (sdiv_pow2<mode>3):
|
||||
New pattern for ASRD.
|
||||
* config/aarch64/iterators.md (UNSPEC_ASRD): New unspec.
|
||||
* internal-fn.def (IFN_DIV_POW2): New internal function.
|
||||
* optabs.def (sdiv_pow2_optab): New optab.
|
||||
* tree-vect-patterns.c (vect_recog_divmod_pattern):
|
||||
Modify pattern to support new operation.
|
||||
* doc/md.texi (sdiv_pow2$var{m3}): Documentation for the above.
|
||||
* doc/sourcebuild.texi (vect_sdiv_pow2_si):
|
||||
Document new target selector.
|
||||
|
||||
2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_layout_frame): Use crtl->abi
|
||||
|
|
|
@ -71,6 +71,7 @@
|
|||
;; ---- [INT] Binary logical operations
|
||||
;; ---- [INT] Binary logical operations (inverted second input)
|
||||
;; ---- [INT] Shifts
|
||||
;; ---- [INT] Shifts (rounding towards 0)
|
||||
;; ---- [FP] General binary arithmetic corresponding to rtx codes
|
||||
;; ---- [FP] General binary arithmetic corresponding to unspecs
|
||||
;; ---- [FP] Addition
|
||||
|
@ -2563,6 +2564,46 @@
|
|||
[(set_attr "movprfx" "yes")]
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT] Shifts (rounding towards 0)
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Includes:
|
||||
;; - ASRD
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Unpredicated arithmetic right shift for division by power-of-2.
|
||||
(define_expand "sdiv_pow2<mode>3"
|
||||
[(set (match_operand:SVE_I 0 "register_operand")
|
||||
(unspec:SVE_I
|
||||
[(match_dup 3)
|
||||
(unspec:SVE_I
|
||||
[(match_operand:SVE_I 1 "register_operand")
|
||||
(match_operand 2 "aarch64_simd_rshift_imm")]
|
||||
UNSPEC_ASRD)]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; Predicated ASRD with PTRUE.
|
||||
(define_insn "*sdiv_pow2<mode>3"
|
||||
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
|
||||
(unspec:SVE_I
|
||||
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||
(unspec:SVE_I
|
||||
[(match_operand:SVE_I 2 "register_operand" "0, w")
|
||||
(match_operand 3 "aarch64_simd_rshift_imm")]
|
||||
UNSPEC_ASRD)]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
"@
|
||||
asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
|
||||
movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
|
||||
[(set_attr "movprfx" "*,yes")]
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [FP] General binary arithmetic corresponding to rtx codes
|
||||
;; -------------------------------------------------------------------------
|
||||
|
|
|
@ -538,6 +538,7 @@
|
|||
UNSPEC_SMULHRS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULHS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_UMULHRS ; Used in aarch64-sve2.md.
|
||||
UNSPEC_ASRD ; Used in aarch64-sve.md.
|
||||
])
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
|
|
|
@ -5414,6 +5414,17 @@ op0 = (narrow) (((((wide) op1 * (wide) op2) >> (N / 2 - 2)) + 1) >> 1);
|
|||
where the sign of @samp{narrow} determines whether this is a signed
|
||||
or unsigned operation, and @var{N} is the size of @samp{wide} in bits.
|
||||
|
||||
@cindex @code{sdiv_pow2@var{m3}} instruction pattern
|
||||
@item @samp{sdiv_pow2@var{m3}}
|
||||
@cindex @code{sdiv_pow2@var{m3}} instruction pattern
|
||||
@itemx @samp{sdiv_pow2@var{m3}}
|
||||
Signed division by power-of-2 immediate. Equivalent to:
|
||||
@smallexample
|
||||
signed op0, op1;
|
||||
@dots{}
|
||||
op0 = op1 / (1 << imm);
|
||||
@end smallexample
|
||||
|
||||
@cindex @code{vec_shl_insert_@var{m}} instruction pattern
|
||||
@item @samp{vec_shl_insert_@var{m}}
|
||||
Shift the elements in vector input operand 1 left one element (i.e.@:
|
||||
|
|
|
@ -1446,6 +1446,10 @@ of bytes.
|
|||
Target supports both signed and unsigned multiply-high-with-round-and-scale
|
||||
operations on vectors of half-words.
|
||||
|
||||
@item vect_sdiv_pow2_si
|
||||
Target supports signed division by constant power-of-2 operations
|
||||
on vectors of 4-byte integers.
|
||||
|
||||
@item vect_condition
|
||||
Target supports vector conditional operations.
|
||||
|
||||
|
|
|
@ -140,6 +140,8 @@ DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
|
|||
DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW,
|
||||
vec_shl_insert, binary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
|
||||
|
|
|
@ -347,6 +347,7 @@ OPTAB_D (smulhs_optab, "smulhs$a3")
|
|||
OPTAB_D (smulhrs_optab, "smulhrs$a3")
|
||||
OPTAB_D (umulhs_optab, "umulhs$a3")
|
||||
OPTAB_D (umulhrs_optab, "umulhrs$a3")
|
||||
OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3")
|
||||
OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
|
||||
OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a")
|
||||
OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2019-09-30 Yuliang Wang <yuliang.wang@arm.com>
|
||||
|
||||
* gcc.dg/vect/vect-sdiv-pow2-1.c: New test.
|
||||
* gcc.target/aarch64/sve/asrdiv_1.c: As above.
|
||||
* lib/target-supports.exp (check_effective_target_vect_sdiv_pow2_si):
|
||||
Return true for AArch64 with SVE.
|
||||
|
||||
2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/torture/simd-abi-9.c: New test.
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define DIV(x,y) ((x)/(y))
|
||||
#define MOD(x,y) ((x)%(y))
|
||||
|
||||
#define TEMPLATE(PO2,OP) \
|
||||
void __attribute__ ((noipa)) \
|
||||
f_##PO2##_##OP (int *restrict a, int *restrict b, __INTPTR_TYPE__ n) \
|
||||
{ \
|
||||
for (__INTPTR_TYPE__ i = 0; i < n; ++i) \
|
||||
a[i] = OP (b[i], (1 << PO2)); \
|
||||
}
|
||||
#define TEMPLATES(PO2) \
|
||||
TEMPLATE (PO2,DIV); \
|
||||
TEMPLATE (PO2,MOD);
|
||||
|
||||
TEMPLATES (1);
|
||||
TEMPLATES (2);
|
||||
TEMPLATES (3);
|
||||
TEMPLATES (7);
|
||||
TEMPLATES (8);
|
||||
TEMPLATES (10);
|
||||
TEMPLATES (15);
|
||||
TEMPLATES (16);
|
||||
TEMPLATES (20);
|
||||
|
||||
typedef void (*func_t) (int *, int *, __INTPTR_TYPE__);
|
||||
typedef struct {
|
||||
int po2;
|
||||
func_t div;
|
||||
func_t mod;
|
||||
} fn_t;
|
||||
const fn_t fns[] = {
|
||||
#define FN_PAIR(PO2) { PO2, f_##PO2##_DIV, f_##PO2##_MOD }
|
||||
FN_PAIR (1),
|
||||
FN_PAIR (2),
|
||||
FN_PAIR (3),
|
||||
FN_PAIR (7),
|
||||
FN_PAIR (8),
|
||||
FN_PAIR (10),
|
||||
FN_PAIR (15),
|
||||
FN_PAIR (16),
|
||||
FN_PAIR (20),
|
||||
};
|
||||
|
||||
int __attribute__ ((noipa, noinline))
|
||||
power2 (int x)
|
||||
{
|
||||
return 1 << x;
|
||||
}
|
||||
|
||||
#define N 50
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int a[N], b[N], c[N];
|
||||
|
||||
for (int i = 0; i < (sizeof(fns)/sizeof(fns[0])); i++)
|
||||
{
|
||||
int p = power2 (fns[i].po2);
|
||||
for (int j = 0; j < N; j++)
|
||||
a[j] = ((p << 4) * j) / (N - 1) - (p << 5);
|
||||
|
||||
fns[i].div (b, a, N);
|
||||
fns[i].mod (c, a, N);
|
||||
|
||||
for (int j = 0; j < N; j++)
|
||||
if (a[j] != (b[j] * p + c[j]))
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump {\.DIV_POW2} "vect" { target vect_sdiv_pow2_si } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 18 "vect" { target vect_sdiv_pow2_si } } } */
|
|
@ -0,0 +1,51 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define SIGNED(S) int##S##_t
|
||||
|
||||
#define DIV(x,y) ((x)/(y))
|
||||
#define MOD(x,y) ((x)%(y))
|
||||
|
||||
#define TEMPLATE(OP,SIZE) \
|
||||
void __attribute__ ((noinline, noclone)) \
|
||||
f_##OP##_##SIZE (SIGNED(SIZE) *restrict a, SIGNED(SIZE) *restrict b, \
|
||||
__INTPTR_TYPE__ n) \
|
||||
{ \
|
||||
for (__INTPTR_TYPE__ i = 0; i < n; ++i) \
|
||||
a[i] = OP (b[i], ((SIGNED(SIZE))1 << ((SIZE)/2+1))); \
|
||||
}
|
||||
#define DIVMOD(SIZE) \
|
||||
TEMPLATE (DIV,SIZE); \
|
||||
TEMPLATE (MOD,SIZE);
|
||||
|
||||
DIVMOD (8);
|
||||
DIVMOD (16);
|
||||
DIVMOD (32);
|
||||
DIVMOD (64);
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 8 "vect" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.b, p[0-9]+/m, z[0-9]+\.b, #5\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.h, p[0-9]+/m, z[0-9]+\.h, #9\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #9\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.s, p[0-9]+/m, z[0-9]+\.s, #17\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #17\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.d, p[0-9]+/m, z[0-9]+\.d, #33\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #33\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tasr\t%} } } */
|
||||
/* { dg-final { scan-assembler-not {\tlsr\t%} } } */
|
||||
/* { dg-final { scan-assembler-not {\tcmplt\t%} } } */
|
||||
/* { dg-final { scan-assembler-not {\tand\t%} } } */
|
|
@ -6256,6 +6256,14 @@ proc check_effective_target_vect_mulhrs_hi {} {
|
|||
&& [check_effective_target_aarch64_sve2] }]
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports signed division
|
||||
# by power-of-2 operations on vectors of 4-byte integers.
|
||||
|
||||
proc check_effective_target_vect_sdiv_pow2_si {} {
|
||||
return [expr { [istarget aarch64*-*-*]
|
||||
&& [check_effective_target_aarch64_sve] }]
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# demotion (packing) of shorts (to chars) and ints (to shorts)
|
||||
# using modulo arithmetic, 0 otherwise.
|
||||
|
|
|
@ -2927,6 +2927,37 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|||
/* Pattern detected. */
|
||||
vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
|
||||
|
||||
*type_out = vectype;
|
||||
|
||||
/* Check if the target supports this internal function. */
|
||||
internal_fn ifn = IFN_DIV_POW2;
|
||||
if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
|
||||
{
|
||||
tree shift = build_int_cst (itype, tree_log2 (oprnd1));
|
||||
|
||||
tree var_div = vect_recog_temp_ssa_var (itype, NULL);
|
||||
gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
|
||||
gimple_call_set_lhs (div_stmt, var_div);
|
||||
|
||||
if (rhs_code == TRUNC_MOD_EXPR)
|
||||
{
|
||||
append_pattern_def_seq (stmt_vinfo, div_stmt);
|
||||
def_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
LSHIFT_EXPR, var_div, shift);
|
||||
append_pattern_def_seq (stmt_vinfo, def_stmt);
|
||||
pattern_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
MINUS_EXPR, oprnd0,
|
||||
gimple_assign_lhs (def_stmt));
|
||||
}
|
||||
else
|
||||
pattern_stmt = div_stmt;
|
||||
gimple_set_location (pattern_stmt, gimple_location (last_stmt));
|
||||
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
|
||||
build_int_cst (itype, 0));
|
||||
if (rhs_code == TRUNC_DIV_EXPR
|
||||
|
@ -3003,7 +3034,6 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|||
signmask);
|
||||
}
|
||||
|
||||
*type_out = vectype;
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue