[AArch64] Use SVE MLA, MLS, MAD and MSB for conditional arithmetic
This patch uses predicated MLA, MLS, MAD and MSB to implement conditional "FMA"s on integers. This also requires providing the unpredicated optabs (fma and fnma) since otherwise tree-ssa-math-opts.c won't try to use the conditional forms. We still want to use shifts and adds in preference to multiplications, so the patch makes the optab expanders check for that. The tests cover floating-point types too, which are already handled, and which were already tested to some extent by gcc.dg/vect. 2019-08-15 Richard Sandiford <richard.sandiford@arm.com> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> gcc/ * config/aarch64/aarch64-protos.h (aarch64_prepare_sve_int_fma) (aarch64_prepare_sve_cond_int_fma): Declare. * config/aarch64/aarch64.c (aarch64_convert_mult_to_shift) (aarch64_prepare_sve_int_fma): New functions. (aarch64_prepare_sve_cond_int_fma): Likewise. * config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY:optab><SVE_I:mode>): Add a "@" marker. (fma<SVE_I:mode>4, cond_fma<SVE_I:mode>, *cond_fma<SVE_I:mode>_2) (*cond_fma<SVE_I:mode>_4, *cond_fma<SVE_I:mode>_any, fnma<SVE_I:mode>4) (cond_fnma<SVE_I:mode>, *cond_fnma<SVE_I:mode>_2) (*cond_fnma<SVE_I:mode>_4, *cond_fnma<SVE_I:mode>_any): New patterns. (*madd<mode>): Rename to... (*fma<mode>4): ...this. (*msub<mode>): Rename to... (*fnma<mode>4): ...this. gcc/testsuite/ * gcc.target/aarch64/sve/cond_mla_1.c: New test. * gcc.target/aarch64/sve/cond_mla_1_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_2.c: Likewise. * gcc.target/aarch64/sve/cond_mla_2_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_3.c: Likewise. * gcc.target/aarch64/sve/cond_mla_3_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_4.c: Likewise. * gcc.target/aarch64/sve/cond_mla_4_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_5.c: Likewise. * gcc.target/aarch64/sve/cond_mla_5_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_6.c: Likewise. * gcc.target/aarch64/sve/cond_mla_6_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_7.c: Likewise. * gcc.target/aarch64/sve/cond_mla_7_run.c: Likewise. * gcc.target/aarch64/sve/cond_mla_8.c: Likewise. * gcc.target/aarch64/sve/cond_mla_8_run.c: Likewise. Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org> From-SVN: r274509
This commit is contained in:
parent
a19ba9e1b1
commit
b6c3aea189
|
@ -1,3 +1,22 @@
|
||||||
|
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-protos.h (aarch64_prepare_sve_int_fma)
|
||||||
|
(aarch64_prepare_sve_cond_int_fma): Declare.
|
||||||
|
* config/aarch64/aarch64.c (aarch64_convert_mult_to_shift)
|
||||||
|
(aarch64_prepare_sve_int_fma): New functions.
|
||||||
|
(aarch64_prepare_sve_cond_int_fma): Likewise.
|
||||||
|
* config/aarch64/aarch64-sve.md
|
||||||
|
(cond_<SVE_INT_BINARY:optab><SVE_I:mode>): Add a "@" marker.
|
||||||
|
(fma<SVE_I:mode>4, cond_fma<SVE_I:mode>, *cond_fma<SVE_I:mode>_2)
|
||||||
|
(*cond_fma<SVE_I:mode>_4, *cond_fma<SVE_I:mode>_any, fnma<SVE_I:mode>4)
|
||||||
|
(cond_fnma<SVE_I:mode>, *cond_fnma<SVE_I:mode>_2)
|
||||||
|
(*cond_fnma<SVE_I:mode>_4, *cond_fnma<SVE_I:mode>_any): New patterns.
|
||||||
|
(*madd<mode>): Rename to...
|
||||||
|
(*fma<mode>4): ...this.
|
||||||
|
(*msub<mode>): Rename to...
|
||||||
|
(*fnma<mode>4): ...this.
|
||||||
|
|
||||||
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||||
|
|
||||||
|
|
|
@ -630,6 +630,9 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
|
||||||
void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
|
void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
|
||||||
bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
|
bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
|
||||||
void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
|
void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
|
||||||
|
|
||||||
|
bool aarch64_prepare_sve_int_fma (rtx *, rtx_code);
|
||||||
|
bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code);
|
||||||
#endif /* RTX_CODE */
|
#endif /* RTX_CODE */
|
||||||
|
|
||||||
void aarch64_init_builtins (void);
|
void aarch64_init_builtins (void);
|
||||||
|
|
|
@ -1844,7 +1844,7 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; Predicated integer operations with merging.
|
;; Predicated integer operations with merging.
|
||||||
(define_expand "cond_<optab><mode>"
|
(define_expand "@cond_<optab><mode>"
|
||||||
[(set (match_operand:SVE_I 0 "register_operand")
|
[(set (match_operand:SVE_I 0 "register_operand")
|
||||||
(unspec:SVE_I
|
(unspec:SVE_I
|
||||||
[(match_operand:<VPRED> 1 "register_operand")
|
[(match_operand:<VPRED> 1 "register_operand")
|
||||||
|
@ -3384,8 +3384,26 @@
|
||||||
;; - MLA
|
;; - MLA
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
;; Unpredicated integer addition of product.
|
||||||
|
(define_expand "fma<mode>4"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand")
|
||||||
|
(plus:SVE_I
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_dup 4)
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 1 "register_operand")
|
||||||
|
(match_operand:SVE_I 2 "nonmemory_operand"))]
|
||||||
|
UNSPEC_PRED_X)
|
||||||
|
(match_operand:SVE_I 3 "register_operand")))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
if (aarch64_prepare_sve_int_fma (operands, PLUS))
|
||||||
|
DONE;
|
||||||
|
operands[4] = aarch64_ptrue_reg (<VPRED>mode);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
;; Predicated integer addition of product.
|
;; Predicated integer addition of product.
|
||||||
(define_insn "*madd<mode>"
|
(define_insn "*fma<mode>4"
|
||||||
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
|
||||||
(plus:SVE_I
|
(plus:SVE_I
|
||||||
(unspec:SVE_I
|
(unspec:SVE_I
|
||||||
|
@ -3402,6 +3420,97 @@
|
||||||
[(set_attr "movprfx" "*,*,yes")]
|
[(set_attr "movprfx" "*,*,yes")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; Predicated integer addition of product with merging.
|
||||||
|
(define_expand "cond_fma<mode>"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand")
|
||||||
|
(plus:SVE_I
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand")
|
||||||
|
(match_operand:SVE_I 3 "general_operand"))
|
||||||
|
(match_operand:SVE_I 4 "register_operand"))
|
||||||
|
(match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
|
||||||
|
DONE;
|
||||||
|
/* Swap the multiplication operands if the fallback value is the
|
||||||
|
second of the two. */
|
||||||
|
if (rtx_equal_p (operands[3], operands[5]))
|
||||||
|
std::swap (operands[2], operands[3]);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer addition of product, merging with the first input.
|
||||||
|
(define_insn "*cond_fma<mode>_2"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||||
|
(plus:SVE_I
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w"))
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "w, w"))
|
||||||
|
(match_dup 2)]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
"@
|
||||||
|
mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
|
||||||
|
[(set_attr "movprfx" "*,yes")]
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer addition of product, merging with the third input.
|
||||||
|
(define_insn "*cond_fma<mode>_4"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||||
|
(plus:SVE_I
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w"))
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "0, w"))
|
||||||
|
(match_dup 4)]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
"@
|
||||||
|
mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
|
||||||
|
[(set_attr "movprfx" "*,yes")]
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer addition of product, merging with an independent value.
|
||||||
|
(define_insn_and_rewrite "*cond_fma<mode>_any"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
|
||||||
|
(plus:SVE_I
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
|
||||||
|
(match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE
|
||||||
|
&& !rtx_equal_p (operands[2], operands[5])
|
||||||
|
&& !rtx_equal_p (operands[3], operands[5])
|
||||||
|
&& !rtx_equal_p (operands[4], operands[5])"
|
||||||
|
"@
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
#"
|
||||||
|
"&& reload_completed
|
||||||
|
&& register_operand (operands[5], <MODE>mode)
|
||||||
|
&& !rtx_equal_p (operands[0], operands[5])"
|
||||||
|
{
|
||||||
|
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
|
||||||
|
operands[5], operands[1]));
|
||||||
|
operands[5] = operands[4] = operands[0];
|
||||||
|
}
|
||||||
|
[(set_attr "movprfx" "yes")]
|
||||||
|
)
|
||||||
|
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
;; ---- [INT] MLS and MSB
|
;; ---- [INT] MLS and MSB
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
|
@ -3410,8 +3519,26 @@
|
||||||
;; - MSB
|
;; - MSB
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
;; Unpredicated integer subtraction of product.
|
||||||
|
(define_expand "fnma<mode>4"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand")
|
||||||
|
(minus:SVE_I
|
||||||
|
(match_operand:SVE_I 3 "register_operand")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_dup 4)
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 1 "register_operand")
|
||||||
|
(match_operand:SVE_I 2 "general_operand"))]
|
||||||
|
UNSPEC_PRED_X)))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
if (aarch64_prepare_sve_int_fma (operands, MINUS))
|
||||||
|
DONE;
|
||||||
|
operands[4] = aarch64_ptrue_reg (<VPRED>mode);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
;; Predicated integer subtraction of product.
|
;; Predicated integer subtraction of product.
|
||||||
(define_insn "*msub<mode>3"
|
(define_insn "*fnma<mode>3"
|
||||||
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
|
||||||
(minus:SVE_I
|
(minus:SVE_I
|
||||||
(match_operand:SVE_I 4 "register_operand" "w, 0, w")
|
(match_operand:SVE_I 4 "register_operand" "w, 0, w")
|
||||||
|
@ -3428,6 +3555,98 @@
|
||||||
[(set_attr "movprfx" "*,*,yes")]
|
[(set_attr "movprfx" "*,*,yes")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; Predicated integer subtraction of product with merging.
|
||||||
|
(define_expand "cond_fnma<mode>"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand")
|
||||||
|
(minus:SVE_I
|
||||||
|
(match_operand:SVE_I 4 "register_operand")
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand")
|
||||||
|
(match_operand:SVE_I 3 "general_operand")))
|
||||||
|
(match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
|
||||||
|
DONE;
|
||||||
|
/* Swap the multiplication operands if the fallback value is the
|
||||||
|
second of the two. */
|
||||||
|
if (rtx_equal_p (operands[3], operands[5]))
|
||||||
|
std::swap (operands[2], operands[3]);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer subtraction of product, merging with the first input.
|
||||||
|
(define_insn "*cond_fnma<mode>_2"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||||
|
(minus:SVE_I
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "w, w")
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w")))
|
||||||
|
(match_dup 2)]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
"@
|
||||||
|
msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
|
||||||
|
[(set_attr "movprfx" "*,yes")]
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer subtraction of product, merging with the third input.
|
||||||
|
(define_insn "*cond_fnma<mode>_4"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
|
||||||
|
(minus:SVE_I
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "0, w")
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w")))
|
||||||
|
(match_dup 4)]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE"
|
||||||
|
"@
|
||||||
|
mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
|
||||||
|
[(set_attr "movprfx" "*,yes")]
|
||||||
|
)
|
||||||
|
|
||||||
|
;; Predicated integer subtraction of product, merging with an
|
||||||
|
;; independent value.
|
||||||
|
(define_insn_and_rewrite "*cond_fnma<mode>_any"
|
||||||
|
[(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
|
||||||
|
(unspec:SVE_I
|
||||||
|
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
|
||||||
|
(minus:SVE_I
|
||||||
|
(match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
|
||||||
|
(mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
|
||||||
|
(match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
|
||||||
|
(match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
|
||||||
|
UNSPEC_SEL))]
|
||||||
|
"TARGET_SVE
|
||||||
|
&& !rtx_equal_p (operands[2], operands[5])
|
||||||
|
&& !rtx_equal_p (operands[3], operands[5])
|
||||||
|
&& !rtx_equal_p (operands[4], operands[5])"
|
||||||
|
"@
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
|
||||||
|
movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
|
||||||
|
#"
|
||||||
|
"&& reload_completed
|
||||||
|
&& register_operand (operands[5], <MODE>mode)
|
||||||
|
&& !rtx_equal_p (operands[0], operands[5])"
|
||||||
|
{
|
||||||
|
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
|
||||||
|
operands[5], operands[1]));
|
||||||
|
operands[5] = operands[4] = operands[0];
|
||||||
|
}
|
||||||
|
[(set_attr "movprfx" "yes")]
|
||||||
|
)
|
||||||
|
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
;; ---- [INT] Dot product
|
;; ---- [INT] Dot product
|
||||||
;; -------------------------------------------------------------------------
|
;; -------------------------------------------------------------------------
|
||||||
|
|
|
@ -16469,6 +16469,98 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
|
||||||
aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
|
aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check whether VALUE is a vector constant in which every element
|
||||||
|
is either a power of 2 or a negated power of 2. If so, return
|
||||||
|
a constant vector of log2s, and flip CODE between PLUS and MINUS
|
||||||
|
if VALUE contains negated powers of 2. Return NULL_RTX otherwise. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
aarch64_convert_mult_to_shift (rtx value, rtx_code &code)
|
||||||
|
{
|
||||||
|
if (GET_CODE (value) != CONST_VECTOR)
|
||||||
|
return NULL_RTX;
|
||||||
|
|
||||||
|
rtx_vector_builder builder;
|
||||||
|
if (!builder.new_unary_operation (GET_MODE (value), value, false))
|
||||||
|
return NULL_RTX;
|
||||||
|
|
||||||
|
scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value));
|
||||||
|
/* 1 if the result of the multiplication must be negated,
|
||||||
|
0 if it mustn't, or -1 if we don't yet care. */
|
||||||
|
int negate = -1;
|
||||||
|
unsigned int encoded_nelts = const_vector_encoded_nelts (value);
|
||||||
|
for (unsigned int i = 0; i < encoded_nelts; ++i)
|
||||||
|
{
|
||||||
|
rtx elt = CONST_VECTOR_ENCODED_ELT (value, i);
|
||||||
|
if (!CONST_SCALAR_INT_P (elt))
|
||||||
|
return NULL_RTX;
|
||||||
|
rtx_mode_t val (elt, int_mode);
|
||||||
|
wide_int pow2 = wi::neg (val);
|
||||||
|
if (val != pow2)
|
||||||
|
{
|
||||||
|
/* It matters whether we negate or not. Make that choice,
|
||||||
|
and make sure that it's consistent with previous elements. */
|
||||||
|
if (negate == !wi::neg_p (val))
|
||||||
|
return NULL_RTX;
|
||||||
|
negate = wi::neg_p (val);
|
||||||
|
if (!negate)
|
||||||
|
pow2 = val;
|
||||||
|
}
|
||||||
|
/* POW2 is now the value that we want to be a power of 2. */
|
||||||
|
int shift = wi::exact_log2 (pow2);
|
||||||
|
if (shift < 0)
|
||||||
|
return NULL_RTX;
|
||||||
|
builder.quick_push (gen_int_mode (shift, int_mode));
|
||||||
|
}
|
||||||
|
if (negate == -1)
|
||||||
|
/* PLUS and MINUS are equivalent; canonicalize on PLUS. */
|
||||||
|
code = PLUS;
|
||||||
|
else if (negate == 1)
|
||||||
|
code = code == PLUS ? MINUS : PLUS;
|
||||||
|
return builder.build ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Prepare for an integer SVE multiply-add or multiply-subtract pattern;
|
||||||
|
CODE is PLUS for the former and MINUS for the latter. OPERANDS is the
|
||||||
|
operands array, in the same order as for fma_optab. Return true if
|
||||||
|
the function emitted all the necessary instructions, false if the caller
|
||||||
|
should generate the pattern normally with the new OPERANDS array. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code)
|
||||||
|
{
|
||||||
|
machine_mode mode = GET_MODE (operands[0]);
|
||||||
|
if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code))
|
||||||
|
{
|
||||||
|
rtx product = expand_binop (mode, vashl_optab, operands[1], shifts,
|
||||||
|
NULL_RTX, true, OPTAB_DIRECT);
|
||||||
|
force_expand_binop (mode, code == PLUS ? add_optab : sub_optab,
|
||||||
|
operands[3], product, operands[0], true,
|
||||||
|
OPTAB_DIRECT);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
operands[2] = force_reg (mode, operands[2]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Likewise, but for a conditional pattern. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code)
|
||||||
|
{
|
||||||
|
machine_mode mode = GET_MODE (operands[0]);
|
||||||
|
if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code))
|
||||||
|
{
|
||||||
|
rtx product = expand_binop (mode, vashl_optab, operands[2], shifts,
|
||||||
|
NULL_RTX, true, OPTAB_DIRECT);
|
||||||
|
emit_insn (gen_cond (code, mode, operands[0], operands[1],
|
||||||
|
operands[4], product, operands[5]));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
operands[3] = force_reg (mode, operands[3]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned HOST_WIDE_INT
|
static unsigned HOST_WIDE_INT
|
||||||
aarch64_shift_truncation_mask (machine_mode mode)
|
aarch64_shift_truncation_mask (machine_mode mode)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,3 +1,23 @@
|
||||||
|
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||||
|
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_1.c: New test.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_1_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_2.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_2_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_3.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_3_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_4.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_4_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_5.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_5_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_6.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_6_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_7.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_7_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_8.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/cond_mla_8_run.c: Likewise.
|
||||||
|
|
||||||
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] != 1 ? a[i] OP b[i] * c : b[i]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,35 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_1.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : b[i]; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] != 1 ? a[i] OP b[i] * c : c; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,36 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_2.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected = (pred[i] != 1 \
|
||||||
|
? a[i] OP b[i] * (TYPE) FACTOR \
|
||||||
|
: (TYPE) FACTOR); \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] != 1 ? a[i] OP b[i] * c : a[i]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,35 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_3.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : a[i]; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] == 1 ? a[i] OP b[i] * c : pred[i]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m,} 4 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,36 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_4.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected = (pred[i] == 1 \
|
||||||
|
? a[i] OP b[i] * (TYPE) FACTOR \
|
||||||
|
: pred[i]); \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] ? a[i] OP b[i] * c : 0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z,} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z,} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z,} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z,} 4 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,35 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_5.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 0; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, TYPE c, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] ? a[i] OP b[i] * c : 5; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE) \
|
||||||
|
T (TYPE, add, +) \
|
||||||
|
T (TYPE, sub, -)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t) \
|
||||||
|
TEST_TYPE (T, uint16_t) \
|
||||||
|
TEST_TYPE (T, uint32_t) \
|
||||||
|
TEST_TYPE (T, uint64_t) \
|
||||||
|
TEST_TYPE (T, _Float16) \
|
||||||
|
TEST_TYPE (T, float) \
|
||||||
|
TEST_TYPE (T, double)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tsel\t} 14 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
|
@ -0,0 +1,35 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_6.c"
|
||||||
|
|
||||||
|
#define FACTOR 17
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 5; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP, CONST) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_COUNT(T, TYPE, CONST) \
|
||||||
|
T (TYPE, add, +, CONST) \
|
||||||
|
T (TYPE, sub, -, CONST)
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE, CONST) \
|
||||||
|
TEST_COUNT (T, TYPE, 2) \
|
||||||
|
TEST_COUNT (T, TYPE, 4) \
|
||||||
|
TEST_COUNT (T, TYPE, CONST)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t, 0x80) \
|
||||||
|
TEST_TYPE (T, uint16_t, 0x8000) \
|
||||||
|
TEST_TYPE (T, uint32_t, 0x80000000) \
|
||||||
|
TEST_TYPE (T, uint64_t, 0x8000000000000000ULL)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,34 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_7.c"
|
||||||
|
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP, CONST) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DEF_LOOP(TYPE, NAME, OP, CONST) \
|
||||||
|
void __attribute__ ((noipa)) \
|
||||||
|
test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \
|
||||||
|
TYPE *__restrict a, \
|
||||||
|
TYPE *__restrict b, \
|
||||||
|
TYPE *__restrict pred, int n) \
|
||||||
|
{ \
|
||||||
|
for (int i = 0; i < n; ++i) \
|
||||||
|
r[i] = pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_COUNT(T, TYPE, CONST) \
|
||||||
|
T (TYPE, add, +, CONST) \
|
||||||
|
T (TYPE, sub, -, CONST)
|
||||||
|
|
||||||
|
#define TEST_TYPE(T, TYPE, CONST) \
|
||||||
|
TEST_COUNT (T, TYPE, 2) \
|
||||||
|
TEST_COUNT (T, TYPE, 4) \
|
||||||
|
TEST_COUNT (T, TYPE, CONST)
|
||||||
|
|
||||||
|
#define TEST_ALL(T) \
|
||||||
|
TEST_TYPE (T, uint8_t, 0x80) \
|
||||||
|
TEST_TYPE (T, uint16_t, 0x8000) \
|
||||||
|
TEST_TYPE (T, uint32_t, 0x80000000) \
|
||||||
|
TEST_TYPE (T, uint64_t, 0x8000000000000000ULL)
|
||||||
|
|
||||||
|
TEST_ALL (DEF_LOOP)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||||
|
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
|
@ -0,0 +1,34 @@
|
||||||
|
/* { dg-do run { target aarch64_sve_hw } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
|
||||||
|
#include "cond_mla_8.c"
|
||||||
|
|
||||||
|
#define N 99
|
||||||
|
|
||||||
|
#define TEST_LOOP(TYPE, NAME, OP, CONST) \
|
||||||
|
{ \
|
||||||
|
TYPE r[N], a[N], b[N], pred[N]; \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
a[i] = (i & 1 ? i : 3 * i); \
|
||||||
|
b[i] = (i >> 4) << (i & 15); \
|
||||||
|
pred[i] = i % 3 < i % 5; \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \
|
||||||
|
for (int i = 0; i < N; ++i) \
|
||||||
|
{ \
|
||||||
|
TYPE expected \
|
||||||
|
= pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \
|
||||||
|
if (r[i] != expected) \
|
||||||
|
__builtin_abort (); \
|
||||||
|
asm volatile ("" ::: "memory"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
TEST_ALL (TEST_LOOP)
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue