[AArch64, Patch] Generate MLA when multiply + add vector by scalar
(On behalf of jackson.woodruff@arm.com) This merges vector multiplies and adds into a single mla instruction when the multiplication is done by a scalar. typedef int __attribute__((vector_size(16))) vec; vec mla1(vec v0, vec v1, int v2) { return v0 + v1 * c; } Now generates: mla1: fmov s2, w0 mla v0.4s, v1.4s, v2.s[0] This is also done for the identical case for a multiply followed by a subtract of vectors with an integer operand on the multiply. gcc/ 2017-07-24 Jackson Woodruff <jackson.woodruff@arm.com> * config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge<mode>): New. (aarch64_mls_elt_merge<mode>): Likewise. gcc/testsuite/ 2017-07-24 Jackson Woodruff <jackson.woodruff@arm.com> * gcc.target/aarch64/simd/vmla_elem_1.c: New. From-SVN: r250475
This commit is contained in:
parent
e678ce869e
commit
4b40986c04
|
@ -1,3 +1,8 @@
|
|||
2017-07-24 Jackson Woodruff <jackson.woodruff@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge<mode>): New.
|
||||
(aarch64_mls_elt_merge<mode>): Likewise.
|
||||
|
||||
2017-07-23 Krister Walfridsson <krister.walfridsson@gmail.com>
|
||||
|
||||
* config.gcc (*-*-netbsd*): Remove check for NetBSD versions not
|
||||
|
|
|
@ -1033,6 +1033,18 @@
|
|||
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mla_elt_merge<mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(plus:VDQHS
|
||||
(mult:VDQHS (vec_duplicate:VDQHS
|
||||
(match_operand:<VEL> 1 "register_operand" "w"))
|
||||
(match_operand:VDQHS 2 "register_operand" "w"))
|
||||
(match_operand:VDQHS 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
||||
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_mls<mode>"
|
||||
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
||||
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
|
||||
|
@ -1080,6 +1092,18 @@
|
|||
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mls_elt_merge<mode>"
|
||||
[(set (match_operand:VDQHS 0 "register_operand" "=w")
|
||||
(minus:VDQHS
|
||||
(match_operand:VDQHS 1 "register_operand" "0")
|
||||
(mult:VDQHS (vec_duplicate:VDQHS
|
||||
(match_operand:<VEL> 2 "register_operand" "w"))
|
||||
(match_operand:VDQHS 3 "register_operand" "w"))))]
|
||||
"TARGET_SIMD"
|
||||
"mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
|
||||
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
|
||||
)
|
||||
|
||||
;; Max/Min operations.
|
||||
(define_insn "<su><maxmin><mode>3"
|
||||
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2017-07-24 Jackson Woodruff <jackson.woodruff@arm.com>
|
||||
|
||||
* gcc.target/aarch64/simd/vmla_elem_1.c: New.
|
||||
|
||||
2017-07-24 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
Mikael Morin <mikael@gcc.gnu.org>
|
||||
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3" } */
|
||||
|
||||
typedef short int __attribute__ ((vector_size (16))) v8hi;
|
||||
|
||||
v8hi
|
||||
mla8hi (v8hi v0, v8hi v1, short int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
|
||||
return v0 + v1 * v2;
|
||||
}
|
||||
|
||||
|
||||
v8hi
|
||||
mls8hi (v8hi v0, v8hi v1, short int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
|
||||
return v0 - v1 * v2;
|
||||
}
|
||||
|
||||
typedef short int __attribute__ ((vector_size (8))) v4hi;
|
||||
|
||||
v4hi
|
||||
mla4hi (v4hi v0, v4hi v1, short int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
|
||||
return v0 + v1 * v2;
|
||||
}
|
||||
|
||||
v4hi
|
||||
mls4hi (v4hi v0, v4hi v1, short int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
|
||||
return v0 - v1 * v2;
|
||||
}
|
||||
|
||||
typedef int __attribute__ ((vector_size (16))) v4si;
|
||||
|
||||
v4si
|
||||
mla4si (v4si v0, v4si v1, int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
|
||||
return v0 + v1 * v2;
|
||||
}
|
||||
|
||||
v4si
|
||||
mls4si (v4si v0, v4si v1, int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
|
||||
return v0 - v1 * v2;
|
||||
}
|
||||
|
||||
typedef int __attribute__((vector_size (8))) v2si;
|
||||
|
||||
v2si
|
||||
mla2si (v2si v0, v2si v1, int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
|
||||
return v0 + v1 * v2;
|
||||
}
|
||||
|
||||
v2si
|
||||
mls2si (v2si v0, v2si v1, int v2)
|
||||
{
|
||||
/* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
|
||||
return v0 - v1 * v2;
|
||||
}
|
Loading…
Reference in New Issue