aarch64: ACLE I8MM multiply-accumulate intrinsics
This patch adds intrinsics for 8-bit integer matrix multiply-accumulate operations including vmmlaq_s32, vmmlaq_u32, and vusmmlaq_s32. gcc/ChangeLog: 2020-02-07 Dennis Zhang <dennis.zhang@arm.com> * config/aarch64/aarch64-simd-builtins.def (simd_smmla): New entry. (simd_ummla, simd_usmmla): Likewise. * config/aarch64/aarch64-simd.md (aarch64_simd_<sur>mmlav16qi): New. * config/aarch64/arm_neon.h (vmmlaq_s32, vmmlaq_u32): New. (vusmmlaq_s32): New. gcc/testsuite/ChangeLog: 2020-02-07 Dennis Zhang <dennis.zhang@arm.com> * gcc.target/aarch64/simd/vmmla.c: New test.
This commit is contained in:
parent
b7903d9f5b
commit
40f6483780
@ -1,3 +1,11 @@
|
||||
2020-02-07 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def (simd_smmla): New entry.
|
||||
(simd_ummla, simd_usmmla): Likewise.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_simd_<sur>mmlav16qi): New.
|
||||
* config/aarch64/arm_neon.h (vmmlaq_s32, vmmlaq_u32): New.
|
||||
(vusmmlaq_s32): New.
|
||||
|
||||
2020-02-07 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/93519
|
||||
|
@ -703,3 +703,8 @@
|
||||
VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf)
|
||||
VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf)
|
||||
VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf)
|
||||
|
||||
/* Implemented by aarch64_simd_<sur>mmlav16qi. */
|
||||
VAR1 (TERNOP, simd_smmla, 0, v16qi)
|
||||
VAR1 (TERNOPU, simd_ummla, 0, v16qi)
|
||||
VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi)
|
||||
|
@ -7173,3 +7173,15 @@
|
||||
}
|
||||
[(set_attr "type" "neon_fp_mla_s_scalar_q")]
|
||||
)
|
||||
|
||||
;; 8-bit integer matrix multiply-accumulate
|
||||
(define_insn "aarch64_simd_<sur>mmlav16qi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
(plus:V4SI
|
||||
(unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
|
||||
(match_operand:V16QI 3 "register_operand" "w")] MATMUL)
|
||||
(match_operand:V4SI 1 "register_operand" "0")))]
|
||||
"TARGET_I8MM"
|
||||
"<sur>mmla\\t%0.4s, %2.16b, %3.16b"
|
||||
[(set_attr "type" "neon_mla_s_q")]
|
||||
)
|
||||
|
@ -34797,6 +34797,29 @@ vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b,
|
||||
return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
/* Matrix Multiply-Accumulate. */
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmmlaq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_simd_smmlav16qi (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vmmlaq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_simd_ummlav16qi_uuuu (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_simd_usmmlav16qi_ssus (__r, __a, __b);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#undef __aarch64_vget_lane_any
|
||||
|
@ -1,3 +1,7 @@
|
||||
2020-02-07 Dennis Zhang <dennis.zhang@arm.com>
|
||||
|
||||
* gcc.target/aarch64/simd/vmmla.c: New test.
|
||||
|
||||
2020-02-07 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/93519
|
||||
|
27
gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
Normal file
27
gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* { dg-do assemble} */
|
||||
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
|
||||
/* { dg-additional-options "-march=armv8.2-a+i8mm" } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
int32x4_t
|
||||
test_vmmlaq_s32 (int32x4_t r, int8x16_t a, int8x16_t b)
|
||||
{
|
||||
return vmmlaq_s32 (r, a, b);
|
||||
}
|
||||
|
||||
uint32x4_t
|
||||
test_vmmlaq_u32 (uint32x4_t r, uint8x16_t a, uint8x16_t b)
|
||||
{
|
||||
return vmmlaq_u32 (r, a, b);
|
||||
}
|
||||
|
||||
int32x4_t
|
||||
test_vusmmlaq_s32 (int32x4_t r, uint8x16_t a, int8x16_t b)
|
||||
{
|
||||
return vusmmlaq_s32 (r, a, b);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tummla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tusmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */
|
Loading…
Reference in New Issue
Block a user