ACLE intrinsics: BFloat16 load intrinsics for AArch32

2020-03-06  Delia Burduv  <delia.burduv@arm.com>

	* config/arm/arm_neon.h (vld2_bf16): New.
	(vld2q_bf16): New.
	(vld3_bf16): New.
	(vld3q_bf16): New.
	(vld4_bf16): New.
	(vld4q_bf16): New.
	(vld2_dup_bf16): New.
	(vld2q_dup_bf16): New.
	(vld3_dup_bf16): New.
	(vld3q_dup_bf16): New.
	(vld4_dup_bf16): New.
	(vld4q_dup_bf16): New.
	* config/arm/arm_neon_builtins.def
	(vld2): Changed to VAR13 and added v4bf, v8bf
	(vld2_dup): Changed to VAR8 and added v4bf, v8bf
	(vld3): Changed to VAR13 and added v4bf, v8bf
	(vld3_dup): Changed to VAR8 and added v4bf, v8bf
	(vld4): Changed to VAR13 and added v4bf, v8bf
	(vld4_dup): Changed to VAR8 and added v4bf, v8bf
	* config/arm/iterators.md (VDXBF2): New iterator.
	*config/arm/neon.md (neon_vld2): Use new iterators.
	(neon_vld2_dup<mode): Use new iterators.
	(neon_vld3<mode>): Likewise.
	(neon_vld3qa<mode>): Likewise.
	(neon_vld3qb<mode>): Likewise.
	(neon_vld3_dup<mode>): Likewise.
	(neon_vld4<mode>): Likewise.
	(neon_vld4qa<mode>): Likewise.
	(neon_vld4qb<mode>): Likewise.
	(neon_vld4_dup<mode>): Likewise.
	(neon_vld2_dupv8bf): New.
	(neon_vld3_dupv8bf): Likewise.
	(neon_vld4_dupv8bf): Likewise.

	* gcc.target/arm/simd/bf16_vldn_1.c: New test.
This commit is contained in:
Delia Burduv 2020-03-06 10:38:20 +00:00 committed by Kyrylo Tkachov
parent ff22937572
commit eb637e7604
7 changed files with 387 additions and 22 deletions

View File

@ -1,3 +1,39 @@
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* config/arm/arm_neon.h (vld2_bf16): New.
(vld2q_bf16): New.
(vld3_bf16): New.
(vld3q_bf16): New.
(vld4_bf16): New.
(vld4q_bf16): New.
(vld2_dup_bf16): New.
(vld2q_dup_bf16): New.
(vld3_dup_bf16): New.
(vld3q_dup_bf16): New.
(vld4_dup_bf16): New.
(vld4q_dup_bf16): New.
* config/arm/arm_neon_builtins.def
(vld2): Changed to VAR13 and added v4bf, v8bf
(vld2_dup): Changed to VAR8 and added v4bf, v8bf
(vld3): Changed to VAR13 and added v4bf, v8bf
(vld3_dup): Changed to VAR8 and added v4bf, v8bf
(vld4): Changed to VAR13 and added v4bf, v8bf
(vld4_dup): Changed to VAR8 and added v4bf, v8bf
* config/arm/iterators.md (VDXBF2): New iterator.
*config/arm/neon.md (neon_vld2): Use new iterators.
(neon_vld2_dup<mode): Use new iterators.
(neon_vld3<mode>): Likewise.
(neon_vld3qa<mode>): Likewise.
(neon_vld3qb<mode>): Likewise.
(neon_vld3_dup<mode>): Likewise.
(neon_vld4<mode>): Likewise.
(neon_vld4qa<mode>): Likewise.
(neon_vld4qb<mode>): Likewise.
(neon_vld4_dup<mode>): Likewise.
(neon_vld2_dupv8bf): New.
(neon_vld3_dupv8bf): Likewise.
(neon_vld4_dupv8bf): Likewise.
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* config/arm/arm_neon.h (bfloat16x4x2_t): New typedef.

View File

@ -19557,6 +19557,114 @@ vst4q_bf16 (bfloat16_t * __ptr, bfloat16x8x4_t __val)
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
}
__extension__ extern __inline bfloat16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_bf16 (bfloat16_t const * __ptr)
{
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv;
__rv.__o = __builtin_neon_vld2v4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2q_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv;
__rv.__o = __builtin_neon_vld2v8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x4x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv;
__rv.__o = __builtin_neon_vld3v4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3q_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __rv;
__rv.__o = __builtin_neon_vld3v8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x4x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv;
__rv.__o = __builtin_neon_vld4v4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4q_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv;
__rv.__o = __builtin_neon_vld4v8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv;
__rv.__o = __builtin_neon_vld2_dupv4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2q_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv;
__rv.__o = __builtin_neon_vld2_dupv8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x4x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv;
__rv.__o = __builtin_neon_vld3_dupv4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x3_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld3q_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __rv;
__rv.__o = __builtin_neon_vld3_dupv8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x4x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv;
__rv.__o = __builtin_neon_vld4_dupv4bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
__extension__ extern __inline bfloat16x8x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld4q_dup_bf16 (const bfloat16_t * __ptr)
{
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv;
__rv.__o = __builtin_neon_vld4_dupv8bf ((const __builtin_neon_hi *) __ptr);
return __rv.__i;
}
#pragma GCC pop_options
#ifdef __cplusplus

View File

@ -320,29 +320,29 @@ VAR12 (STORE1, vst1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR12 (STORE1LANE, vst1_lane,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR11 (LOAD1, vld2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR13 (LOAD1, vld2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR9 (LOAD1LANE, vld2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR8 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst2,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR13 (LOAD1, vld3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR9 (LOAD1LANE, vld3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR8 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst3,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR13 (LOAD1, vld4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR9 (LOAD1LANE, vld4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR8 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst4,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst4_lane,

View File

@ -87,6 +87,9 @@
;; Double-width vector modes plus 64-bit elements, including V4BF.
(define_mode_iterator VDXBF [V8QI V4HI V4HF (V4BF "TARGET_BF16_SIMD") V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements, V4BF and V8BF.
(define_mode_iterator VDXBF2 [V8QI V4HI V4HF V2SI V2SF DI (V4BF "TARGET_BF16_SIMD") (V8BF ("TARGET_BF16_SIMD"))])
;; Double-width vector modes plus 64-bit elements,
;; with V4BFmode added, suitable for moves.
(define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI])

View File

@ -5428,7 +5428,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld2<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
(unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
{
@ -5453,7 +5453,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld2<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2))]
"TARGET_NEON"
"vld2.<V_sz_elem>\t%h0, %A1"
@ -5516,7 +5516,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld2_dup<mode>"
[(set (match_operand:TI 0 "s_register_operand" "=w")
(unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_DUP))]
"TARGET_NEON"
{
@ -5531,6 +5531,27 @@ if (BYTES_BIG_ENDIAN)
(const_string "neon_load1_1reg<q>")))]
)
(define_insn "neon_vld2_dupv8bf"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_DUP))]
"TARGET_BF16_SIMD"
{
rtx ops[5];
int tabbase = REGNO (operands[0]);
ops[4] = operands[1];
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
return "";
}
[(set_attr "type" "neon_load2_all_lanes_q")]
)
(define_expand "vec_store_lanesti<mode>"
[(set (match_operand:TI 0 "neon_struct_operand")
(unspec:TI [(match_operand:TI 1 "s_register_operand")
@ -5637,7 +5658,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld3<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
(unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3))]
"TARGET_NEON"
{
@ -5665,7 +5686,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vld3<mode>"
[(match_operand:CI 0 "s_register_operand")
(match_operand:CI 1 "neon_struct_operand")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@ -5680,7 +5701,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld3qa<mode>"
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3A))]
"TARGET_NEON"
{
@ -5700,7 +5721,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
(match_operand:CI 2 "s_register_operand" "0")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3B))]
"TARGET_NEON"
{
@ -5777,7 +5798,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld3_dup<mode>"
[(set (match_operand:EI 0 "s_register_operand" "=w")
(unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD3_DUP))]
"TARGET_NEON"
{
@ -5800,6 +5821,26 @@ if (BYTES_BIG_ENDIAN)
(const_string "neon_load3_all_lanes<q>")
(const_string "neon_load1_1reg<q>")))])
(define_insn "neon_vld3_dupv8bf"
[(set (match_operand:CI 0 "s_register_operand" "=w")
(unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_DUP))]
"TARGET_BF16_SIMD"
{
rtx ops[4];
int tabbase = REGNO (operands[0]);
ops[3] = operands[1];
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
return "";
}
[(set_attr "type" "neon_load3_all_lanes_q")]
)
(define_expand "vec_store_lanesei<mode>"
[(set (match_operand:EI 0 "neon_struct_operand")
(unspec:EI [(match_operand:EI 1 "s_register_operand")
@ -5955,7 +5996,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld4<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4))]
"TARGET_NEON"
{
@ -5983,7 +6024,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vld4<mode>"
[(match_operand:XI 0 "s_register_operand")
(match_operand:XI 1 "neon_struct_operand")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON"
{
rtx mem;
@ -5998,7 +6039,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld4qa<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4A))]
"TARGET_NEON"
{
@ -6019,7 +6060,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
(match_operand:XI 2 "s_register_operand" "0")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4B))]
"TARGET_NEON"
{
@ -6099,7 +6140,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vld4_dup<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
(unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD4_DUP))]
"TARGET_NEON"
{
@ -6125,6 +6166,27 @@ if (BYTES_BIG_ENDIAN)
(const_string "neon_load1_1reg<q>")))]
)
(define_insn "neon_vld4_dupv8bf"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
(unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VLD2_DUP))]
"TARGET_BF16_SIMD"
{
rtx ops[5];
int tabbase = REGNO (operands[0]);
ops[4] = operands[1];
ops[0] = gen_rtx_REG (V4BFmode, tabbase);
ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
return "";
}
[(set_attr "type" "neon_load4_all_lanes_q")]
)
(define_expand "vec_store_lanesoi<mode>"
[(set (match_operand:OI 0 "neon_struct_operand")
(unspec:OI [(match_operand:OI 1 "s_register_operand")

View File

@ -1,3 +1,7 @@
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* gcc.target/arm/simd/bf16_vldn_1.c: New test.
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* gcc.target/arm/simd/bf16_vstn_1.c: New test.

View File

@ -0,0 +1,152 @@
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_neon.h"
/*
**test_vld2_bf16:
** ...
** vld2.16 {d0-d1}, \[r0\]
** bx lr
*/
bfloat16x4x2_t
test_vld2_bf16 (bfloat16_t * ptr)
{
return vld2_bf16 (ptr);
}
/*
**test_vld2q_bf16:
** ...
** vld2.16 {d0-d3}, \[r0\]
** bx lr
*/
bfloat16x8x2_t
test_vld2q_bf16 (bfloat16_t * ptr)
{
return vld2q_bf16 (ptr);
}
/*
**test_vld2_dup_bf16:
** ...
** vld2.16 {d0\[\], d1\[\]}, \[r0\]
** bx lr
*/
bfloat16x4x2_t
test_vld2_dup_bf16 (bfloat16_t * ptr)
{
return vld2_dup_bf16 (ptr);
}
/*
**test_vld2q_dup_bf16:
** ...
** vld2.16 {d0, d1, d2, d3}, \[r0\]
** bx lr
*/
bfloat16x8x2_t
test_vld2q_dup_bf16 (bfloat16_t * ptr)
{
return vld2q_dup_bf16 (ptr);
}
/*
**test_vld3_bf16:
** ...
** vld3.16 {d0-d2}, \[r0\]
** bx lr
*/
bfloat16x4x3_t
test_vld3_bf16 (bfloat16_t * ptr)
{
return vld3_bf16 (ptr);
}
/*
**test_vld3q_bf16:
** ...
** vld3.16 {d1, d3, d5}, \[r0\]
** bx lr
*/
bfloat16x8x3_t
test_vld3q_bf16 (bfloat16_t * ptr)
{
return vld3q_bf16 (ptr);
}
/*
**test_vld3_dup_bf16:
** ...
** vld3.16 {d0\[\], d1\[\], d2\[\]}, \[r0\]
** bx lr
*/
bfloat16x4x3_t
test_vld3_dup_bf16 (bfloat16_t * ptr)
{
return vld3_dup_bf16 (ptr);
}
/*
**test_vld3q_dup_bf16:
** ...
** vld3.16 {d0\[\], d1\[\], d2\[\]}, \[r0\]
** bx lr
*/
bfloat16x8x3_t
test_vld3q_dup_bf16 (bfloat16_t * ptr)
{
return vld3q_dup_bf16 (ptr);
}
/*
**test_vld4_bf16:
** ...
** vld4.16 {d0-d3}, \[r0\]
** bx lr
*/
bfloat16x4x4_t
test_vld4_bf16 (bfloat16_t * ptr)
{
return vld4_bf16 (ptr);
}
/*
**test_vld4q_bf16:
** ...
** vld4.16 {d1, d3, d5, d7}, \[r0\]
** bx lr
*/
bfloat16x8x4_t
test_vld4q_bf16 (bfloat16_t * ptr)
{
return vld4q_bf16 (ptr);
}
/*
**test_vld4_dup_bf16:
** ...
** vld4.16 {d0\[\], d1\[\], d2\[\], d3\[\]}, \[r0\]
** bx lr
*/
bfloat16x4x4_t
test_vld4_dup_bf16 (bfloat16_t * ptr)
{
return vld4_dup_bf16 (ptr);
}
/*
**test_vld4q_dup_bf16:
** ...
** vld4.16 {d0\[\], d1\[\], d2\[\], d3\[\]}, \[r0\]
** bx lr
*/
bfloat16x8x4_t
test_vld4q_dup_bf16 (bfloat16_t * ptr)
{
return vld4q_dup_bf16 (ptr);
}