[AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

gcc/:

	* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
	* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
	(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
	Add __builtin_aarch64_simd_hf.
	* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
	float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
	vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
	vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
	vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
	vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
	vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
	vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
	vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.

	* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
	V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
	(VDC, Vdbl): Add V4HF.

gcc/testsuite/:

	* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
	* gcc.target/aarch64/vldN_dup_1.c: Likewise.
	* gcc.target/aarch64/vldN_lane_1.c: Likewise.
	(main): update orig_data to avoid float16 NaN on bigendian.

From-SVN: r227543
This commit is contained in:
Alan Lawrence 2015-09-08 19:03:53 +00:00 committed by Alan Lawrence
parent 71a11456ef
commit 7c36948575
9 changed files with 341 additions and 10 deletions

View File

@ -1,3 +1,23 @@
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
* config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
(aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
Add __builtin_aarch64_simd_hf.
* config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
* config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
(VDC, Vdbl): Add V4HF.
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support

View File

@ -297,6 +297,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
VAR1 (T, N, MAP, L)
#define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR1 (T, N, MAP, M)
#define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR1 (T, X, MAP, N)
#include "aarch64-builtin-iterators.h"
@ -374,6 +380,7 @@ const char *aarch64_scalar_builtin_types[] = {
"__builtin_aarch64_simd_qi",
"__builtin_aarch64_simd_hi",
"__builtin_aarch64_simd_si",
"__builtin_aarch64_simd_hf",
"__builtin_aarch64_simd_sf",
"__builtin_aarch64_simd_di",
"__builtin_aarch64_simd_df",
@ -661,6 +668,8 @@ aarch64_init_simd_builtin_scalar_types (void)
"__builtin_aarch64_simd_qi");
(*lang_hooks.types.register_builtin_type) (intHI_type_node,
"__builtin_aarch64_simd_hi");
(*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
"__builtin_aarch64_simd_hf");
(*lang_hooks.types.register_builtin_type) (intSI_type_node,
"__builtin_aarch64_simd_si");
(*lang_hooks.types.register_builtin_type) (float_type_node,

View File

@ -1335,6 +1335,9 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
case V2SImode:
gen = gen_aarch64_simd_combinev2si;
break;
case V4HFmode:
gen = gen_aarch64_simd_combinev4hf;
break;
case V2SFmode:
gen = gen_aarch64_simd_combinev2sf;
break;

View File

@ -152,6 +152,16 @@ typedef struct uint64x2x2_t
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float16x4x2_t
{
float16x4_t val[2];
} float16x4x2_t;
typedef struct float16x8x2_t
{
float16x8_t val[2];
} float16x8x2_t;
typedef struct float32x2x2_t
{
float32x2_t val[2];
@ -272,6 +282,16 @@ typedef struct uint64x2x3_t
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float16x4x3_t
{
float16x4_t val[3];
} float16x4x3_t;
typedef struct float16x8x3_t
{
float16x8_t val[3];
} float16x8x3_t;
typedef struct float32x2x3_t
{
float32x2_t val[3];
@ -392,6 +412,16 @@ typedef struct uint64x2x4_t
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float16x4x4_t
{
float16x4_t val[4];
} float16x4x4_t;
typedef struct float16x8x4_t
{
float16x8_t val[4];
} float16x8x4_t;
typedef struct float32x2x4_t
{
float32x2_t val[4];
@ -2643,6 +2673,12 @@ vcreate_s64 (uint64_t __a)
return (int64x1_t) {__a};
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcreate_f16 (uint64_t __a)
{
return (float16x4_t) __a;
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcreate_f32 (uint64_t __a)
{
@ -4779,6 +4815,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
return __builtin_aarch64_combinedi (__a[0], __b[0]);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vcombine_f16 (float16x4_t __a, float16x4_t __b)
{
return __builtin_aarch64_combinev4hf (__a, __b);
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcombine_f32 (float32x2_t __a, float32x2_t __b)
{
@ -9907,7 +9949,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | N | N |
+------+----+----+----+----+
|float | - | - | N | N |
|float | - | Y | N | N |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@ -9921,7 +9963,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | Y | Y | Y |
+------+----+----+----+----+
|float | - | - | Y | Y |
|float | - | Y | Y | Y |
+------+----+----+----+----+
|poly | Y | Y | - | - |
+------+----+----+----+----+
@ -9935,7 +9977,7 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
+------+----+----+----+----+
|uint | Y | N | N | Y |
+------+----+----+----+----+
|float | - | - | N | Y |
|float | - | N | N | Y |
+------+----+----+----+----+
|poly | Y | N | - | - |
+------+----+----+----+----+
@ -9951,6 +9993,7 @@ __STRUCTN (int, 8, 2)
__STRUCTN (int, 16, 2)
__STRUCTN (uint, 8, 2)
__STRUCTN (uint, 16, 2)
__STRUCTN (float, 16, 2)
__STRUCTN (poly, 8, 2)
__STRUCTN (poly, 16, 2)
/* 3-element structs. */
@ -9962,6 +10005,7 @@ __STRUCTN (uint, 8, 3)
__STRUCTN (uint, 16, 3)
__STRUCTN (uint, 32, 3)
__STRUCTN (uint, 64, 3)
__STRUCTN (float, 16, 3)
__STRUCTN (float, 32, 3)
__STRUCTN (float, 64, 3)
__STRUCTN (poly, 8, 3)
@ -9999,6 +10043,8 @@ vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
float16x8_t)
__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
@ -10037,6 +10083,7 @@ vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
@ -10078,6 +10125,8 @@ vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
float16x8_t)
__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
@ -10116,6 +10165,7 @@ vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
@ -10162,6 +10212,8 @@ vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __o, __c); \
}
__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
float16x8_t)
__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
float32x4_t)
__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
@ -10200,6 +10252,7 @@ vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
__ptr, __temp.__o, __c); \
}
__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
@ -15255,6 +15308,17 @@ vld2_u32 (const uint32_t * __a)
return ret;
}
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vld2_f16 (const float16_t * __a)
{
float16x4x2_t ret;
__builtin_aarch64_simd_oi __o;
__o = __builtin_aarch64_ld2v4hf (__a);
ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
return ret;
}
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
@ -15376,6 +15440,17 @@ vld2q_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vld2q_f16 (const float16_t * __a)
{
float16x8x2_t ret;
__builtin_aarch64_simd_oi __o;
__o = __builtin_aarch64_ld2v8hf (__a);
ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
return ret;
}
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
@ -15530,6 +15605,18 @@ vld3_u32 (const uint32_t * __a)
return ret;
}
__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
vld3_f16 (const float16_t * __a)
{
float16x4x3_t ret;
__builtin_aarch64_simd_ci __o;
__o = __builtin_aarch64_ld3v4hf (__a);
ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
return ret;
}
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
@ -15662,6 +15749,18 @@ vld3q_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
vld3q_f16 (const float16_t * __a)
{
float16x8x3_t ret;
__builtin_aarch64_simd_ci __o;
__o = __builtin_aarch64_ld3v8hf (__a);
ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
return ret;
}
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
@ -15829,6 +15928,19 @@ vld4_u32 (const uint32_t * __a)
return ret;
}
__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
vld4_f16 (const float16_t * __a)
{
float16x4x4_t ret;
__builtin_aarch64_simd_xi __o;
__o = __builtin_aarch64_ld4v4hf (__a);
ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
return ret;
}
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
@ -15972,6 +16084,19 @@ vld4q_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
vld4q_f16 (const float16_t * __a)
{
float16x8x4_t ret;
__builtin_aarch64_simd_xi __o;
__o = __builtin_aarch64_ld4v8hf (__a);
ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
return ret;
}
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
@ -16033,6 +16158,17 @@ vld2_dup_s32 (const int32_t * __a)
return ret;
}
__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
vld2_dup_f16 (const float16_t * __a)
{
float16x4x2_t ret;
__builtin_aarch64_simd_oi __o;
__o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
return ret;
}
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
@ -16242,6 +16378,17 @@ vld2q_dup_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
vld2q_dup_f16 (const float16_t * __a)
{
float16x8x2_t ret;
__builtin_aarch64_simd_oi __o;
__o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
return ret;
}
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_dup_f32 (const float32_t * __a)
{
@ -16396,6 +16543,18 @@ vld3_dup_u32 (const uint32_t * __a)
return ret;
}
__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
vld3_dup_f16 (const float16_t * __a)
{
float16x4x3_t ret;
__builtin_aarch64_simd_ci __o;
__o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
return ret;
}
__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
@ -16528,6 +16687,18 @@ vld3q_dup_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
vld3q_dup_f16 (const float16_t * __a)
{
float16x8x3_t ret;
__builtin_aarch64_simd_ci __o;
__o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
return ret;
}
__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_dup_f32 (const float32_t * __a)
{
@ -16695,6 +16866,19 @@ vld4_dup_u32 (const uint32_t * __a)
return ret;
}
__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
vld4_dup_f16 (const float16_t * __a)
{
float16x4x4_t ret;
__builtin_aarch64_simd_xi __o;
__o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
return ret;
}
__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
@ -16838,6 +17022,19 @@ vld4q_dup_u64 (const uint64_t * __a)
return ret;
}
__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
vld4q_dup_f16 (const float16_t * __a)
{
float16x8x4_t ret;
__builtin_aarch64_simd_xi __o;
__o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
return ret;
}
__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_dup_f32 (const float32_t * __a)
{
@ -16890,6 +17087,8 @@ vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
v8hf, hf, f16, float16x8_t)
__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
@ -16934,6 +17133,7 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@ -16981,6 +17181,8 @@ vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return __b; \
}
__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
v8hf, hf, f16, float16x8_t)
__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
@ -17027,6 +17229,7 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@ -17082,6 +17285,8 @@ vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
/* vld4q_lane */
__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
v8hf, hf, f16, float16x8_t)
__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
sf, f32, float32x4_t)
__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
@ -17130,6 +17335,7 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
return ret; \
}
__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
@ -22490,6 +22696,18 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val)
__builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f16 (float16_t * __a, float16x4x2_t val)
{
__builtin_aarch64_simd_oi __o;
float16x8x2_t temp;
temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
__builtin_aarch64_st2v4hf (__a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t val)
{
@ -22592,6 +22810,15 @@ vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
__builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f16 (float16_t * __a, float16x8x2_t val)
{
__builtin_aarch64_simd_oi __o;
__o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
__o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
__builtin_aarch64_st2v8hf (__a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t val)
{
@ -22764,6 +22991,20 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val)
__builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f16 (float16_t * __a, float16x4x3_t val)
{
__builtin_aarch64_simd_ci __o;
float16x8x3_t temp;
temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
__builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t val)
{
@ -22878,6 +23119,16 @@ vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
__builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f16 (float16_t * __a, float16x8x3_t val)
{
__builtin_aarch64_simd_ci __o;
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
__o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
__builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t val)
{
@ -23074,6 +23325,22 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val)
__builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f16 (float16_t * __a, float16x4x4_t val)
{
__builtin_aarch64_simd_xi __o;
float16x8x4_t temp;
temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
__builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t val)
{
@ -23200,6 +23467,17 @@ vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
__builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f16 (float16_t * __a, float16x8x4_t val)
{
__builtin_aarch64_simd_xi __o;
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
__o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
__builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t val)
{

View File

@ -111,9 +111,9 @@
(define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V2SF V4SF V2DF DI])
;; All vector modes barring HF modes, plus DI and DF.
;; All vector modes, plus DI and DF.
(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
V2DI V2SF V4SF V2DF DI DF])
V2DI V4HF V8HF V2SF V4SF V2DF DI DF])
;; Vector modes for Integer reduction across lanes.
(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI])
@ -134,7 +134,7 @@
(define_mode_iterator VQW [V16QI V8HI V4SI])
;; Double vector modes for combines.
(define_mode_iterator VDC [V8QI V4HI V2SI V2SF DI DF])
(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF])
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
@ -361,7 +361,8 @@
(V2SI "2s") (V4SI "4s")
(DI "1d") (DF "1d")
(V2DI "2d") (V2SF "2s")
(V4SF "4s") (V2DF "2d")])
(V4SF "4s") (V2DF "2d")
(V4HF "4h") (V8HF "8h")])
(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
(V4SI "32") (V2DI "64")])
@ -387,7 +388,8 @@
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(V4HI "h") (V8HI "h")
(V2SI "s") (V4SI "s")
(V2DI "d") (V2SF "s")
(V2DI "d") (V4HF "h")
(V8HF "h") (V2SF "s")
(V4SF "s") (V2DF "d")
(SF "s") (DF "d")
(QI "b") (HI "h")
@ -397,7 +399,8 @@
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b")
(V2SI "8b") (V4SI "16b")
(V2DI "16b") (V2SF "8b")
(V2DI "16b") (V4HF "8b")
(V8HF "16b") (V2SF "8b")
(V4SF "16b") (V2DF "16b")
(DI "8b") (DF "8b")
(SI "8b")])
@ -448,6 +451,7 @@
;; Double modes of vector modes (lower case).
(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
(V4HF "v8hf")
(V2SI "v4si") (V2SF "v4sf")
(SI "v2si") (DI "v2di")
(DF "v2df")])
@ -522,6 +526,7 @@
(V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
(DI "DI") (V2DI "V2DI")
(V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(V2DF "V2DI") (DF "DI")
(SF "SI")])
@ -531,6 +536,7 @@
(V4HI "v4hi") (V8HI "v8hi")
(V2SI "v2si") (V4SI "v4si")
(DI "di") (V2DI "v2di")
(V4HF "v4hi") (V8HF "v8hi")
(V2SF "v2si") (V4SF "v4si")
(V2DF "v2di") (DF "di")
(SF "si")])

View File

@ -1,3 +1,10 @@
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
* gcc.target/aarch64/vldN_dup_1.c: Likewise.
* gcc.target/aarch64/vldN_lane_1.c: Likewise.
(main): update orig_data to avoid float16 NaN on bigendian.
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and

View File

@ -39,6 +39,7 @@ VARIANT (int32, 2, STRUCT, _s32) \
VARIANT (int64, 1, STRUCT, _s64) \
VARIANT (poly8, 8, STRUCT, _p8) \
VARIANT (poly16, 4, STRUCT, _p16) \
VARIANT (float16, 4, STRUCT, _f16) \
VARIANT (float32, 2, STRUCT, _f32) \
VARIANT (float64, 1, STRUCT, _f64) \
VARIANT (uint8, 16, STRUCT, q_u8) \
@ -51,6 +52,7 @@ VARIANT (int32, 4, STRUCT, q_s32) \
VARIANT (int64, 2, STRUCT, q_s64) \
VARIANT (poly8, 16, STRUCT, q_p8) \
VARIANT (poly16, 8, STRUCT, q_p16) \
VARIANT (float16, 8, STRUCT, q_f16) \
VARIANT (float32, 4, STRUCT, q_f32) \
VARIANT (float64, 2, STRUCT, q_f64)

View File

@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, STRUCT) \
VARIANT (int64, , 1, _s64, STRUCT) \
VARIANT (poly8, , 8, _p8, STRUCT) \
VARIANT (poly16, , 4, _p16, STRUCT) \
VARIANT (float16, , 4, _f16, STRUCT) \
VARIANT (float32, , 2, _f32, STRUCT) \
VARIANT (float64, , 1, _f64, STRUCT) \
VARIANT (uint8, q, 16, _u8, STRUCT) \
@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, STRUCT) \
VARIANT (int64, q, 2, _s64, STRUCT) \
VARIANT (poly8, q, 16, _p8, STRUCT) \
VARIANT (poly16, q, 8, _p16, STRUCT) \
VARIANT (float16, q, 8, _f16, STRUCT) \
VARIANT (float32, q, 4, _f32, STRUCT) \
VARIANT (float64, q, 2, _f64, STRUCT)
@ -74,6 +76,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
float16_t float16_data[4] = { 1.0625, 3.125, 0.03125, 7.75 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };

View File

@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, 0, STRUCT) \
VARIANT (int64, , 1, _s64, 0, STRUCT) \
VARIANT (poly8, , 8, _p8, 7, STRUCT) \
VARIANT (poly16, , 4, _p16, 1, STRUCT) \
VARIANT (float16, , 4, _f16, 3, STRUCT) \
VARIANT (float32, , 2, _f32, 1, STRUCT) \
VARIANT (float64, , 1, _f64, 0, STRUCT) \
VARIANT (uint8, q, 16, _u8, 14, STRUCT) \
@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, 2, STRUCT) \
VARIANT (int64, q, 2, _s64, 1, STRUCT) \
VARIANT (poly8, q, 16, _p8, 12, STRUCT) \
VARIANT (poly16, q, 8, _p16, 5, STRUCT) \
VARIANT (float16, q, 8, _f16, 7, STRUCT)\
VARIANT (float32, q, 4, _f32, 1, STRUCT)\
VARIANT (float64, q, 2, _f64, 0, STRUCT)
@ -71,7 +73,7 @@ main (int argc, char **argv)
{
/* Original data for all vector formats. */
uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL,
0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
@ -87,6 +89,7 @@ main (int argc, char **argv)
int64_t *int64_data = (int64_t *)uint64_data;
poly8_t poly8_data[4] = { 0, 7, 13, 18, };
poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
float16_t float16_data[4] = { 0.8125, 7.5, 19, 0.046875 };
float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };