Detect EXT patterns to vec_perm_const, use for EXT intrinsics
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, TYPES_BINOPV): New static data. * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): New patterns. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match patterns for EXT. (aarch64_evpc_ext): New function. * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. From-SVN: r211058
This commit is contained in:
parent
ed00b1fb97
commit
ae0533da54
@ -1,3 +1,22 @@
|
||||
2014-05-29 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
|
||||
TYPES_BINOPV): New static data.
|
||||
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
|
||||
New patterns.
|
||||
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
|
||||
patterns for EXT.
|
||||
(aarch64_evpc_ext): New function.
|
||||
|
||||
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
|
||||
|
||||
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
|
||||
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
|
||||
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
|
||||
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
|
||||
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
|
||||
|
||||
2014-05-29 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* rtl.h (BLOCK_SYMBOL_CHECK): Use SYMBOL_REF_FLAGS.
|
||||
|
@ -169,6 +169,10 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
|
||||
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_void, qualifier_none, qualifier_none };
|
||||
#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
|
||||
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
|
||||
|
@ -410,3 +410,6 @@
|
||||
/* Implemented by aarch64_crypto_pmull<mode>. */
|
||||
VAR1 (BINOPP, crypto_pmull, 0, di)
|
||||
VAR1 (BINOPP, crypto_pmull, 0, v2di)
|
||||
|
||||
/* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */
|
||||
VAR1 (BINOPV, im_lane_bound, 0, si)
|
||||
|
@ -4167,6 +4167,35 @@
|
||||
[(set_attr "type" "neon_permute<q>")]
|
||||
)
|
||||
|
||||
;; Note immediate (third) operand is lane index not byte index.
|
||||
(define_insn "aarch64_ext<mode>"
|
||||
[(set (match_operand:VALL 0 "register_operand" "=w")
|
||||
(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
|
||||
(match_operand:VALL 2 "register_operand" "w")
|
||||
(match_operand:SI 3 "immediate_operand" "i")]
|
||||
UNSPEC_EXT))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
operands[3] = GEN_INT (INTVAL (operands[3])
|
||||
* GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
|
||||
return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
|
||||
}
|
||||
[(set_attr "type" "neon_ext<q>")]
|
||||
)
|
||||
|
||||
;; This exists solely to check the arguments to the corresponding __builtin.
|
||||
;; Used where we want an error for out-of-range indices which would otherwise
|
||||
;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
|
||||
(define_expand "aarch64_im_lane_boundsi"
|
||||
[(match_operand:SI 0 "immediate_operand" "i")
|
||||
(match_operand:SI 1 "immediate_operand" "i")]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "aarch64_st2<mode>_dreg"
|
||||
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
|
||||
(unspec:TI [(match_operand:OI 1 "register_operand" "w")
|
||||
|
@ -8990,6 +8990,70 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize patterns for the EXT insn. */
|
||||
|
||||
static bool
|
||||
aarch64_evpc_ext (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, nelt = d->nelt;
|
||||
rtx (*gen) (rtx, rtx, rtx, rtx);
|
||||
rtx offset;
|
||||
|
||||
unsigned int location = d->perm[0]; /* Always < nelt. */
|
||||
|
||||
/* Check if the extracted indices are increasing by one. */
|
||||
for (i = 1; i < nelt; i++)
|
||||
{
|
||||
unsigned int required = location + i;
|
||||
if (d->one_vector_p)
|
||||
{
|
||||
/* We'll pass the same vector in twice, so allow indices to wrap. */
|
||||
required &= (nelt - 1);
|
||||
}
|
||||
if (d->perm[i] != required)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The mid-end handles masks that just return one of the input vectors. */
|
||||
gcc_assert (location != 0);
|
||||
|
||||
switch (d->vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_extv16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_extv8qi; break;
|
||||
case V4HImode: gen = gen_aarch64_extv4hi; break;
|
||||
case V8HImode: gen = gen_aarch64_extv8hi; break;
|
||||
case V2SImode: gen = gen_aarch64_extv2si; break;
|
||||
case V4SImode: gen = gen_aarch64_extv4si; break;
|
||||
case V2SFmode: gen = gen_aarch64_extv2sf; break;
|
||||
case V4SFmode: gen = gen_aarch64_extv4sf; break;
|
||||
case V2DImode: gen = gen_aarch64_extv2di; break;
|
||||
case V2DFmode: gen = gen_aarch64_extv2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
/* After setup, we want the high elements of the first vector (stored
|
||||
at the LSB end of the register), and the low elements of the second
|
||||
vector (stored at the MSB end of the register). So swap. */
|
||||
rtx temp = d->op0;
|
||||
d->op0 = d->op1;
|
||||
d->op1 = temp;
|
||||
/* location != 0 (above), so safe to assume (nelt - location) < nelt. */
|
||||
location = nelt - location;
|
||||
}
|
||||
|
||||
offset = GEN_INT (location);
|
||||
emit_insn (gen (d->target, d->op0, d->op1, offset));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_evpc_dup (struct expand_vec_perm_d *d)
|
||||
{
|
||||
@ -9094,7 +9158,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
|
||||
if (TARGET_SIMD)
|
||||
{
|
||||
if (aarch64_evpc_zip (d))
|
||||
if (aarch64_evpc_ext (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_zip (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_uzp (d))
|
||||
return true;
|
||||
|
@ -5661,318 +5661,6 @@ vcvtxd_f32_f64 (float64_t a)
|
||||
return result;
|
||||
}
|
||||
|
||||
#define vext_f32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float32x2_t b_ = (b); \
|
||||
float32x2_t a_ = (a); \
|
||||
float32x2_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_f64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float64x1_t b_ = (b); \
|
||||
float64x1_t a_ = (a); \
|
||||
float64x1_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_p8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
poly8x8_t b_ = (b); \
|
||||
poly8x8_t a_ = (a); \
|
||||
poly8x8_t result; \
|
||||
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_p16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
poly16x4_t b_ = (b); \
|
||||
poly16x4_t a_ = (a); \
|
||||
poly16x4_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_s8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int8x8_t b_ = (b); \
|
||||
int8x8_t a_ = (a); \
|
||||
int8x8_t result; \
|
||||
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x4_t b_ = (b); \
|
||||
int16x4_t a_ = (a); \
|
||||
int16x4_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x2_t b_ = (b); \
|
||||
int32x2_t a_ = (a); \
|
||||
int32x2_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x1_t b_ = (b); \
|
||||
int64x1_t a_ = (a); \
|
||||
int64x1_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_u8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint8x8_t b_ = (b); \
|
||||
uint8x8_t a_ = (a); \
|
||||
uint8x8_t result; \
|
||||
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_u16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint16x4_t b_ = (b); \
|
||||
uint16x4_t a_ = (a); \
|
||||
uint16x4_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_u32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint32x2_t b_ = (b); \
|
||||
uint32x2_t a_ = (a); \
|
||||
uint32x2_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vext_u64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint64x1_t b_ = (b); \
|
||||
uint64x1_t a_ = (a); \
|
||||
uint64x1_t result; \
|
||||
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_f32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float32x4_t b_ = (b); \
|
||||
float32x4_t a_ = (a); \
|
||||
float32x4_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_f64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float64x2_t b_ = (b); \
|
||||
float64x2_t a_ = (a); \
|
||||
float64x2_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_p8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
poly8x16_t b_ = (b); \
|
||||
poly8x16_t a_ = (a); \
|
||||
poly8x16_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_p16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
poly16x8_t b_ = (b); \
|
||||
poly16x8_t a_ = (a); \
|
||||
poly16x8_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_s8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int8x16_t b_ = (b); \
|
||||
int8x16_t a_ = (a); \
|
||||
int8x16_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x8_t b_ = (b); \
|
||||
int16x8_t a_ = (a); \
|
||||
int16x8_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x4_t b_ = (b); \
|
||||
int32x4_t a_ = (a); \
|
||||
int32x4_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x2_t b_ = (b); \
|
||||
int64x2_t a_ = (a); \
|
||||
int64x2_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_u8(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint8x16_t b_ = (b); \
|
||||
uint8x16_t a_ = (a); \
|
||||
uint8x16_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_u16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint16x8_t b_ = (b); \
|
||||
uint16x8_t a_ = (a); \
|
||||
uint16x8_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_u32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint32x4_t b_ = (b); \
|
||||
uint32x4_t a_ = (a); \
|
||||
uint32x4_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
#define vextq_u64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint64x2_t b_ = (b); \
|
||||
uint64x2_t a_ = (a); \
|
||||
uint64x2_t result; \
|
||||
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
|
||||
: "=w"(result) \
|
||||
: "w"(a_), "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
|
||||
{
|
||||
@ -17444,6 +17132,292 @@ vdupd_laneq_u64 (uint64x2_t __a, const int __b)
|
||||
return __aarch64_vgetq_lane_u64 (__a, __b);
|
||||
}
|
||||
|
||||
/* vext */
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
|
||||
{
|
||||
/* The only possible index to the assembler instruction returns element 0. */
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 1);
|
||||
return __a;
|
||||
}
|
||||
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
|
||||
vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
|
||||
vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
|
||||
{
|
||||
/* The only possible index to the assembler instruction returns element 0. */
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 1);
|
||||
return __a;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
|
||||
vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||
vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
|
||||
vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
|
||||
vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
|
||||
{
|
||||
/* The only possible index to the assembler instruction returns element 0. */
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 1);
|
||||
return __a;
|
||||
}
|
||||
|
||||
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
|
||||
vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
|
||||
vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
|
||||
vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 16);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x16_t)
|
||||
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
|
||||
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
|
||||
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
|
||||
vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint16x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 16);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x16_t)
|
||||
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
|
||||
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
|
||||
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||
vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint16x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
|
||||
vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
|
||||
vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
|
||||
vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 16);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint8x16_t)
|
||||
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
|
||||
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
|
||||
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||
vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 8);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint16x8_t)
|
||||
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b,
|
||||
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
|
||||
vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 4);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a,
|
||||
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
|
||||
#endif
|
||||
}
|
||||
|
||||
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
|
||||
vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
|
||||
{
|
||||
__builtin_aarch64_im_lane_boundsi (__c, 2);
|
||||
#ifdef __AARCH64EB__
|
||||
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
|
||||
#else
|
||||
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
|
||||
#endif
|
||||
}
|
||||
|
||||
/* vfma_lane */
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
|
@ -270,6 +270,7 @@
|
||||
UNSPEC_UZP2 ; Used in vector permute patterns.
|
||||
UNSPEC_TRN1 ; Used in vector permute patterns.
|
||||
UNSPEC_TRN2 ; Used in vector permute patterns.
|
||||
UNSPEC_EXT ; Used in aarch64-simd.md.
|
||||
UNSPEC_AESE ; Used in aarch64-simd.md.
|
||||
UNSPEC_AESD ; Used in aarch64-simd.md.
|
||||
UNSPEC_AESMC ; Used in aarch64-simd.md.
|
||||
|
Loading…
Reference in New Issue
Block a user