Detect EXT patterns to vec_perm_const, use for EXT intrinsics

* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
        TYPES_BINOPV): New static data.
        * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
        * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
        New patterns.
        * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
        patterns for EXT.
        (aarch64_evpc_ext): New function.

        * config/aarch64/iterators.md (UNSPEC_EXT): New enum element.

        * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
        vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
        vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
        vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
        vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.

From-SVN: r211058
This commit is contained in:
Alan Lawrence 2014-05-29 16:57:42 +00:00 committed by Alan Lawrence
parent ed00b1fb97
commit ae0533da54
7 changed files with 409 additions and 313 deletions

View File

@ -1,3 +1,22 @@
2014-05-29 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
TYPES_BINOPV): New static data.
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
New patterns.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
patterns for EXT.
(aarch64_evpc_ext): New function.
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
2014-05-29 Tom de Vries <tom@codesourcery.com>
* rtl.h (BLOCK_SYMBOL_CHECK): Use SYMBOL_REF_FLAGS.

View File

@ -169,6 +169,10 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_none, qualifier_none };
#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)

View File

@ -410,3 +410,6 @@
/* Implemented by aarch64_crypto_pmull<mode>. */
VAR1 (BINOPP, crypto_pmull, 0, di)
VAR1 (BINOPP, crypto_pmull, 0, v2di)
/* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */
VAR1 (BINOPV, im_lane_bound, 0, si)

View File

@ -4167,6 +4167,35 @@
[(set_attr "type" "neon_permute<q>")]
)
;; Note immediate (third) operand is lane index not byte index.
(define_insn "aarch64_ext<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
(match_operand:VALL 2 "register_operand" "w")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_EXT))]
"TARGET_SIMD"
{
operands[3] = GEN_INT (INTVAL (operands[3])
* GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
}
[(set_attr "type" "neon_ext<q>")]
)
;; This exists solely to check the arguments to the corresponding __builtin.
;; Used where we want an error for out-of-range indices which would otherwise
;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
(define_expand "aarch64_im_lane_boundsi"
[(match_operand:SI 0 "immediate_operand" "i")
(match_operand:SI 1 "immediate_operand" "i")]
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
DONE;
}
)
(define_insn "aarch64_st2<mode>_dreg"
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:TI [(match_operand:OI 1 "register_operand" "w")

View File

@ -8990,6 +8990,70 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
return true;
}
/* Recognize patterns for the EXT insn. */
static bool
aarch64_evpc_ext (struct expand_vec_perm_d *d)
{
unsigned int i, nelt = d->nelt;
rtx (*gen) (rtx, rtx, rtx, rtx);
rtx offset;
unsigned int location = d->perm[0]; /* Always < nelt. */
/* Check if the extracted indices are increasing by one. */
for (i = 1; i < nelt; i++)
{
unsigned int required = location + i;
if (d->one_vector_p)
{
/* We'll pass the same vector in twice, so allow indices to wrap. */
required &= (nelt - 1);
}
if (d->perm[i] != required)
return false;
}
/* The mid-end handles masks that just return one of the input vectors. */
gcc_assert (location != 0);
switch (d->vmode)
{
case V16QImode: gen = gen_aarch64_extv16qi; break;
case V8QImode: gen = gen_aarch64_extv8qi; break;
case V4HImode: gen = gen_aarch64_extv4hi; break;
case V8HImode: gen = gen_aarch64_extv8hi; break;
case V2SImode: gen = gen_aarch64_extv2si; break;
case V4SImode: gen = gen_aarch64_extv4si; break;
case V2SFmode: gen = gen_aarch64_extv2sf; break;
case V4SFmode: gen = gen_aarch64_extv4sf; break;
case V2DImode: gen = gen_aarch64_extv2di; break;
case V2DFmode: gen = gen_aarch64_extv2df; break;
default:
return false;
}
/* Success! */
if (d->testing_p)
return true;
if (BYTES_BIG_ENDIAN)
{
/* After setup, we want the high elements of the first vector (stored
at the LSB end of the register), and the low elements of the second
vector (stored at the MSB end of the register). So swap. */
rtx temp = d->op0;
d->op0 = d->op1;
d->op1 = temp;
/* location != 0 (above), so safe to assume (nelt - location) < nelt. */
location = nelt - location;
}
offset = GEN_INT (location);
emit_insn (gen (d->target, d->op0, d->op1, offset));
return true;
}
static bool
aarch64_evpc_dup (struct expand_vec_perm_d *d)
{
@ -9094,7 +9158,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (TARGET_SIMD)
{
if (aarch64_evpc_zip (d))
if (aarch64_evpc_ext (d))
return true;
else if (aarch64_evpc_zip (d))
return true;
else if (aarch64_evpc_uzp (d))
return true;

View File

@ -5661,318 +5661,6 @@ vcvtxd_f32_f64 (float64_t a)
return result;
}
#define vext_f32(a, b, c) \
__extension__ \
({ \
float32x2_t b_ = (b); \
float32x2_t a_ = (a); \
float32x2_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_f64(a, b, c) \
__extension__ \
({ \
float64x1_t b_ = (b); \
float64x1_t a_ = (a); \
float64x1_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_p8(a, b, c) \
__extension__ \
({ \
poly8x8_t b_ = (b); \
poly8x8_t a_ = (a); \
poly8x8_t result; \
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_p16(a, b, c) \
__extension__ \
({ \
poly16x4_t b_ = (b); \
poly16x4_t a_ = (a); \
poly16x4_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_s8(a, b, c) \
__extension__ \
({ \
int8x8_t b_ = (b); \
int8x8_t a_ = (a); \
int8x8_t result; \
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_s16(a, b, c) \
__extension__ \
({ \
int16x4_t b_ = (b); \
int16x4_t a_ = (a); \
int16x4_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_s32(a, b, c) \
__extension__ \
({ \
int32x2_t b_ = (b); \
int32x2_t a_ = (a); \
int32x2_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_s64(a, b, c) \
__extension__ \
({ \
int64x1_t b_ = (b); \
int64x1_t a_ = (a); \
int64x1_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_u8(a, b, c) \
__extension__ \
({ \
uint8x8_t b_ = (b); \
uint8x8_t a_ = (a); \
uint8x8_t result; \
__asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_u16(a, b, c) \
__extension__ \
({ \
uint16x4_t b_ = (b); \
uint16x4_t a_ = (a); \
uint16x4_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_u32(a, b, c) \
__extension__ \
({ \
uint32x2_t b_ = (b); \
uint32x2_t a_ = (a); \
uint32x2_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vext_u64(a, b, c) \
__extension__ \
({ \
uint64x1_t b_ = (b); \
uint64x1_t a_ = (a); \
uint64x1_t result; \
__asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_f32(a, b, c) \
__extension__ \
({ \
float32x4_t b_ = (b); \
float32x4_t a_ = (a); \
float32x4_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_f64(a, b, c) \
__extension__ \
({ \
float64x2_t b_ = (b); \
float64x2_t a_ = (a); \
float64x2_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_p8(a, b, c) \
__extension__ \
({ \
poly8x16_t b_ = (b); \
poly8x16_t a_ = (a); \
poly8x16_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_p16(a, b, c) \
__extension__ \
({ \
poly16x8_t b_ = (b); \
poly16x8_t a_ = (a); \
poly16x8_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_s8(a, b, c) \
__extension__ \
({ \
int8x16_t b_ = (b); \
int8x16_t a_ = (a); \
int8x16_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_s16(a, b, c) \
__extension__ \
({ \
int16x8_t b_ = (b); \
int16x8_t a_ = (a); \
int16x8_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_s32(a, b, c) \
__extension__ \
({ \
int32x4_t b_ = (b); \
int32x4_t a_ = (a); \
int32x4_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_s64(a, b, c) \
__extension__ \
({ \
int64x2_t b_ = (b); \
int64x2_t a_ = (a); \
int64x2_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_u8(a, b, c) \
__extension__ \
({ \
uint8x16_t b_ = (b); \
uint8x16_t a_ = (a); \
uint8x16_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_u16(a, b, c) \
__extension__ \
({ \
uint16x8_t b_ = (b); \
uint16x8_t a_ = (a); \
uint16x8_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_u32(a, b, c) \
__extension__ \
({ \
uint32x4_t b_ = (b); \
uint32x4_t a_ = (a); \
uint32x4_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vextq_u64(a, b, c) \
__extension__ \
({ \
uint64x2_t b_ = (b); \
uint64x2_t a_ = (a); \
uint64x2_t result; \
__asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
{
@ -17444,6 +17132,292 @@ vdupd_laneq_u64 (uint64x2_t __a, const int __b)
return __aarch64_vgetq_lane_u64 (__a, __b);
}
/* vext */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
#endif
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
{
/* The only possible index to the assembler instruction returns element 0. */
__builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
#endif
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
{
/* The only possible index to the assembler instruction returns element 0. */
__builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
#endif
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
{
/* The only possible index to the assembler instruction returns element 0. */
__builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
#endif
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 16);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
#else
return __builtin_shuffle (__a, __b, (uint8x16_t)
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
#endif
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 16);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
#else
return __builtin_shuffle (__a, __b, (uint8x16_t)
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
#endif
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
#endif
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 16);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
#else
return __builtin_shuffle (__a, __b, (uint8x16_t)
{__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
#endif
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 8);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
#else
return __builtin_shuffle (__a, __b,
(uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
#endif
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 4);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
#endif
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
{
__builtin_aarch64_im_lane_boundsi (__c, 2);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
#endif
}
/* vfma_lane */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))

View File

@ -270,6 +270,7 @@
UNSPEC_UZP2 ; Used in vector permute patterns.
UNSPEC_TRN1 ; Used in vector permute patterns.
UNSPEC_TRN2 ; Used in vector permute patterns.
UNSPEC_EXT ; Used in aarch64-simd.md.
UNSPEC_AESE ; Used in aarch64-simd.md.
UNSPEC_AESD ; Used in aarch64-simd.md.
UNSPEC_AESMC ; Used in aarch64-simd.md.