[AArch64] Rewrite the vdup_lane intrinsics in C
gcc/ * config/aarch64/aarch64-simd-builtins.def (dup_lane_scalar): Remove. * config/aarch64/aarch64-simd.md (aarch64_simd_dup): Add 'w->w' alternative. (aarch64_dup_lane<mode>): Allow for VALL. (aarch64_dup_lane_scalar<mode>): Remove. (aarch64_dup_lane_<vswap_width_name><mode>): New. (aarch64_get_lane_signed<mode>): Add w->w altenative. (aarch64_get_lane_unsigned<mode>): Likewise. (aarch64_get_lane<mode>): Likewise. * config/aarch64/aarch64.c (aarch64_evpc_dup): New. (aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup. * config/aarch64/iterators.md (VSWAP_WIDTH): New. (VCON): Change container of V2SF. (vswap_width_name): Likewise. * config/aarch64/arm_neon.h (__aarch64_vdup_lane_any): New. (__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise. (vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation. (vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise. gcc/testsuite/ * gcc.target/aarch64/scalar_intrinsics.c (vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers. From-SVN: r202180
This commit is contained in:
parent
d617d2d806
commit
91bd4114a7
@ -1,3 +1,26 @@
|
||||
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def
|
||||
(dup_lane_scalar): Remove.
|
||||
* config/aarch64/aarch64-simd.md
|
||||
(aarch64_simd_dup): Add 'w->w' alternative.
|
||||
(aarch64_dup_lane<mode>): Allow for VALL.
|
||||
(aarch64_dup_lane_scalar<mode>): Remove.
|
||||
(aarch64_dup_lane_<vswap_width_name><mode>): New.
|
||||
(aarch64_get_lane_signed<mode>): Add w->w altenative.
|
||||
(aarch64_get_lane_unsigned<mode>): Likewise.
|
||||
(aarch64_get_lane<mode>): Likewise.
|
||||
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
|
||||
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
|
||||
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
|
||||
(VCON): Change container of V2SF.
|
||||
(vswap_width_name): Likewise.
|
||||
* config/aarch64/arm_neon.h
|
||||
(__aarch64_vdup_lane_any): New.
|
||||
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
|
||||
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
|
||||
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
|
||||
|
||||
2013-09-02 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
PR middle-end/56382
|
||||
|
@ -336,24 +336,13 @@
|
||||
})
|
||||
|
||||
(define_insn "aarch64_simd_dup<mode>"
|
||||
[(set (match_operand:VDQ 0 "register_operand" "=w")
|
||||
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r")))]
|
||||
[(set (match_operand:VDQ 0 "register_operand" "=w, w")
|
||||
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
|
||||
"TARGET_SIMD"
|
||||
"dup\\t%0.<Vtype>, %<vw>1"
|
||||
[(set_attr "simd_type" "simd_dupgp")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_dup_lane<mode>"
|
||||
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
||||
(vec_duplicate:VDQ_I
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VCON> 1 "register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
|
||||
)))]
|
||||
"TARGET_SIMD"
|
||||
"dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
|
||||
[(set_attr "simd_type" "simd_dup")
|
||||
"@
|
||||
dup\\t%0.<Vtype>, %<vw>1
|
||||
dup\\t%0.<Vtype>, %1.<Vetype>[0]"
|
||||
[(set_attr "simd_type" "simd_dupgp, simd_dup")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
@ -366,6 +355,32 @@
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_dup_lane<mode>"
|
||||
[(set (match_operand:VALL 0 "register_operand" "=w")
|
||||
(vec_duplicate:VALL
|
||||
(vec_select:<VEL>
|
||||
(match_operand:VALL 1 "register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
|
||||
)))]
|
||||
"TARGET_SIMD"
|
||||
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
|
||||
[(set_attr "simd_type" "simd_dup")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
|
||||
[(set (match_operand:VALL 0 "register_operand" "=w")
|
||||
(vec_duplicate:VALL
|
||||
(vec_select:<VEL>
|
||||
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
|
||||
)))]
|
||||
"TARGET_SIMD"
|
||||
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
|
||||
[(set_attr "simd_type" "simd_dup")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_simd_mov<mode>"
|
||||
[(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
|
||||
"=w, Utv, w, ?r, ?w, ?r, w")
|
||||
|
@ -7931,6 +7931,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_evpc_dup (struct expand_vec_perm_d *d)
|
||||
{
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
rtx out = d->target;
|
||||
rtx in0;
|
||||
enum machine_mode vmode = d->vmode;
|
||||
unsigned int i, elt, nelt = d->nelt;
|
||||
rtx lane;
|
||||
|
||||
/* TODO: This may not be big-endian safe. */
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
return false;
|
||||
|
||||
elt = d->perm[0];
|
||||
for (i = 1; i < nelt; i++)
|
||||
{
|
||||
if (elt != d->perm[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The generic preparation in aarch64_expand_vec_perm_const_1
|
||||
swaps the operand order and the permute indices if it finds
|
||||
d->perm[0] to be in the second operand. Thus, we can always
|
||||
use d->op0 and need not do any extra arithmetic to get the
|
||||
correct lane number. */
|
||||
in0 = d->op0;
|
||||
lane = GEN_INT (elt);
|
||||
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
|
||||
case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
|
||||
case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
emit_insn (gen (out, in0, lane));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
|
||||
{
|
||||
@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
else if (aarch64_evpc_trn (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_dup (d))
|
||||
return true;
|
||||
return aarch64_evpc_tbl (d);
|
||||
}
|
||||
return false;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -383,7 +383,7 @@
|
||||
(V4HI "V8HI") (V8HI "V8HI")
|
||||
(V2SI "V4SI") (V4SI "V4SI")
|
||||
(DI "V2DI") (V2DI "V2DI")
|
||||
(V2SF "V2SF") (V4SF "V4SF")
|
||||
(V2SF "V4SF") (V4SF "V4SF")
|
||||
(V2DF "V2DF") (SI "V4SI")
|
||||
(HI "V8HI") (QI "V16QI")])
|
||||
|
||||
@ -527,6 +527,20 @@
|
||||
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
|
||||
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
|
||||
|
||||
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
|
||||
(V4HI "V8HI") (V8HI "V4HI")
|
||||
(V2SI "V4SI") (V4SI "V2SI")
|
||||
(DI "V2DI") (V2DI "DI")
|
||||
(V2SF "V4SF") (V4SF "V2SF")
|
||||
(DF "V2DF") (V2DF "DF")])
|
||||
|
||||
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
|
||||
(V4HI "to_128") (V8HI "to_64")
|
||||
(V2SI "to_128") (V4SI "to_64")
|
||||
(DI "to_128") (V2DI "to_64")
|
||||
(V2SF "to_128") (V4SF "to_64")
|
||||
(DF "to_128") (V2DF "to_64")])
|
||||
|
||||
;; -------------------------------------------------------------------
|
||||
;; Code Iterators
|
||||
;; -------------------------------------------------------------------
|
||||
|
@ -1,3 +1,8 @@
|
||||
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/scalar_intrinsics.c
|
||||
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
|
||||
|
||||
2013-09-02 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/57511
|
||||
|
@ -198,13 +198,21 @@ test_vcltzd_s64 (int64x1_t a)
|
||||
int8x1_t
|
||||
test_vdupb_lane_s8 (int8x16_t a)
|
||||
{
|
||||
return vdupb_lane_s8 (a, 2);
|
||||
int8x1_t res;
|
||||
force_simd (a);
|
||||
res = vdupb_laneq_s8 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
uint8x1_t
|
||||
test_vdupb_lane_u8 (uint8x16_t a)
|
||||
{
|
||||
return vdupb_lane_u8 (a, 2);
|
||||
uint8x1_t res;
|
||||
force_simd (a);
|
||||
res = vdupb_laneq_u8 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */
|
||||
@ -212,13 +220,21 @@ test_vdupb_lane_u8 (uint8x16_t a)
|
||||
int16x1_t
|
||||
test_vduph_lane_s16 (int16x8_t a)
|
||||
{
|
||||
return vduph_lane_s16 (a, 2);
|
||||
int16x1_t res;
|
||||
force_simd (a);
|
||||
res = vduph_laneq_s16 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
uint16x1_t
|
||||
test_vduph_lane_u16 (uint16x8_t a)
|
||||
{
|
||||
return vduph_lane_u16 (a, 2);
|
||||
uint16x1_t res;
|
||||
force_simd (a);
|
||||
res = vduph_laneq_u16 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */
|
||||
@ -226,13 +242,21 @@ test_vduph_lane_u16 (uint16x8_t a)
|
||||
int32x1_t
|
||||
test_vdups_lane_s32 (int32x4_t a)
|
||||
{
|
||||
return vdups_lane_s32 (a, 2);
|
||||
int32x1_t res;
|
||||
force_simd (a);
|
||||
res = vdups_laneq_s32 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
uint32x1_t
|
||||
test_vdups_lane_u32 (uint32x4_t a)
|
||||
{
|
||||
return vdups_lane_u32 (a, 2);
|
||||
uint32x1_t res;
|
||||
force_simd (a);
|
||||
res = vdups_laneq_u32 (a, 2);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
|
||||
@ -240,13 +264,21 @@ test_vdups_lane_u32 (uint32x4_t a)
|
||||
int64x1_t
|
||||
test_vdupd_lane_s64 (int64x2_t a)
|
||||
{
|
||||
return vdupd_lane_s64 (a, 1);
|
||||
int64x1_t res;
|
||||
force_simd (a);
|
||||
res = vdupd_laneq_s64 (a, 1);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64x1_t
|
||||
test_vdupd_lane_u64 (uint64x2_t a)
|
||||
{
|
||||
return vdupd_lane_u64 (a, 1);
|
||||
uint64x1_t res;
|
||||
force_simd (a);
|
||||
res = vdupd_laneq_u64 (a, 1);
|
||||
force_simd (res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
|
||||
|
Loading…
Reference in New Issue
Block a user