[AArch64] Rewrite the vdup_lane intrinsics in C

gcc/
	* config/aarch64/aarch64-simd-builtins.def
	(dup_lane_scalar): Remove.
	* config/aarch64/aarch64-simd.md
	(aarch64_simd_dup): Add 'w->w' alternative.
	(aarch64_dup_lane<mode>): Allow for VALL.
	(aarch64_dup_lane_scalar<mode>): Remove.
	(aarch64_dup_lane_<vswap_width_name><mode>): New.
	(aarch64_get_lane_signed<mode>): Add w->w altenative.
	(aarch64_get_lane_unsigned<mode>): Likewise.
	(aarch64_get_lane<mode>): Likewise.
	* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
	(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
	* config/aarch64/iterators.md (VSWAP_WIDTH): New.
	(VCON): Change container of V2SF.
	(vswap_width_name): Likewise.
	* config/aarch64/arm_neon.h
	(__aarch64_vdup_lane_any): New.
	(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
	(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
	(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/scalar_intrinsics.c
	(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.

From-SVN: r202180
This commit is contained in:
James Greenhalgh 2013-09-02 16:22:10 +00:00 committed by James Greenhalgh
parent d617d2d806
commit 91bd4114a7
7 changed files with 887 additions and 602 deletions

View File

@ -1,3 +1,26 @@
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
2013-09-02 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/56382

View File

@ -336,24 +336,13 @@
})
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r")))]
[(set (match_operand:VDQ 0 "register_operand" "=w, w")
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %<vw>1"
[(set_attr "simd_type" "simd_dupgp")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(vec_duplicate:VDQ_I
(vec_select:<VEL>
(match_operand:<VCON> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
"@
dup\\t%0.<Vtype>, %<vw>1
dup\\t%0.<Vtype>, %1.<Vetype>[0]"
[(set_attr "simd_type" "simd_dupgp, simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
@ -366,6 +355,32 @@
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:<VEL>
(match_operand:VALL 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "*aarch64_simd_mov<mode>"
[(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
"=w, Utv, w, ?r, ?w, ?r, w")

View File

@ -7931,6 +7931,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
return true;
}
static bool
aarch64_evpc_dup (struct expand_vec_perm_d *d)
{
rtx (*gen) (rtx, rtx, rtx);
rtx out = d->target;
rtx in0;
enum machine_mode vmode = d->vmode;
unsigned int i, elt, nelt = d->nelt;
rtx lane;
/* TODO: This may not be big-endian safe. */
if (BYTES_BIG_ENDIAN)
return false;
elt = d->perm[0];
for (i = 1; i < nelt; i++)
{
if (elt != d->perm[i])
return false;
}
/* The generic preparation in aarch64_expand_vec_perm_const_1
swaps the operand order and the permute indices if it finds
d->perm[0] to be in the second operand. Thus, we can always
use d->op0 and need not do any extra arithmetic to get the
correct lane number. */
in0 = d->op0;
lane = GEN_INT (elt);
switch (vmode)
{
case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
default:
return false;
}
emit_insn (gen (out, in0, lane));
return true;
}
static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{
@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
else if (aarch64_evpc_trn (d))
return true;
else if (aarch64_evpc_dup (d))
return true;
return aarch64_evpc_tbl (d);
}
return false;

File diff suppressed because it is too large Load Diff

View File

@ -383,7 +383,7 @@
(V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI")
(V2SF "V2SF") (V4SF "V4SF")
(V2SF "V4SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")])
@ -527,6 +527,20 @@
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
(V4HI "V8HI") (V8HI "V4HI")
(V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF")
(DF "V2DF") (V2DF "DF")])
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
(V4HI "to_128") (V8HI "to_64")
(V2SI "to_128") (V4SI "to_64")
(DI "to_128") (V2DI "to_64")
(V2SF "to_128") (V4SF "to_64")
(DF "to_128") (V2DF "to_64")])
;; -------------------------------------------------------------------
;; Code Iterators
;; -------------------------------------------------------------------

View File

@ -1,3 +1,8 @@
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
2013-09-02 Richard Biener <rguenther@suse.de>
PR middle-end/57511

View File

@ -198,13 +198,21 @@ test_vcltzd_s64 (int64x1_t a)
int8x1_t
test_vdupb_lane_s8 (int8x16_t a)
{
return vdupb_lane_s8 (a, 2);
int8x1_t res;
force_simd (a);
res = vdupb_laneq_s8 (a, 2);
force_simd (res);
return res;
}
uint8x1_t
test_vdupb_lane_u8 (uint8x16_t a)
{
return vdupb_lane_u8 (a, 2);
uint8x1_t res;
force_simd (a);
res = vdupb_laneq_u8 (a, 2);
force_simd (res);
return res;
}
/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */
@ -212,13 +220,21 @@ test_vdupb_lane_u8 (uint8x16_t a)
int16x1_t
test_vduph_lane_s16 (int16x8_t a)
{
return vduph_lane_s16 (a, 2);
int16x1_t res;
force_simd (a);
res = vduph_laneq_s16 (a, 2);
force_simd (res);
return res;
}
uint16x1_t
test_vduph_lane_u16 (uint16x8_t a)
{
return vduph_lane_u16 (a, 2);
uint16x1_t res;
force_simd (a);
res = vduph_laneq_u16 (a, 2);
force_simd (res);
return res;
}
/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */
@ -226,13 +242,21 @@ test_vduph_lane_u16 (uint16x8_t a)
int32x1_t
test_vdups_lane_s32 (int32x4_t a)
{
return vdups_lane_s32 (a, 2);
int32x1_t res;
force_simd (a);
res = vdups_laneq_s32 (a, 2);
force_simd (res);
return res;
}
uint32x1_t
test_vdups_lane_u32 (uint32x4_t a)
{
return vdups_lane_u32 (a, 2);
uint32x1_t res;
force_simd (a);
res = vdups_laneq_u32 (a, 2);
force_simd (res);
return res;
}
/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
@ -240,13 +264,21 @@ test_vdups_lane_u32 (uint32x4_t a)
int64x1_t
test_vdupd_lane_s64 (int64x2_t a)
{
return vdupd_lane_s64 (a, 1);
int64x1_t res;
force_simd (a);
res = vdupd_laneq_s64 (a, 1);
force_simd (res);
return res;
}
uint64x1_t
test_vdupd_lane_u64 (uint64x2_t a)
{
return vdupd_lane_u64 (a, 1);
uint64x1_t res;
force_simd (a);
res = vdupd_laneq_u64 (a, 1);
force_simd (res);
return res;
}
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */