re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)
PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
This commit is contained in:
parent
e7d8c7020c
commit
77efea3120
@ -1,3 +1,39 @@
|
||||
2014-10-24 Felix Yang <felix.yang@huawei.com>
|
||||
Jiji Jiang <jiangjiji@huawei.com>
|
||||
|
||||
PR target/63173
|
||||
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
|
||||
(__LD3R_FUNC): Ditto.
|
||||
(__LD4R_FUNC): Ditto.
|
||||
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
|
||||
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
|
||||
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
|
||||
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
|
||||
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
|
||||
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
|
||||
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
|
||||
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
|
||||
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
|
||||
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
|
||||
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
|
||||
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
|
||||
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
|
||||
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
|
||||
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
|
||||
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
|
||||
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
|
||||
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
|
||||
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
|
||||
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
|
||||
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
|
||||
builtins.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
|
||||
(aarch64_simd_ld3r<mode>): Likewise.
|
||||
(aarch64_simd_ld4r<mode>): Likewise.
|
||||
(aarch64_ld2r<mode>): New expand.
|
||||
(aarch64_ld3r<mode>): Likewise.
|
||||
(aarch64_ld4r<mode>): Likewise.
|
||||
|
||||
2014-10-24 Maxim Kuvyrkov <maxim.kuvyrkov@gmail.com>
|
||||
|
||||
* rtlanal.c (get_base_term): Handle SCRATCH.
|
||||
|
@ -83,6 +83,10 @@
|
||||
BUILTIN_VQ (LOADSTRUCT, ld2, 0)
|
||||
BUILTIN_VQ (LOADSTRUCT, ld3, 0)
|
||||
BUILTIN_VQ (LOADSTRUCT, ld4, 0)
|
||||
/* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>. */
|
||||
BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
|
||||
BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
|
||||
BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
|
||||
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
|
||||
BUILTIN_VDC (STORESTRUCT, st2, 0)
|
||||
BUILTIN_VDC (STORESTRUCT, st3, 0)
|
||||
|
@ -3991,6 +3991,16 @@
|
||||
[(set_attr "type" "neon_load2_2reg<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_simd_ld2r<mode>"
|
||||
[(set (match_operand:OI 0 "register_operand" "=w")
|
||||
(unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
||||
UNSPEC_LD2_DUP))]
|
||||
"TARGET_SIMD"
|
||||
"ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
|
||||
[(set_attr "type" "neon_load2_all_lanes<q>")]
|
||||
)
|
||||
|
||||
(define_insn "vec_store_lanesoi<mode>"
|
||||
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
|
||||
(unspec:OI [(match_operand:OI 1 "register_operand" "w")
|
||||
@ -4022,6 +4032,16 @@
|
||||
[(set_attr "type" "neon_load3_3reg<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_simd_ld3r<mode>"
|
||||
[(set (match_operand:CI 0 "register_operand" "=w")
|
||||
(unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
||||
UNSPEC_LD3_DUP))]
|
||||
"TARGET_SIMD"
|
||||
"ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
|
||||
[(set_attr "type" "neon_load3_all_lanes<q>")]
|
||||
)
|
||||
|
||||
(define_insn "vec_store_lanesci<mode>"
|
||||
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
|
||||
(unspec:CI [(match_operand:CI 1 "register_operand" "w")
|
||||
@ -4053,6 +4073,16 @@
|
||||
[(set_attr "type" "neon_load4_4reg<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_simd_ld4r<mode>"
|
||||
[(set (match_operand:XI 0 "register_operand" "=w")
|
||||
(unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
|
||||
UNSPEC_LD4_DUP))]
|
||||
"TARGET_SIMD"
|
||||
"ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
|
||||
[(set_attr "type" "neon_load4_all_lanes<q>")]
|
||||
)
|
||||
|
||||
(define_insn "vec_store_lanesxi<mode>"
|
||||
[(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
|
||||
(unspec:XI [(match_operand:XI 1 "register_operand" "w")
|
||||
@ -4193,6 +4223,45 @@
|
||||
aarch64_simd_disambiguate_copy (operands, dest, src, 4);
|
||||
})
|
||||
|
||||
(define_expand "aarch64_ld2r<mode>"
|
||||
[(match_operand:OI 0 "register_operand" "=w")
|
||||
(match_operand:DI 1 "register_operand" "w")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
enum machine_mode mode = <V_TWO_ELEM>mode;
|
||||
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
||||
|
||||
emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "aarch64_ld3r<mode>"
|
||||
[(match_operand:CI 0 "register_operand" "=w")
|
||||
(match_operand:DI 1 "register_operand" "w")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
enum machine_mode mode = <V_THREE_ELEM>mode;
|
||||
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
||||
|
||||
emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "aarch64_ld4r<mode>"
|
||||
[(match_operand:XI 0 "register_operand" "=w")
|
||||
(match_operand:DI 1 "register_operand" "w")
|
||||
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
enum machine_mode mode = <V_FOUR_ELEM>mode;
|
||||
rtx mem = gen_rtx_MEM (mode, operands[1]);
|
||||
|
||||
emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "aarch64_ld2<mode>_dreg"
|
||||
[(set (match_operand:OI 0 "register_operand" "=w")
|
||||
(subreg:OI
|
||||
|
@ -90,8 +90,11 @@
|
||||
UNSPEC_GOTTINYPIC
|
||||
UNSPEC_LD1
|
||||
UNSPEC_LD2
|
||||
UNSPEC_LD2_DUP
|
||||
UNSPEC_LD3
|
||||
UNSPEC_LD3_DUP
|
||||
UNSPEC_LD4
|
||||
UNSPEC_LD4_DUP
|
||||
UNSPEC_MB
|
||||
UNSPEC_NOP
|
||||
UNSPEC_PRLG_STK
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user