re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf)

PR target/63173
        * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
        (__LD3R_FUNC): Ditto.
        (__LD4R_FUNC): Ditto.
        (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
         vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
         vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, 
         vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, 
         vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 
         vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
        (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 
         vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 
         vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
         vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 
         vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 
         vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
        (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 
         vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 
         vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 
         vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 
         vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 
         vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
        * config/aarch64/aarch64.md (define_c_enum "unspec"): Add
        UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
        * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
        builtins.
        * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
        (aarch64_simd_ld3r<mode>): Likewise.
        (aarch64_simd_ld4r<mode>): Likewise.
        (aarch64_ld2r<mode>): New expand.
        (aarch64_ld3r<mode>): Likewise.
        (aarch64_ld4r<mode>): Likewise.

Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>

From-SVN: r216630
This commit is contained in:
Felix Yang 2014-10-24 10:53:08 +00:00 committed by Fei Yang
parent e7d8c7020c
commit 77efea3120
5 changed files with 978 additions and 120 deletions

View File

@ -1,3 +1,39 @@
2014-10-24 Felix Yang <felix.yang@huawei.com>
Jiji Jiang <jiangjiji@huawei.com>
PR target/63173
* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
(__LD3R_FUNC): Ditto.
(__LD4R_FUNC): Ditto.
(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
builtins.
* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
(aarch64_simd_ld3r<mode>): Likewise.
(aarch64_simd_ld4r<mode>): Likewise.
(aarch64_ld2r<mode>): New expand.
(aarch64_ld3r<mode>): Likewise.
(aarch64_ld4r<mode>): Likewise.
2014-10-24 Maxim Kuvyrkov <maxim.kuvyrkov@gmail.com>
* rtlanal.c (get_base_term): Handle SCRATCH.

View File

@ -83,6 +83,10 @@
BUILTIN_VQ (LOADSTRUCT, ld2, 0)
BUILTIN_VQ (LOADSTRUCT, ld3, 0)
BUILTIN_VQ (LOADSTRUCT, ld4, 0)
/* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>. */
BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
BUILTIN_VDC (STORESTRUCT, st2, 0)
BUILTIN_VDC (STORESTRUCT, st3, 0)

View File

@ -3991,6 +3991,16 @@
[(set_attr "type" "neon_load2_2reg<q>")]
)
(define_insn "aarch64_simd_ld2r<mode>"
[(set (match_operand:OI 0 "register_operand" "=w")
(unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
UNSPEC_LD2_DUP))]
"TARGET_SIMD"
"ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
[(set_attr "type" "neon_load2_all_lanes<q>")]
)
(define_insn "vec_store_lanesoi<mode>"
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:OI [(match_operand:OI 1 "register_operand" "w")
@ -4022,6 +4032,16 @@
[(set_attr "type" "neon_load3_3reg<q>")]
)
(define_insn "aarch64_simd_ld3r<mode>"
[(set (match_operand:CI 0 "register_operand" "=w")
(unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
UNSPEC_LD3_DUP))]
"TARGET_SIMD"
"ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load3_all_lanes<q>")]
)
(define_insn "vec_store_lanesci<mode>"
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:CI [(match_operand:CI 1 "register_operand" "w")
@ -4053,6 +4073,16 @@
[(set_attr "type" "neon_load4_4reg<q>")]
)
(define_insn "aarch64_simd_ld4r<mode>"
[(set (match_operand:XI 0 "register_operand" "=w")
(unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
UNSPEC_LD4_DUP))]
"TARGET_SIMD"
"ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
[(set_attr "type" "neon_load4_all_lanes<q>")]
)
(define_insn "vec_store_lanesxi<mode>"
[(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:XI [(match_operand:XI 1 "register_operand" "w")
@ -4193,6 +4223,45 @@
aarch64_simd_disambiguate_copy (operands, dest, src, 4);
})
(define_expand "aarch64_ld2r<mode>"
[(match_operand:OI 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "w")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
enum machine_mode mode = <V_TWO_ELEM>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
DONE;
})
(define_expand "aarch64_ld3r<mode>"
[(match_operand:CI 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "w")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
enum machine_mode mode = <V_THREE_ELEM>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
DONE;
})
(define_expand "aarch64_ld4r<mode>"
[(match_operand:XI 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "w")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
enum machine_mode mode = <V_FOUR_ELEM>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
DONE;
})
(define_insn "aarch64_ld2<mode>_dreg"
[(set (match_operand:OI 0 "register_operand" "=w")
(subreg:OI

View File

@ -90,8 +90,11 @@
UNSPEC_GOTTINYPIC
UNSPEC_LD1
UNSPEC_LD2
UNSPEC_LD2_DUP
UNSPEC_LD3
UNSPEC_LD3_DUP
UNSPEC_LD4
UNSPEC_LD4_DUP
UNSPEC_MB
UNSPEC_NOP
UNSPEC_PRLG_STK

File diff suppressed because it is too large Load Diff