From 058e2674514e24a92282606059c697ff820550e0 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 17 Sep 2012 17:08:14 +0000 Subject: [PATCH] arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns. 2012-09-17 Ulrich Weigand * config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns. * config/arm/arm.md ("vec_set_internal"): Support memory source operands, implemented via vld1 instruction. ("vec_extract"): Support memory destination operands, implemented via vst1 instruction. ("neon_vst1_lane"): Use UNSPEC_VST1_LANE instead of vec_select. * config/arm/predicates.md ("neon_lane_number"): Remove. From-SVN: r191400 --- gcc/ChangeLog | 11 ++++ gcc/config/arm/arm.c | 22 ++++++++ gcc/config/arm/neon.md | 101 +++++++++++++++++++++-------------- gcc/config/arm/predicates.md | 4 -- 4 files changed, 94 insertions(+), 44 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 81d4e2ebc31..bc8257c052b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2012-09-17 Ulrich Weigand + + * config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set + patterns. + * config/arm/arm.md ("vec_set_internal"): Support memory source + operands, implemented via vld1 instruction. + ("vec_extract"): Support memory destination operands, implemented + via vst1 instruction. + ("neon_vst1_lane"): Use UNSPEC_VST1_LANE instead of vec_select. + * config/arm/predicates.md ("neon_lane_number"): Remove. + 2012-09-17 Ramana Radhakrishnan Ulrich Weigand diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index b84a04098ea..1031b685863 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -7666,6 +7666,28 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) return true; case SET: + /* The vec_extract patterns accept memory operands that require an + address reload. Account for the cost of that reload to give the + auto-inc-dec pass an incentive to try to replace them. */ + if (TARGET_NEON && MEM_P (SET_DEST (x)) + && GET_CODE (SET_SRC (x)) == VEC_SELECT) + { + *total = rtx_cost (SET_DEST (x), code, 0, speed); + if (!neon_vector_mem_operand (SET_DEST (x), 2)) + *total += COSTS_N_INSNS (1); + return true; + } + /* Likewise for the vec_set patterns. */ + if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE + && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE + && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0))) + { + rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0); + *total = rtx_cost (mem, code, 0, speed); + if (!neon_vector_mem_operand (mem, 2)) + *total += COSTS_N_INSNS (1); + return true; + } return false; case UNSPEC: diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index bad95f5ad3c..b89d5383a93 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -416,30 +416,33 @@ [(set_attr "neon_type" "neon_vld1_1_2_regs")]) (define_insn "vec_set_internal" - [(set (match_operand:VD 0 "s_register_operand" "=w") + [(set (match_operand:VD 0 "s_register_operand" "=w,w") (vec_merge:VD (vec_duplicate:VD - (match_operand: 1 "s_register_operand" "r")) - (match_operand:VD 3 "s_register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")))] + (match_operand: 1 "nonimmediate_operand" "Um,r")) + (match_operand:VD 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { int elt = ffs ((int) INTVAL (operands[2])) - 1; if (BYTES_BIG_ENDIAN) elt = GET_MODE_NUNITS (mode) - 1 - elt; operands[2] = GEN_INT (elt); - - return "vmov.\t%P0[%c2], %1"; + + if (which_alternative == 0) + return "vld1.\t{%P0[%c2]}, %A1"; + else + return "vmov.\t%P0[%c2], %1"; } - [(set_attr "neon_type" "neon_mcr")]) + [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]) (define_insn "vec_set_internal" - [(set (match_operand:VQ 0 "s_register_operand" "=w") + [(set (match_operand:VQ 0 "s_register_operand" "=w,w") (vec_merge:VQ (vec_duplicate:VQ - (match_operand: 1 "s_register_operand" "r")) - (match_operand:VQ 3 "s_register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")))] + (match_operand: 1 "nonimmediate_operand" "Um,r")) + (match_operand:VQ 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; @@ -454,18 +457,21 @@ operands[0] = gen_rtx_REG (mode, regno + hi); operands[2] = GEN_INT (elt); - return "vmov.\t%P0[%c2], %1"; + if (which_alternative == 0) + return "vld1.\t{%P0[%c2]}, %A1"; + else + return "vmov.\t%P0[%c2], %1"; } - [(set_attr "neon_type" "neon_mcr")] + [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")] ) (define_insn "vec_setv2di_internal" - [(set (match_operand:V2DI 0 "s_register_operand" "=w") + [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") (vec_merge:V2DI (vec_duplicate:V2DI - (match_operand:DI 1 "s_register_operand" "r")) - (match_operand:V2DI 3 "s_register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")))] + (match_operand:DI 1 "nonimmediate_operand" "Um,r")) + (match_operand:V2DI 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; @@ -473,9 +479,12 @@ operands[0] = gen_rtx_REG (DImode, regno); - return "vmov\t%P0, %Q1, %R1"; + if (which_alternative == 0) + return "vld1.64\t%P0, %A1"; + else + return "vmov\t%P0, %Q1, %R1"; } - [(set_attr "neon_type" "neon_mcr_2_mcrr")] + [(set_attr "neon_type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")] ) (define_expand "vec_set" @@ -491,10 +500,10 @@ }) (define_insn "vec_extract" - [(set (match_operand: 0 "s_register_operand" "=r") + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VD 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VD 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { if (BYTES_BIG_ENDIAN) @@ -503,16 +512,20 @@ elt = GET_MODE_NUNITS (mode) - 1 - elt; operands[2] = GEN_INT (elt); } - return "vmov.\t%0, %P1[%c2]"; + + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov.\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extract" - [(set (match_operand: 0 "s_register_operand" "=r") + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VQ 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { int half_elts = GET_MODE_NUNITS (mode) / 2; @@ -526,25 +539,31 @@ operands[1] = gen_rtx_REG (mode, regno + hi); operands[2] = GEN_INT (elt); - return "vmov.\t%0, %P1[%c2]"; + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov.\t%0, %P1[%c2]"; } - [(set_attr "neon_type" "neon_bp_simple")] + [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extractv2di" - [(set (match_operand:DI 0 "s_register_operand" "=r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") (vec_select:DI - (match_operand:V2DI 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:V2DI 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); operands[1] = gen_rtx_REG (DImode, regno); - return "vmov\t%Q0, %R0, %P1 @ v2di"; + if (which_alternative == 0) + return "vst1.64\t{%P1}, %A0 @ v2di"; + else + return "vmov\t%Q0, %R0, %P1 @ v2di"; } - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")] ) (define_expand "vec_init" @@ -4449,9 +4468,10 @@ (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") - (vec_select: - (match_operand:VDX 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + (unspec: + [(match_operand:VDX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] "TARGET_NEON" { HOST_WIDE_INT lane = INTVAL (operands[2]); @@ -4470,9 +4490,10 @@ (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") - (vec_select: - (match_operand:VQX 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + (unspec: + [(match_operand:VQX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] "TARGET_NEON" { HOST_WIDE_INT lane = INTVAL (operands[2]); diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index e7eb7b3cd44..f55acbf2210 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -524,10 +524,6 @@ (ior (match_operand 0 "imm_for_neon_inv_logic_operand") (match_operand 0 "s_register_operand"))) -;; TODO: We could check lane numbers more precisely based on the mode. -(define_predicate "neon_lane_number" - (and (match_code "const_int") - (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))) ;; Predicates for named expanders that overlap multiple ISAs. (define_predicate "cmpdi_operand"