arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns.
2012-09-17 Ulrich Weigand <ulrich.weigand@linaro.org> * config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns. * config/arm/arm.md ("vec_set<mode>_internal"): Support memory source operands, implemented via vld1 instruction. ("vec_extract<mode>"): Support memory destination operands, implemented via vst1 instruction. ("neon_vst1_lane<mode>"): Use UNSPEC_VST1_LANE instead of vec_select. * config/arm/predicates.md ("neon_lane_number"): Remove. From-SVN: r191400
This commit is contained in:
parent
a7e7bf8fc3
commit
058e267451
@ -1,3 +1,14 @@
|
||||
2012-09-17 Ulrich Weigand <ulrich.weigand@linaro.org>
|
||||
|
||||
* config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set
|
||||
patterns.
|
||||
* config/arm/arm.md ("vec_set<mode>_internal"): Support memory source
|
||||
operands, implemented via vld1 instruction.
|
||||
("vec_extract<mode>"): Support memory destination operands, implemented
|
||||
via vst1 instruction.
|
||||
("neon_vst1_lane<mode>"): Use UNSPEC_VST1_LANE instead of vec_select.
|
||||
* config/arm/predicates.md ("neon_lane_number"): Remove.
|
||||
|
||||
2012-09-17 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
Ulrich Weigand <ulrich.weigand@linaro.org>
|
||||
|
||||
|
@ -7666,6 +7666,28 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
||||
return true;
|
||||
|
||||
case SET:
|
||||
/* The vec_extract patterns accept memory operands that require an
|
||||
address reload. Account for the cost of that reload to give the
|
||||
auto-inc-dec pass an incentive to try to replace them. */
|
||||
if (TARGET_NEON && MEM_P (SET_DEST (x))
|
||||
&& GET_CODE (SET_SRC (x)) == VEC_SELECT)
|
||||
{
|
||||
*total = rtx_cost (SET_DEST (x), code, 0, speed);
|
||||
if (!neon_vector_mem_operand (SET_DEST (x), 2))
|
||||
*total += COSTS_N_INSNS (1);
|
||||
return true;
|
||||
}
|
||||
/* Likewise for the vec_set patterns. */
|
||||
if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
|
||||
&& GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
|
||||
&& MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
|
||||
{
|
||||
rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
|
||||
*total = rtx_cost (mem, code, 0, speed);
|
||||
if (!neon_vector_mem_operand (mem, 2))
|
||||
*total += COSTS_N_INSNS (1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case UNSPEC:
|
||||
|
@ -416,30 +416,33 @@
|
||||
[(set_attr "neon_type" "neon_vld1_1_2_regs")])
|
||||
|
||||
(define_insn "vec_set<mode>_internal"
|
||||
[(set (match_operand:VD 0 "s_register_operand" "=w")
|
||||
[(set (match_operand:VD 0 "s_register_operand" "=w,w")
|
||||
(vec_merge:VD
|
||||
(vec_duplicate:VD
|
||||
(match_operand:<V_elem> 1 "s_register_operand" "r"))
|
||||
(match_operand:VD 3 "s_register_operand" "0")
|
||||
(match_operand:SI 2 "immediate_operand" "i")))]
|
||||
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
|
||||
(match_operand:VD 3 "s_register_operand" "0,0")
|
||||
(match_operand:SI 2 "immediate_operand" "i,i")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
int elt = ffs ((int) INTVAL (operands[2])) - 1;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
|
||||
|
||||
if (which_alternative == 0)
|
||||
return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
|
||||
else
|
||||
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_mcr")])
|
||||
[(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")])
|
||||
|
||||
(define_insn "vec_set<mode>_internal"
|
||||
[(set (match_operand:VQ 0 "s_register_operand" "=w")
|
||||
[(set (match_operand:VQ 0 "s_register_operand" "=w,w")
|
||||
(vec_merge:VQ
|
||||
(vec_duplicate:VQ
|
||||
(match_operand:<V_elem> 1 "s_register_operand" "r"))
|
||||
(match_operand:VQ 3 "s_register_operand" "0")
|
||||
(match_operand:SI 2 "immediate_operand" "i")))]
|
||||
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
|
||||
(match_operand:VQ 3 "s_register_operand" "0,0")
|
||||
(match_operand:SI 2 "immediate_operand" "i,i")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
|
||||
@ -454,18 +457,21 @@
|
||||
operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
|
||||
if (which_alternative == 0)
|
||||
return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
|
||||
else
|
||||
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_mcr")]
|
||||
[(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]
|
||||
)
|
||||
|
||||
(define_insn "vec_setv2di_internal"
|
||||
[(set (match_operand:V2DI 0 "s_register_operand" "=w")
|
||||
[(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
|
||||
(vec_merge:V2DI
|
||||
(vec_duplicate:V2DI
|
||||
(match_operand:DI 1 "s_register_operand" "r"))
|
||||
(match_operand:V2DI 3 "s_register_operand" "0")
|
||||
(match_operand:SI 2 "immediate_operand" "i")))]
|
||||
(match_operand:DI 1 "nonimmediate_operand" "Um,r"))
|
||||
(match_operand:V2DI 3 "s_register_operand" "0,0")
|
||||
(match_operand:SI 2 "immediate_operand" "i,i")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
|
||||
@ -473,9 +479,12 @@
|
||||
|
||||
operands[0] = gen_rtx_REG (DImode, regno);
|
||||
|
||||
return "vmov\t%P0, %Q1, %R1";
|
||||
if (which_alternative == 0)
|
||||
return "vld1.64\t%P0, %A1";
|
||||
else
|
||||
return "vmov\t%P0, %Q1, %R1";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_mcr_2_mcrr")]
|
||||
[(set_attr "neon_type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")]
|
||||
)
|
||||
|
||||
(define_expand "vec_set<mode>"
|
||||
@ -491,10 +500,10 @@
|
||||
})
|
||||
|
||||
(define_insn "vec_extract<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
|
||||
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
|
||||
(match_operand:VD 1 "s_register_operand" "w,w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
@ -503,16 +512,20 @@
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
|
||||
|
||||
if (which_alternative == 0)
|
||||
return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
|
||||
else
|
||||
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_bp_simple")]
|
||||
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "vec_extract<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
|
||||
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VQ 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
|
||||
(match_operand:VQ 1 "s_register_operand" "w,w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
@ -526,25 +539,31 @@
|
||||
operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
|
||||
if (which_alternative == 0)
|
||||
return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
|
||||
else
|
||||
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_bp_simple")]
|
||||
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "vec_extractv2di"
|
||||
[(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
|
||||
(vec_select:DI
|
||||
(match_operand:V2DI 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
|
||||
(match_operand:V2DI 1 "s_register_operand" "w,w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
|
||||
|
||||
operands[1] = gen_rtx_REG (DImode, regno);
|
||||
|
||||
return "vmov\t%Q0, %R0, %P1 @ v2di";
|
||||
if (which_alternative == 0)
|
||||
return "vst1.64\t{%P1}, %A0 @ v2di";
|
||||
else
|
||||
return "vmov\t%Q0, %R0, %P1 @ v2di";
|
||||
}
|
||||
[(set_attr "neon_type" "neon_int_1")]
|
||||
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_init<mode>"
|
||||
@ -4449,9 +4468,10 @@
|
||||
|
||||
(define_insn "neon_vst1_lane<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VDX 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
|
||||
(unspec:<V_elem>
|
||||
[(match_operand:VDX 1 "s_register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
UNSPEC_VST1_LANE))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
HOST_WIDE_INT lane = INTVAL (operands[2]);
|
||||
@ -4470,9 +4490,10 @@
|
||||
|
||||
(define_insn "neon_vst1_lane<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VQX 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
|
||||
(unspec:<V_elem>
|
||||
[(match_operand:VQX 1 "s_register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
UNSPEC_VST1_LANE))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
HOST_WIDE_INT lane = INTVAL (operands[2]);
|
||||
|
@ -524,10 +524,6 @@
|
||||
(ior (match_operand 0 "imm_for_neon_inv_logic_operand")
|
||||
(match_operand 0 "s_register_operand")))
|
||||
|
||||
;; TODO: We could check lane numbers more precisely based on the mode.
|
||||
(define_predicate "neon_lane_number"
|
||||
(and (match_code "const_int")
|
||||
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
|
||||
;; Predicates for named expanders that overlap multiple ISAs.
|
||||
|
||||
(define_predicate "cmpdi_operand"
|
||||
|
Loading…
x
Reference in New Issue
Block a user