arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns.

2012-09-17  Ulrich Weigand  <ulrich.weigand@linaro.org>

	* config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set
	patterns.
	* config/arm/arm.md ("vec_set<mode>_internal"): Support memory source
	operands, implemented via vld1 instruction.
	("vec_extract<mode>"): Support memory destination operands, implemented
	via vst1 instruction.
	("neon_vst1_lane<mode>"): Use UNSPEC_VST1_LANE instead of vec_select.
	* config/arm/predicates.md ("neon_lane_number"): Remove.

From-SVN: r191400
This commit is contained in:
Ulrich Weigand 2012-09-17 17:08:14 +00:00 committed by Ulrich Weigand
parent a7e7bf8fc3
commit 058e267451
4 changed files with 94 additions and 44 deletions

View File

@ -1,3 +1,14 @@
2012-09-17 Ulrich Weigand <ulrich.weigand@linaro.org>
* config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set
patterns.
* config/arm/arm.md ("vec_set<mode>_internal"): Support memory source
operands, implemented via vld1 instruction.
("vec_extract<mode>"): Support memory destination operands, implemented
via vst1 instruction.
("neon_vst1_lane<mode>"): Use UNSPEC_VST1_LANE instead of vec_select.
* config/arm/predicates.md ("neon_lane_number"): Remove.
2012-09-17 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Ulrich Weigand <ulrich.weigand@linaro.org>

View File

@ -7666,6 +7666,28 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
return true;
case SET:
/* The vec_extract patterns accept memory operands that require an
address reload. Account for the cost of that reload to give the
auto-inc-dec pass an incentive to try to replace them. */
if (TARGET_NEON && MEM_P (SET_DEST (x))
&& GET_CODE (SET_SRC (x)) == VEC_SELECT)
{
*total = rtx_cost (SET_DEST (x), code, 0, speed);
if (!neon_vector_mem_operand (SET_DEST (x), 2))
*total += COSTS_N_INSNS (1);
return true;
}
/* Likewise for the vec_set patterns. */
if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
&& GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
&& MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
{
rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
*total = rtx_cost (mem, code, 0, speed);
if (!neon_vector_mem_operand (mem, 2))
*total += COSTS_N_INSNS (1);
return true;
}
return false;
case UNSPEC:

View File

@ -416,30 +416,33 @@
[(set_attr "neon_type" "neon_vld1_1_2_regs")])
(define_insn "vec_set<mode>_internal"
[(set (match_operand:VD 0 "s_register_operand" "=w")
[(set (match_operand:VD 0 "s_register_operand" "=w,w")
(vec_merge:VD
(vec_duplicate:VD
(match_operand:<V_elem> 1 "s_register_operand" "r"))
(match_operand:VD 3 "s_register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
(match_operand:VD 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON"
{
int elt = ffs ((int) INTVAL (operands[2])) - 1;
if (BYTES_BIG_ENDIAN)
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
operands[2] = GEN_INT (elt);
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
if (which_alternative == 0)
return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
else
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
}
[(set_attr "neon_type" "neon_mcr")])
[(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")])
(define_insn "vec_set<mode>_internal"
[(set (match_operand:VQ 0 "s_register_operand" "=w")
[(set (match_operand:VQ 0 "s_register_operand" "=w,w")
(vec_merge:VQ
(vec_duplicate:VQ
(match_operand:<V_elem> 1 "s_register_operand" "r"))
(match_operand:VQ 3 "s_register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
(match_operand:VQ 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON"
{
HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
@ -454,18 +457,21 @@
operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
operands[2] = GEN_INT (elt);
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
if (which_alternative == 0)
return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
else
return "vmov.<V_sz_elem>\t%P0[%c2], %1";
}
[(set_attr "neon_type" "neon_mcr")]
[(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]
)
(define_insn "vec_setv2di_internal"
[(set (match_operand:V2DI 0 "s_register_operand" "=w")
[(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
(vec_merge:V2DI
(vec_duplicate:V2DI
(match_operand:DI 1 "s_register_operand" "r"))
(match_operand:V2DI 3 "s_register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
(match_operand:DI 1 "nonimmediate_operand" "Um,r"))
(match_operand:V2DI 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON"
{
HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
@ -473,9 +479,12 @@
operands[0] = gen_rtx_REG (DImode, regno);
return "vmov\t%P0, %Q1, %R1";
if (which_alternative == 0)
return "vld1.64\t%P0, %A1";
else
return "vmov\t%P0, %Q1, %R1";
}
[(set_attr "neon_type" "neon_mcr_2_mcrr")]
[(set_attr "neon_type" "neon_vld1_1_2_regs,neon_mcr_2_mcrr")]
)
(define_expand "vec_set<mode>"
@ -491,10 +500,10 @@
})
(define_insn "vec_extract<mode>"
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
(match_operand:VD 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
(match_operand:VD 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON"
{
if (BYTES_BIG_ENDIAN)
@ -503,16 +512,20 @@
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
operands[2] = GEN_INT (elt);
}
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
if (which_alternative == 0)
return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
else
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
}
[(set_attr "neon_type" "neon_bp_simple")]
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
)
(define_insn "vec_extract<mode>"
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem>
(match_operand:VQ 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
(match_operand:VQ 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON"
{
int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
@ -526,25 +539,31 @@
operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
operands[2] = GEN_INT (elt);
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
if (which_alternative == 0)
return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
else
return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
}
[(set_attr "neon_type" "neon_bp_simple")]
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")]
)
(define_insn "vec_extractv2di"
[(set (match_operand:DI 0 "s_register_operand" "=r")
[(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
(vec_select:DI
(match_operand:V2DI 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
(match_operand:V2DI 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON"
{
int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
operands[1] = gen_rtx_REG (DImode, regno);
return "vmov\t%Q0, %R0, %P1 @ v2di";
if (which_alternative == 0)
return "vst1.64\t{%P1}, %A0 @ v2di";
else
return "vmov\t%Q0, %R0, %P1 @ v2di";
}
[(set_attr "neon_type" "neon_int_1")]
[(set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")]
)
(define_expand "vec_init<mode>"
@ -4449,9 +4468,10 @@
(define_insn "neon_vst1_lane<mode>"
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(vec_select:<V_elem>
(match_operand:VDX 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
(unspec:<V_elem>
[(match_operand:VDX 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
UNSPEC_VST1_LANE))]
"TARGET_NEON"
{
HOST_WIDE_INT lane = INTVAL (operands[2]);
@ -4470,9 +4490,10 @@
(define_insn "neon_vst1_lane<mode>"
[(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
(vec_select:<V_elem>
(match_operand:VQX 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
(unspec:<V_elem>
[(match_operand:VQX 1 "s_register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
UNSPEC_VST1_LANE))]
"TARGET_NEON"
{
HOST_WIDE_INT lane = INTVAL (operands[2]);

View File

@ -524,10 +524,6 @@
(ior (match_operand 0 "imm_for_neon_inv_logic_operand")
(match_operand 0 "s_register_operand")))
;; TODO: We could check lane numbers more precisely based on the mode.
(define_predicate "neon_lane_number"
(and (match_code "const_int")
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
;; Predicates for named expanders that overlap multiple ISAs.
(define_predicate "cmpdi_operand"