expmed.c (add_cost, [...]): Make arrays indexed by machine mode.

* expmed.c (add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap):
	Make arrays indexed by machine mode.  Rename negate_cost to neg_cost.
	(init_expmed): Initialize these cost arrays as appropriate.
	(store_bit_field, extract_bit_field): Correct whitespace.
	(synth_mult, choose_mult_variant, expand_mult, expand_mult_highpart,
	expand_mult_highpart_optab, expand_divmod): Update uses of add_cost,
	neg_cost, sdiv_pow2_cheap, smod_pow2_cheap to index with mode,
	word_mode or compute_mode as appropriate.

From-SVN: r82743
This commit is contained in:
Roger Sayle 2004-06-08 04:30:48 +00:00 committed by Roger Sayle
parent 9221325f10
commit 5877771853
2 changed files with 112 additions and 85 deletions

View File

@ -1,3 +1,14 @@
2004-06-07 Roger Sayle <roger@eyesopen.com>
* expmed.c (add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap):
Make arrays indexed by machine mode. Rename negate_cost to neg_cost.
(init_expmed): Initialize these cost arrays as appropriate.
(store_bit_field, extract_bit_field): Correct whitespace.
(synth_mult, choose_mult_variant, expand_mult, expand_mult_highpart,
expand_mult_highpart_optab, expand_divmod): Update uses of add_cost,
neg_cost, sdiv_pow2_cheap, smod_pow2_cheap to index with mode,
word_mode or compute_mode as appropriate.
2004-06-07 Eric Botcazou <ebotcazou@libertysurf.fr>
PR target/15783

View File

@ -57,7 +57,8 @@ static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
Usually, this will mean that the MD file will emit non-branch
sequences. */
static int sdiv_pow2_cheap, smod_pow2_cheap;
static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
static int smod_pow2_cheap[NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
@ -90,7 +91,9 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
static int add_cost, negate_cost, zero_cost;
static int zero_cost;
static int add_cost[NUM_MACHINE_MODES];
static int neg_cost[NUM_MACHINE_MODES];
static int shift_cost[MAX_BITS_PER_WORD];
static int shiftadd_cost[MAX_BITS_PER_WORD];
static int shiftsub_cost[MAX_BITS_PER_WORD];
@ -114,7 +117,6 @@ init_expmed (void)
reg = gen_rtx_REG (word_mode, 10000);
zero_cost = rtx_cost (const0_rtx, 0);
add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
gen_rtx_ASHIFT (word_mode, reg,
@ -136,8 +138,48 @@ init_expmed (void)
init_recog ();
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
reg = gen_rtx_REG (mode, 10000);
add_cost[mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET);
neg_cost[mode] = rtx_cost (gen_rtx_NEG (mode, reg), SET);
div_cost[mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
mul_cost[mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
sdiv_pow2_cheap[mode]
= (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost[mode]);
smod_pow2_cheap[mode]
= (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost[mode]);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
mul_widen_cost[wider_mode]
= rtx_cost (gen_rtx_MULT (wider_mode,
gen_rtx_ZERO_EXTEND (wider_mode, reg),
gen_rtx_ZERO_EXTEND (wider_mode, reg)),
SET);
mul_highpart_cost[mode]
= rtx_cost (gen_rtx_TRUNCATE
(mode,
gen_rtx_LSHIFTRT (wider_mode,
gen_rtx_MULT (wider_mode,
gen_rtx_ZERO_EXTEND
(wider_mode, reg),
gen_rtx_ZERO_EXTEND
(wider_mode, reg)),
GEN_INT (GET_MODE_BITSIZE (mode)))),
SET);
}
}
shift_cost[0] = 0;
shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
shiftadd_cost[0] = shiftsub_cost[0] = add_cost[word_mode];
for (m = 1; m < MAX_BITS_PER_WORD; m++)
{
@ -157,44 +199,6 @@ init_expmed (void)
shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
}
negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
sdiv_pow2_cheap
= (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost);
smod_pow2_cheap
= (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost);
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
reg = gen_rtx_REG (mode, 10000);
div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
mul_widen_cost[(int) wider_mode]
= rtx_cost (gen_rtx_MULT (wider_mode,
gen_rtx_ZERO_EXTEND (wider_mode, reg),
gen_rtx_ZERO_EXTEND (wider_mode, reg)),
SET);
mul_highpart_cost[(int) mode]
= rtx_cost (gen_rtx_TRUNCATE
(mode,
gen_rtx_LSHIFTRT (wider_mode,
gen_rtx_MULT (wider_mode,
gen_rtx_ZERO_EXTEND
(wider_mode, reg),
gen_rtx_ZERO_EXTEND
(wider_mode, reg)),
GEN_INT (GET_MODE_BITSIZE (mode)))),
SET);
}
}
end_sequence ();
}
@ -317,7 +321,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
&& (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
&& (vec_set_optab->handlers[GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& fieldmode == GET_MODE_INNER (GET_MODE (op0))
&& bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
@ -325,7 +329,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
{
enum machine_mode outermode = GET_MODE (op0);
enum machine_mode innermode = GET_MODE_INNER (outermode);
int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code;
int icode = (int) vec_set_optab->handlers[outermode].insn_code;
int pos = bitnum / GET_MODE_BITSIZE (innermode);
rtx rtxpos = GEN_INT (pos);
rtx src = value;
@ -451,10 +455,10 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
if (GET_CODE (op0) != MEM
&& (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
&& bitsize == GET_MODE_BITSIZE (fieldmode)
&& (movstrict_optab->handlers[(int) fieldmode].insn_code
&& (movstrict_optab->handlers[fieldmode].insn_code
!= CODE_FOR_nothing))
{
int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
int icode = movstrict_optab->handlers[fieldmode].insn_code;
/* Get appropriate low part of the value being stored. */
if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
@ -1086,14 +1090,14 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
available. */
if (VECTOR_MODE_P (GET_MODE (op0))
&& GET_CODE (op0) != MEM
&& (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
&& (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
!= CODE_FOR_nothing)
&& ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
== bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
{
enum machine_mode outermode = GET_MODE (op0);
enum machine_mode innermode = GET_MODE_INNER (outermode);
int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code;
int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
int pos = bitnum / GET_MODE_BITSIZE (innermode);
rtx rtxpos = GEN_INT (pos);
rtx src = op0;
@ -2255,7 +2259,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
cost = add_cost;
cost = add_cost[word_mode];
synth_mult (alg_in, t + 1, cost_limit - cost);
cost += alg_in->cost;
@ -2272,7 +2276,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
cost = add_cost;
cost = add_cost[word_mode];
synth_mult (alg_in, t - 1, cost_limit - cost);
cost += alg_in->cost;
@ -2304,7 +2308,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
cost = add_cost[word_mode] + shift_cost[m];
if (shiftadd_cost[m] < cost)
cost = shiftadd_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
@ -2323,7 +2329,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < BITS_PER_WORD)
{
cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
cost = add_cost[word_mode] + shift_cost[m];
if (shiftsub_cost[m] < cost)
cost = shiftsub_cost[m];
synth_mult (alg_in, t / d, cost_limit - cost);
cost += alg_in->cost;
@ -2427,15 +2435,17 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
alg2.cost += negate_cost;
synth_mult (&alg2, -val, MIN (alg->cost, mult_cost)
- neg_cost[mode]);
alg2.cost += neg_cost[mode];
if (alg2.cost < alg->cost)
*alg = alg2, *variant = negate_variant;
}
/* This proves very useful for division-by-constant. */
synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
alg2.cost += add_cost;
synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost)
- add_cost[mode]);
alg2.cost += add_cost[mode];
if (alg2.cost < alg->cost)
*alg = alg2, *variant = add_variant;
@ -2633,7 +2643,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
&& (unsignedp || !flag_trapv))
{
int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
mult_cost = MIN (12 * add_cost, mult_cost);
mult_cost = MIN (12 * add_cost[mode], mult_cost);
if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
mult_cost))
@ -2887,7 +2897,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
if (mul_highpart_cost[mode] < max_cost)
{
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@ -2899,8 +2909,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
< max_cost))
&& (mul_highpart_cost[mode] + 2 * shift_cost[size-1]
+ 4 * add_cost[mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@ -2913,8 +2923,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[(int) wider_mode] < max_cost)
if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[wider_mode] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
unsignedp, OPTAB_WIDEN);
@ -2924,9 +2934,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening the mode and perform a non-widening multiplication. */
moptab = smul_optab;
if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
&& mul_cost[wider_mode] + shift_cost[size-1] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
unsignedp, OPTAB_WIDEN);
@ -2936,10 +2946,10 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening multiplication of opposite signedness, and adjust. */
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
&& (mul_widen_cost[(int) wider_mode]
+ 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
&& (mul_widen_cost[wider_mode] + 2 * shift_cost[size-1]
+ 4 * add_cost[mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
@ -2998,7 +3008,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
extra_cost += add_cost;
extra_cost += add_cost[mode];
}
/* See whether shift/add multiplication is cheap enough. */
@ -3179,15 +3189,15 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
for (compute_mode = mode; compute_mode != VOIDmode;
compute_mode = GET_MODE_WIDER_MODE (compute_mode))
if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
|| optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
break;
if (compute_mode == VOIDmode)
for (compute_mode = mode; compute_mode != VOIDmode;
compute_mode = GET_MODE_WIDER_MODE (compute_mode))
if (optab1->handlers[(int) compute_mode].libfunc
|| optab2->handlers[(int) compute_mode].libfunc)
if (optab1->handlers[compute_mode].libfunc
|| optab2->handlers[compute_mode].libfunc)
break;
/* If we still couldn't find a mode, use MODE, but we'll probably abort
@ -3211,10 +3221,11 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
max_cost = div_cost[(int) compute_mode]
max_cost = div_cost[compute_mode]
- (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const)
? mul_cost[(int) compute_mode] + add_cost : 0);
? mul_cost[compute_mode] + add_cost[compute_mode]
: 0);
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
@ -3332,7 +3343,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
extra_cost = (shift_cost[post_shift - 1]
+ shift_cost[1] + 2 * add_cost);
+ shift_cost[1]
+ 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@ -3415,15 +3427,16 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
}
else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
&& (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
&& (rem_flag ? smod_pow2_cheap[compute_mode]
: sdiv_pow2_cheap[compute_mode])
/* ??? The cheap metric is computed only for
word_mode. If this operation is wider, this may
not be so. Assume true if the optab has an
expander for this mode. */
&& (((rem_flag ? smod_optab : sdiv_optab)
->handlers[(int) compute_mode].insn_code
->handlers[compute_mode].insn_code
!= CODE_FOR_nothing)
|| (sdivmod_optab->handlers[(int) compute_mode]
|| (sdivmod_optab->handlers[compute_mode]
.insn_code != CODE_FOR_nothing)))
;
else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
@ -3497,7 +3510,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + add_cost);
+ shift_cost[size - 1]
+ add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
@ -3528,7 +3542,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + 2 * add_cost);
+ shift_cost[size - 1]
+ 2 * add_cost[compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0,
max_cost - extra_cost);
@ -3618,7 +3633,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + 2 * add_cost);
+ shift_cost[size - 1]
+ 2 * add_cost[compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@ -4079,7 +4095,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= sign_expand_binop (compute_mode, umod_optab, smod_optab,
op0, op1, target,
unsignedp,
((optab2->handlers[(int) compute_mode].insn_code
((optab2->handlers[compute_mode].insn_code
!= CODE_FOR_nothing)
? OPTAB_DIRECT : OPTAB_WIDEN));
if (remainder == 0)
@ -4107,7 +4123,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
op0, op1, rem_flag ? NULL_RTX : target,
unsignedp,
((optab2->handlers[(int) compute_mode].insn_code
((optab2->handlers[compute_mode].insn_code
!= CODE_FOR_nothing)
? OPTAB_DIRECT : OPTAB_WIDEN));
@ -4751,9 +4767,9 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
that is compensated by the subsequent overflow when subtracting
one / negating. */
if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
{