expmed.c (choose_mult_variant, [...]): New, split from...
* expmed.c (choose_mult_variant, expand_mult_const): New, split from... (expand_mult): ...here. (extract_high_half): New, split out from expand_mult_highpart. (expand_highpart_optab): Likewise. Don't clobber target prematurely. (expand_highpart): Evaluate the cost of a shift/add sequence, then see if any of the specialized optabs are cheaper. From-SVN: r79673
This commit is contained in:
parent
d36d56001a
commit
8efc8980df
@ -1,3 +1,12 @@
|
|||||||
|
2004-03-19 Richard Sandiford <rsandifo@redhat.com>
|
||||||
|
|
||||||
|
* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
|
||||||
|
(expand_mult): ...here.
|
||||||
|
(extract_high_half): New, split out from expand_mult_highpart.
|
||||||
|
(expand_highpart_optab): Likewise. Don't clobber target prematurely.
|
||||||
|
(expand_highpart): Evaluate the cost of a shift/add sequence,
|
||||||
|
then see if any of the specialized optabs are cheaper.
|
||||||
|
|
||||||
2004-03-18 Ian Lance Taylor <ian@wasabisystems.com>
|
2004-03-18 Ian Lance Taylor <ian@wasabisystems.com>
|
||||||
|
|
||||||
* mklibgcc.in: Remove obsolete MAYBE_USE_COLLECT2.
|
* mklibgcc.in: Remove obsolete MAYBE_USE_COLLECT2.
|
||||||
|
635
gcc/expmed.c
635
gcc/expmed.c
@ -2149,11 +2149,24 @@ struct algorithm
|
|||||||
char log[MAX_BITS_PER_WORD];
|
char log[MAX_BITS_PER_WORD];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Indicates the type of fixup needed after a constant multiplication.
|
||||||
|
BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
|
||||||
|
the result should be negated, and ADD_VARIANT means that the
|
||||||
|
multiplicand should be added to the result. */
|
||||||
|
enum mult_variant {basic_variant, negate_variant, add_variant};
|
||||||
|
|
||||||
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
|
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
|
||||||
|
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
|
||||||
|
struct algorithm *, enum mult_variant *);
|
||||||
|
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
|
||||||
|
const struct algorithm *, enum mult_variant);
|
||||||
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
|
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
|
||||||
int, unsigned HOST_WIDE_INT *,
|
int, unsigned HOST_WIDE_INT *,
|
||||||
int *, int *);
|
int *, int *);
|
||||||
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
|
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
|
||||||
|
static rtx extract_high_half (enum machine_mode, rtx);
|
||||||
|
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
|
||||||
|
int, int);
|
||||||
/* Compute and return the best algorithm for multiplying by T.
|
/* Compute and return the best algorithm for multiplying by T.
|
||||||
The algorithm must cost less than cost_limit
|
The algorithm must cost less than cost_limit
|
||||||
If retval.cost >= COST_LIMIT, no algorithm was found and all
|
If retval.cost >= COST_LIMIT, no algorithm was found and all
|
||||||
@ -2396,6 +2409,198 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
|
|||||||
alg_out->ops * sizeof *alg_out->log);
|
alg_out->ops * sizeof *alg_out->log);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Find the cheapeast way of multiplying a value of mode MODE by VAL.
|
||||||
|
Try three variations:
|
||||||
|
|
||||||
|
- a shift/add sequence based on VAL itself
|
||||||
|
- a shift/add sequence based on -VAL, followed by a negation
|
||||||
|
- a shift/add sequence based on VAL - 1, followed by an addition.
|
||||||
|
|
||||||
|
Return true if the cheapest of these is better than register
|
||||||
|
multiplication, describing the algorithm in *ALG and final
|
||||||
|
fixup in *VARIANT. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
|
||||||
|
struct algorithm *alg, enum mult_variant *variant)
|
||||||
|
{
|
||||||
|
int mult_cost;
|
||||||
|
struct algorithm alg2;
|
||||||
|
rtx reg;
|
||||||
|
|
||||||
|
reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
|
||||||
|
mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
|
||||||
|
mult_cost = MIN (12 * add_cost, mult_cost);
|
||||||
|
|
||||||
|
*variant = basic_variant;
|
||||||
|
synth_mult (alg, val, mult_cost);
|
||||||
|
|
||||||
|
/* This works only if the inverted value actually fits in an
|
||||||
|
`unsigned int' */
|
||||||
|
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
|
||||||
|
{
|
||||||
|
synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
|
||||||
|
alg2.cost += negate_cost;
|
||||||
|
if (alg2.cost < alg->cost)
|
||||||
|
*alg = alg2, *variant = negate_variant;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This proves very useful for division-by-constant. */
|
||||||
|
synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
|
||||||
|
alg2.cost += add_cost;
|
||||||
|
if (alg2.cost < alg->cost)
|
||||||
|
*alg = alg2, *variant = add_variant;
|
||||||
|
|
||||||
|
return alg->cost < mult_cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A subroutine of expand_mult, used for constant multiplications.
|
||||||
|
Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
|
||||||
|
convenient. Use the shift/add sequence described by ALG and apply
|
||||||
|
the final fixup specified by VARIANT. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
|
||||||
|
rtx target, const struct algorithm *alg,
|
||||||
|
enum mult_variant variant)
|
||||||
|
{
|
||||||
|
HOST_WIDE_INT val_so_far;
|
||||||
|
rtx insn, accum, tem;
|
||||||
|
int opno;
|
||||||
|
enum machine_mode nmode;
|
||||||
|
|
||||||
|
/* op0 must be register to make mult_cost match the precomputed
|
||||||
|
shiftadd_cost array. */
|
||||||
|
op0 = protect_from_queue (op0, 0);
|
||||||
|
|
||||||
|
/* Avoid referencing memory over and over.
|
||||||
|
For speed, but also for correctness when mem is volatile. */
|
||||||
|
if (GET_CODE (op0) == MEM)
|
||||||
|
op0 = force_reg (mode, op0);
|
||||||
|
|
||||||
|
/* ACCUM starts out either as OP0 or as a zero, depending on
|
||||||
|
the first operation. */
|
||||||
|
|
||||||
|
if (alg->op[0] == alg_zero)
|
||||||
|
{
|
||||||
|
accum = copy_to_mode_reg (mode, const0_rtx);
|
||||||
|
val_so_far = 0;
|
||||||
|
}
|
||||||
|
else if (alg->op[0] == alg_m)
|
||||||
|
{
|
||||||
|
accum = copy_to_mode_reg (mode, op0);
|
||||||
|
val_so_far = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
for (opno = 1; opno < alg->ops; opno++)
|
||||||
|
{
|
||||||
|
int log = alg->log[opno];
|
||||||
|
int preserve = preserve_subexpressions_p ();
|
||||||
|
rtx shift_subtarget = preserve ? 0 : accum;
|
||||||
|
rtx add_target
|
||||||
|
= (opno == alg->ops - 1 && target != 0 && variant != add_variant
|
||||||
|
&& ! preserve)
|
||||||
|
? target : 0;
|
||||||
|
rtx accum_target = preserve ? 0 : accum;
|
||||||
|
|
||||||
|
switch (alg->op[opno])
|
||||||
|
{
|
||||||
|
case alg_shift:
|
||||||
|
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
||||||
|
build_int_2 (log, 0), NULL_RTX, 0);
|
||||||
|
val_so_far <<= log;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_add_t_m2:
|
||||||
|
tem = expand_shift (LSHIFT_EXPR, mode, op0,
|
||||||
|
build_int_2 (log, 0), NULL_RTX, 0);
|
||||||
|
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
|
||||||
|
add_target ? add_target : accum_target);
|
||||||
|
val_so_far += (HOST_WIDE_INT) 1 << log;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_sub_t_m2:
|
||||||
|
tem = expand_shift (LSHIFT_EXPR, mode, op0,
|
||||||
|
build_int_2 (log, 0), NULL_RTX, 0);
|
||||||
|
accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
|
||||||
|
add_target ? add_target : accum_target);
|
||||||
|
val_so_far -= (HOST_WIDE_INT) 1 << log;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_add_t2_m:
|
||||||
|
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
||||||
|
build_int_2 (log, 0), shift_subtarget,
|
||||||
|
0);
|
||||||
|
accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
|
||||||
|
add_target ? add_target : accum_target);
|
||||||
|
val_so_far = (val_so_far << log) + 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_sub_t2_m:
|
||||||
|
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
||||||
|
build_int_2 (log, 0), shift_subtarget, 0);
|
||||||
|
accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
|
||||||
|
add_target ? add_target : accum_target);
|
||||||
|
val_so_far = (val_so_far << log) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_add_factor:
|
||||||
|
tem = expand_shift (LSHIFT_EXPR, mode, accum,
|
||||||
|
build_int_2 (log, 0), NULL_RTX, 0);
|
||||||
|
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
|
||||||
|
add_target ? add_target : accum_target);
|
||||||
|
val_so_far += val_so_far << log;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case alg_sub_factor:
|
||||||
|
tem = expand_shift (LSHIFT_EXPR, mode, accum,
|
||||||
|
build_int_2 (log, 0), NULL_RTX, 0);
|
||||||
|
accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
|
||||||
|
(add_target ? add_target
|
||||||
|
: preserve ? 0 : tem));
|
||||||
|
val_so_far = (val_so_far << log) - val_so_far;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
abort ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write a REG_EQUAL note on the last insn so that we can cse
|
||||||
|
multiplication sequences. Note that if ACCUM is a SUBREG,
|
||||||
|
we've set the inner register and must properly indicate
|
||||||
|
that. */
|
||||||
|
|
||||||
|
tem = op0, nmode = mode;
|
||||||
|
if (GET_CODE (accum) == SUBREG)
|
||||||
|
{
|
||||||
|
nmode = GET_MODE (SUBREG_REG (accum));
|
||||||
|
tem = gen_lowpart (nmode, op0);
|
||||||
|
}
|
||||||
|
|
||||||
|
insn = get_last_insn ();
|
||||||
|
set_unique_reg_note (insn, REG_EQUAL,
|
||||||
|
gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (variant == negate_variant)
|
||||||
|
{
|
||||||
|
val_so_far = -val_so_far;
|
||||||
|
accum = expand_unop (mode, neg_optab, accum, target, 0);
|
||||||
|
}
|
||||||
|
else if (variant == add_variant)
|
||||||
|
{
|
||||||
|
val_so_far = val_so_far + 1;
|
||||||
|
accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val != val_so_far)
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
return accum;
|
||||||
|
}
|
||||||
|
|
||||||
/* Perform a multiplication and return an rtx for the result.
|
/* Perform a multiplication and return an rtx for the result.
|
||||||
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
|
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
|
||||||
TARGET is a suggestion for where to store the result (an rtx).
|
TARGET is a suggestion for where to store the result (an rtx).
|
||||||
@ -2409,6 +2614,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
|
|||||||
int unsignedp)
|
int unsignedp)
|
||||||
{
|
{
|
||||||
rtx const_op1 = op1;
|
rtx const_op1 = op1;
|
||||||
|
enum mult_variant variant;
|
||||||
|
struct algorithm algorithm;
|
||||||
|
|
||||||
/* synth_mult does an `unsigned int' multiply. As long as the mode is
|
/* synth_mult does an `unsigned int' multiply. As long as the mode is
|
||||||
less than or equal in size to `unsigned int' this doesn't matter.
|
less than or equal in size to `unsigned int' this doesn't matter.
|
||||||
@ -2435,190 +2642,10 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
|
|||||||
that it seems better to use synth_mult always. */
|
that it seems better to use synth_mult always. */
|
||||||
|
|
||||||
if (const_op1 && GET_CODE (const_op1) == CONST_INT
|
if (const_op1 && GET_CODE (const_op1) == CONST_INT
|
||||||
&& (unsignedp || ! flag_trapv))
|
&& (unsignedp || !flag_trapv)
|
||||||
{
|
&& choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
|
||||||
struct algorithm alg;
|
return expand_mult_const (mode, op0, INTVAL (const_op1), target,
|
||||||
struct algorithm alg2;
|
&algorithm, variant);
|
||||||
HOST_WIDE_INT val = INTVAL (op1);
|
|
||||||
HOST_WIDE_INT val_so_far;
|
|
||||||
rtx insn;
|
|
||||||
int mult_cost;
|
|
||||||
enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
|
|
||||||
|
|
||||||
/* op0 must be register to make mult_cost match the precomputed
|
|
||||||
shiftadd_cost array. */
|
|
||||||
op0 = force_reg (mode, op0);
|
|
||||||
|
|
||||||
/* Try to do the computation three ways: multiply by the negative of OP1
|
|
||||||
and then negate, do the multiplication directly, or do multiplication
|
|
||||||
by OP1 - 1. */
|
|
||||||
|
|
||||||
mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
|
|
||||||
mult_cost = MIN (12 * add_cost, mult_cost);
|
|
||||||
|
|
||||||
synth_mult (&alg, val, mult_cost);
|
|
||||||
|
|
||||||
/* This works only if the inverted value actually fits in an
|
|
||||||
`unsigned int' */
|
|
||||||
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
|
|
||||||
{
|
|
||||||
synth_mult (&alg2, - val,
|
|
||||||
(alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
|
|
||||||
if (alg2.cost + negate_cost < alg.cost)
|
|
||||||
alg = alg2, variant = negate_variant;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This proves very useful for division-by-constant. */
|
|
||||||
synth_mult (&alg2, val - 1,
|
|
||||||
(alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
|
|
||||||
if (alg2.cost + add_cost < alg.cost)
|
|
||||||
alg = alg2, variant = add_variant;
|
|
||||||
|
|
||||||
if (alg.cost < mult_cost)
|
|
||||||
{
|
|
||||||
/* We found something cheaper than a multiply insn. */
|
|
||||||
int opno;
|
|
||||||
rtx accum, tem;
|
|
||||||
enum machine_mode nmode;
|
|
||||||
|
|
||||||
op0 = protect_from_queue (op0, 0);
|
|
||||||
|
|
||||||
/* Avoid referencing memory over and over.
|
|
||||||
For speed, but also for correctness when mem is volatile. */
|
|
||||||
if (GET_CODE (op0) == MEM)
|
|
||||||
op0 = force_reg (mode, op0);
|
|
||||||
|
|
||||||
/* ACCUM starts out either as OP0 or as a zero, depending on
|
|
||||||
the first operation. */
|
|
||||||
|
|
||||||
if (alg.op[0] == alg_zero)
|
|
||||||
{
|
|
||||||
accum = copy_to_mode_reg (mode, const0_rtx);
|
|
||||||
val_so_far = 0;
|
|
||||||
}
|
|
||||||
else if (alg.op[0] == alg_m)
|
|
||||||
{
|
|
||||||
accum = copy_to_mode_reg (mode, op0);
|
|
||||||
val_so_far = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
abort ();
|
|
||||||
|
|
||||||
for (opno = 1; opno < alg.ops; opno++)
|
|
||||||
{
|
|
||||||
int log = alg.log[opno];
|
|
||||||
int preserve = preserve_subexpressions_p ();
|
|
||||||
rtx shift_subtarget = preserve ? 0 : accum;
|
|
||||||
rtx add_target
|
|
||||||
= (opno == alg.ops - 1 && target != 0 && variant != add_variant
|
|
||||||
&& ! preserve)
|
|
||||||
? target : 0;
|
|
||||||
rtx accum_target = preserve ? 0 : accum;
|
|
||||||
|
|
||||||
switch (alg.op[opno])
|
|
||||||
{
|
|
||||||
case alg_shift:
|
|
||||||
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
|
||||||
build_int_2 (log, 0), NULL_RTX, 0);
|
|
||||||
val_so_far <<= log;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_add_t_m2:
|
|
||||||
tem = expand_shift (LSHIFT_EXPR, mode, op0,
|
|
||||||
build_int_2 (log, 0), NULL_RTX, 0);
|
|
||||||
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
|
|
||||||
add_target
|
|
||||||
? add_target : accum_target);
|
|
||||||
val_so_far += (HOST_WIDE_INT) 1 << log;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_sub_t_m2:
|
|
||||||
tem = expand_shift (LSHIFT_EXPR, mode, op0,
|
|
||||||
build_int_2 (log, 0), NULL_RTX, 0);
|
|
||||||
accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
|
|
||||||
add_target
|
|
||||||
? add_target : accum_target);
|
|
||||||
val_so_far -= (HOST_WIDE_INT) 1 << log;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_add_t2_m:
|
|
||||||
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
|
||||||
build_int_2 (log, 0), shift_subtarget,
|
|
||||||
0);
|
|
||||||
accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
|
|
||||||
add_target
|
|
||||||
? add_target : accum_target);
|
|
||||||
val_so_far = (val_so_far << log) + 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_sub_t2_m:
|
|
||||||
accum = expand_shift (LSHIFT_EXPR, mode, accum,
|
|
||||||
build_int_2 (log, 0), shift_subtarget,
|
|
||||||
0);
|
|
||||||
accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
|
|
||||||
add_target
|
|
||||||
? add_target : accum_target);
|
|
||||||
val_so_far = (val_so_far << log) - 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_add_factor:
|
|
||||||
tem = expand_shift (LSHIFT_EXPR, mode, accum,
|
|
||||||
build_int_2 (log, 0), NULL_RTX, 0);
|
|
||||||
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
|
|
||||||
add_target
|
|
||||||
? add_target : accum_target);
|
|
||||||
val_so_far += val_so_far << log;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case alg_sub_factor:
|
|
||||||
tem = expand_shift (LSHIFT_EXPR, mode, accum,
|
|
||||||
build_int_2 (log, 0), NULL_RTX, 0);
|
|
||||||
accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
|
|
||||||
(add_target ? add_target
|
|
||||||
: preserve ? 0 : tem));
|
|
||||||
val_so_far = (val_so_far << log) - val_so_far;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
abort ();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write a REG_EQUAL note on the last insn so that we can cse
|
|
||||||
multiplication sequences. Note that if ACCUM is a SUBREG,
|
|
||||||
we've set the inner register and must properly indicate
|
|
||||||
that. */
|
|
||||||
|
|
||||||
tem = op0, nmode = mode;
|
|
||||||
if (GET_CODE (accum) == SUBREG)
|
|
||||||
{
|
|
||||||
nmode = GET_MODE (SUBREG_REG (accum));
|
|
||||||
tem = gen_lowpart (nmode, op0);
|
|
||||||
}
|
|
||||||
|
|
||||||
insn = get_last_insn ();
|
|
||||||
set_unique_reg_note (insn,
|
|
||||||
REG_EQUAL,
|
|
||||||
gen_rtx_MULT (nmode, tem,
|
|
||||||
GEN_INT (val_so_far)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (variant == negate_variant)
|
|
||||||
{
|
|
||||||
val_so_far = - val_so_far;
|
|
||||||
accum = expand_unop (mode, neg_optab, accum, target, 0);
|
|
||||||
}
|
|
||||||
else if (variant == add_variant)
|
|
||||||
{
|
|
||||||
val_so_far = val_so_far + 1;
|
|
||||||
accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (val != val_so_far)
|
|
||||||
abort ();
|
|
||||||
|
|
||||||
return accum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (GET_CODE (op0) == CONST_DOUBLE)
|
if (GET_CODE (op0) == CONST_DOUBLE)
|
||||||
{
|
{
|
||||||
@ -2832,6 +2859,108 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
|
|||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
extract_high_half (enum machine_mode mode, rtx op)
|
||||||
|
{
|
||||||
|
enum machine_mode wider_mode;
|
||||||
|
|
||||||
|
if (mode == word_mode)
|
||||||
|
return gen_highpart (mode, op);
|
||||||
|
|
||||||
|
wider_mode = GET_MODE_WIDER_MODE (mode);
|
||||||
|
op = expand_shift (RSHIFT_EXPR, wider_mode, op,
|
||||||
|
build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
|
||||||
|
return convert_modes (mode, wider_mode, op, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Like expand_mult_highpart, but only consider using a multiplication
|
||||||
|
optab. OP1 is an rtx for the constant operand. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
|
||||||
|
rtx target, int unsignedp, int max_cost)
|
||||||
|
{
|
||||||
|
enum machine_mode wider_mode;
|
||||||
|
optab moptab;
|
||||||
|
rtx tem;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
wider_mode = GET_MODE_WIDER_MODE (mode);
|
||||||
|
size = GET_MODE_BITSIZE (mode);
|
||||||
|
|
||||||
|
/* Firstly, try using a multiplication insn that only generates the needed
|
||||||
|
high part of the product, and in the sign flavor of unsignedp. */
|
||||||
|
if (mul_highpart_cost[(int) mode] < max_cost)
|
||||||
|
{
|
||||||
|
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
|
||||||
|
tem = expand_binop (mode, moptab, op0, op1, target,
|
||||||
|
unsignedp, OPTAB_DIRECT);
|
||||||
|
if (tem)
|
||||||
|
return tem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
|
||||||
|
Need to adjust the result after the multiplication. */
|
||||||
|
if (size - 1 < BITS_PER_WORD
|
||||||
|
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
|
||||||
|
< max_cost))
|
||||||
|
{
|
||||||
|
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
|
||||||
|
tem = expand_binop (mode, moptab, op0, op1, target,
|
||||||
|
unsignedp, OPTAB_DIRECT);
|
||||||
|
if (tem)
|
||||||
|
/* We used the wrong signedness. Adjust the result. */
|
||||||
|
return expand_mult_highpart_adjust (mode, tem, op0, op1,
|
||||||
|
tem, unsignedp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try widening multiplication. */
|
||||||
|
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
|
||||||
|
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
||||||
|
&& mul_widen_cost[(int) wider_mode] < max_cost)
|
||||||
|
{
|
||||||
|
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
|
||||||
|
unsignedp, OPTAB_WIDEN);
|
||||||
|
if (tem)
|
||||||
|
return extract_high_half (mode, tem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try widening the mode and perform a non-widening multiplication. */
|
||||||
|
moptab = smul_optab;
|
||||||
|
if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
||||||
|
&& size - 1 < BITS_PER_WORD
|
||||||
|
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
|
||||||
|
{
|
||||||
|
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
|
||||||
|
unsignedp, OPTAB_WIDEN);
|
||||||
|
if (tem)
|
||||||
|
return extract_high_half (mode, tem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try widening multiplication of opposite signedness, and adjust. */
|
||||||
|
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
|
||||||
|
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
||||||
|
&& size - 1 < BITS_PER_WORD
|
||||||
|
&& (mul_widen_cost[(int) wider_mode]
|
||||||
|
+ 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
|
||||||
|
{
|
||||||
|
rtx regop1 = force_reg (mode, op1);
|
||||||
|
tem = expand_binop (wider_mode, moptab, op0, regop1,
|
||||||
|
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
|
||||||
|
if (tem != 0)
|
||||||
|
{
|
||||||
|
tem = extract_high_half (mode, tem);
|
||||||
|
/* We used the wrong signedness. Adjust the result. */
|
||||||
|
return expand_mult_highpart_adjust (mode, tem, op0, op1,
|
||||||
|
target, unsignedp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Emit code to multiply OP0 and CNST1, putting the high half of the result
|
/* Emit code to multiply OP0 and CNST1, putting the high half of the result
|
||||||
in TARGET if that is convenient, and return where the result is. If the
|
in TARGET if that is convenient, and return where the result is. If the
|
||||||
operation can not be performed, 0 is returned.
|
operation can not be performed, 0 is returned.
|
||||||
@ -2847,133 +2976,35 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
|
|||||||
unsigned HOST_WIDE_INT cnst1, rtx target,
|
unsigned HOST_WIDE_INT cnst1, rtx target,
|
||||||
int unsignedp, int max_cost)
|
int unsignedp, int max_cost)
|
||||||
{
|
{
|
||||||
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
|
enum machine_mode wider_mode;
|
||||||
optab mul_highpart_optab;
|
enum mult_variant variant;
|
||||||
optab moptab;
|
struct algorithm alg;
|
||||||
rtx tem;
|
rtx op1, tem;
|
||||||
int size = GET_MODE_BITSIZE (mode);
|
|
||||||
rtx op1, wide_op1;
|
|
||||||
|
|
||||||
/* We can't support modes wider than HOST_BITS_PER_INT. */
|
/* We can't support modes wider than HOST_BITS_PER_INT. */
|
||||||
if (size > HOST_BITS_PER_WIDE_INT)
|
if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
|
||||||
abort ();
|
abort ();
|
||||||
|
|
||||||
op1 = gen_int_mode (cnst1, mode);
|
op1 = gen_int_mode (cnst1, mode);
|
||||||
|
|
||||||
wide_op1
|
/* See whether shift/add multiplication is cheap enough. */
|
||||||
= immed_double_const (cnst1,
|
if (choose_mult_variant (mode, cnst1, &alg, &variant)
|
||||||
(unsignedp
|
&& (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
|
||||||
? (HOST_WIDE_INT) 0
|
|
||||||
: -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
|
|
||||||
wider_mode);
|
|
||||||
|
|
||||||
/* expand_mult handles constant multiplication of word_mode
|
|
||||||
or narrower. It does a poor job for large modes. */
|
|
||||||
if (size < BITS_PER_WORD
|
|
||||||
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
|
|
||||||
{
|
{
|
||||||
/* We have to do this, since expand_binop doesn't do conversion for
|
/* See whether the specialized multiplication optabs are
|
||||||
multiply. Maybe change expand_binop to handle widening multiply? */
|
cheaper than the shift/add version. */
|
||||||
|
tem = expand_mult_highpart_optab (mode, op0, op1, target,
|
||||||
|
unsignedp, alg.cost);
|
||||||
|
if (tem)
|
||||||
|
return tem;
|
||||||
|
|
||||||
|
wider_mode = GET_MODE_WIDER_MODE (mode);
|
||||||
op0 = convert_to_mode (wider_mode, op0, unsignedp);
|
op0 = convert_to_mode (wider_mode, op0, unsignedp);
|
||||||
|
tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
|
||||||
/* We know that this can't have signed overflow, so pretend this is
|
return extract_high_half (mode, tem);
|
||||||
an unsigned multiply. */
|
|
||||||
tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
|
|
||||||
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
|
|
||||||
build_int_2 (size, 0), NULL_RTX, 1);
|
|
||||||
return convert_modes (mode, wider_mode, tem, unsignedp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (target == 0)
|
|
||||||
target = gen_reg_rtx (mode);
|
|
||||||
|
|
||||||
/* Firstly, try using a multiplication insn that only generates the needed
|
|
||||||
high part of the product, and in the sign flavor of unsignedp. */
|
|
||||||
if (mul_highpart_cost[(int) mode] < max_cost)
|
|
||||||
{
|
|
||||||
mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
|
|
||||||
target = expand_binop (mode, mul_highpart_optab,
|
|
||||||
op0, op1, target, unsignedp, OPTAB_DIRECT);
|
|
||||||
if (target)
|
|
||||||
return target;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
|
|
||||||
Need to adjust the result after the multiplication. */
|
|
||||||
if (size - 1 < BITS_PER_WORD
|
|
||||||
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
|
|
||||||
< max_cost))
|
|
||||||
{
|
|
||||||
mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
|
|
||||||
target = expand_binop (mode, mul_highpart_optab,
|
|
||||||
op0, op1, target, unsignedp, OPTAB_DIRECT);
|
|
||||||
if (target)
|
|
||||||
/* We used the wrong signedness. Adjust the result. */
|
|
||||||
return expand_mult_highpart_adjust (mode, target, op0,
|
|
||||||
op1, target, unsignedp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try widening multiplication. */
|
|
||||||
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
|
|
||||||
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
|
||||||
&& mul_widen_cost[(int) wider_mode] < max_cost)
|
|
||||||
{
|
|
||||||
op1 = force_reg (mode, op1);
|
|
||||||
goto try;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try widening the mode and perform a non-widening multiplication. */
|
|
||||||
moptab = smul_optab;
|
|
||||||
if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
|
||||||
&& size - 1 < BITS_PER_WORD
|
|
||||||
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
|
|
||||||
{
|
|
||||||
op1 = wide_op1;
|
|
||||||
goto try;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try widening multiplication of opposite signedness, and adjust. */
|
|
||||||
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
|
|
||||||
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
|
|
||||||
&& size - 1 < BITS_PER_WORD
|
|
||||||
&& (mul_widen_cost[(int) wider_mode]
|
|
||||||
+ 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
|
|
||||||
{
|
|
||||||
rtx regop1 = force_reg (mode, op1);
|
|
||||||
tem = expand_binop (wider_mode, moptab, op0, regop1,
|
|
||||||
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
|
|
||||||
if (tem != 0)
|
|
||||||
{
|
|
||||||
/* Extract the high half of the just generated product. */
|
|
||||||
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
|
|
||||||
build_int_2 (size, 0), NULL_RTX, 1);
|
|
||||||
tem = convert_modes (mode, wider_mode, tem, unsignedp);
|
|
||||||
/* We used the wrong signedness. Adjust the result. */
|
|
||||||
return expand_mult_highpart_adjust (mode, tem, op0, op1,
|
|
||||||
target, unsignedp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
try:
|
|
||||||
/* Pass NULL_RTX as target since TARGET has wrong mode. */
|
|
||||||
tem = expand_binop (wider_mode, moptab, op0, op1,
|
|
||||||
NULL_RTX, unsignedp, OPTAB_WIDEN);
|
|
||||||
if (tem == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Extract the high half of the just generated product. */
|
|
||||||
if (mode == word_mode)
|
|
||||||
{
|
|
||||||
return gen_highpart (mode, tem);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
|
|
||||||
build_int_2 (size, 0), NULL_RTX, 1);
|
|
||||||
return convert_modes (mode, wider_mode, tem, unsignedp);
|
|
||||||
}
|
}
|
||||||
|
return expand_mult_highpart_optab (mode, op0, op1, target,
|
||||||
|
unsignedp, max_cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
|
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user