expand: Expand x / y * y as x - x % y if the latter is cheaper [PR96696]

The following patch tests both x / y * y and x - x % y expansion for the
former GIMPLE code and chooses the cheaper of those sequences.

2021-04-27  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/96696
	* expr.c (expand_expr_divmod): New function.
	(expand_expr_real_2) <case TRUNC_DIV_EXPR>: Use it for truncations and
	divisions.  Formatting fixes.
	<case MULT_EXPR>: Optimize x / y * y as x - x % y if the latter is
	cheaper.

	* gcc.target/i386/pr96696.c: New test.
This commit is contained in:
Jakub Jelinek 2021-04-27 14:45:45 +02:00
parent eea8224629
commit 3dcd1334b4
2 changed files with 160 additions and 56 deletions

View File

@ -8664,6 +8664,56 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
return temp;
}
/* Helper function of expand_expr_2, expand a division or modulo.
op0 and op1 should be already expanded treeop0 and treeop1, using
expand_operands. */
static rtx
expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
tree treeop1, rtx op0, rtx op1, rtx target, int unsignedp)
{
bool mod_p = (code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
|| code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR);
if (SCALAR_INT_MODE_P (mode)
&& optimize >= 2
&& get_range_pos_neg (treeop0) == 1
&& get_range_pos_neg (treeop1) == 1)
{
/* If both arguments are known to be positive when interpreted
as signed, we can expand it as both signed and unsigned
division or modulo. Choose the cheaper sequence in that case. */
bool speed_p = optimize_insn_for_speed_p ();
do_pending_stack_adjust ();
start_sequence ();
rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
rtx_insn *uns_insns = get_insns ();
end_sequence ();
start_sequence ();
rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
rtx_insn *sgn_insns = get_insns ();
end_sequence ();
unsigned uns_cost = seq_cost (uns_insns, speed_p);
unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
/* If costs are the same then use as tie breaker the other other
factor. */
if (uns_cost == sgn_cost)
{
uns_cost = seq_cost (uns_insns, !speed_p);
sgn_cost = seq_cost (sgn_insns, !speed_p);
}
if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
{
emit_insn (uns_insns);
return uns_ret;
}
emit_insn (sgn_insns);
return sgn_ret;
}
return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
}
rtx
expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
enum expand_modifier modifier)
@ -9201,14 +9251,78 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
if (!REG_P (op0))
op0 = copy_to_mode_reg (mode, op0);
return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
gen_int_mode (tree_to_shwi (exp1),
TYPE_MODE (TREE_TYPE (exp1)))));
op1 = gen_int_mode (tree_to_shwi (exp1),
TYPE_MODE (TREE_TYPE (exp1)));
return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, op1));
}
if (modifier == EXPAND_STACK_PARM)
target = 0;
if (SCALAR_INT_MODE_P (mode) && optimize >= 2)
{
gimple *def_stmt0 = get_def_for_expr (treeop0, TRUNC_DIV_EXPR);
gimple *def_stmt1 = get_def_for_expr (treeop1, TRUNC_DIV_EXPR);
if (def_stmt0
&& !operand_equal_p (treeop1, gimple_assign_rhs2 (def_stmt0), 0))
def_stmt0 = NULL;
if (def_stmt1
&& !operand_equal_p (treeop0, gimple_assign_rhs2 (def_stmt1), 0))
def_stmt1 = NULL;
if (def_stmt0 || def_stmt1)
{
/* X / Y * Y can be expanded as X - X % Y too.
Choose the cheaper sequence of those two. */
if (def_stmt0)
treeop0 = gimple_assign_rhs1 (def_stmt0);
else
{
treeop1 = treeop0;
treeop0 = gimple_assign_rhs1 (def_stmt1);
}
expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
EXPAND_NORMAL);
bool speed_p = optimize_insn_for_speed_p ();
do_pending_stack_adjust ();
start_sequence ();
rtx divmul_ret
= expand_expr_divmod (TRUNC_DIV_EXPR, mode, treeop0, treeop1,
op0, op1, NULL_RTX, unsignedp);
divmul_ret = expand_mult (mode, divmul_ret, op1, target,
unsignedp);
rtx_insn *divmul_insns = get_insns ();
end_sequence ();
start_sequence ();
rtx modsub_ret
= expand_expr_divmod (TRUNC_MOD_EXPR, mode, treeop0, treeop1,
op0, op1, NULL_RTX, unsignedp);
this_optab = optab_for_tree_code (MINUS_EXPR, type,
optab_default);
modsub_ret = expand_binop (mode, this_optab, op0, modsub_ret,
target, unsignedp, OPTAB_LIB_WIDEN);
rtx_insn *modsub_insns = get_insns ();
end_sequence ();
unsigned divmul_cost = seq_cost (divmul_insns, speed_p);
unsigned modsub_cost = seq_cost (modsub_insns, speed_p);
/* If costs are the same then use as tie breaker the other other
factor. */
if (divmul_cost == modsub_cost)
{
divmul_cost = seq_cost (divmul_insns, !speed_p);
modsub_cost = seq_cost (modsub_insns, !speed_p);
}
if (divmul_cost <= modsub_cost)
{
emit_insn (divmul_insns);
return REDUCE_BIT_FIELD (divmul_ret);
}
emit_insn (modsub_insns);
return REDUCE_BIT_FIELD (modsub_ret);
}
}
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
@ -9222,61 +9336,21 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
case CEIL_DIV_EXPR:
case ROUND_DIV_EXPR:
case EXACT_DIV_EXPR:
{
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_divmod" doesn't support sat/no-sat fixed-point
divisions. */
if (ALL_FIXED_POINT_MODE_P (mode))
goto binop;
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_divmod" doesn't support sat/no-sat fixed-point
divisions. */
if (ALL_FIXED_POINT_MODE_P (mode))
goto binop;
if (modifier == EXPAND_STACK_PARM)
target = 0;
/* Possible optimization: compute the dividend with EXPAND_SUM
then if the divisor is constant can optimize the case
where some terms of the dividend have coeffs divisible by it. */
expand_operands (treeop0, treeop1,
subtarget, &op0, &op1, EXPAND_NORMAL);
bool mod_p = code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
|| code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR;
if (SCALAR_INT_MODE_P (mode)
&& optimize >= 2
&& get_range_pos_neg (treeop0) == 1
&& get_range_pos_neg (treeop1) == 1)
{
/* If both arguments are known to be positive when interpreted
as signed, we can expand it as both signed and unsigned
division or modulo. Choose the cheaper sequence in that case. */
bool speed_p = optimize_insn_for_speed_p ();
do_pending_stack_adjust ();
start_sequence ();
rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
rtx_insn *uns_insns = get_insns ();
end_sequence ();
start_sequence ();
rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
rtx_insn *sgn_insns = get_insns ();
end_sequence ();
unsigned uns_cost = seq_cost (uns_insns, speed_p);
unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
if (modifier == EXPAND_STACK_PARM)
target = 0;
/* Possible optimization: compute the dividend with EXPAND_SUM
then if the divisor is constant can optimize the case
where some terms of the dividend have coeffs divisible by it. */
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return expand_expr_divmod (code, mode, treeop0, treeop1, op0, op1,
target, unsignedp);
/* If costs are the same then use as tie breaker the other
other factor. */
if (uns_cost == sgn_cost)
{
uns_cost = seq_cost (uns_insns, !speed_p);
sgn_cost = seq_cost (sgn_insns, !speed_p);
}
if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
{
emit_insn (uns_insns);
return uns_ret;
}
emit_insn (sgn_insns);
return sgn_ret;
}
return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
}
case RDIV_EXPR:
goto binop;

View File

@ -0,0 +1,30 @@
/* PR tree-optimization/96696 */
/* { dg-do compile } */
/* { dg-options "-O2 -masm=att" } */
/* { dg-final { scan-assembler-times "\tidivl\t" 2 } } */
/* { dg-final { scan-assembler-times "\tdivl\t" 2 } } */
/* { dg-final { scan-assembler-not "\ti?mull\t" } } */
int
foo (int x, int y)
{
return (x / y) * y;
}
int
bar (int x, int y)
{
return x - (x % y);
}
unsigned
baz (unsigned x, unsigned y)
{
return (x / y) * y;
}
unsigned
qux (unsigned x, unsigned y)
{
return x - (x % y);
}