expand: Expand x / y * y as x - x % y if the latter is cheaper [PR96696]

The following patch tests both x / y * y and x - x % y expansion for the former GIMPLE code and chooses the cheaper of those sequences. 2021-04-27 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/96696 * expr.c (expand_expr_divmod): New function. (expand_expr_real_2) <case TRUNC_DIV_EXPR>: Use it for truncations and divisions. Formatting fixes. <case MULT_EXPR>: Optimize x / y * y as x - x % y if the latter is cheaper. * gcc.target/i386/pr96696.c: New test.
2021-04-27 14:45:45 +02:00 · 2021-04-27 14:45:45 +02:00 · 3dcd1334b4
commit 3dcd1334b4
parent eea8224629
2 changed files with 160 additions and 56 deletions
--- a/gcc/expr.c
+++ b/gcc/expr.c
@ -8664,6 +8664,56 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
  return temp;
 }

+/* Helper function of expand_expr_2, expand a division or modulo.
+   op0 and op1 should be already expanded treeop0 and treeop1, using
+   expand_operands.  */
+
+static rtx
+expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
+		    tree treeop1, rtx op0, rtx op1, rtx target, int unsignedp)
+{
+  bool mod_p = (code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
+		|| code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR);
+  if (SCALAR_INT_MODE_P (mode)
+      && optimize >= 2
+      && get_range_pos_neg (treeop0) == 1
+      && get_range_pos_neg (treeop1) == 1)
+    {
+      /* If both arguments are known to be positive when interpreted
+	 as signed, we can expand it as both signed and unsigned
+	 division or modulo.  Choose the cheaper sequence in that case.  */
+      bool speed_p = optimize_insn_for_speed_p ();
+      do_pending_stack_adjust ();
+      start_sequence ();
+      rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
+      rtx_insn *uns_insns = get_insns ();
+      end_sequence ();
+      start_sequence ();
+      rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
+      rtx_insn *sgn_insns = get_insns ();
+      end_sequence ();
+      unsigned uns_cost = seq_cost (uns_insns, speed_p);
+      unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
+
+      /* If costs are the same then use as tie breaker the other other
+	 factor.  */
+      if (uns_cost == sgn_cost)
+	{
+	  uns_cost = seq_cost (uns_insns, !speed_p);
+	  sgn_cost = seq_cost (sgn_insns, !speed_p);
+	}
+
+      if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
+	{
+	  emit_insn (uns_insns);
+	  return uns_ret;
+	}
+      emit_insn (sgn_insns);
+      return sgn_ret;
+    }
+  return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
+}
+
 rtx
 expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier)
@ -9201,14 +9251,78 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	  if (!REG_P (op0))
 	    op0 = copy_to_mode_reg (mode, op0);

-	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
-			       gen_int_mode (tree_to_shwi (exp1),
-					     TYPE_MODE (TREE_TYPE (exp1)))));
+	  op1 = gen_int_mode (tree_to_shwi (exp1),
+			      TYPE_MODE (TREE_TYPE (exp1)));
+	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, op1));
 	}

      if (modifier == EXPAND_STACK_PARM)
 	target = 0;

+      if (SCALAR_INT_MODE_P (mode) && optimize >= 2)
+	{
+	  gimple *def_stmt0 = get_def_for_expr (treeop0, TRUNC_DIV_EXPR);
+	  gimple *def_stmt1 = get_def_for_expr (treeop1, TRUNC_DIV_EXPR);
+	  if (def_stmt0
+	      && !operand_equal_p (treeop1, gimple_assign_rhs2 (def_stmt0), 0))
+	    def_stmt0 = NULL;
+	  if (def_stmt1
+	      && !operand_equal_p (treeop0, gimple_assign_rhs2 (def_stmt1), 0))
+	    def_stmt1 = NULL;
+
+	  if (def_stmt0 || def_stmt1)
+	    {
+	      /* X / Y * Y can be expanded as X - X % Y too.
+		 Choose the cheaper sequence of those two.  */
+	      if (def_stmt0)
+		treeop0 = gimple_assign_rhs1 (def_stmt0);
+	      else
+		{
+		  treeop1 = treeop0;
+		  treeop0 = gimple_assign_rhs1 (def_stmt1);
+		}
+	      expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
+			       EXPAND_NORMAL);
+	      bool speed_p = optimize_insn_for_speed_p ();
+	      do_pending_stack_adjust ();
+	      start_sequence ();
+	      rtx divmul_ret
+		= expand_expr_divmod (TRUNC_DIV_EXPR, mode, treeop0, treeop1,
+				      op0, op1, NULL_RTX, unsignedp);
+	      divmul_ret = expand_mult (mode, divmul_ret, op1, target,
+					unsignedp);
+	      rtx_insn *divmul_insns = get_insns ();
+	      end_sequence ();
+	      start_sequence ();
+	      rtx modsub_ret
+		= expand_expr_divmod (TRUNC_MOD_EXPR, mode, treeop0, treeop1,
+				      op0, op1, NULL_RTX, unsignedp);
+	      this_optab = optab_for_tree_code (MINUS_EXPR, type,
+						optab_default);
+	      modsub_ret = expand_binop (mode, this_optab, op0, modsub_ret,
+					 target, unsignedp, OPTAB_LIB_WIDEN);
+	      rtx_insn *modsub_insns = get_insns ();
+	      end_sequence ();
+	      unsigned divmul_cost = seq_cost (divmul_insns, speed_p);
+	      unsigned modsub_cost = seq_cost (modsub_insns, speed_p);
+	      /* If costs are the same then use as tie breaker the other other
+		 factor.  */
+	      if (divmul_cost == modsub_cost)
+		{
+		  divmul_cost = seq_cost (divmul_insns, !speed_p);
+		  modsub_cost = seq_cost (modsub_insns, !speed_p);
+		}
+
+	      if (divmul_cost <= modsub_cost)
+		{
+		  emit_insn (divmul_insns);
+		  return REDUCE_BIT_FIELD (divmul_ret);
+		}
+	      emit_insn (modsub_insns);
+	      return REDUCE_BIT_FIELD (modsub_ret);
+	    }
+	}
+
      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
      return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));

@ -9222,61 +9336,21 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
    case CEIL_DIV_EXPR:
    case ROUND_DIV_EXPR:
    case EXACT_DIV_EXPR:
-     {
-       /* If this is a fixed-point operation, then we cannot use the code
-	  below because "expand_divmod" doesn't support sat/no-sat fixed-point
-	  divisions.   */
-       if (ALL_FIXED_POINT_MODE_P (mode))
-	 goto binop;
+      /* If this is a fixed-point operation, then we cannot use the code
+	 below because "expand_divmod" doesn't support sat/no-sat fixed-point
+	 divisions.   */
+      if (ALL_FIXED_POINT_MODE_P (mode))
+	goto binop;

-       if (modifier == EXPAND_STACK_PARM)
-	 target = 0;
-       /* Possible optimization: compute the dividend with EXPAND_SUM
-	  then if the divisor is constant can optimize the case
-	  where some terms of the dividend have coeffs divisible by it.  */
-       expand_operands (treeop0, treeop1,
-			subtarget, &op0, &op1, EXPAND_NORMAL);
-       bool mod_p = code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
-		    || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR;
-       if (SCALAR_INT_MODE_P (mode)
-	   && optimize >= 2
-	   && get_range_pos_neg (treeop0) == 1
-	   && get_range_pos_neg (treeop1) == 1)
-	 {
-	   /* If both arguments are known to be positive when interpreted
-	      as signed, we can expand it as both signed and unsigned
-	      division or modulo.  Choose the cheaper sequence in that case.  */
-	   bool speed_p = optimize_insn_for_speed_p ();
-	   do_pending_stack_adjust ();
-	   start_sequence ();
-	   rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
-	   rtx_insn *uns_insns = get_insns ();
-	   end_sequence ();
-	   start_sequence ();
-	   rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
-	   rtx_insn *sgn_insns = get_insns ();
-	   end_sequence ();
-	   unsigned uns_cost = seq_cost (uns_insns, speed_p);
-	   unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
+      if (modifier == EXPAND_STACK_PARM)
+	target = 0;
+      /* Possible optimization: compute the dividend with EXPAND_SUM
+	 then if the divisor is constant can optimize the case
+	 where some terms of the dividend have coeffs divisible by it.  */
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      return expand_expr_divmod (code, mode, treeop0, treeop1, op0, op1,
+				 target, unsignedp);

-	   /* If costs are the same then use as tie breaker the other
-	      other factor.  */
-	   if (uns_cost == sgn_cost)
-	     {
-		uns_cost = seq_cost (uns_insns, !speed_p);
-		sgn_cost = seq_cost (sgn_insns, !speed_p);
-	     }
-
-	   if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
-	     {
-	       emit_insn (uns_insns);
-	       return uns_ret;
-	     }
-	   emit_insn (sgn_insns);
-	   return sgn_ret;
-	 }
-       return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
-     }
    case RDIV_EXPR:
      goto binop;

--- a/gcc/testsuite/gcc.target/i386/pr96696.c
+++ b/gcc/testsuite/gcc.target/i386/pr96696.c
@ -0,0 +1,30 @@
+/* PR tree-optimization/96696 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-times "\tidivl\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tdivl\t" 2 } } */
+/* { dg-final { scan-assembler-not "\ti?mull\t" } } */
+
+int
+foo (int x, int y)
+{
+  return (x / y) * y;
+}
+
+int
+bar (int x, int y)
+{
+  return x - (x % y);
+}
+
+unsigned
+baz (unsigned x, unsigned y)
+{
+  return (x / y) * y;
+}
+
+unsigned
+qux (unsigned x, unsigned y)
+{
+  return x - (x % y);
+}