match.pd: Simplify 1 / X for integer X [PR95424]
This patch implements an optimization for the following C++ code: int f(int x) { return 1 / x; } int f(unsigned int x) { return 1 / x; } Before this patch, x86-64 gcc -std=c++20 -O3 produces the following assembly: f(int): xor edx, edx mov eax, 1 idiv edi ret f(unsigned int): xor edx, edx mov eax, 1 div edi ret In comparison, clang++ -std=c++20 -O3 produces the following assembly: f(int): lea ecx, [rdi + 1] xor eax, eax cmp ecx, 3 cmovb eax, edi ret f(unsigned int): xor eax, eax cmp edi, 1 sete al ret Clang's output is more efficient as it avoids expensive div operations. With this patch, GCC now produces the following assembly: f(int): lea eax, [rdi + 1] cmp eax, 2 mov eax, 0 cmovbe eax, edi ret f(unsigned int): xor eax, eax cmp edi, 1 sete al ret which is virtually identical to Clang's assembly output. Any slight differences in the output for f(int) is possibly related to a different missed optimization. v2: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587751.html Changes from v2: 1. Refactor from using a switch statement to using the built-in if-else statement. v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587634.html Changes from v1: 1. Refactor common if conditions. 2. Use build_[minus_]one_cst (type) to get -1/1 of the correct type. 3. Match only for TRUNC_DIV_EXPR and TYPE_PRECISION (type) > 1. gcc/ChangeLog: PR tree-optimization/95424 * match.pd: Simplify 1 / X where X is an integer.
This commit is contained in:
parent
a591c71b41
commit
c2b610e7c6
13
gcc/match.pd
13
gcc/match.pd
@ -435,6 +435,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
&& TYPE_UNSIGNED (type))
|
||||
(trunc_divmod @0 @1))))
|
||||
|
||||
/* 1 / X -> X == 1 for unsigned integer X.
|
||||
1 / X -> X >= -1 && X <= 1 ? X : 0 for signed integer X.
|
||||
But not for 1 / 0 so that we can get proper warnings and errors,
|
||||
and not for 1-bit integers as they are edge cases better handled elsewhere. */
|
||||
(simplify
|
||||
(trunc_div integer_onep@0 @1)
|
||||
(if (INTEGRAL_TYPE_P (type) && !integer_zerop (@1) && TYPE_PRECISION (type) > 1)
|
||||
(if (TYPE_UNSIGNED (type))
|
||||
(eq @1 { build_one_cst (type); })
|
||||
(with { tree utype = unsigned_type_for (type); }
|
||||
(cond (le (plus (convert:utype @1) { build_one_cst (utype); }) { build_int_cst (utype, 2); })
|
||||
@1 { build_zero_cst (type); })))))
|
||||
|
||||
/* Combine two successive divisions. Note that combining ceil_div
|
||||
and floor_div is trickier and combining round_div even more so. */
|
||||
(for div (trunc_div exact_div)
|
||||
|
9
gcc/testsuite/gcc.dg/tree-ssa/divide-6.c
Normal file
9
gcc/testsuite/gcc.dg/tree-ssa/divide-6.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-optimized" } */
|
||||
|
||||
unsigned int f(unsigned int x) {
|
||||
return 1 / x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump "x_..D. == 1;" "optimized" } } */
|
9
gcc/testsuite/gcc.dg/tree-ssa/divide-7.c
Normal file
9
gcc/testsuite/gcc.dg/tree-ssa/divide-7.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-optimized" } */
|
||||
|
||||
int f(int x) {
|
||||
return 1 / x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump ".. <= 2 ? x_..D. : 0;" "optimized" } } */
|
Loading…
Reference in New Issue
Block a user