re PR tree-optimization/54733 (Missing opportunity to optimize endian independent load/store)

2014-05-23  Thomas Preud'homme  <thomas.preudhomme@arm.com>

	PR tree-optimization/54733
gcc/
	* tree-ssa-math-opts.c (nop_stats): New "bswap_stats" structure.
	(CMPNOP): Define.
	(find_bswap_or_nop_load): New.
	(find_bswap_1): Renamed to ...
	(find_bswap_or_nop_1): This. Also add support for memory source.
	(find_bswap): Renamed to ...
	(find_bswap_or_nop): This. Also add support for memory source and
	detection of bitwise operations equivalent to load in host endianness.
	(execute_optimize_bswap): Likewise. Also move its leading comment back
	in place and split statement transformation into ...
	(bswap_replace): This.

gcc/testsuite
	* gcc.dg/optimize-bswapdi-3.c: New test to check extension of bswap
	optimization to support memory sources and bitwise operations
	equivalent to load in host endianness.
	* gcc.dg/optimize-bswaphi-1.c: Likewise.
	* gcc.dg/optimize-bswapsi-2.c: Likewise.
	* gcc.c-torture/execute/bswap-2.c: Likewise.

From-SVN: r210843
This commit is contained in:
Thomas Preud'homme 2014-05-23 03:33:28 +00:00 committed by Thomas Preud'homme
parent eaa33a6a65
commit 73984f8494
7 changed files with 679 additions and 109 deletions

View File

@ -1,3 +1,18 @@
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR tree-optimization/54733
* tree-ssa-math-opts.c (nop_stats): New "bswap_stats" structure.
(CMPNOP): Define.
(find_bswap_or_nop_load): New.
(find_bswap_1): Renamed to ...
(find_bswap_or_nop_1): This. Also add support for memory source.
(find_bswap): Renamed to ...
(find_bswap_or_nop): This. Also add support for memory source and
detection of bitwise operations equivalent to load in host endianness.
(execute_optimize_bswap): Likewise. Also move its leading comment back
in place and split statement transformation into ...
(bswap_replace): This.
2014-05-22 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/61215

View File

@ -1,3 +1,13 @@
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR tree-optimization/54733
* gcc.dg/optimize-bswapdi-3.c: New test to check extension of bswap
optimization to support memory sources and bitwise operations
equivalent to load in host endianness.
* gcc.dg/optimize-bswaphi-1.c: Likewise.
* gcc.dg/optimize-bswapsi-2.c: Likewise.
* gcc.c-torture/execute/bswap-2.c: Likewise.
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
* lib/target-supports.exp: New effective targets for architectures

View File

@ -0,0 +1,90 @@
#ifdef __UINT32_TYPE__
typedef __UINT32_TYPE__ uint32_t;
#else
typedef __UINT32_TYPE__ unsigned;
#endif
struct bitfield {
unsigned char f0:7;
unsigned char f1:7;
unsigned char f2:7;
unsigned char f3:7;
};
struct ok {
unsigned char f0;
unsigned char f1;
unsigned char f2;
unsigned char f3;
};
union bf_or_uint32 {
struct ok inval;
struct bitfield bfval;
};
__attribute__ ((noinline, noclone)) uint32_t
partial_read_le32 (union bf_or_uint32 in)
{
return in.bfval.f0 | (in.bfval.f1 << 8)
| (in.bfval.f2 << 16) | (in.bfval.f3 << 24);
}
__attribute__ ((noinline, noclone)) uint32_t
partial_read_be32 (union bf_or_uint32 in)
{
return in.bfval.f3 | (in.bfval.f2 << 8)
| (in.bfval.f1 << 16) | (in.bfval.f0 << 24);
}
__attribute__ ((noinline, noclone)) uint32_t
fake_read_le32 (char *x, char *y)
{
unsigned char c0, c1, c2, c3;
c0 = x[0];
c1 = x[1];
*y = 1;
c2 = x[2];
c3 = x[3];
return c0 | c1 << 8 | c2 << 16 | c3 << 24;
}
__attribute__ ((noinline, noclone)) uint32_t
fake_read_be32 (char *x, char *y)
{
unsigned char c0, c1, c2, c3;
c0 = x[0];
c1 = x[1];
*y = 1;
c2 = x[2];
c3 = x[3];
return c3 | c2 << 8 | c1 << 16 | c0 << 24;
}
int
main ()
{
union bf_or_uint32 bfin;
uint32_t out;
char cin[] = { 0x83, 0x85, 0x87, 0x89 };
if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
return 0;
bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
out = partial_read_le32 (bfin);
if (out != 0x09070503 && out != 0x88868482)
__builtin_abort ();
bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
out = partial_read_be32 (bfin);
if (out != 0x03050709 && out != 0x82848688)
__builtin_abort ();
out = fake_read_le32 (cin, &cin[2]);
if (out != 0x89018583)
__builtin_abort ();
out = fake_read_be32 (cin, &cin[2]);
if (out != 0x83850189)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,64 @@
/* { dg-do compile } */
/* { dg-require-effective-target bswap64 } */
/* { dg-require-effective-target stdint_types } */
/* { dg-options "-O2 -fdump-tree-bswap" } */
#include <stdint.h>
unsigned char data[8];
struct uint64_st {
unsigned char u0, u1, u2, u3, u4, u5, u6, u7;
};
uint64_t read_le64_1 (void)
{
return (uint64_t) data[0] | ((uint64_t) data[1] << 8)
| ((uint64_t) data[2] << 16) | ((uint64_t) data[3] << 24)
| ((uint64_t) data[4] << 32) | ((uint64_t) data[5] << 40)
| ((uint64_t) data[6] << 48) | ((uint64_t) data[7] << 56);
}
uint64_t read_le64_2 (struct uint64_st data)
{
return (uint64_t) data.u0 | ((uint64_t) data.u1 << 8)
| ((uint64_t) data.u2 << 16) | ((uint64_t) data.u3 << 24)
| ((uint64_t) data.u4 << 32) | ((uint64_t) data.u5 << 40)
| ((uint64_t) data.u6 << 48) | ((uint64_t) data.u7 << 56);
}
uint64_t read_le64_3 (unsigned char *data)
{
return (uint64_t) *data | ((uint64_t) *(data + 1) << 8)
| ((uint64_t) *(data + 2) << 16) | ((uint64_t) *(data + 3) << 24)
| ((uint64_t) *(data + 4) << 32) | ((uint64_t) *(data + 5) << 40)
| ((uint64_t) *(data + 6) << 48) | ((uint64_t) *(data + 7) << 56);
}
uint64_t read_be64_1 (void)
{
return (uint64_t) data[7] | ((uint64_t) data[6] << 8)
| ((uint64_t) data[5] << 16) | ((uint64_t) data[4] << 24)
| ((uint64_t) data[3] << 32) | ((uint64_t) data[2] << 40)
| ((uint64_t) data[1] << 48) | ((uint64_t) data[0] << 56);
}
uint64_t read_be64_2 (struct uint64_st data)
{
return (uint64_t) data.u7 | ((uint64_t) data.u6 << 8)
| ((uint64_t) data.u5 << 16) | ((uint64_t) data.u4 << 24)
| ((uint64_t) data.u3 << 32) | ((uint64_t) data.u2 << 40)
| ((uint64_t) data.u1 << 48) | ((uint64_t) data.u0 << 56);
}
uint64_t read_be64_3 (unsigned char *data)
{
return (uint64_t) *(data + 7) | ((uint64_t) *(data + 6) << 8)
| ((uint64_t) *(data + 5) << 16) | ((uint64_t) *(data + 4) << 24)
| ((uint64_t) *(data + 3) << 32) | ((uint64_t) *(data + 2) << 40)
| ((uint64_t) *(data + 1) << 48) | ((uint64_t) *data << 56);
}
/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 3 "bswap" } } */
/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
/* { dg-final { cleanup-tree-dump "bswap" } } */

View File

@ -0,0 +1,47 @@
/* { dg-do compile } */
/* { dg-require-effective-target bswap16 } */
/* { dg-require-effective-target stdint_types } */
/* { dg-options "-O2 -fdump-tree-bswap" } */
/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
#include <stdint.h>
unsigned char data[2];
struct uint16_st {
unsigned char u0, u1;
};
uint32_t read_le16_1 (void)
{
return data[0] | (data[1] << 8);
}
uint32_t read_le16_2 (struct uint16_st data)
{
return data.u0 | (data.u1 << 8);
}
uint32_t read_le16_3 (unsigned char *data)
{
return *data | (*(data + 1) << 8);
}
uint32_t read_be16_1 (void)
{
return data[1] | (data[0] << 8);
}
uint32_t read_be16_2 (struct uint16_st data)
{
return data.u1 | (data.u0 << 8);
}
uint32_t read_be16_3 (unsigned char *data)
{
return *(data + 1) | (*data << 8);
}
/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 3 "bswap" } } */
/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
/* { dg-final { cleanup-tree-dump "bswap" } } */

View File

@ -0,0 +1,49 @@
/* { dg-do compile } */
/* { dg-require-effective-target bswap32 } */
/* { dg-require-effective-target stdint_types } */
/* { dg-options "-O2 -fdump-tree-bswap" } */
/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
#include <stdint.h>
extern unsigned char data[4];
struct uint32_st {
unsigned char u0, u1, u2, u3;
};
uint32_t read_le32_1 (void)
{
return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
}
uint32_t read_le32_2 (struct uint32_st data)
{
return data.u0 | (data.u1 << 8) | (data.u2 << 16) | (data.u3 << 24);
}
uint32_t read_le32_3 (unsigned char *data)
{
return *data | (*(data + 1) << 8) | (*(data + 2) << 16)
| (*(data + 3) << 24);
}
uint32_t read_be32_1 (void)
{
return data[3] | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
}
uint32_t read_be32_2 (struct uint32_st data)
{
return data.u3 | (data.u2 << 8) | (data.u1 << 16) | (data.u0 << 24);
}
uint32_t read_be32_3 (unsigned char *data)
{
return *(data + 3) | (*(data + 2) << 8) | (*(data + 1) << 16)
| (*data << 24);
}
/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 3 "bswap" } } */
/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
/* { dg-final { cleanup-tree-dump "bswap" } } */

View File

@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
#include "is-a.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "gimplify.h"
#include "gimplify-me.h"
#include "stor-layout.h"
#include "gimple-ssa.h"
@ -170,15 +171,15 @@ static struct
static struct
{
/* Number of hand-written 16-bit bswaps found. */
/* Number of hand-written 16-bit nop / bswaps found. */
int found_16bit;
/* Number of hand-written 32-bit bswaps found. */
/* Number of hand-written 32-bit nop / bswaps found. */
int found_32bit;
/* Number of hand-written 64-bit bswaps found. */
/* Number of hand-written 64-bit nop / bswaps found. */
int found_64bit;
} bswap_stats;
} nop_stats, bswap_stats;
static struct
{
@ -1604,13 +1605,43 @@ make_pass_cse_sincos (gcc::context *ctxt)
0 - byte has the value 0
1..size - byte contains the content of the byte
number indexed with that value minus one */
number indexed with that value minus one.
To detect permutations on memory sources (arrays and structures), a symbolic
number is also associated a base address (the array or structure the load is
made from), an offset from the base address and a range which gives the
difference between the highest and lowest accessed memory location to make
such a symbolic number. The range is thus different from size which reflects
the size of the type of current expression. Note that for non memory source,
range holds the same value as size.
For instance, for an array char a[], (short) a[0] | (short) a[3] would have
a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
still have a size of 2 but this time a range of 1. */
struct symbolic_number {
unsigned HOST_WIDEST_INT n;
int size;
tree base_addr;
tree offset;
HOST_WIDE_INT bytepos;
tree alias_set;
tree vuse;
unsigned HOST_WIDE_INT range;
};
/* The number which the find_bswap_or_nop_1 result should match in
order to have a nop. The number is masked according to the size of
the symbolic number before using it. */
#define CMPNOP (sizeof (HOST_WIDEST_INT) < 8 ? 0 : \
(unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201)
/* The number which the find_bswap_or_nop_1 result should match in
order to have a byte swap. The number is masked according to the
size of the symbolic number before using it. */
#define CMPXCHG (sizeof (HOST_WIDEST_INT) < 8 ? 0 : \
(unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708)
/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
number N. Return false if the requested operation is not permitted
on a symbolic number. */
@ -1670,13 +1701,76 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
return true;
}
/* find_bswap_1 invokes itself recursively with N and tries to perform
the operation given by the rhs of STMT on the result. If the
operation could successfully be executed the function returns the
tree expression of the source operand and NULL otherwise. */
/* Check if STMT might be a byte swap or a nop from a memory source and returns
the answer. If so, REF is that memory source and the base of the memory area
accessed and the offset of the access from that base are recorded in N. */
bool
find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
{
/* Leaf node is an array or component ref. Memorize its base and
offset from base to compare to other such leaf node. */
HOST_WIDE_INT bitsize, bitpos;
enum machine_mode mode;
int unsignedp, volatilep;
if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
return false;
n->base_addr = get_inner_reference (ref, &bitsize, &bitpos, &n->offset,
&mode, &unsignedp, &volatilep, false);
if (TREE_CODE (n->base_addr) == MEM_REF)
{
offset_int bit_offset = 0;
tree off = TREE_OPERAND (n->base_addr, 1);
if (!integer_zerop (off))
{
offset_int boff, coff = mem_ref_offset (n->base_addr);
boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
bit_offset += boff;
}
n->base_addr = TREE_OPERAND (n->base_addr, 0);
/* Avoid returning a negative bitpos as this may wreak havoc later. */
if (wi::neg_p (bit_offset))
{
offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
offset_int tem = bit_offset.and_not (mask);
/* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
bit_offset -= tem;
tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
if (n->offset)
n->offset = size_binop (PLUS_EXPR, n->offset,
wide_int_to_tree (sizetype, tem));
else
n->offset = wide_int_to_tree (sizetype, tem);
}
bitpos += bit_offset.to_shwi ();
}
if (bitpos % BITS_PER_UNIT)
return false;
if (bitsize % BITS_PER_UNIT)
return false;
n->bytepos = bitpos / BITS_PER_UNIT;
n->alias_set = reference_alias_ptr_type (ref);
n->vuse = gimple_vuse (stmt);
return true;
}
/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
the operation given by the rhs of STMT on the result. If the operation
could successfully be executed the function returns the tree expression of
the source operand and NULL otherwise. */
static tree
find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
{
enum tree_code code;
tree rhs1, rhs2 = NULL;
@ -1689,6 +1783,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
rhs1 = gimple_assign_rhs1 (stmt);
if (find_bswap_or_nop_load (stmt, rhs1, n))
return rhs1;
if (TREE_CODE (rhs1) != SSA_NAME)
return NULL_TREE;
@ -1715,11 +1812,11 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
&& code != CONVERT_EXPR)
return NULL_TREE;
source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
/* If find_bswap_1 returned NULL STMT is a leaf node and we have
to initialize the symbolic number. */
if (!source_expr1)
/* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
we have to initialize the symbolic number. */
if (!source_expr1 || gimple_assign_load_p (rhs1_stmt))
{
/* Set up the symbolic number N by setting each byte to a
value between 1 and the byte size of rhs1. The highest
@ -1729,14 +1826,18 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
if (n->size % BITS_PER_UNIT != 0)
return NULL_TREE;
n->size /= BITS_PER_UNIT;
n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
(unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
n->range = n->size;
n->n = CMPNOP;
if (n->size < (int)sizeof (HOST_WIDEST_INT))
n->n &= ((unsigned HOST_WIDEST_INT)1 <<
(n->size * BITS_PER_UNIT)) - 1;
source_expr1 = rhs1;
if (!source_expr1)
{
n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
source_expr1 = rhs1;
}
}
switch (code)
@ -1777,6 +1878,8 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
}
n->size = type_size / BITS_PER_UNIT;
if (!n->base_addr)
n->range = n->size;
}
break;
default:
@ -1805,17 +1908,79 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
switch (code)
{
case BIT_IOR_EXPR:
source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
if (!source_expr1)
return NULL_TREE;
source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
source_expr2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
if (source_expr1 != source_expr2
|| n1.size != n2.size)
if (n1.size != n2.size || !source_expr2)
return NULL_TREE;
if (!n1.vuse != !n2.vuse ||
(n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
return NULL_TREE;
if (source_expr1 != source_expr2)
{
HOST_WIDEST_INT inc, mask;
unsigned i;
HOST_WIDE_INT off_sub;
struct symbolic_number *n_ptr;
if (!n1.base_addr || !n2.base_addr
|| !operand_equal_p (n1.base_addr, n2.base_addr, 0))
return NULL_TREE;
if (!n1.offset != !n2.offset ||
(n1.offset && !operand_equal_p (n1.offset, n2.offset, 0)))
return NULL_TREE;
/* We swap n1 with n2 to have n1 < n2. */
if (n2.bytepos < n1.bytepos)
{
struct symbolic_number tmpn;
tmpn = n2;
n2 = n1;
n1 = tmpn;
source_expr1 = source_expr2;
}
off_sub = n2.bytepos - n1.bytepos;
/* Check that the range of memory covered < biggest int size. */
if (off_sub + n2.range > (int) sizeof (HOST_WIDEST_INT))
return NULL_TREE;
n->range = n2.range + off_sub;
/* Reinterpret byte marks in symbolic number holding the value of
bigger weight according to host endianness. */
inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub;
mask = 0xFF;
if (BYTES_BIG_ENDIAN)
n_ptr = &n1;
else
n_ptr = &n2;
for (i = 0; i < sizeof (HOST_WIDEST_INT); i++, inc <<= 8,
mask <<= 8)
{
if (n_ptr->n & mask)
n_ptr->n += inc;
}
}
else
n->range = n1.range;
if (!n1.alias_set
|| alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set))
n->alias_set = n1.alias_set;
else
n->alias_set = ptr_type_node;
n->vuse = n1.vuse;
n->base_addr = n1.base_addr;
n->offset = n1.offset;
n->bytepos = n1.bytepos;
n->size = n1.size;
for (i = 0, mask = 0xff; i < n->size; i++, mask <<= BITS_PER_UNIT)
{
@ -1840,57 +2005,75 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
return NULL_TREE;
}
/* Check if STMT completes a bswap implementation consisting of ORs,
SHIFTs and ANDs. Return the source tree expression on which the
byte swap is performed and NULL if no bswap was found. */
/* Check if STMT completes a bswap implementation or a read in a given
endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
accordingly. It also sets N to represent the kind of operations
performed: size of the resulting expression and whether it works on
a memory source, and if so alias-set and vuse. At last, the
function returns the source tree expression. */
static tree
find_bswap (gimple stmt)
find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
{
/* The number which the find_bswap result should match in order to
have a full byte swap. The number is shifted to the left according
to the size of the symbolic number before using it. */
unsigned HOST_WIDEST_INT cmp =
sizeof (HOST_WIDEST_INT) < 8 ? 0 :
(unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
/* The number which the find_bswap_or_nop_1 result should match in order
to have a full byte swap. The number is shifted to the right
according to the size of the symbolic number before using it. */
unsigned HOST_WIDEST_INT cmpxchg = CMPXCHG;
unsigned HOST_WIDEST_INT cmpnop = CMPNOP;
struct symbolic_number n;
tree source_expr;
int limit;
/* The last parameter determines the depth search limit. It usually
correlates directly to the number of bytes to be touched. We
increase that number by three here in order to also
cover signed -> unsigned converions of the src operand as can be seen
correlates directly to the number n of bytes to be touched. We
increase that number by log2(n) + 1 here in order to also
cover signed -> unsigned conversions of the src operand as can be seen
in libgcc, and for initial shift/and operation of the src operand. */
limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
source_expr = find_bswap_1 (stmt, &n, limit);
source_expr = find_bswap_or_nop_1 (stmt, n, limit);
if (!source_expr)
return NULL_TREE;
/* Zero out the extra bits of N and CMP. */
if (n.size < (int)sizeof (HOST_WIDEST_INT))
/* Find real size of result (highest non zero byte). */
if (n->base_addr)
{
unsigned HOST_WIDEST_INT mask =
((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
int rsize;
unsigned HOST_WIDEST_INT tmpn;
n.n &= mask;
cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_UNIT, rsize++);
n->range = rsize;
}
/* A complete byte swap should make the symbolic number to start
with the largest digit in the highest order byte. */
if (cmp != n.n)
/* Zero out the extra bits of N and CMP*. */
if (n->range < (int)sizeof (HOST_WIDEST_INT))
{
unsigned HOST_WIDEST_INT mask;
mask = ((unsigned HOST_WIDEST_INT)1 << (n->range * BITS_PER_UNIT)) - 1;
cmpxchg >>= (sizeof (HOST_WIDEST_INT) - n->range) * BITS_PER_UNIT;
cmpnop &= mask;
}
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as host architecture. */
if (n->n == cmpnop)
*bswap = false;
else if (n->n == cmpxchg)
*bswap = true;
else
return NULL_TREE;
/* Useless bit manipulation performed by code. */
if (!n->base_addr && n->n == cmpnop)
return NULL_TREE;
n->range *= BITS_PER_UNIT;
return source_expr;
}
/* Find manual byte swap implementations and turn them into a bswap
builtin invokation. */
namespace {
const pass_data pass_data_optimize_bswap =
@ -1924,6 +2107,156 @@ public:
}; // class pass_optimize_bswap
/* Perform the bswap optimization: replace the statement STMT at GSI
with load type, VUSE and set-alias as described by N if a memory
source is involved (N->base_addr is non null), followed by the
builtin bswap invocation in FNDECL if BSWAP is true. SRC gives
the source on which STMT is operating and N->range gives the
size of the expression involved for maintaining some statistics. */
static bool
bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree src, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
bool bswap)
{
tree tmp, tgt;
gimple call;
tgt = gimple_assign_lhs (stmt);
/* Need to load the value from memory first. */
if (n->base_addr)
{
tree addr_expr, addr_tmp, val_expr, val_tmp;
tree load_offset_ptr, aligned_load_type;
gimple addr_stmt, load_stmt;
unsigned align;
align = get_object_alignment (src);
if (bswap && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
return false;
/* Compute address to load from and cast according to the size
of the load. */
addr_expr = build_fold_addr_expr (unshare_expr (src));
if (is_gimple_min_invariant (addr_expr))
addr_tmp = addr_expr;
else
{
addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
"load_src");
addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
}
/* Perform the load. */
aligned_load_type = load_type;
if (align < TYPE_ALIGN (load_type))
aligned_load_type = build_aligned_type (load_type, align);
load_offset_ptr = build_int_cst (n->alias_set, 0);
val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
load_offset_ptr);
if (!bswap)
{
if (n->range == 16)
nop_stats.found_16bit++;
else if (n->range == 32)
nop_stats.found_32bit++;
else
{
gcc_assert (n->range == 64);
nop_stats.found_64bit++;
}
/* Convert the result of load if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
{
val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
"load_dst");
load_stmt = gimple_build_assign (val_tmp, val_expr);
gimple_set_vuse (load_stmt, n->vuse);
gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
gimple_assign_set_rhs_with_ops_1 (gsi, NOP_EXPR, val_tmp,
NULL_TREE, NULL_TREE);
}
else
gimple_assign_set_rhs_with_ops_1 (gsi, MEM_REF, val_expr,
NULL_TREE, NULL_TREE);
update_stmt (gsi_stmt (*gsi));
if (dump_file)
{
fprintf (dump_file,
"%d bit load in host endianness found at: ",
(int)n->range);
print_gimple_stmt (dump_file, stmt, 0, 0);
}
return true;
}
else
{
val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
load_stmt = gimple_build_assign (val_tmp, val_expr);
gimple_set_vuse (load_stmt, n->vuse);
gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
}
src = val_tmp;
}
if (n->range == 16)
bswap_stats.found_16bit++;
else if (n->range == 32)
bswap_stats.found_32bit++;
else
{
gcc_assert (n->range == 64);
bswap_stats.found_64bit++;
}
tmp = src;
/* Convert the src expression if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
{
gimple convert_stmt;
tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, NULL);
gsi_insert_before (gsi, convert_stmt, GSI_SAME_STMT);
}
call = gimple_build_call (fndecl, 1, tmp);
tmp = tgt;
/* Convert the result if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
{
gimple convert_stmt;
tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tgt, tmp, NULL);
gsi_insert_after (gsi, convert_stmt, GSI_SAME_STMT);
}
gimple_call_set_lhs (call, tmp);
if (dump_file)
{
fprintf (dump_file, "%d bit bswap implementation found at: ",
(int)n->range);
print_gimple_stmt (dump_file, stmt, 0, 0);
}
gsi_insert_after (gsi, call, GSI_SAME_STMT);
gsi_remove (gsi, true);
return true;
}
/* Find manual byte swap implementations as well as load in a given
endianness. Byte swaps are turned into a bswap builtin invokation
while endian loads are converted to bswap builtin invokation or
simple load according to the host endianness. */
unsigned int
pass_optimize_bswap::execute (function *fun)
{
@ -1946,9 +2279,6 @@ pass_optimize_bswap::execute (function *fun)
&& (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
|| (bswap32_p && word_mode == SImode)));
if (!bswap16_p && !bswap32_p && !bswap64_p)
return 0;
/* Determine the argument type of the builtins. The code later on
assumes that the return and argument type are the same. */
if (bswap16_p)
@ -1969,6 +2299,7 @@ pass_optimize_bswap::execute (function *fun)
bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
}
memset (&nop_stats, 0, sizeof (nop_stats));
memset (&bswap_stats, 0, sizeof (bswap_stats));
FOR_EACH_BB_FN (bb, fun)
@ -1982,21 +2313,24 @@ pass_optimize_bswap::execute (function *fun)
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
{
gimple stmt = gsi_stmt (gsi);
tree bswap_src, bswap_type;
tree bswap_tmp;
tree fndecl = NULL_TREE;
int type_size;
gimple call;
tree fndecl = NULL_TREE, bswap_type = NULL_TREE;
tree src, load_type;
struct symbolic_number n;
bool bswap;
if (!is_gimple_assign (stmt)
|| gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
continue;
type_size = TYPE_PRECISION (gimple_expr_type (stmt));
src = find_bswap_or_nop (stmt, &n, &bswap);
switch (type_size)
if (!src)
continue;
switch (n.range)
{
case 16:
load_type = uint16_type_node;
if (bswap16_p)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
@ -2004,6 +2338,7 @@ pass_optimize_bswap::execute (function *fun)
}
break;
case 32:
load_type = uint32_type_node;
if (bswap32_p)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@ -2011,6 +2346,7 @@ pass_optimize_bswap::execute (function *fun)
}
break;
case 64:
load_type = uint64_type_node;
if (bswap64_p)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
@ -2021,62 +2357,21 @@ pass_optimize_bswap::execute (function *fun)
continue;
}
if (!fndecl)
if (bswap && !fndecl)
continue;
bswap_src = find_bswap (stmt);
if (!bswap_src)
continue;
changed = true;
if (type_size == 16)
bswap_stats.found_16bit++;
else if (type_size == 32)
bswap_stats.found_32bit++;
else
bswap_stats.found_64bit++;
bswap_tmp = bswap_src;
/* Convert the src expression if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
{
gimple convert_stmt;
bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
convert_stmt = gimple_build_assign_with_ops
(NOP_EXPR, bswap_tmp, bswap_src, NULL);
gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
}
call = gimple_build_call (fndecl, 1, bswap_tmp);
bswap_tmp = gimple_assign_lhs (stmt);
/* Convert the result if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
{
gimple convert_stmt;
bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
convert_stmt = gimple_build_assign_with_ops
(NOP_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
}
gimple_call_set_lhs (call, bswap_tmp);
if (dump_file)
{
fprintf (dump_file, "%d bit bswap implementation found at: ",
(int)type_size);
print_gimple_stmt (dump_file, stmt, 0, 0);
}
gsi_insert_after (&gsi, call, GSI_SAME_STMT);
gsi_remove (&gsi, true);
if (bswap_replace (stmt, &gsi, src, fndecl, bswap_type, load_type,
&n, bswap))
changed = true;
}
}
statistics_counter_event (fun, "16-bit nop implementations found",
nop_stats.found_16bit);
statistics_counter_event (fun, "32-bit nop implementations found",
nop_stats.found_32bit);
statistics_counter_event (fun, "64-bit nop implementations found",
nop_stats.found_64bit);
statistics_counter_event (fun, "16-bit bswap implementations found",
bswap_stats.found_16bit);
statistics_counter_event (fun, "32-bit bswap implementations found",