|
|
|
@ -18,9 +18,10 @@
|
|
|
|
|
along with GCC; see the file COPYING3. If not see
|
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
|
|
/* The purpose of the store merging pass is to combine multiple memory
|
|
|
|
|
stores of constant values, values loaded from memory or bitwise operations
|
|
|
|
|
on those to consecutive memory locations into fewer wider stores.
|
|
|
|
|
/* The purpose of the store merging pass is to combine multiple memory stores
|
|
|
|
|
of constant values, values loaded from memory, bitwise operations on those,
|
|
|
|
|
or bit-field values, to consecutive locations, into fewer wider stores.
|
|
|
|
|
|
|
|
|
|
For example, if we have a sequence peforming four byte stores to
|
|
|
|
|
consecutive memory locations:
|
|
|
|
|
[p ] := imm1;
|
|
|
|
@ -28,7 +29,7 @@
|
|
|
|
|
[p + 2B] := imm3;
|
|
|
|
|
[p + 3B] := imm4;
|
|
|
|
|
we can transform this into a single 4-byte store if the target supports it:
|
|
|
|
|
[p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
|
|
|
|
|
[p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
|
|
|
|
|
|
|
|
|
|
Or:
|
|
|
|
|
[p ] := [q ];
|
|
|
|
@ -46,12 +47,18 @@
|
|
|
|
|
if there is no overlap can be transformed into a single 4-byte
|
|
|
|
|
load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
|
|
|
|
|
|
|
|
|
|
Or:
|
|
|
|
|
[p:1 ] := imm;
|
|
|
|
|
[p:31] := val & 0x7FFFFFFF;
|
|
|
|
|
we can transform this into a single 4-byte store if the target supports it:
|
|
|
|
|
[p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
|
|
|
|
|
|
|
|
|
|
The algorithm is applied to each basic block in three phases:
|
|
|
|
|
|
|
|
|
|
1) Scan through the basic block recording assignments to
|
|
|
|
|
destinations that can be expressed as a store to memory of a certain size
|
|
|
|
|
at a certain bit offset from expressions we can handle. For bit-fields
|
|
|
|
|
we also note the surrounding bit region, bits that could be stored in
|
|
|
|
|
1) Scan through the basic block and record assignments to destinations
|
|
|
|
|
that can be expressed as a store to memory of a certain size at a certain
|
|
|
|
|
bit offset from base expressions we can handle. For bit-fields we also
|
|
|
|
|
record the surrounding bit region, i.e. bits that could be stored in
|
|
|
|
|
a read-modify-write operation when storing the bit-field. Record store
|
|
|
|
|
chains to different bases in a hash_map (m_stores) and make sure to
|
|
|
|
|
terminate such chains when appropriate (for example when when the stored
|
|
|
|
@ -60,14 +67,14 @@
|
|
|
|
|
etc. A store_immediate_info object is recorded for every such store.
|
|
|
|
|
Record as many such assignments to a single base as possible until a
|
|
|
|
|
statement that interferes with the store sequence is encountered.
|
|
|
|
|
Each store has up to 2 operands, which can be an immediate constant
|
|
|
|
|
or a memory load, from which the value to be stored can be computed.
|
|
|
|
|
Each store has up to 2 operands, which can be a either constant, a memory
|
|
|
|
|
load or an SSA name, from which the value to be stored can be computed.
|
|
|
|
|
At most one of the operands can be a constant. The operands are recorded
|
|
|
|
|
in store_operand_info struct.
|
|
|
|
|
|
|
|
|
|
2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
|
|
|
|
|
2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
|
|
|
|
|
store_immediate_info objects) and coalesce contiguous stores into
|
|
|
|
|
merged_store_group objects. For bit-fields stores, we don't need to
|
|
|
|
|
merged_store_group objects. For bit-field stores, we don't need to
|
|
|
|
|
require the stores to be contiguous, just their surrounding bit regions
|
|
|
|
|
have to be contiguous. If the expression being stored is different
|
|
|
|
|
between adjacent stores, such as one store storing a constant and
|
|
|
|
@ -91,8 +98,8 @@
|
|
|
|
|
multiple stores per store group to handle contiguous stores that are not
|
|
|
|
|
of a size that is a power of 2. For example it can try to emit a 40-bit
|
|
|
|
|
store as a 32-bit store followed by an 8-bit store.
|
|
|
|
|
We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
|
|
|
|
|
TARGET_SLOW_UNALIGNED_ACCESS rules.
|
|
|
|
|
We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
|
|
|
|
|
or TARGET_SLOW_UNALIGNED_ACCESS settings.
|
|
|
|
|
|
|
|
|
|
Note on endianness and example:
|
|
|
|
|
Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
|
|
|
|
@ -149,6 +156,7 @@
|
|
|
|
|
#include "tree-hash-traits.h"
|
|
|
|
|
#include "gimple-iterator.h"
|
|
|
|
|
#include "gimplify.h"
|
|
|
|
|
#include "gimple-fold.h"
|
|
|
|
|
#include "stor-layout.h"
|
|
|
|
|
#include "timevar.h"
|
|
|
|
|
#include "tree-cfg.h"
|
|
|
|
@ -1309,9 +1317,10 @@ make_pass_optimize_bswap (gcc::context *ctxt)
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
/* Struct recording one operand for the store, which is either a constant,
|
|
|
|
|
then VAL represents the constant and all the other fields are zero,
|
|
|
|
|
or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
|
|
|
|
|
and the other fields also reflect the memory load. */
|
|
|
|
|
then VAL represents the constant and all the other fields are zero, or
|
|
|
|
|
a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
|
|
|
|
|
and the other fields also reflect the memory load, or an SSA name, then
|
|
|
|
|
VAL represents the SSA name and all the other fields are zero, */
|
|
|
|
|
|
|
|
|
|
struct store_operand_info
|
|
|
|
|
{
|
|
|
|
@ -1345,8 +1354,9 @@ struct store_immediate_info
|
|
|
|
|
unsigned HOST_WIDE_INT bitregion_end;
|
|
|
|
|
gimple *stmt;
|
|
|
|
|
unsigned int order;
|
|
|
|
|
/* INTEGER_CST for constant stores, MEM_REF for memory copy or
|
|
|
|
|
BIT_*_EXPR for logical bitwise operation.
|
|
|
|
|
/* INTEGER_CST for constant stores, MEM_REF for memory copy,
|
|
|
|
|
BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
|
|
|
|
|
for bit insertion.
|
|
|
|
|
LROTATE_EXPR if it can be only bswap optimized and
|
|
|
|
|
ops are not really meaningful.
|
|
|
|
|
NOP_EXPR if bswap optimization detected identity, ops
|
|
|
|
@ -1425,6 +1435,7 @@ struct merged_store_group
|
|
|
|
|
gimple *first_stmt;
|
|
|
|
|
unsigned char *val;
|
|
|
|
|
unsigned char *mask;
|
|
|
|
|
bool bit_insertion;
|
|
|
|
|
|
|
|
|
|
merged_store_group (store_immediate_info *);
|
|
|
|
|
~merged_store_group ();
|
|
|
|
@ -1444,7 +1455,7 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < len; i++)
|
|
|
|
|
fprintf (fd, "%x ", ptr[i]);
|
|
|
|
|
fprintf (fd, "%02x ", ptr[i]);
|
|
|
|
|
fprintf (fd, "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -1806,6 +1817,7 @@ merged_store_group::merged_store_group (store_immediate_info *info)
|
|
|
|
|
width has been finalized. */
|
|
|
|
|
val = NULL;
|
|
|
|
|
mask = NULL;
|
|
|
|
|
bit_insertion = false;
|
|
|
|
|
unsigned HOST_WIDE_INT align_bitpos = 0;
|
|
|
|
|
get_object_alignment_1 (gimple_assign_lhs (info->stmt),
|
|
|
|
|
&align, &align_bitpos);
|
|
|
|
@ -1930,10 +1942,8 @@ merged_store_group::apply_stores ()
|
|
|
|
|
stores.qsort (sort_by_order);
|
|
|
|
|
store_immediate_info *info;
|
|
|
|
|
unsigned int i;
|
|
|
|
|
/* Create a buffer of a size that is 2 times the number of bytes we're
|
|
|
|
|
storing. That way native_encode_expr can write power-of-2-sized
|
|
|
|
|
chunks without overrunning. */
|
|
|
|
|
buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
|
|
|
|
|
/* Create a power-of-2-sized buffer for native_encode_expr. */
|
|
|
|
|
buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
|
|
|
|
|
val = XNEWVEC (unsigned char, 2 * buf_size);
|
|
|
|
|
mask = val + buf_size;
|
|
|
|
|
memset (val, 0, buf_size);
|
|
|
|
@ -1942,31 +1952,22 @@ merged_store_group::apply_stores ()
|
|
|
|
|
FOR_EACH_VEC_ELT (stores, i, info)
|
|
|
|
|
{
|
|
|
|
|
unsigned int pos_in_buffer = info->bitpos - bitregion_start;
|
|
|
|
|
tree cst = NULL_TREE;
|
|
|
|
|
tree cst;
|
|
|
|
|
if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
|
|
|
|
|
cst = info->ops[0].val;
|
|
|
|
|
else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
|
|
|
|
|
cst = info->ops[1].val;
|
|
|
|
|
else
|
|
|
|
|
cst = NULL_TREE;
|
|
|
|
|
bool ret = true;
|
|
|
|
|
if (cst)
|
|
|
|
|
ret = encode_tree_to_bitpos (cst, val, info->bitsize,
|
|
|
|
|
pos_in_buffer, buf_size);
|
|
|
|
|
if (cst && dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
{
|
|
|
|
|
if (ret)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "After writing ");
|
|
|
|
|
print_generic_expr (dump_file, cst, 0);
|
|
|
|
|
fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
|
|
|
|
|
" at position %d the merged region contains:\n",
|
|
|
|
|
info->bitsize, pos_in_buffer);
|
|
|
|
|
dump_char_array (dump_file, val, buf_size);
|
|
|
|
|
}
|
|
|
|
|
if (info->rhs_code == BIT_INSERT_EXPR)
|
|
|
|
|
bit_insertion = true;
|
|
|
|
|
else
|
|
|
|
|
fprintf (dump_file, "Failed to merge stores\n");
|
|
|
|
|
ret = encode_tree_to_bitpos (cst, val, info->bitsize,
|
|
|
|
|
pos_in_buffer, buf_size);
|
|
|
|
|
}
|
|
|
|
|
if (!ret)
|
|
|
|
|
return false;
|
|
|
|
|
unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
|
|
|
|
|
if (BYTES_BIG_ENDIAN)
|
|
|
|
|
clear_bit_region_be (m, (BITS_PER_UNIT - 1
|
|
|
|
@ -1974,6 +1975,26 @@ merged_store_group::apply_stores ()
|
|
|
|
|
info->bitsize);
|
|
|
|
|
else
|
|
|
|
|
clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
|
|
|
|
|
if (cst && dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
{
|
|
|
|
|
if (ret)
|
|
|
|
|
{
|
|
|
|
|
fputs ("After writing ", dump_file);
|
|
|
|
|
print_generic_expr (dump_file, cst, 0);
|
|
|
|
|
fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
|
|
|
|
|
" at position %d\n", info->bitsize, pos_in_buffer);
|
|
|
|
|
fputs (" the merged value contains ", dump_file);
|
|
|
|
|
dump_char_array (dump_file, val, buf_size);
|
|
|
|
|
fputs (" the merged mask contains ", dump_file);
|
|
|
|
|
dump_char_array (dump_file, mask, buf_size);
|
|
|
|
|
if (bit_insertion)
|
|
|
|
|
fputs (" bit insertion is required\n", dump_file);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
fprintf (dump_file, "Failed to merge stores\n");
|
|
|
|
|
}
|
|
|
|
|
if (!ret)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
stores.qsort (sort_by_bitpos);
|
|
|
|
|
return true;
|
|
|
|
@ -2038,14 +2059,14 @@ public:
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Pass not supported for PDP-endianness, nor for insane hosts
|
|
|
|
|
or target character sizes where native_{encode,interpret}_expr
|
|
|
|
|
/* Pass not supported for PDP-endian, nor for insane hosts or
|
|
|
|
|
target character sizes where native_{encode,interpret}_expr
|
|
|
|
|
doesn't work properly. */
|
|
|
|
|
virtual bool
|
|
|
|
|
gate (function *)
|
|
|
|
|
{
|
|
|
|
|
return flag_store_merging
|
|
|
|
|
&& WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN
|
|
|
|
|
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
|
|
|
|
|
&& CHAR_BIT == 8
|
|
|
|
|
&& BITS_PER_UNIT == 8;
|
|
|
|
|
}
|
|
|
|
@ -2586,7 +2607,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
|
|
|
|
|
fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
|
|
|
|
|
m_store_info.length ());
|
|
|
|
|
|
|
|
|
|
store_immediate_info *info;
|
|
|
|
@ -2597,20 +2618,13 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
|
|
|
|
|
info = m_store_info[0];
|
|
|
|
|
merged_store_group *merged_store = new merged_store_group (info);
|
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
fputs ("New store group\n", dump_file);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_VEC_ELT (m_store_info, i, info)
|
|
|
|
|
{
|
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
|
|
|
|
|
" bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
|
|
|
|
|
i, info->bitsize, info->bitpos);
|
|
|
|
|
print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
|
|
|
|
|
fprintf (dump_file, "\n------------\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (i <= ignore)
|
|
|
|
|
continue;
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
/* First try to handle group of stores like:
|
|
|
|
|
p[0] = data >> 24;
|
|
|
|
@ -2636,7 +2650,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
merged_store = new merged_store_group (m_store_info[ignore]);
|
|
|
|
|
else
|
|
|
|
|
merged_store = NULL;
|
|
|
|
|
continue;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2651,16 +2665,20 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
&& merged_store->stores[0]->rhs_code == INTEGER_CST)
|
|
|
|
|
{
|
|
|
|
|
merged_store->merge_overlapping (info);
|
|
|
|
|
continue;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/* |---store 1---||---store 2---|
|
|
|
|
|
This store is consecutive to the previous one.
|
|
|
|
|
Merge it into the current store group. There can be gaps in between
|
|
|
|
|
the stores, but there can't be gaps in between bitregions. */
|
|
|
|
|
else if (info->rhs_code != LROTATE_EXPR
|
|
|
|
|
&& info->bitregion_start <= merged_store->bitregion_end
|
|
|
|
|
&& info->rhs_code == merged_store->stores[0]->rhs_code)
|
|
|
|
|
else if (info->bitregion_start <= merged_store->bitregion_end
|
|
|
|
|
&& info->rhs_code != LROTATE_EXPR
|
|
|
|
|
&& (info->rhs_code == merged_store->stores[0]->rhs_code
|
|
|
|
|
|| (info->rhs_code == INTEGER_CST
|
|
|
|
|
&& merged_store->stores[0]->rhs_code == BIT_INSERT_EXPR)
|
|
|
|
|
|| (info->rhs_code == BIT_INSERT_EXPR
|
|
|
|
|
&& merged_store->stores[0]->rhs_code == INTEGER_CST)))
|
|
|
|
|
{
|
|
|
|
|
store_immediate_info *infof = merged_store->stores[0];
|
|
|
|
|
|
|
|
|
@ -2692,7 +2710,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
info->bitpos + info->bitsize)))
|
|
|
|
|
{
|
|
|
|
|
merged_store->merge_into (info);
|
|
|
|
|
continue;
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2702,31 +2720,43 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|
|
|
|
/* Try to apply all the stores recorded for the group to determine
|
|
|
|
|
the bitpattern they write and discard it if that fails.
|
|
|
|
|
This will also reject single-store groups. */
|
|
|
|
|
if (!merged_store->apply_stores ())
|
|
|
|
|
delete merged_store;
|
|
|
|
|
else
|
|
|
|
|
if (merged_store->apply_stores ())
|
|
|
|
|
m_merged_store_groups.safe_push (merged_store);
|
|
|
|
|
else
|
|
|
|
|
delete merged_store;
|
|
|
|
|
|
|
|
|
|
merged_store = new merged_store_group (info);
|
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
fputs ("New store group\n", dump_file);
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
|
|
|
|
|
" bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
|
|
|
|
|
i, info->bitsize, info->bitpos);
|
|
|
|
|
print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
|
|
|
|
|
fputc ('\n', dump_file);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Record or discard the last store group. */
|
|
|
|
|
if (merged_store)
|
|
|
|
|
{
|
|
|
|
|
if (!merged_store->apply_stores ())
|
|
|
|
|
delete merged_store;
|
|
|
|
|
else
|
|
|
|
|
if (merged_store->apply_stores ())
|
|
|
|
|
m_merged_store_groups.safe_push (merged_store);
|
|
|
|
|
else
|
|
|
|
|
delete merged_store;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
|
|
|
|
|
|
|
|
|
|
bool success
|
|
|
|
|
= !m_merged_store_groups.is_empty ()
|
|
|
|
|
&& m_merged_store_groups.length () < m_store_info.length ();
|
|
|
|
|
|
|
|
|
|
if (success && dump_file)
|
|
|
|
|
fprintf (dump_file, "Coalescing successful!\n"
|
|
|
|
|
"Merged into %u stores\n",
|
|
|
|
|
fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
|
|
|
|
|
m_merged_store_groups.length ());
|
|
|
|
|
|
|
|
|
|
return success;
|
|
|
|
@ -2943,6 +2973,8 @@ count_multiple_uses (store_immediate_info *info)
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
case BIT_INSERT_EXPR:
|
|
|
|
|
return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
@ -3521,6 +3553,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
{
|
|
|
|
|
unsigned HOST_WIDE_INT try_size = split_store->size;
|
|
|
|
|
unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
|
|
|
|
|
unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
|
|
|
|
|
unsigned HOST_WIDE_INT align = split_store->align;
|
|
|
|
|
tree dest, src;
|
|
|
|
|
location_t loc;
|
|
|
|
@ -3555,8 +3588,10 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
MR_DEPENDENCE_BASE (dest) = base;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tree mask = integer_zero_node;
|
|
|
|
|
if (!bswap_res)
|
|
|
|
|
tree mask;
|
|
|
|
|
if (bswap_res)
|
|
|
|
|
mask = integer_zero_node;
|
|
|
|
|
else
|
|
|
|
|
mask = native_interpret_expr (int_type,
|
|
|
|
|
group->mask + try_pos
|
|
|
|
|
- start_byte_pos,
|
|
|
|
@ -3582,7 +3617,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
|
|
|
|
|
unsigned HOST_WIDE_INT load_align = group->load_align[j];
|
|
|
|
|
unsigned HOST_WIDE_INT align_bitpos
|
|
|
|
|
= known_alignment (try_pos * BITS_PER_UNIT
|
|
|
|
|
= known_alignment (try_bitpos
|
|
|
|
|
- split_store->orig_stores[0]->bitpos
|
|
|
|
|
+ op.bitpos);
|
|
|
|
|
if (align_bitpos & (load_align - 1))
|
|
|
|
@ -3594,7 +3629,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
= build_aligned_type (load_int_type, load_align);
|
|
|
|
|
|
|
|
|
|
poly_uint64 load_pos
|
|
|
|
|
= exact_div (try_pos * BITS_PER_UNIT
|
|
|
|
|
= exact_div (try_bitpos
|
|
|
|
|
- split_store->orig_stores[0]->bitpos
|
|
|
|
|
+ op.bitpos,
|
|
|
|
|
BITS_PER_UNIT);
|
|
|
|
@ -3728,6 +3763,45 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If bit insertion is required, we use the source as an accumulator
|
|
|
|
|
into which the successive bit-field values are manually inserted.
|
|
|
|
|
FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
|
|
|
|
|
if (group->bit_insertion)
|
|
|
|
|
FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
|
|
|
|
|
if (info->rhs_code == BIT_INSERT_EXPR
|
|
|
|
|
&& info->bitpos < try_bitpos + try_size
|
|
|
|
|
&& info->bitpos + info->bitsize > try_bitpos)
|
|
|
|
|
{
|
|
|
|
|
/* Mask, truncate, convert to final type, shift and ior into
|
|
|
|
|
the accumulator. Note that every step can be a no-op. */
|
|
|
|
|
const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
|
|
|
|
|
const HOST_WIDE_INT end_gap
|
|
|
|
|
= (try_bitpos + try_size) - (info->bitpos + info->bitsize);
|
|
|
|
|
tree tem = info->ops[0].val;
|
|
|
|
|
if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
|
|
|
|
|
{
|
|
|
|
|
const unsigned HOST_WIDE_INT imask
|
|
|
|
|
= (HOST_WIDE_INT_1U << info->bitsize) - 1;
|
|
|
|
|
tem = gimple_build (&seq, loc,
|
|
|
|
|
BIT_AND_EXPR, TREE_TYPE (tem), tem,
|
|
|
|
|
build_int_cst (TREE_TYPE (tem),
|
|
|
|
|
imask));
|
|
|
|
|
}
|
|
|
|
|
const HOST_WIDE_INT shift
|
|
|
|
|
= (BYTES_BIG_ENDIAN ? end_gap : start_gap);
|
|
|
|
|
if (shift < 0)
|
|
|
|
|
tem = gimple_build (&seq, loc,
|
|
|
|
|
RSHIFT_EXPR, TREE_TYPE (tem), tem,
|
|
|
|
|
build_int_cst (NULL_TREE, -shift));
|
|
|
|
|
tem = gimple_convert (&seq, loc, int_type, tem);
|
|
|
|
|
if (shift > 0)
|
|
|
|
|
tem = gimple_build (&seq, loc,
|
|
|
|
|
LSHIFT_EXPR, int_type, tem,
|
|
|
|
|
build_int_cst (NULL_TREE, shift));
|
|
|
|
|
src = gimple_build (&seq, loc,
|
|
|
|
|
BIT_IOR_EXPR, int_type, tem, src);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!integer_zerop (mask))
|
|
|
|
|
{
|
|
|
|
|
tree tem = make_ssa_name (int_type);
|
|
|
|
@ -3798,7 +3872,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file,
|
|
|
|
|
"New sequence of %u stmts to replace old one of %u stmts\n",
|
|
|
|
|
"New sequence of %u stores to replace old one of %u stores\n",
|
|
|
|
|
split_stores.length (), orig_num_stmts);
|
|
|
|
|
if (dump_flags & TDF_DETAILS)
|
|
|
|
|
print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
|
|
|
|
@ -4120,6 +4194,7 @@ pass_store_merging::process_store (gimple *stmt)
|
|
|
|
|
def_stmt = SSA_NAME_DEF_STMT (rhs1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rhs_code == ERROR_MARK && !invalid)
|
|
|
|
|
switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
|
|
|
|
|
{
|
|
|
|
@ -4156,12 +4231,12 @@ pass_store_merging::process_store (gimple *stmt)
|
|
|
|
|
invalid = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned HOST_WIDE_INT const_bitsize;
|
|
|
|
|
if (bitsize.is_constant (&const_bitsize)
|
|
|
|
|
&& multiple_p (const_bitsize, BITS_PER_UNIT)
|
|
|
|
|
&& multiple_p (bitpos, BITS_PER_UNIT)
|
|
|
|
|
&& (const_bitsize % BITS_PER_UNIT) == 0
|
|
|
|
|
&& const_bitsize <= 64
|
|
|
|
|
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
|
|
|
|
|
&& multiple_p (bitpos, BITS_PER_UNIT))
|
|
|
|
|
{
|
|
|
|
|
ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
|
|
|
|
|
if (ins_stmt)
|
|
|
|
@ -4188,6 +4263,28 @@ pass_store_merging::process_store (gimple *stmt)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (invalid
|
|
|
|
|
&& bitsize.is_constant (&const_bitsize)
|
|
|
|
|
&& ((const_bitsize % BITS_PER_UNIT) != 0
|
|
|
|
|
|| !multiple_p (bitpos, BITS_PER_UNIT))
|
|
|
|
|
&& const_bitsize <= 64)
|
|
|
|
|
{
|
|
|
|
|
/* Bypass a truncating conversion to the bit-field type. */
|
|
|
|
|
if (is_gimple_assign (def_stmt) && CONVERT_EXPR_CODE_P (rhs_code))
|
|
|
|
|
{
|
|
|
|
|
tree rhs1 = gimple_assign_rhs1 (def_stmt);
|
|
|
|
|
if (TREE_CODE (rhs1) == SSA_NAME
|
|
|
|
|
&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
|
|
|
|
|
&& const_bitsize <= TYPE_PRECISION (TREE_TYPE (rhs1)))
|
|
|
|
|
rhs = rhs1;
|
|
|
|
|
}
|
|
|
|
|
rhs_code = BIT_INSERT_EXPR;
|
|
|
|
|
ops[0].val = rhs;
|
|
|
|
|
ops[0].base_addr = NULL_TREE;
|
|
|
|
|
ops[1].base_addr = NULL_TREE;
|
|
|
|
|
invalid = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
|
|
|
|
|