re PR rtl-optimization/33927 (replace_read in dse.c could handle cases where GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode) (and the size is the same))

gcc/
	PR rtl-optimization/33927
	* Makefile.in (dse.o): Depend on $(TM_P_H).
	* expr.h (extract_low_bits): Declare.
	* expmed.c (extract_low_bits): New function.
	* rtlhooks.c (gen_lowpart_general): Generalize SUBREG handling.
	* dse.c: Include tm_p.h.
	(find_shift_sequence): Remove the read_reg argument and return the
	read value.  Emit the instructions instead of returning them.
	Iterate on new_mode rather than calculating it each time.
	Check MODES_TIEABLE_P.  Use simplify_gen_subreg to convert the
	source to NEW_MODE and extract_low_bits to convert the shifted
	value to READ_MODE.
	(replace_read): Allow the load and store to have different mode
	classes.  Use extract_low_bits when SHIFT == 0.  Create the shift
	or extraction instructions before trying the replacement.  Update
	dump-file code accordingly, avoiding use of REGNO (store_info->rhs).

gcc/testsuite/
	* gcc.target/mips/dse-1.c: Add checks for zeros.

From-SVN: r133452
This commit is contained in:
Richard Sandiford 2008-03-22 19:37:53 +00:00 committed by Richard Sandiford
parent 48e55066d5
commit 18b526e806
8 changed files with 167 additions and 102 deletions

View File

@ -1,3 +1,22 @@
2008-03-22 Richard Sandiford <rsandifo@nildram.co.uk>
PR rtl-optimization/33927
* Makefile.in (dse.o): Depend on $(TM_P_H).
* expr.h (extract_low_bits): Declare.
* expmed.c (extract_low_bits): New function.
* rtlhooks.c (gen_lowpart_general): Generalize SUBREG handling.
* dse.c: Include tm_p.h.
(find_shift_sequence): Remove the read_reg argument and return the
read value. Emit the instructions instead of returning them.
Iterate on new_mode rather than calculating it each time.
Check MODES_TIEABLE_P. Use simplify_gen_subreg to convert the
source to NEW_MODE and extract_low_bits to convert the shifted
value to READ_MODE.
(replace_read): Allow the load and store to have different mode
classes. Use extract_low_bits when SHIFT == 0. Create the shift
or extraction instructions before trying the replacement. Update
dump-file code accordingly, avoiding use of REGNO (store_info->rhs).
2008-03-22 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (assign_386_stack_local): Align DImode slots

View File

@ -2555,8 +2555,8 @@ dce.o : dce.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(TREE_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(DF_H) cselib.h \
$(DBGCNT_H) dce.h timevar.h tree-pass.h $(DBGCNT_H)
dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(TREE_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
$(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) timevar.h tree-pass.h \
$(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
$(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) timevar.h tree-pass.h \
alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H)
fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
toplev.h insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \

157
gcc/dse.c
View File

@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "tm.h"
#include "rtl.h"
#include "tree.h"
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "flags.h"
@ -1383,9 +1384,9 @@ record_store (rtx body, bb_info_t bb_info)
if (store_info->is_set
/* No place to keep the value after ra. */
&& !reload_completed
/* The careful reviewer may wish to comment my checking that the
rhs of a store is always a reg. */
&& REG_P (SET_SRC (body))
&& (REG_P (SET_SRC (body))
|| GET_CODE (SET_SRC (body)) == SUBREG
|| CONSTANT_P (SET_SRC (body)))
/* Sometimes the store and reload is used for truncation and
rounding. */
&& !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
@ -1418,15 +1419,15 @@ dump_insn_info (const char * start, insn_info_t insn_info)
shift. */
static rtx
find_shift_sequence (rtx read_reg,
int access_size,
find_shift_sequence (int access_size,
store_info_t store_info,
read_info_t read_info,
int shift)
{
enum machine_mode store_mode = GET_MODE (store_info->mem);
enum machine_mode read_mode = GET_MODE (read_info->mem);
rtx chosen_seq = NULL;
enum machine_mode new_mode;
rtx read_reg = NULL;
/* Some machines like the x86 have shift insns for each size of
operand. Other machines like the ppc or the ia-64 may only have
@ -1435,21 +1436,31 @@ find_shift_sequence (rtx read_reg,
justify the value we want to read but is available in one insn on
the machine. */
for (; access_size <= UNITS_PER_WORD; access_size *= 2)
for (new_mode = smallest_mode_for_size (access_size * BITS_PER_UNIT,
MODE_INT);
GET_MODE_BITSIZE (new_mode) <= BITS_PER_WORD;
new_mode = GET_MODE_WIDER_MODE (new_mode))
{
rtx target, new_reg, shift_seq, insn;
enum machine_mode new_mode;
rtx target, new_reg, shift_seq, insn, new_lhs;
int cost;
/* Try a wider mode if truncating the store mode to ACCESS_SIZE
bytes requires a real instruction. */
if (access_size < GET_MODE_SIZE (store_mode)
&& !TRULY_NOOP_TRUNCATION (access_size * BITS_PER_UNIT,
/* Try a wider mode if truncating the store mode to NEW_MODE
requires a real instruction. */
if (GET_MODE_BITSIZE (new_mode) < GET_MODE_BITSIZE (store_mode)
&& !TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (new_mode),
GET_MODE_BITSIZE (store_mode)))
continue;
new_mode = smallest_mode_for_size (access_size * BITS_PER_UNIT,
MODE_INT);
/* Also try a wider mode if the necessary punning is either not
desirable or not possible. */
if (!CONSTANT_P (store_info->rhs)
&& !MODES_TIEABLE_P (new_mode, store_mode))
continue;
new_lhs = simplify_gen_subreg (new_mode, copy_rtx (store_info->rhs),
store_mode, 0);
if (new_lhs == NULL_RTX)
continue;
new_reg = gen_reg_rtx (new_mode);
start_sequence ();
@ -1485,31 +1496,13 @@ find_shift_sequence (rtx read_reg,
take the value from the store and put it into the
shift pseudo, then shift it, then generate another
move to put in into the target of the read. */
start_sequence ();
emit_move_insn (new_reg, gen_lowpart (new_mode, store_info->rhs));
emit_move_insn (new_reg, new_lhs);
emit_insn (shift_seq);
convert_move (read_reg, new_reg, 1);
if (dump_file)
{
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
REGNO (new_reg), GET_MODE_NAME (new_mode),
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
fprintf (dump_file, " -- with shift of r%d by %d\n",
REGNO(new_reg), shift);
fprintf (dump_file, " -- and second extract insn r%d:%s = r%d:%s\n",
REGNO (read_reg), GET_MODE_NAME (read_mode),
REGNO (new_reg), GET_MODE_NAME (new_mode));
}
/* Get the three insn sequence and return it. */
chosen_seq = get_insns ();
end_sequence ();
read_reg = extract_low_bits (read_mode, new_mode, new_reg);
break;
}
return chosen_seq;
return read_reg;
}
@ -1552,16 +1545,11 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
enum machine_mode read_mode = GET_MODE (read_info->mem);
int shift;
int access_size; /* In bytes. */
rtx read_reg = gen_reg_rtx (read_mode);
rtx shift_seq = NULL;
rtx insns, read_reg;
if (!dbg_cnt (dse))
return false;
if (GET_MODE_CLASS (read_mode) != MODE_INT
|| GET_MODE_CLASS (store_mode) != MODE_INT)
return false;
/* To get here the read is within the boundaries of the write so
shift will never be negative. Start out with the shift being in
bytes. */
@ -1575,62 +1563,43 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
/* From now on it is bits. */
shift *= BITS_PER_UNIT;
/* We need to keep this in perspective. We are replacing a read
/* Create a sequence of instructions to set up the read register.
This sequence goes immediately before the store and its result
is read by the load.
We need to keep this in perspective. We are replacing a read
with a sequence of insns, but the read will almost certainly be
in cache, so it is not going to be an expensive one. Thus, we
are not willing to do a multi insn shift or worse a subroutine
call to get rid of the read. */
if (shift)
{
if (access_size > UNITS_PER_WORD)
return false;
shift_seq = find_shift_sequence (read_reg, access_size, store_info,
read_info, shift);
if (!shift_seq)
return false;
}
if (dump_file)
fprintf (dump_file, "replacing load at %d from store at %d\n",
INSN_UID (read_insn->insn), INSN_UID (store_insn->insn));
fprintf (dump_file, "trying to replace %smode load in insn %d"
" from %smode store in insn %d\n",
GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
start_sequence ();
if (shift)
read_reg = find_shift_sequence (access_size, store_info, read_info, shift);
else
read_reg = extract_low_bits (read_mode, store_mode,
copy_rtx (store_info->rhs));
if (read_reg == NULL_RTX)
{
end_sequence ();
if (dump_file)
fprintf (dump_file, " -- could not extract bits of stored value\n");
return false;
}
/* Force the value into a new register so that it won't be clobbered
between the store and the load. */
read_reg = copy_to_mode_reg (read_mode, read_reg);
insns = get_insns ();
end_sequence ();
if (validate_change (read_insn->insn, loc, read_reg, 0))
{
rtx insns;
deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
if (read_mode == store_mode)
{
start_sequence ();
/* The modes are the same and everything lines up. Just
generate a simple move. */
emit_move_insn (read_reg, store_info->rhs);
if (dump_file)
fprintf (dump_file, " -- adding move insn r%d = r%d\n",
REGNO (read_reg), REGNO (store_info->rhs));
insns = get_insns ();
end_sequence ();
}
else if (shift)
insns = shift_seq;
else
{
/* The modes are different but the lsb are in the same
place, we need to extract the value in the right from the
rhs of the store. */
start_sequence ();
convert_move (read_reg, store_info->rhs, 1);
if (dump_file)
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
REGNO (read_reg), GET_MODE_NAME (read_mode),
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
insns = get_insns ();
end_sequence ();
}
/* Insert this right before the store insn where it will be safe
from later insns that might change it before the read. */
emit_insn_before (insns, store_insn->insn);
@ -1668,12 +1637,22 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
rest of dse, play like this read never happened. */
read_insn->read_rec = read_info->next;
pool_free (read_info_pool, read_info);
if (dump_file)
{
fprintf (dump_file, " -- replaced the loaded MEM with ");
print_simple_rtl (dump_file, read_reg);
fprintf (dump_file, "\n");
}
return true;
}
else
{
if (dump_file)
fprintf (dump_file, " -- validation failure\n");
{
fprintf (dump_file, " -- replacing the loaded MEM with ");
print_simple_rtl (dump_file, read_reg);
fprintf (dump_file, " led to an invalid instruction\n");
}
return false;
}
}

View File

@ -2098,6 +2098,66 @@ extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
NULL_RTX, 0);
}
/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
MODE, fill the upper bits with zeros. Fail if the layout of either
mode is unknown (as for CC modes) or if the extraction would involve
unprofitable mode punning. Return the value on success, otherwise
return null.
This is different from gen_lowpart* in these respects:
- the returned value must always be considered an rvalue
- when MODE is wider than SRC_MODE, the extraction involves
a zero extension
- when MODE is smaller than SRC_MODE, the extraction involves
a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
In other words, this routine performs a computation, whereas the
gen_lowpart* routines are conceptually lvalue or rvalue subreg
operations. */
rtx
extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
{
enum machine_mode int_mode, src_int_mode;
if (mode == src_mode)
return src;
if (CONSTANT_P (src))
return simplify_gen_subreg (mode, src, src_mode,
subreg_lowpart_offset (mode, src_mode));
if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
return NULL_RTX;
if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
&& MODES_TIEABLE_P (mode, src_mode))
{
rtx x = gen_lowpart_common (mode, src);
if (x)
return x;
}
src_int_mode = int_mode_for_mode (src_mode);
int_mode = int_mode_for_mode (mode);
if (src_int_mode == BLKmode || int_mode == BLKmode)
return NULL_RTX;
if (!MODES_TIEABLE_P (src_int_mode, src_mode))
return NULL_RTX;
if (!MODES_TIEABLE_P (int_mode, mode))
return NULL_RTX;
src = gen_lowpart (src_int_mode, src);
src = convert_modes (int_mode, src_int_mode, src, true);
src = gen_lowpart (mode, src);
return src;
}
/* Add INC into TARGET. */
void

View File

@ -744,6 +744,7 @@ extern void store_bit_field (rtx, unsigned HOST_WIDE_INT,
extern rtx extract_bit_field (rtx, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, int, rtx,
enum machine_mode, enum machine_mode);
extern rtx extract_low_bits (enum machine_mode, enum machine_mode, rtx);
extern rtx expand_mult (enum machine_mode, rtx, rtx, rtx, int);
extern rtx expand_mult_highpart_adjust (enum machine_mode, rtx, rtx, rtx, rtx, int);

View File

@ -43,11 +43,9 @@ gen_lowpart_general (enum machine_mode mode, rtx x)
if (result)
return result;
/* If it's a REG, it must be a hard reg that's not valid in MODE. */
else if (REG_P (x)
/* Or we could have a subreg of a floating point value. */
|| (GET_CODE (x) == SUBREG
&& FLOAT_MODE_P (GET_MODE (SUBREG_REG (x)))))
/* Handle SUBREGs and hard REGs that were rejected by
simplify_gen_subreg. */
else if (REG_P (x) || GET_CODE (x) == SUBREG)
{
result = gen_lowpart_common (mode, copy_to_reg (x));
gcc_assert (result != 0);

View File

@ -1,3 +1,7 @@
2008-03-22 Richard Sandiford <rsandifo@nildram.co.uk>
* gcc.target/mips/dse-1.c: Add checks for zeros.
2008-03-21 Andrew Pinski <andrew_pinski@playstation.sony.com>
PR target/27946

View File

@ -1,7 +1,4 @@
/* ??? Further to the subreg comment below, we can't rely on any of the
tests passing unless we handle subregs, and the patch to do so has
been rejected for the time being. */
/* { dg-do compile { target { ! *-*-* } } } */
/* { dg-do compile } */
/* { dg-mips-options "-mgp64 -O" } */
#define TEST(ID, TYPE1, TYPE2) \
@ -18,6 +15,13 @@
u->m2 = x; \
return (u->m1[0] \
+ u->m1[sizeof (TYPE2) / sizeof (TYPE1) - 1]); \
} \
\
TYPE1 __attribute__((nomips16)) \
g##ID (union u##ID *u) \
{ \
u->m2 = 0; \
return (u->m1[0] | u->m1[1]); \
}
TEST (1, unsigned int, unsigned long long);
@ -32,8 +36,8 @@ TEST (8, short, int);
TEST (9, unsigned char, unsigned int);
TEST (10, signed char, int);
/* DSE isn't yet read to consider stores of subregs, so the corresponding
(char, short) tests won't pass. */
TEST (11, unsigned char, unsigned short);
TEST (12, signed char, short);
/* { dg-final { scan-assembler-not "\tlh\t" } } */
/* { dg-final { scan-assembler-not "\tlhu\t" } } */