Makefile.in (lower-subreg.o, [...]): Depend on lower-subreg.h.
gcc/ 2012-03-31 Kenneth Zadeck <zadeck@naturalbridge.com> Richard Sandiford <r.sandiford@uk.ibm.com> * Makefile.in (lower-subreg.o, target-globals.o): Depend on lower-subreg.h. * lower-subreg.h: New file. * target-globals.h (this_target_lower_subreg): Declare. (target_globals): Add lower_subreg; (restore_target_globals): Restore this_target_lower_subreg. * target-globals.c: Include it. (default_target_globals): Add default_target_lower_subreg. (save_target_globals): Initialize target_lower_subreg. * rtl.h (init_lower_subreg): Added declaration. * toplev.c (backend_init_target): Call initializer for lower-subreg pass. * lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros. (default_target_lower_subreg): New variable. (this_target_lower_subreg): Likewise. (twice_word_mode, choices): New macros. (shift_cost, compute_splitting_shift, compute_costs) (init_lower_subreg): New functions. (resolve_simple_move): Add speed_p argument. Check choices. (find_pseudo_copy): Don't check the mode size here. (resolve_simple_move): Assert the mode size. (find_decomposable_shift_zext): Add speed_p argument and return a bool. Check choices. (resolve_shift_zext): Add comment. (dump_shift_choices, dump_choices): New functions. (decompose_multiword_subregs): Dump list of profitable transformations. Add code to skip non profitable transformations. Update calls to simple_move and find_decomposable_shift_zext. Co-Authored-By: Richard Sandiford <r.sandiford@uk.ibm.com> From-SVN: r187015
This commit is contained in:
parent
90911ab64f
commit
af4ba42308
|
@ -1,3 +1,35 @@
|
|||
2012-05-01 Kenneth Zadeck <zadeck@naturalbridge.com>
|
||||
Richard Sandiford <r.sandiford@uk.ibm.com>
|
||||
|
||||
* Makefile.in (lower-subreg.o, target-globals.o): Depend on
|
||||
lower-subreg.h.
|
||||
* lower-subreg.h: New file.
|
||||
* target-globals.h (this_target_lower_subreg): Declare.
|
||||
(target_globals): Add lower_subreg;
|
||||
(restore_target_globals): Restore this_target_lower_subreg.
|
||||
* target-globals.c: Include it.
|
||||
(default_target_globals): Add default_target_lower_subreg.
|
||||
(save_target_globals): Initialize target_lower_subreg.
|
||||
* rtl.h (init_lower_subreg): Added declaration.
|
||||
* toplev.c (backend_init_target): Call initializer for lower-subreg
|
||||
pass.
|
||||
* lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros.
|
||||
(default_target_lower_subreg): New variable.
|
||||
(this_target_lower_subreg): Likewise.
|
||||
(twice_word_mode, choices): New macros.
|
||||
(shift_cost, compute_splitting_shift, compute_costs)
|
||||
(init_lower_subreg): New functions.
|
||||
(resolve_simple_move): Add speed_p argument. Check choices.
|
||||
(find_pseudo_copy): Don't check the mode size here.
|
||||
(resolve_simple_move): Assert the mode size.
|
||||
(find_decomposable_shift_zext): Add speed_p argument and return
|
||||
a bool. Check choices.
|
||||
(resolve_shift_zext): Add comment.
|
||||
(dump_shift_choices, dump_choices): New functions.
|
||||
(decompose_multiword_subregs): Dump list of profitable
|
||||
transformations. Add code to skip non profitable transformations.
|
||||
Update calls to simple_move and find_decomposable_shift_zext.
|
||||
|
||||
2012-05-01 Ian Bolton <ian.bolton@arm.com>
|
||||
Sameera Deshpande <sameera.deshpande@arm.com>
|
||||
Greta Yorsh <greta.yorsh@arm.com>
|
||||
|
|
|
@ -3428,11 +3428,13 @@ dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) $(DB
|
|||
lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(MACHMODE_H) $(TM_H) $(RTL_H) $(TM_P_H) $(TIMEVAR_H) $(FLAGS_H) \
|
||||
insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) $(OBSTACK_H) $(BITMAP_H) \
|
||||
$(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h
|
||||
$(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h \
|
||||
lower-subreg.h
|
||||
target-globals.o : target-globals.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) insn-config.h $(MACHMODE_H) $(GGC_H) toplev.h target-globals.h \
|
||||
$(FLAGS_H) $(REGS_H) $(RTL_H) reload.h expmed.h $(EXPR_H) $(OPTABS_H) \
|
||||
$(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h
|
||||
$(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h \
|
||||
lower-subreg.h
|
||||
hw-doloop.o : hw-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(RTL_H) $(FLAGS_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \
|
||||
$(DF_H) $(CFGLAYOUT_H) $(CFGLOOP_H) output.h $(RECOG_H) $(TARGET_H) \
|
||||
|
|
|
@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "regs.h"
|
||||
#include "tree-pass.h"
|
||||
#include "df.h"
|
||||
#include "lower-subreg.h"
|
||||
|
||||
#ifdef STACK_GROWS_DOWNWARD
|
||||
# undef STACK_GROWS_DOWNWARD
|
||||
|
@ -52,10 +53,35 @@ DEF_VEC_P (bitmap);
|
|||
DEF_VEC_ALLOC_P (bitmap,heap);
|
||||
|
||||
/* Decompose multi-word pseudo-registers into individual
|
||||
pseudo-registers when possible. This is possible when all the uses
|
||||
of a multi-word register are via SUBREG, or are copies of the
|
||||
register to another location. Breaking apart the register permits
|
||||
more CSE and permits better register allocation. */
|
||||
pseudo-registers when possible and profitable. This is possible
|
||||
when all the uses of a multi-word register are via SUBREG, or are
|
||||
copies of the register to another location. Breaking apart the
|
||||
register permits more CSE and permits better register allocation.
|
||||
This is profitable if the machine does not have move instructions
|
||||
to do this.
|
||||
|
||||
This pass only splits moves with modes that are wider than
|
||||
word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
|
||||
modes that are twice the width of word_mode. The latter could be
|
||||
generalized if there was a need to do this, but the trend in
|
||||
architectures is to not need this.
|
||||
|
||||
There are two useful preprocessor defines for use by maintainers:
|
||||
|
||||
#define LOG_COSTS 1
|
||||
|
||||
if you wish to see the actual cost estimates that are being used
|
||||
for each mode wider than word mode and the cost estimates for zero
|
||||
extension and the shifts. This can be useful when port maintainers
|
||||
are tuning insn rtx costs.
|
||||
|
||||
#define FORCE_LOWERING 1
|
||||
|
||||
if you wish to test the pass with all the transformation forced on.
|
||||
This can be useful for finding bugs in the transformations. */
|
||||
|
||||
#define LOG_COSTS 0
|
||||
#define FORCE_LOWERING 0
|
||||
|
||||
/* Bit N in this bitmap is set if regno N is used in a context in
|
||||
which we can decompose it. */
|
||||
|
@ -75,8 +101,190 @@ static bitmap subreg_context;
|
|||
copy from reg M to reg N. */
|
||||
static VEC(bitmap,heap) *reg_copy_graph;
|
||||
|
||||
/* Return whether X is a simple object which we can take a word_mode
|
||||
subreg of. */
|
||||
struct target_lower_subreg default_target_lower_subreg;
|
||||
#if SWITCHABLE_TARGET
|
||||
struct target_lower_subreg *this_target_lower_subreg
|
||||
= &default_target_lower_subreg;
|
||||
#endif
|
||||
|
||||
#define twice_word_mode \
|
||||
this_target_lower_subreg->x_twice_word_mode
|
||||
#define choices \
|
||||
this_target_lower_subreg->x_choices
|
||||
|
||||
/* RTXes used while computing costs. */
|
||||
struct cost_rtxes {
|
||||
/* Source and target registers. */
|
||||
rtx source;
|
||||
rtx target;
|
||||
|
||||
/* A twice_word_mode ZERO_EXTEND of SOURCE. */
|
||||
rtx zext;
|
||||
|
||||
/* A shift of SOURCE. */
|
||||
rtx shift;
|
||||
|
||||
/* A SET of TARGET. */
|
||||
rtx set;
|
||||
};
|
||||
|
||||
/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
|
||||
rtxes in RTXES. SPEED_P selects between the speed and size cost. */
|
||||
|
||||
static int
|
||||
shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
|
||||
enum machine_mode mode, int op1)
|
||||
{
|
||||
PUT_MODE (rtxes->target, mode);
|
||||
PUT_CODE (rtxes->shift, code);
|
||||
PUT_MODE (rtxes->shift, mode);
|
||||
PUT_MODE (rtxes->source, mode);
|
||||
XEXP (rtxes->shift, 1) = GEN_INT (op1);
|
||||
SET_SRC (rtxes->set) = rtxes->shift;
|
||||
return insn_rtx_cost (rtxes->set, speed_p);
|
||||
}
|
||||
|
||||
/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
|
||||
to true if it is profitable to split a double-word CODE shift
|
||||
of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
|
||||
for speed or size profitability.
|
||||
|
||||
Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
|
||||
the cost of moving zero into a word-mode register. WORD_MOVE_COST
|
||||
is the cost of moving between word registers. */
|
||||
|
||||
static void
|
||||
compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
|
||||
bool *splitting, enum rtx_code code,
|
||||
int word_move_zero_cost, int word_move_cost)
|
||||
{
|
||||
int wide_cost, narrow_cost, i;
|
||||
|
||||
for (i = 0; i < BITS_PER_WORD; i++)
|
||||
{
|
||||
wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
|
||||
i + BITS_PER_WORD);
|
||||
if (i == 0)
|
||||
narrow_cost = word_move_cost;
|
||||
else
|
||||
narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
|
||||
GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
|
||||
i + BITS_PER_WORD, wide_cost, narrow_cost,
|
||||
word_move_zero_cost);
|
||||
|
||||
if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
|
||||
splitting[i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute what we should do when optimizing for speed or size; SPEED_P
|
||||
selects which. Use RTXES for computing costs. */
|
||||
|
||||
static void
|
||||
compute_costs (bool speed_p, struct cost_rtxes *rtxes)
|
||||
{
|
||||
unsigned int i;
|
||||
int word_move_zero_cost, word_move_cost;
|
||||
|
||||
SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
|
||||
word_move_zero_cost = insn_rtx_cost (rtxes->set, speed_p);
|
||||
|
||||
SET_SRC (rtxes->set) = rtxes->source;
|
||||
word_move_cost = insn_rtx_cost (rtxes->set, speed_p);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
|
||||
GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
|
||||
|
||||
for (i = 0; i < MAX_MACHINE_MODE; i++)
|
||||
{
|
||||
enum machine_mode mode = (enum machine_mode) i;
|
||||
int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
|
||||
if (factor > 1)
|
||||
{
|
||||
int mode_move_cost;
|
||||
|
||||
PUT_MODE (rtxes->target, mode);
|
||||
PUT_MODE (rtxes->source, mode);
|
||||
mode_move_cost = insn_rtx_cost (rtxes->set, speed_p);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
|
||||
GET_MODE_NAME (mode), mode_move_cost,
|
||||
word_move_cost, factor);
|
||||
|
||||
if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
|
||||
{
|
||||
choices[speed_p].move_modes_to_split[i] = true;
|
||||
choices[speed_p].something_to_do = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* For the moves and shifts, the only case that is checked is one
|
||||
where the mode of the target is an integer mode twice the width
|
||||
of the word_mode.
|
||||
|
||||
If it is not profitable to split a double word move then do not
|
||||
even consider the shifts or the zero extension. */
|
||||
if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
|
||||
{
|
||||
int zext_cost;
|
||||
|
||||
/* The only case here to check to see if moving the upper part with a
|
||||
zero is cheaper than doing the zext itself. */
|
||||
PUT_MODE (rtxes->target, twice_word_mode);
|
||||
PUT_MODE (rtxes->source, word_mode);
|
||||
SET_SRC (rtxes->set) = rtxes->zext;
|
||||
zext_cost = insn_rtx_cost (rtxes->set, speed_p);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
|
||||
GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
|
||||
zext_cost, word_move_cost, word_move_zero_cost);
|
||||
|
||||
if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
|
||||
choices[speed_p].splitting_zext = true;
|
||||
|
||||
compute_splitting_shift (speed_p, rtxes,
|
||||
choices[speed_p].splitting_ashift, ASHIFT,
|
||||
word_move_zero_cost, word_move_cost);
|
||||
compute_splitting_shift (speed_p, rtxes,
|
||||
choices[speed_p].splitting_lshiftrt, LSHIFTRT,
|
||||
word_move_zero_cost, word_move_cost);
|
||||
}
|
||||
}
|
||||
|
||||
/* Do one-per-target initialisation. This involves determining
|
||||
which operations on the machine are profitable. If none are found,
|
||||
then the pass just returns when called. */
|
||||
|
||||
void
|
||||
init_lower_subreg (void)
|
||||
{
|
||||
struct cost_rtxes rtxes;
|
||||
|
||||
memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
|
||||
|
||||
twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
|
||||
|
||||
rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
|
||||
rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
|
||||
rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
|
||||
rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
|
||||
rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "\nSize costs\n==========\n\n");
|
||||
compute_costs (false, &rtxes);
|
||||
|
||||
if (LOG_COSTS)
|
||||
fprintf (stderr, "\nSpeed costs\n===========\n\n");
|
||||
compute_costs (true, &rtxes);
|
||||
}
|
||||
|
||||
static bool
|
||||
simple_move_operand (rtx x)
|
||||
|
@ -101,12 +309,15 @@ simple_move_operand (rtx x)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* If INSN is a single set between two objects, return the single set.
|
||||
Such an insn can always be decomposed. INSN should have been
|
||||
passed to recog and extract_insn before this is called. */
|
||||
/* If INSN is a single set between two objects that we want to split,
|
||||
return the single set. SPEED_P says whether we are optimizing
|
||||
INSN for speed or size.
|
||||
|
||||
INSN should have been passed to recog and extract_insn before this
|
||||
is called. */
|
||||
|
||||
static rtx
|
||||
simple_move (rtx insn)
|
||||
simple_move (rtx insn, bool speed_p)
|
||||
{
|
||||
rtx x;
|
||||
rtx set;
|
||||
|
@ -150,6 +361,9 @@ simple_move (rtx insn)
|
|||
if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
|
||||
return NULL_RTX;
|
||||
|
||||
if (!choices[speed_p].move_modes_to_split[(int) mode])
|
||||
return NULL_RTX;
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
|
@ -173,9 +387,6 @@ find_pseudo_copy (rtx set)
|
|||
if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
|
||||
return false;
|
||||
|
||||
if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
|
||||
return false;
|
||||
|
||||
b = VEC_index (bitmap, reg_copy_graph, rs);
|
||||
if (b == NULL)
|
||||
{
|
||||
|
@ -668,8 +879,7 @@ resolve_simple_move (rtx set, rtx insn)
|
|||
orig_mode = GET_MODE (dest);
|
||||
|
||||
words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||||
if (words <= 1)
|
||||
return insn;
|
||||
gcc_assert (words > 1);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
|
@ -931,12 +1141,13 @@ resolve_debug (rtx insn)
|
|||
resolve_reg_notes (insn);
|
||||
}
|
||||
|
||||
/* Checks if INSN is a decomposable multiword-shift or zero-extend and
|
||||
sets the decomposable_context bitmap accordingly. A non-zero value
|
||||
is returned if a decomposable insn has been found. */
|
||||
/* Check if INSN is a decomposable multiword-shift or zero-extend and
|
||||
set the decomposable_context bitmap accordingly. SPEED_P is true
|
||||
if we are optimizing INSN for speed rather than size. Return true
|
||||
if INSN is decomposable. */
|
||||
|
||||
static int
|
||||
find_decomposable_shift_zext (rtx insn)
|
||||
static bool
|
||||
find_decomposable_shift_zext (rtx insn, bool speed_p)
|
||||
{
|
||||
rtx set;
|
||||
rtx op;
|
||||
|
@ -944,41 +1155,44 @@ find_decomposable_shift_zext (rtx insn)
|
|||
|
||||
set = single_set (insn);
|
||||
if (!set)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
op = SET_SRC (set);
|
||||
if (GET_CODE (op) != ASHIFT
|
||||
&& GET_CODE (op) != LSHIFTRT
|
||||
&& GET_CODE (op) != ZERO_EXTEND)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
op_operand = XEXP (op, 0);
|
||||
if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
|
||||
|| HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
|
||||
|| HARD_REGISTER_NUM_P (REGNO (op_operand))
|
||||
|| !SCALAR_INT_MODE_P (GET_MODE (op)))
|
||||
return 0;
|
||||
|| GET_MODE (op) != twice_word_mode)
|
||||
return false;
|
||||
|
||||
if (GET_CODE (op) == ZERO_EXTEND)
|
||||
{
|
||||
if (GET_MODE (op_operand) != word_mode
|
||||
|| GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
|
||||
return 0;
|
||||
|| !choices[speed_p].splitting_zext)
|
||||
return false;
|
||||
}
|
||||
else /* left or right shift */
|
||||
{
|
||||
bool *splitting = (GET_CODE (op) == ASHIFT
|
||||
? choices[speed_p].splitting_ashift
|
||||
: choices[speed_p].splitting_lshiftrt);
|
||||
if (!CONST_INT_P (XEXP (op, 1))
|
||||
|| INTVAL (XEXP (op, 1)) < BITS_PER_WORD
|
||||
|| GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
|
||||
return 0;
|
||||
|| !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
|
||||
2 * BITS_PER_WORD - 1)
|
||||
|| !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
|
||||
return false;
|
||||
|
||||
bitmap_set_bit (decomposable_context, REGNO (op_operand));
|
||||
}
|
||||
|
||||
bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
|
||||
|
||||
if (GET_CODE (op) != ZERO_EXTEND)
|
||||
bitmap_set_bit (decomposable_context, REGNO (op_operand));
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Decompose a more than word wide shift (in INSN) of a multiword
|
||||
|
@ -1008,6 +1222,8 @@ resolve_shift_zext (rtx insn)
|
|||
|
||||
op_operand = XEXP (op, 0);
|
||||
|
||||
/* We can tear this operation apart only if the regs were already
|
||||
torn apart. */
|
||||
if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
|
||||
return NULL_RTX;
|
||||
|
||||
|
@ -1073,6 +1289,56 @@ resolve_shift_zext (rtx insn)
|
|||
return insns;
|
||||
}
|
||||
|
||||
/* Print to dump_file a description of what we're doing with shift code CODE.
|
||||
SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
|
||||
|
||||
static void
|
||||
dump_shift_choices (enum rtx_code code, bool *splitting)
|
||||
{
|
||||
int i;
|
||||
const char *sep;
|
||||
|
||||
fprintf (dump_file,
|
||||
" Splitting mode %s for %s lowering with shift amounts = ",
|
||||
GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
|
||||
sep = "";
|
||||
for (i = 0; i < BITS_PER_WORD; i++)
|
||||
if (splitting[i])
|
||||
{
|
||||
fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
|
||||
sep = ",";
|
||||
}
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
/* Print to dump_file a description of what we're doing when optimizing
|
||||
for speed or size; SPEED_P says which. DESCRIPTION is a description
|
||||
of the SPEED_P choice. */
|
||||
|
||||
static void
|
||||
dump_choices (bool speed_p, const char *description)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
fprintf (dump_file, "Choices when optimizing for %s:\n", description);
|
||||
|
||||
for (i = 0; i < MAX_MACHINE_MODE; i++)
|
||||
if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
|
||||
fprintf (dump_file, " %s mode %s for copy lowering.\n",
|
||||
choices[speed_p].move_modes_to_split[i]
|
||||
? "Splitting"
|
||||
: "Skipping",
|
||||
GET_MODE_NAME ((enum machine_mode) i));
|
||||
|
||||
fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
|
||||
choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
|
||||
GET_MODE_NAME (twice_word_mode));
|
||||
|
||||
dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
|
||||
dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
/* Look for registers which are always accessed via word-sized SUBREGs
|
||||
or via copies. Decompose these registers into several word-sized
|
||||
pseudo-registers. */
|
||||
|
@ -1082,9 +1348,21 @@ decompose_multiword_subregs (void)
|
|||
{
|
||||
unsigned int max;
|
||||
basic_block bb;
|
||||
bool speed_p;
|
||||
|
||||
if (df)
|
||||
df_set_flags (DF_DEFER_INSN_RESCAN);
|
||||
if (dump_file)
|
||||
{
|
||||
dump_choices (false, "size");
|
||||
dump_choices (true, "speed");
|
||||
}
|
||||
|
||||
/* Check if this target even has any modes to consider lowering. */
|
||||
if (!choices[false].something_to_do && !choices[true].something_to_do)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Nothing to do!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
max = max_reg_num ();
|
||||
|
||||
|
@ -1094,24 +1372,38 @@ decompose_multiword_subregs (void)
|
|||
all the insns. */
|
||||
{
|
||||
unsigned int i;
|
||||
bool useful_modes_seen = false;
|
||||
|
||||
for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
|
||||
if (regno_reg_rtx[i] != NULL)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
|
||||
if (choices[false].move_modes_to_split[(int) mode]
|
||||
|| choices[true].move_modes_to_split[(int) mode])
|
||||
{
|
||||
useful_modes_seen = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!useful_modes_seen)
|
||||
{
|
||||
if (regno_reg_rtx[i] != NULL
|
||||
&& GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD)
|
||||
break;
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Nothing to lower in this function.\n");
|
||||
return;
|
||||
}
|
||||
if (i == max)
|
||||
return;
|
||||
}
|
||||
|
||||
if (df)
|
||||
run_word_dce ();
|
||||
{
|
||||
df_set_flags (DF_DEFER_INSN_RESCAN);
|
||||
run_word_dce ();
|
||||
}
|
||||
|
||||
/* FIXME: When the dataflow branch is merged, we can change this
|
||||
code to look for each multi-word pseudo-register and to find each
|
||||
insn which sets or uses that register. That should be faster
|
||||
than scanning all the insns. */
|
||||
/* FIXME: It may be possible to change this code to look for each
|
||||
multi-word pseudo-register and to find each insn which sets or
|
||||
uses that register. That should be faster than scanning all the
|
||||
insns. */
|
||||
|
||||
decomposable_context = BITMAP_ALLOC (NULL);
|
||||
non_decomposable_context = BITMAP_ALLOC (NULL);
|
||||
|
@ -1121,6 +1413,7 @@ decompose_multiword_subregs (void)
|
|||
VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
|
||||
memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
|
||||
|
||||
speed_p = optimize_function_for_speed_p (cfun);
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
rtx insn;
|
||||
|
@ -1138,12 +1431,12 @@ decompose_multiword_subregs (void)
|
|||
|
||||
recog_memoized (insn);
|
||||
|
||||
if (find_decomposable_shift_zext (insn))
|
||||
if (find_decomposable_shift_zext (insn, speed_p))
|
||||
continue;
|
||||
|
||||
extract_insn (insn);
|
||||
|
||||
set = simple_move (insn);
|
||||
set = simple_move (insn, speed_p);
|
||||
|
||||
if (!set)
|
||||
cmi = NOT_SIMPLE_MOVE;
|
||||
|
@ -1197,7 +1490,9 @@ decompose_multiword_subregs (void)
|
|||
FOR_EACH_BB (bb)
|
||||
{
|
||||
rtx insn;
|
||||
bool speed_p;
|
||||
|
||||
speed_p = optimize_bb_for_speed_p (bb);
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
{
|
||||
rtx pat;
|
||||
|
@ -1220,7 +1515,7 @@ decompose_multiword_subregs (void)
|
|||
recog_memoized (insn);
|
||||
extract_insn (insn);
|
||||
|
||||
set = simple_move (insn);
|
||||
set = simple_move (insn, speed_p);
|
||||
if (set)
|
||||
{
|
||||
rtx orig_insn = insn;
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/* Target-dependent costs for lower-subreg.c.
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option; any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef LOWER_SUBREG_H
|
||||
#define LOWER_SUBREG_H 1
|
||||
|
||||
/* Information about whether, and where, lower-subreg should be applied. */
|
||||
struct lower_subreg_choices {
|
||||
/* A boolean vector for move splitting that is indexed by mode and is
|
||||
true for each mode that is to have its copies split. */
|
||||
bool move_modes_to_split[MAX_MACHINE_MODE];
|
||||
|
||||
/* True if zero-extensions from word_mode to twice_word_mode should
|
||||
be split. */
|
||||
bool splitting_zext;
|
||||
|
||||
/* Index X is true if twice_word_mode shifts by X + BITS_PER_WORD
|
||||
should be split. */
|
||||
bool splitting_ashift[MAX_BITS_PER_WORD];
|
||||
bool splitting_lshiftrt[MAX_BITS_PER_WORD];
|
||||
|
||||
/* True if there is at least one mode that is worth splitting. */
|
||||
bool something_to_do;
|
||||
};
|
||||
|
||||
/* Target-specific information for the subreg lowering pass. */
|
||||
struct target_lower_subreg {
|
||||
/* An integer mode that is twice as wide as word_mode. */
|
||||
enum machine_mode x_twice_word_mode;
|
||||
|
||||
/* What we have decided to do when optimizing for size (index 0)
|
||||
and speed (index 1). */
|
||||
struct lower_subreg_choices x_choices[2];
|
||||
};
|
||||
|
||||
extern struct target_lower_subreg default_target_lower_subreg;
|
||||
#if SWITCHABLE_TARGET
|
||||
extern struct target_lower_subreg *this_target_lower_subreg;
|
||||
#else
|
||||
#define this_target_lower_subreg (&default_target_lower_subreg)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -2526,6 +2526,9 @@ extern void init_expmed (void);
|
|||
extern void expand_inc (rtx, rtx);
|
||||
extern void expand_dec (rtx, rtx);
|
||||
|
||||
/* In lower-subreg.c */
|
||||
extern void init_lower_subreg (void);
|
||||
|
||||
/* In gcse.c */
|
||||
extern bool can_copy_p (enum machine_mode);
|
||||
extern bool can_assign_to_reg_without_clobbers_p (rtx);
|
||||
|
|
|
@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "builtins.h"
|
||||
#include "gcse.h"
|
||||
#include "bb-reorder.h"
|
||||
#include "lower-subreg.h"
|
||||
|
||||
#if SWITCHABLE_TARGET
|
||||
struct target_globals default_target_globals = {
|
||||
|
@ -56,7 +57,8 @@ struct target_globals default_target_globals = {
|
|||
&default_target_ira_int,
|
||||
&default_target_builtins,
|
||||
&default_target_gcse,
|
||||
&default_target_bb_reorder
|
||||
&default_target_bb_reorder,
|
||||
&default_target_lower_subreg
|
||||
};
|
||||
|
||||
struct target_globals *
|
||||
|
@ -79,6 +81,7 @@ save_target_globals (void)
|
|||
g->builtins = XCNEW (struct target_builtins);
|
||||
g->gcse = XCNEW (struct target_gcse);
|
||||
g->bb_reorder = XCNEW (struct target_bb_reorder);
|
||||
g->lower_subreg = XCNEW (struct target_lower_subreg);
|
||||
restore_target_globals (g);
|
||||
init_reg_sets ();
|
||||
target_reinit ();
|
||||
|
|
|
@ -35,6 +35,7 @@ extern struct target_ira_int *this_target_ira_int;
|
|||
extern struct target_builtins *this_target_builtins;
|
||||
extern struct target_gcse *this_target_gcse;
|
||||
extern struct target_bb_reorder *this_target_bb_reorder;
|
||||
extern struct target_lower_subreg *this_target_lower_subreg;
|
||||
|
||||
struct GTY(()) target_globals {
|
||||
struct target_flag_state *GTY((skip)) flag_state;
|
||||
|
@ -51,6 +52,7 @@ struct GTY(()) target_globals {
|
|||
struct target_builtins *GTY((skip)) builtins;
|
||||
struct target_gcse *GTY((skip)) gcse;
|
||||
struct target_bb_reorder *GTY((skip)) bb_reorder;
|
||||
struct target_lower_subreg *GTY((skip)) lower_subreg;
|
||||
};
|
||||
|
||||
extern struct target_globals default_target_globals;
|
||||
|
@ -74,6 +76,7 @@ restore_target_globals (struct target_globals *g)
|
|||
this_target_builtins = g->builtins;
|
||||
this_target_gcse = g->gcse;
|
||||
this_target_bb_reorder = g->bb_reorder;
|
||||
this_target_lower_subreg = g->lower_subreg;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1601,6 +1601,7 @@ backend_init_target (void)
|
|||
/* rtx_cost is mode-dependent, so cached values need to be recomputed
|
||||
on a mode change. */
|
||||
init_expmed ();
|
||||
init_lower_subreg ();
|
||||
|
||||
/* We may need to recompute regno_save_code[] and regno_restore_code[]
|
||||
after a mode change as well. */
|
||||
|
|
Loading…
Reference in New Issue