mips-protos.h (mips_expand_block_move): Declare.
* config/mips/mips-protos.h (mips_expand_block_move): Declare. (expand_block_move, output_block_move): Remove. * config/mips/mips.h (enum block_move_type): Remove. * config/mips/mips.c (block_move_call, output_block_move): Remove. (mips_block_move_straight, mips_adjust_block_mem): New function. (mips_block_move_loop): Renamed and reworked from block_move_loop. (mips_expand_block_move): Likewise expand_block_move. Return false to fall back on the target-independent code. * config/mips/mips.md (movstrsi): Use mips_expand_block_move. (movstrsi_internal*): Remove. From-SVN: r68071
This commit is contained in:
parent
6cf87ca4e5
commit
730cf82238
@ -1,3 +1,16 @@
|
||||
2003-06-17 Richard Sandiford <rsandifo@redhat.com>
|
||||
|
||||
* config/mips/mips-protos.h (mips_expand_block_move): Declare.
|
||||
(expand_block_move, output_block_move): Remove.
|
||||
* config/mips/mips.h (enum block_move_type): Remove.
|
||||
* config/mips/mips.c (block_move_call, output_block_move): Remove.
|
||||
(mips_block_move_straight, mips_adjust_block_mem): New function.
|
||||
(mips_block_move_loop): Renamed and reworked from block_move_loop.
|
||||
(mips_expand_block_move): Likewise expand_block_move. Return false
|
||||
to fall back on the target-independent code.
|
||||
* config/mips/mips.md (movstrsi): Use mips_expand_block_move.
|
||||
(movstrsi_internal*): Remove.
|
||||
|
||||
2003-06-16 Zack Weinberg <zack@codesourcery.com>
|
||||
|
||||
* cpplib.h, cpphash.h, cppcharset.c, cpperror.c, cppexp.c
|
||||
|
@ -78,7 +78,7 @@ extern tree mips_build_va_list PARAMS ((void));
|
||||
extern void mips_va_start PARAMS ((tree, rtx));
|
||||
extern struct rtx_def *mips_va_arg PARAMS ((tree, tree));
|
||||
|
||||
extern void expand_block_move PARAMS ((rtx *));
|
||||
extern bool mips_expand_block_move PARAMS ((rtx, rtx, rtx));
|
||||
extern bool mips_expand_unaligned_load PARAMS ((rtx, rtx,
|
||||
unsigned int,
|
||||
int));
|
||||
@ -98,8 +98,6 @@ extern void mips_split_64bit_move PARAMS ((rtx, rtx));
|
||||
extern const char *mips_output_move PARAMS ((rtx, rtx));
|
||||
extern const char *mips_emit_prefetch PARAMS ((rtx *));
|
||||
extern const char *mips_restore_gp PARAMS ((rtx *));
|
||||
extern const char *output_block_move PARAMS ((rtx, rtx *, int,
|
||||
enum block_move_type));
|
||||
extern void override_options PARAMS ((void));
|
||||
extern void mips_conditional_register_usage PARAMS ((void));
|
||||
extern void print_operand_address PARAMS ((FILE *, rtx));
|
||||
|
@ -220,11 +220,12 @@ static void mips_legitimize_const_move PARAMS ((enum machine_mode,
|
||||
rtx, rtx));
|
||||
static int m16_check_op PARAMS ((rtx, int, int, int));
|
||||
static bool mips_function_ok_for_sibcall PARAMS ((tree, tree));
|
||||
static void block_move_loop PARAMS ((rtx, rtx,
|
||||
unsigned int,
|
||||
int,
|
||||
rtx, rtx));
|
||||
static void block_move_call PARAMS ((rtx, rtx, rtx));
|
||||
static void mips_block_move_straight PARAMS ((rtx, rtx,
|
||||
HOST_WIDE_INT));
|
||||
static void mips_adjust_block_mem PARAMS ((rtx, HOST_WIDE_INT,
|
||||
rtx *, rtx *));
|
||||
static void mips_block_move_loop PARAMS ((rtx, rtx,
|
||||
HOST_WIDE_INT));
|
||||
static void mips_arg_info PARAMS ((const CUMULATIVE_ARGS *,
|
||||
enum machine_mode,
|
||||
tree, int,
|
||||
@ -3676,569 +3677,166 @@ mips_set_return_address (address, scratch)
|
||||
emit_move_insn (gen_rtx_MEM (GET_MODE (address), scratch), address);
|
||||
}
|
||||
|
||||
/* Write a loop to move a constant number of bytes.
|
||||
Generate load/stores as follows:
|
||||
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
|
||||
Assume that the areas do not overlap. */
|
||||
|
||||
do {
|
||||
temp1 = src[0];
|
||||
temp2 = src[1];
|
||||
...
|
||||
temp<last> = src[MAX_MOVE_REGS-1];
|
||||
dest[0] = temp1;
|
||||
dest[1] = temp2;
|
||||
...
|
||||
dest[MAX_MOVE_REGS-1] = temp<last>;
|
||||
src += MAX_MOVE_REGS;
|
||||
dest += MAX_MOVE_REGS;
|
||||
} while (src != final);
|
||||
static void
|
||||
mips_block_move_straight (dest, src, length)
|
||||
rtx dest, src;
|
||||
HOST_WIDE_INT length;
|
||||
{
|
||||
HOST_WIDE_INT offset, delta;
|
||||
unsigned HOST_WIDE_INT bits;
|
||||
int i;
|
||||
enum machine_mode mode;
|
||||
rtx *regs;
|
||||
|
||||
This way, no NOP's are needed, and only MAX_MOVE_REGS+3 temp
|
||||
registers are needed.
|
||||
/* Work out how many bits to move at a time. If both operands have
|
||||
half-word alignment, it is usually better to move in half words.
|
||||
For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
|
||||
and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
|
||||
Otherwise move word-sized chunks. */
|
||||
if (MEM_ALIGN (src) == BITS_PER_WORD / 2
|
||||
&& MEM_ALIGN (dest) == BITS_PER_WORD / 2)
|
||||
bits = BITS_PER_WORD / 2;
|
||||
else
|
||||
bits = BITS_PER_WORD;
|
||||
|
||||
Aligned moves move MAX_MOVE_REGS*4 bytes every (2*MAX_MOVE_REGS)+3
|
||||
cycles, unaligned moves move MAX_MOVE_REGS*4 bytes every
|
||||
(4*MAX_MOVE_REGS)+3 cycles, assuming no cache misses. */
|
||||
mode = mode_for_size (bits, MODE_INT, 0);
|
||||
delta = bits / BITS_PER_UNIT;
|
||||
|
||||
/* Allocate a buffer for the temporary registers. */
|
||||
regs = alloca (sizeof (rtx) * length / delta);
|
||||
|
||||
/* Load as many BITS-sized chunks as possible. Use a normal load if
|
||||
the source has enough alignment, otherwise use left/right pairs. */
|
||||
for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
|
||||
{
|
||||
rtx part;
|
||||
|
||||
regs[i] = gen_reg_rtx (mode);
|
||||
part = adjust_address (src, mode, offset);
|
||||
if (MEM_ALIGN (part) >= bits)
|
||||
emit_move_insn (regs[i], part);
|
||||
else if (!mips_expand_unaligned_load (regs[i], part, bits, 0))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Copy the chunks to the destination. */
|
||||
for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
|
||||
{
|
||||
rtx part;
|
||||
|
||||
part = adjust_address (dest, mode, offset);
|
||||
if (MEM_ALIGN (part) >= bits)
|
||||
emit_move_insn (part, regs[i]);
|
||||
else if (!mips_expand_unaligned_store (part, regs[i], bits, 0))
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Mop up any left-over bytes. */
|
||||
if (offset < length)
|
||||
{
|
||||
src = adjust_address (src, mode, offset);
|
||||
dest = adjust_address (dest, mode, offset);
|
||||
move_by_pieces (dest, src, length - offset,
|
||||
MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_MOVE_REGS 4
|
||||
#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD)
|
||||
|
||||
|
||||
/* Helper function for doing a loop-based block operation on memory
|
||||
reference MEM. Each iteration of the loop will operate on LENGTH
|
||||
bytes of MEM.
|
||||
|
||||
Create a new base register for use within the loop and point it to
|
||||
the start of MEM. Create a new memory reference that uses this
|
||||
register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
|
||||
|
||||
static void
|
||||
block_move_loop (dest_reg, src_reg, bytes, align, orig_dest, orig_src)
|
||||
rtx dest_reg; /* register holding destination address */
|
||||
rtx src_reg; /* register holding source address */
|
||||
unsigned int bytes; /* # bytes to move */
|
||||
int align; /* alignment */
|
||||
rtx orig_dest; /* original dest */
|
||||
rtx orig_src; /* original source for making a reg note */
|
||||
mips_adjust_block_mem (mem, length, loop_reg, loop_mem)
|
||||
rtx mem, *loop_reg, *loop_mem;
|
||||
HOST_WIDE_INT length;
|
||||
{
|
||||
rtx dest_mem = replace_equiv_address (orig_dest, dest_reg);
|
||||
rtx src_mem = replace_equiv_address (orig_src, src_reg);
|
||||
rtx align_rtx = GEN_INT (align);
|
||||
rtx label;
|
||||
rtx final_src;
|
||||
rtx bytes_rtx;
|
||||
int leftover;
|
||||
*loop_reg = copy_addr_to_reg (XEXP (mem, 0));
|
||||
|
||||
if (bytes < (unsigned)2 * MAX_MOVE_BYTES)
|
||||
abort ();
|
||||
/* Although the new mem does not refer to a known location,
|
||||
it does keep up to LENGTH bytes of alignment. */
|
||||
*loop_mem = change_address (mem, BLKmode, *loop_reg);
|
||||
set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
|
||||
}
|
||||
|
||||
leftover = bytes % MAX_MOVE_BYTES;
|
||||
bytes -= leftover;
|
||||
|
||||
/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES
|
||||
per iteration. LENGTH must be at least MAX_MOVE_BYTES. Assume that the
|
||||
memory regions do not overlap. */
|
||||
|
||||
static void
|
||||
mips_block_move_loop (dest, src, length)
|
||||
rtx dest, src;
|
||||
HOST_WIDE_INT length;
|
||||
{
|
||||
rtx label, src_reg, dest_reg, final_src;
|
||||
HOST_WIDE_INT leftover;
|
||||
|
||||
leftover = length % MAX_MOVE_BYTES;
|
||||
length -= leftover;
|
||||
|
||||
/* Create registers and memory references for use within the loop. */
|
||||
mips_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src);
|
||||
mips_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest);
|
||||
|
||||
/* Calculate the value that SRC_REG should have after the last iteration
|
||||
of the loop. */
|
||||
final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
|
||||
0, 0, OPTAB_WIDEN);
|
||||
|
||||
/* Emit the start of the loop. */
|
||||
label = gen_label_rtx ();
|
||||
final_src = gen_reg_rtx (Pmode);
|
||||
bytes_rtx = GEN_INT (bytes);
|
||||
|
||||
if (bytes > 0x7fff)
|
||||
{
|
||||
if (Pmode == DImode)
|
||||
{
|
||||
emit_insn (gen_movdi (final_src, bytes_rtx));
|
||||
emit_insn (gen_adddi3 (final_src, final_src, src_reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_insn (gen_movsi (final_src, bytes_rtx));
|
||||
emit_insn (gen_addsi3 (final_src, final_src, src_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Pmode == DImode)
|
||||
emit_insn (gen_adddi3 (final_src, src_reg, bytes_rtx));
|
||||
else
|
||||
emit_insn (gen_addsi3 (final_src, src_reg, bytes_rtx));
|
||||
}
|
||||
|
||||
emit_label (label);
|
||||
|
||||
bytes_rtx = GEN_INT (MAX_MOVE_BYTES);
|
||||
emit_insn (gen_movstrsi_internal (dest_mem, src_mem, bytes_rtx, align_rtx));
|
||||
/* Emit the loop body. */
|
||||
mips_block_move_straight (dest, src, MAX_MOVE_BYTES);
|
||||
|
||||
/* Move on to the next block. */
|
||||
emit_move_insn (src_reg, plus_constant (src_reg, MAX_MOVE_BYTES));
|
||||
emit_move_insn (dest_reg, plus_constant (dest_reg, MAX_MOVE_BYTES));
|
||||
|
||||
/* Emit the loop condition. */
|
||||
if (Pmode == DImode)
|
||||
{
|
||||
emit_insn (gen_adddi3 (src_reg, src_reg, bytes_rtx));
|
||||
emit_insn (gen_adddi3 (dest_reg, dest_reg, bytes_rtx));
|
||||
emit_insn (gen_cmpdi (src_reg, final_src));
|
||||
}
|
||||
emit_insn (gen_cmpdi (src_reg, final_src));
|
||||
else
|
||||
{
|
||||
emit_insn (gen_addsi3 (src_reg, src_reg, bytes_rtx));
|
||||
emit_insn (gen_addsi3 (dest_reg, dest_reg, bytes_rtx));
|
||||
emit_insn (gen_cmpsi (src_reg, final_src));
|
||||
}
|
||||
|
||||
emit_insn (gen_cmpsi (src_reg, final_src));
|
||||
emit_jump_insn (gen_bne (label));
|
||||
|
||||
/* Mop up any left-over bytes. */
|
||||
if (leftover)
|
||||
emit_insn (gen_movstrsi_internal (dest_mem, src_mem, GEN_INT (leftover),
|
||||
align_rtx));
|
||||
mips_block_move_straight (dest, src, leftover);
|
||||
}
|
||||
|
||||
/* Use a library function to move some bytes. */
|
||||
/* Expand a movstrsi instruction. */
|
||||
|
||||
static void
|
||||
block_move_call (dest_reg, src_reg, bytes_rtx)
|
||||
rtx dest_reg;
|
||||
rtx src_reg;
|
||||
rtx bytes_rtx;
|
||||
bool
|
||||
mips_expand_block_move (dest, src, length)
|
||||
rtx dest, src, length;
|
||||
{
|
||||
/* We want to pass the size as Pmode, which will normally be SImode
|
||||
but will be DImode if we are using 64 bit longs and pointers. */
|
||||
if (GET_MODE (bytes_rtx) != VOIDmode
|
||||
&& GET_MODE (bytes_rtx) != (unsigned) Pmode)
|
||||
bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1);
|
||||
|
||||
#ifdef TARGET_MEM_FUNCTIONS
|
||||
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "memcpy"), 0,
|
||||
VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode,
|
||||
convert_to_mode (TYPE_MODE (sizetype), bytes_rtx,
|
||||
TREE_UNSIGNED (sizetype)),
|
||||
TYPE_MODE (sizetype));
|
||||
#else
|
||||
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "bcopy"), 0,
|
||||
VOIDmode, 3, src_reg, Pmode, dest_reg, Pmode,
|
||||
convert_to_mode (TYPE_MODE (integer_type_node), bytes_rtx,
|
||||
TREE_UNSIGNED (integer_type_node)),
|
||||
TYPE_MODE (integer_type_node));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Expand string/block move operations.
|
||||
|
||||
operands[0] is the pointer to the destination.
|
||||
operands[1] is the pointer to the source.
|
||||
operands[2] is the number of bytes to move.
|
||||
operands[3] is the alignment. */
|
||||
|
||||
void
|
||||
expand_block_move (operands)
|
||||
rtx operands[];
|
||||
{
|
||||
rtx bytes_rtx = operands[2];
|
||||
rtx align_rtx = operands[3];
|
||||
int constp = GET_CODE (bytes_rtx) == CONST_INT;
|
||||
unsigned HOST_WIDE_INT bytes = constp ? INTVAL (bytes_rtx) : 0;
|
||||
unsigned int align = INTVAL (align_rtx);
|
||||
rtx orig_src = operands[1];
|
||||
rtx orig_dest = operands[0];
|
||||
rtx src_reg;
|
||||
rtx dest_reg;
|
||||
|
||||
if (constp && bytes == 0)
|
||||
return;
|
||||
|
||||
if (align > (unsigned) UNITS_PER_WORD)
|
||||
align = UNITS_PER_WORD;
|
||||
|
||||
/* Move the address into scratch registers. */
|
||||
dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
|
||||
src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
|
||||
|
||||
if (TARGET_MEMCPY)
|
||||
block_move_call (dest_reg, src_reg, bytes_rtx);
|
||||
|
||||
else if (constp && bytes <= (unsigned)2 * MAX_MOVE_BYTES
|
||||
&& align == (unsigned) UNITS_PER_WORD)
|
||||
move_by_pieces (orig_dest, orig_src, bytes, align * BITS_PER_WORD, 0);
|
||||
|
||||
else if (constp && bytes <= (unsigned)2 * MAX_MOVE_BYTES)
|
||||
emit_insn (gen_movstrsi_internal (replace_equiv_address (orig_dest,
|
||||
dest_reg),
|
||||
replace_equiv_address (orig_src,
|
||||
src_reg),
|
||||
bytes_rtx, align_rtx));
|
||||
|
||||
else if (constp && align >= (unsigned) UNITS_PER_WORD && optimize)
|
||||
block_move_loop (dest_reg, src_reg, bytes, align, orig_dest, orig_src);
|
||||
|
||||
else if (constp && optimize)
|
||||
if (GET_CODE (length) == CONST_INT)
|
||||
{
|
||||
/* If the alignment is not word aligned, generate a test at
|
||||
runtime, to see whether things wound up aligned, and we
|
||||
can use the faster lw/sw instead ulw/usw. */
|
||||
|
||||
rtx temp = gen_reg_rtx (Pmode);
|
||||
rtx aligned_label = gen_label_rtx ();
|
||||
rtx join_label = gen_label_rtx ();
|
||||
int leftover = bytes % MAX_MOVE_BYTES;
|
||||
|
||||
bytes -= leftover;
|
||||
|
||||
if (Pmode == DImode)
|
||||
if (INTVAL (length) <= 2 * MAX_MOVE_BYTES)
|
||||
{
|
||||
emit_insn (gen_iordi3 (temp, src_reg, dest_reg));
|
||||
emit_insn (gen_anddi3 (temp, temp, GEN_INT (UNITS_PER_WORD - 1)));
|
||||
emit_insn (gen_cmpdi (temp, const0_rtx));
|
||||
mips_block_move_straight (dest, src, INTVAL (length));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
else if (optimize)
|
||||
{
|
||||
emit_insn (gen_iorsi3 (temp, src_reg, dest_reg));
|
||||
emit_insn (gen_andsi3 (temp, temp, GEN_INT (UNITS_PER_WORD - 1)));
|
||||
emit_insn (gen_cmpsi (temp, const0_rtx));
|
||||
}
|
||||
|
||||
emit_jump_insn (gen_beq (aligned_label));
|
||||
|
||||
/* Unaligned loop. */
|
||||
block_move_loop (dest_reg, src_reg, bytes, 1, orig_dest, orig_src);
|
||||
emit_jump_insn (gen_jump (join_label));
|
||||
emit_barrier ();
|
||||
|
||||
/* Aligned loop. */
|
||||
emit_label (aligned_label);
|
||||
block_move_loop (dest_reg, src_reg, bytes, UNITS_PER_WORD, orig_dest,
|
||||
orig_src);
|
||||
emit_label (join_label);
|
||||
|
||||
/* Bytes at the end of the loop. */
|
||||
if (leftover)
|
||||
emit_insn (gen_movstrsi_internal (replace_equiv_address (orig_dest,
|
||||
dest_reg),
|
||||
replace_equiv_address (orig_src,
|
||||
src_reg),
|
||||
GEN_INT (leftover),
|
||||
GEN_INT (align)));
|
||||
}
|
||||
|
||||
else
|
||||
block_move_call (dest_reg, src_reg, bytes_rtx);
|
||||
}
|
||||
|
||||
/* Emit load/stores for a small constant block_move.
|
||||
|
||||
operands[0] is the memory address of the destination.
|
||||
operands[1] is the memory address of the source.
|
||||
operands[2] is the number of bytes to move.
|
||||
operands[3] is the alignment.
|
||||
operands[4] is a temp register.
|
||||
operands[5] is a temp register.
|
||||
...
|
||||
operands[3+num_regs] is the last temp register.
|
||||
|
||||
The block move type can be one of the following:
|
||||
BLOCK_MOVE_NORMAL Do all of the block move.
|
||||
BLOCK_MOVE_NOT_LAST Do all but the last store.
|
||||
BLOCK_MOVE_LAST Do just the last store. */
|
||||
|
||||
const char *
|
||||
output_block_move (insn, operands, num_regs, move_type)
|
||||
rtx insn;
|
||||
rtx operands[];
|
||||
int num_regs;
|
||||
enum block_move_type move_type;
|
||||
{
|
||||
rtx dest_reg = XEXP (operands[0], 0);
|
||||
rtx src_reg = XEXP (operands[1], 0);
|
||||
HOST_WIDE_INT bytes = INTVAL (operands[2]);
|
||||
int align = INTVAL (operands[3]);
|
||||
int num = 0;
|
||||
int offset = 0;
|
||||
int use_lwl_lwr = 0;
|
||||
int last_operand = num_regs + 4;
|
||||
int safe_regs = 4;
|
||||
int i;
|
||||
rtx xoperands[10];
|
||||
|
||||
struct {
|
||||
const char *load; /* load insn without nop */
|
||||
const char *load_nop; /* load insn with trailing nop */
|
||||
const char *store; /* store insn */
|
||||
const char *final; /* if last_store used: NULL or swr */
|
||||
const char *last_store; /* last store instruction */
|
||||
int offset; /* current offset */
|
||||
enum machine_mode mode; /* mode to use on (MEM) */
|
||||
} load_store[4];
|
||||
|
||||
/* ??? Detect a bug in GCC, where it can give us a register
|
||||
the same as one of the addressing registers and reduce
|
||||
the number of registers available. */
|
||||
for (i = 4; i < last_operand && safe_regs < (int) ARRAY_SIZE (xoperands); i++)
|
||||
if (! reg_mentioned_p (operands[i], operands[0])
|
||||
&& ! reg_mentioned_p (operands[i], operands[1]))
|
||||
xoperands[safe_regs++] = operands[i];
|
||||
|
||||
if (safe_regs < last_operand)
|
||||
{
|
||||
xoperands[0] = operands[0];
|
||||
xoperands[1] = operands[1];
|
||||
xoperands[2] = operands[2];
|
||||
xoperands[3] = operands[3];
|
||||
return output_block_move (insn, xoperands, safe_regs - 4, move_type);
|
||||
}
|
||||
|
||||
/* If we are given global or static addresses, and we would be
|
||||
emitting a few instructions, try to save time by using a
|
||||
temporary register for the pointer. */
|
||||
/* ??? The SGI Irix6 assembler fails when a SYMBOL_REF is used in
|
||||
an ldl/ldr instruction pair. We play it safe, and always move
|
||||
constant addresses into registers when generating N32/N64 code, just
|
||||
in case we might emit an unaligned load instruction. */
|
||||
if (num_regs > 2 && (bytes > 2 * align || move_type != BLOCK_MOVE_NORMAL
|
||||
|| mips_abi == ABI_N32
|
||||
|| mips_abi == ABI_64))
|
||||
{
|
||||
if (CONSTANT_P (src_reg))
|
||||
{
|
||||
src_reg = operands[3 + num_regs--];
|
||||
if (move_type != BLOCK_MOVE_LAST)
|
||||
{
|
||||
xoperands[1] = operands[1];
|
||||
xoperands[0] = src_reg;
|
||||
if (Pmode == DImode)
|
||||
output_asm_insn ("dla\t%0,%1", xoperands);
|
||||
else
|
||||
output_asm_insn ("la\t%0,%1", xoperands);
|
||||
}
|
||||
}
|
||||
|
||||
if (CONSTANT_P (dest_reg))
|
||||
{
|
||||
dest_reg = operands[3 + num_regs--];
|
||||
if (move_type != BLOCK_MOVE_LAST)
|
||||
{
|
||||
xoperands[1] = operands[0];
|
||||
xoperands[0] = dest_reg;
|
||||
if (Pmode == DImode)
|
||||
output_asm_insn ("dla\t%0,%1", xoperands);
|
||||
else
|
||||
output_asm_insn ("la\t%0,%1", xoperands);
|
||||
}
|
||||
mips_block_move_loop (dest, src, INTVAL (length));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* ??? We really shouldn't get any LO_SUM addresses here, because they
|
||||
are not offsettable, however, offsettable_address_p says they are
|
||||
offsettable. I think this is a bug in offsettable_address_p.
|
||||
For expediency, we fix this by just loading the address into a register
|
||||
if we happen to get one. */
|
||||
|
||||
if (GET_CODE (src_reg) == LO_SUM)
|
||||
{
|
||||
src_reg = operands[3 + num_regs--];
|
||||
if (move_type != BLOCK_MOVE_LAST)
|
||||
{
|
||||
xoperands[2] = XEXP (XEXP (operands[1], 0), 1);
|
||||
xoperands[1] = XEXP (XEXP (operands[1], 0), 0);
|
||||
xoperands[0] = src_reg;
|
||||
if (Pmode == DImode)
|
||||
output_asm_insn ("daddiu\t%0,%1,%%lo(%2)", xoperands);
|
||||
else
|
||||
output_asm_insn ("addiu\t%0,%1,%%lo(%2)", xoperands);
|
||||
}
|
||||
}
|
||||
|
||||
if (GET_CODE (dest_reg) == LO_SUM)
|
||||
{
|
||||
dest_reg = operands[3 + num_regs--];
|
||||
if (move_type != BLOCK_MOVE_LAST)
|
||||
{
|
||||
xoperands[2] = XEXP (XEXP (operands[0], 0), 1);
|
||||
xoperands[1] = XEXP (XEXP (operands[0], 0), 0);
|
||||
xoperands[0] = dest_reg;
|
||||
if (Pmode == DImode)
|
||||
output_asm_insn ("daddiu\t%0,%1,%%lo(%2)", xoperands);
|
||||
else
|
||||
output_asm_insn ("addiu\t%0,%1,%%lo(%2)", xoperands);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_regs > (int) ARRAY_SIZE (load_store))
|
||||
num_regs = ARRAY_SIZE (load_store);
|
||||
|
||||
else if (num_regs < 1)
|
||||
abort_with_insn (insn,
|
||||
"cannot do block move, not enough scratch registers");
|
||||
|
||||
while (bytes > 0)
|
||||
{
|
||||
load_store[num].offset = offset;
|
||||
|
||||
if (TARGET_64BIT && bytes >= 8 && align >= 8)
|
||||
{
|
||||
load_store[num].load = "ld\t%0,%1";
|
||||
load_store[num].load_nop = "ld\t%0,%1%#";
|
||||
load_store[num].store = "sd\t%0,%1";
|
||||
load_store[num].last_store = "sd\t%0,%1";
|
||||
load_store[num].final = 0;
|
||||
load_store[num].mode = DImode;
|
||||
offset += 8;
|
||||
bytes -= 8;
|
||||
}
|
||||
|
||||
/* ??? Fails because of a MIPS assembler bug? */
|
||||
else if (TARGET_64BIT && bytes >= 8
|
||||
&& ! TARGET_SR71K
|
||||
&& ! TARGET_MIPS16)
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
load_store[num].load = "ldl\t%0,%1\n\tldr\t%0,%2";
|
||||
load_store[num].load_nop = "ldl\t%0,%1\n\tldr\t%0,%2%#";
|
||||
load_store[num].store = "sdl\t%0,%1\n\tsdr\t%0,%2";
|
||||
load_store[num].last_store = "sdr\t%0,%2";
|
||||
load_store[num].final = "sdl\t%0,%1";
|
||||
}
|
||||
else
|
||||
{
|
||||
load_store[num].load = "ldl\t%0,%2\n\tldr\t%0,%1";
|
||||
load_store[num].load_nop = "ldl\t%0,%2\n\tldr\t%0,%1%#";
|
||||
load_store[num].store = "sdl\t%0,%2\n\tsdr\t%0,%1";
|
||||
load_store[num].last_store = "sdr\t%0,%1";
|
||||
load_store[num].final = "sdl\t%0,%2";
|
||||
}
|
||||
|
||||
load_store[num].mode = DImode;
|
||||
offset += 8;
|
||||
bytes -= 8;
|
||||
use_lwl_lwr = 1;
|
||||
}
|
||||
|
||||
else if (bytes >= 4 && align >= 4)
|
||||
{
|
||||
load_store[num].load = "lw\t%0,%1";
|
||||
load_store[num].load_nop = "lw\t%0,%1%#";
|
||||
load_store[num].store = "sw\t%0,%1";
|
||||
load_store[num].last_store = "sw\t%0,%1";
|
||||
load_store[num].final = 0;
|
||||
load_store[num].mode = SImode;
|
||||
offset += 4;
|
||||
bytes -= 4;
|
||||
}
|
||||
|
||||
else if (bytes >= 4
|
||||
&& ! TARGET_SR71K
|
||||
&& ! TARGET_MIPS16)
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
load_store[num].load = "lwl\t%0,%1\n\tlwr\t%0,%2";
|
||||
load_store[num].load_nop = "lwl\t%0,%1\n\tlwr\t%0,%2%#";
|
||||
load_store[num].store = "swl\t%0,%1\n\tswr\t%0,%2";
|
||||
load_store[num].last_store = "swr\t%0,%2";
|
||||
load_store[num].final = "swl\t%0,%1";
|
||||
}
|
||||
else
|
||||
{
|
||||
load_store[num].load = "lwl\t%0,%2\n\tlwr\t%0,%1";
|
||||
load_store[num].load_nop = "lwl\t%0,%2\n\tlwr\t%0,%1%#";
|
||||
load_store[num].store = "swl\t%0,%2\n\tswr\t%0,%1";
|
||||
load_store[num].last_store = "swr\t%0,%1";
|
||||
load_store[num].final = "swl\t%0,%2";
|
||||
}
|
||||
|
||||
load_store[num].mode = SImode;
|
||||
offset += 4;
|
||||
bytes -= 4;
|
||||
use_lwl_lwr = 1;
|
||||
}
|
||||
|
||||
else if (bytes >= 2 && align >= 2)
|
||||
{
|
||||
load_store[num].load = "lh\t%0,%1";
|
||||
load_store[num].load_nop = "lh\t%0,%1%#";
|
||||
load_store[num].store = "sh\t%0,%1";
|
||||
load_store[num].last_store = "sh\t%0,%1";
|
||||
load_store[num].final = 0;
|
||||
load_store[num].mode = HImode;
|
||||
offset += 2;
|
||||
bytes -= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
load_store[num].load = "lb\t%0,%1";
|
||||
load_store[num].load_nop = "lb\t%0,%1%#";
|
||||
load_store[num].store = "sb\t%0,%1";
|
||||
load_store[num].last_store = "sb\t%0,%1";
|
||||
load_store[num].final = 0;
|
||||
load_store[num].mode = QImode;
|
||||
offset++;
|
||||
bytes--;
|
||||
}
|
||||
|
||||
/* Emit load/stores now if we have run out of registers or are
|
||||
at the end of the move. */
|
||||
|
||||
if (++num == num_regs || bytes == 0)
|
||||
{
|
||||
/* If only load/store, we need a NOP after the load. */
|
||||
if (num == 1)
|
||||
load_store[0].load = load_store[0].load_nop;
|
||||
|
||||
if (move_type != BLOCK_MOVE_LAST)
|
||||
{
|
||||
for (i = 0; i < num; i++)
|
||||
{
|
||||
int offset;
|
||||
|
||||
if (!operands[i + 4])
|
||||
abort ();
|
||||
|
||||
if (GET_MODE (operands[i + 4]) != load_store[i].mode)
|
||||
operands[i + 4] = gen_rtx_REG (load_store[i].mode,
|
||||
REGNO (operands[i + 4]));
|
||||
|
||||
offset = load_store[i].offset;
|
||||
xoperands[0] = operands[i + 4];
|
||||
xoperands[1] = gen_rtx_MEM (load_store[i].mode,
|
||||
plus_constant (src_reg, offset));
|
||||
|
||||
if (use_lwl_lwr)
|
||||
{
|
||||
int extra_offset
|
||||
= GET_MODE_SIZE (load_store[i].mode) - 1;
|
||||
|
||||
xoperands[2] = gen_rtx_MEM (load_store[i].mode,
|
||||
plus_constant (src_reg,
|
||||
extra_offset
|
||||
+ offset));
|
||||
}
|
||||
|
||||
output_asm_insn (load_store[i].load, xoperands);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num; i++)
|
||||
{
|
||||
int last_p = (i == num-1 && bytes == 0);
|
||||
int offset = load_store[i].offset;
|
||||
|
||||
xoperands[0] = operands[i + 4];
|
||||
xoperands[1] = gen_rtx_MEM (load_store[i].mode,
|
||||
plus_constant (dest_reg, offset));
|
||||
|
||||
|
||||
if (use_lwl_lwr)
|
||||
{
|
||||
int extra_offset = GET_MODE_SIZE (load_store[i].mode) - 1;
|
||||
xoperands[2] = gen_rtx_MEM (load_store[i].mode,
|
||||
plus_constant (dest_reg,
|
||||
extra_offset
|
||||
+ offset));
|
||||
}
|
||||
|
||||
if (move_type == BLOCK_MOVE_NORMAL)
|
||||
output_asm_insn (load_store[i].store, xoperands);
|
||||
|
||||
else if (move_type == BLOCK_MOVE_NOT_LAST)
|
||||
{
|
||||
if (!last_p)
|
||||
output_asm_insn (load_store[i].store, xoperands);
|
||||
|
||||
else if (load_store[i].final != 0)
|
||||
output_asm_insn (load_store[i].final, xoperands);
|
||||
}
|
||||
|
||||
else if (last_p)
|
||||
output_asm_insn (load_store[i].last_store, xoperands);
|
||||
}
|
||||
|
||||
num = 0; /* reset load_store */
|
||||
use_lwl_lwr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Argument support functions. */
|
||||
|
@ -94,15 +94,6 @@ enum mips_abicalls_type {
|
||||
/* Recast the abicalls class to be the abicalls attribute. */
|
||||
#define mips_abicalls_attr ((enum attr_abicalls)mips_abicalls)
|
||||
|
||||
/* Which type of block move to do (whether or not the last store is
|
||||
split out so it can fill a branch delay slot). */
|
||||
|
||||
enum block_move_type {
|
||||
BLOCK_MOVE_NORMAL, /* generate complete block move */
|
||||
BLOCK_MOVE_NOT_LAST, /* generate all but last store */
|
||||
BLOCK_MOVE_LAST /* generate just the last store */
|
||||
};
|
||||
|
||||
/* Information about one recognized processor. Defined here for the
|
||||
benefit of TARGET_CPU_CPP_BUILTINS. */
|
||||
struct mips_cpu_info {
|
||||
|
@ -5460,151 +5460,15 @@ move\\t%0,%z4\\n\\
|
||||
(define_expand "movstrsi"
|
||||
[(parallel [(set (match_operand:BLK 0 "general_operand" "")
|
||||
(match_operand:BLK 1 "general_operand" ""))
|
||||
(use (match_operand:SI 2 "arith32_operand" ""))
|
||||
(use (match_operand:SI 3 "immediate_operand" ""))])]
|
||||
"!TARGET_MIPS16"
|
||||
"
|
||||
{
|
||||
if (operands[0]) /* avoid unused code messages */
|
||||
{
|
||||
expand_block_move (operands);
|
||||
(use (match_operand:SI 2 "" ""))
|
||||
(use (match_operand:SI 3 "const_int_operand" ""))])]
|
||||
"!TARGET_MIPS16 && !TARGET_MEMCPY"
|
||||
{
|
||||
if (mips_expand_block_move (operands[0], operands[1], operands[2]))
|
||||
DONE;
|
||||
}
|
||||
}")
|
||||
|
||||
;; Insn generated by block moves
|
||||
|
||||
(define_insn "movstrsi_internal"
|
||||
[(set (match_operand:BLK 0 "memory_operand" "=o") ;; destination
|
||||
(match_operand:BLK 1 "memory_operand" "o")) ;; source
|
||||
(clobber (match_scratch:SI 4 "=&d")) ;; temp 1
|
||||
(clobber (match_scratch:SI 5 "=&d")) ;; temp 2
|
||||
(clobber (match_scratch:SI 6 "=&d")) ;; temp 3
|
||||
(clobber (match_scratch:SI 7 "=&d")) ;; temp 4
|
||||
(use (match_operand:SI 2 "small_int" "I")) ;; # bytes to move
|
||||
(use (match_operand:SI 3 "small_int" "I")) ;; alignment
|
||||
(use (const_int 0))] ;; normal block move
|
||||
""
|
||||
"* return output_block_move (insn, operands, 4, BLOCK_MOVE_NORMAL);"
|
||||
[(set_attr "type" "store")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "80")])
|
||||
|
||||
;; We need mips16 versions, because an offset from the stack pointer
|
||||
;; is not offsettable, since the stack pointer can only handle 4 and 8
|
||||
;; byte loads.
|
||||
|
||||
(define_insn ""
|
||||
[(set (match_operand:BLK 0 "memory_operand" "=o") ;; destination
|
||||
(match_operand:BLK 1 "memory_operand" "o")) ;; source
|
||||
(clobber (match_scratch:SI 4 "=&d")) ;; temp 1
|
||||
(clobber (match_scratch:SI 5 "=&d")) ;; temp 2
|
||||
(clobber (match_scratch:SI 6 "=&d")) ;; temp 3
|
||||
(clobber (match_scratch:SI 7 "=&d")) ;; temp 4
|
||||
(use (match_operand:SI 2 "small_int" "I")) ;; # bytes to move
|
||||
(use (match_operand:SI 3 "small_int" "I")) ;; alignment
|
||||
(use (const_int 0))] ;; normal block move
|
||||
"TARGET_MIPS16"
|
||||
"* return output_block_move (insn, operands, 4, BLOCK_MOVE_NORMAL);"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "80")])
|
||||
|
||||
;; Split a block move into 2 parts, the first part is everything
|
||||
;; except for the last move, and the second part is just the last
|
||||
;; store, which is exactly 1 instruction (ie, not a usw), so it can
|
||||
;; fill a delay slot. This also prevents a bug in delayed branches
|
||||
;; from showing up, which reuses one of the registers in our clobbers.
|
||||
|
||||
;; ??? Disabled because it doesn't preserve alias information for
|
||||
;; operands 0 and 1. Also, the rtl for the second insn doesn't mention
|
||||
;; that it uses the registers clobbered by the first.
|
||||
;;
|
||||
;; It would probably be better to split the block into individual
|
||||
;; instructions instead.
|
||||
(define_split
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "")))
|
||||
(clobber (match_operand:SI 4 "register_operand" ""))
|
||||
(clobber (match_operand:SI 5 "register_operand" ""))
|
||||
(clobber (match_operand:SI 6 "register_operand" ""))
|
||||
(clobber (match_operand:SI 7 "register_operand" ""))
|
||||
(use (match_operand:SI 2 "small_int" ""))
|
||||
(use (match_operand:SI 3 "small_int" ""))
|
||||
(use (const_int 0))]
|
||||
|
||||
"reload_completed && 0 && INTVAL (operands[2]) > 0"
|
||||
|
||||
;; All but the last move
|
||||
[(parallel [(set (mem:BLK (match_dup 0))
|
||||
(mem:BLK (match_dup 1)))
|
||||
(clobber (match_dup 4))
|
||||
(clobber (match_dup 5))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(use (const_int 1))])
|
||||
|
||||
;; The last store, so it can fill a delay slot
|
||||
(parallel [(set (mem:BLK (match_dup 0))
|
||||
(mem:BLK (match_dup 1)))
|
||||
(clobber (match_dup 4))
|
||||
(clobber (match_dup 5))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(use (const_int 2))])]
|
||||
|
||||
"")
|
||||
|
||||
(define_insn "movstrsi_internal2"
|
||||
[(set (match_operand:BLK 0 "memory_operand" "=o") ;; destination
|
||||
(match_operand:BLK 1 "memory_operand" "o")) ;; source
|
||||
(clobber (match_scratch:SI 4 "=&d")) ;; temp 1
|
||||
(clobber (match_scratch:SI 5 "=&d")) ;; temp 2
|
||||
(clobber (match_scratch:SI 6 "=&d")) ;; temp 3
|
||||
(clobber (match_scratch:SI 7 "=&d")) ;; temp 4
|
||||
(use (match_operand:SI 2 "small_int" "I")) ;; # bytes to move
|
||||
(use (match_operand:SI 3 "small_int" "I")) ;; alignment
|
||||
(use (const_int 1))] ;; all but last store
|
||||
""
|
||||
"* return output_block_move (insn, operands, 4, BLOCK_MOVE_NOT_LAST);"
|
||||
[(set_attr "type" "store")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "80")])
|
||||
|
||||
(define_insn ""
|
||||
[(set (match_operand:BLK 0 "memory_operand" "=o") ;; destination
|
||||
(match_operand:BLK 1 "memory_operand" "o")) ;; source
|
||||
(clobber (match_scratch:SI 4 "=&d")) ;; temp 1
|
||||
(clobber (match_scratch:SI 5 "=&d")) ;; temp 2
|
||||
(clobber (match_scratch:SI 6 "=&d")) ;; temp 3
|
||||
(clobber (match_scratch:SI 7 "=&d")) ;; temp 4
|
||||
(use (match_operand:SI 2 "small_int" "I")) ;; # bytes to move
|
||||
(use (match_operand:SI 3 "small_int" "I")) ;; alignment
|
||||
(use (const_int 1))] ;; all but last store
|
||||
"TARGET_MIPS16"
|
||||
"* return output_block_move (insn, operands, 4, BLOCK_MOVE_NOT_LAST);"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "mode" "none")
|
||||
(set_attr "length" "80")])
|
||||
|
||||
(define_insn "movstrsi_internal3"
|
||||
[(set (match_operand:BLK 0 "memory_operand" "=m") ;; destination
|
||||
(match_operand:BLK 1 "memory_operand" "m")) ;; source
|
||||
(clobber (match_scratch:SI 4 "=&d")) ;; temp 1
|
||||
(clobber (match_scratch:SI 5 "=&d")) ;; temp 2
|
||||
(clobber (match_scratch:SI 6 "=&d")) ;; temp 3
|
||||
(clobber (match_scratch:SI 7 "=&d")) ;; temp 4
|
||||
(use (match_operand:SI 2 "small_int" "I")) ;; # bytes to move
|
||||
(use (match_operand:SI 3 "small_int" "I")) ;; alignment
|
||||
(use (const_int 2))] ;; just last store of block move
|
||||
""
|
||||
"* return output_block_move (insn, operands, 4, BLOCK_MOVE_LAST);"
|
||||
[(set_attr "type" "store")
|
||||
(set_attr "mode" "none")])
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;;
|
||||
;; ....................
|
||||
|
Loading…
Reference in New Issue
Block a user