Trevor Smigiel <Trevor_Smigiel@playstation.sony.com>

* config/spu/spu-protos.h (aligned_mem_p, spu_valid_mov): Remove.
	(spu_split_load, spu_split_store): Change return type to int.
	(spu_split_convert): Declare.
	* config/spu/predicates.md (spu_mem_operand): Remove.
	(spu_mov_operand): Update.
	(spu_dest_operand, shiftrt_operator, extend_operator): Define.
	* config/spu/spu.c (regno_aligned_for_load): Remove.
	(reg_aligned_for_addr, spu_expand_load): Define.
	(spu_expand_extv): Reimplement and handle MEM.
	(spu_expand_insv): Handle MEM.
	(spu_sched_reorder): Handle insn's with length 0.
	(spu_legitimate_address_p): Reimplement.
	(store_with_one_insn_p): Return TRUE for any mode with size
	larger than 16 bytes.
	(address_needs_split): Define.
	(spu_expand_mov): Call spu_split_load and spu_split_store for MEM
	operands.
	(spu_convert_move): Define.
	(spu_split_load): Use spu_expand_load and change all MEM's to
	TImode.
	(spu_split_store): Change all MEM's to TImode.
	(spu_init_expanders): Preallocate registers that correspond to
	LAST_VIRTUAL_REG+1 and LAST_VIRTUAL_REG+2 and set them with
	mark_reg_pointer.
	(spu_split_convert): Define.
	* config/spu/spu.md (QHSI, QHSDI): New mode iterators.
	(_move<mode>, _movdi, _movti): Update predicate and condition.
	(load, store): Change to define_split.
	(extendqiti2, extendhiti2, extendsiti2, extendditi2): Simplify to
	extend<mode>ti2.
	(zero_extendqiti2, zero_extendhiti2, <v>lshr<mode>3_imm): Define.
	(lshr<mode>3, lshr<mode>3_imm, lshr<mode>3_re): Simplify to one
	define_insn_and_split of lshr<mode>3.
	(shrqbybi_<mode>, shrqby_<mode>): Simplify to define_expand.
	(<v>ashr<mode>3_imm): Define.
	(extv, extzv, insv): Allow MEM operands.
	(trunc_shr_ti<mode>, trunc_shr_tidi, shl_ext_<mode>ti,
	shl_ext_diti, sext_trunc_lshr_tiqisi, zext_trunc_lshr_tiqisi,
	sext_trunc_lshr_tihisi, zext_trunc_lshr_tihisi): Define for combine.
	(_spu_convert2): Change to define_insn_and_split and remove the
	corresponding define_peephole2.
	(stack_protect_set, stack_protect_test, stack_protect_test_si):
	Change predicates to memory_operand.

From-SVN: r147814
This commit is contained in:
Trevor Smigiel 2009-05-23 02:28:14 +00:00 committed by Trevor Smigiel
parent 6cfd7dcfaa
commit eec9405eea
5 changed files with 666 additions and 448 deletions

View File

@ -1,3 +1,49 @@
2009-05-22 Trevor Smigiel <Trevor_Smigiel@playstation.sony.com>
* config/spu/spu-protos.h (aligned_mem_p, spu_valid_mov): Remove.
(spu_split_load, spu_split_store): Change return type to int.
(spu_split_convert): Declare.
* config/spu/predicates.md (spu_mem_operand): Remove.
(spu_mov_operand): Update.
(spu_dest_operand, shiftrt_operator, extend_operator): Define.
* config/spu/spu.c (regno_aligned_for_load): Remove.
(reg_aligned_for_addr, spu_expand_load): Define.
(spu_expand_extv): Reimplement and handle MEM.
(spu_expand_insv): Handle MEM.
(spu_sched_reorder): Handle insn's with length 0.
(spu_legitimate_address_p): Reimplement.
(store_with_one_insn_p): Return TRUE for any mode with size
larger than 16 bytes.
(address_needs_split): Define.
(spu_expand_mov): Call spu_split_load and spu_split_store for MEM
operands.
(spu_convert_move): Define.
(spu_split_load): Use spu_expand_load and change all MEM's to
TImode.
(spu_split_store): Change all MEM's to TImode.
(spu_init_expanders): Preallocate registers that correspond to
LAST_VIRTUAL_REG+1 and LAST_VIRTUAL_REG+2 and set them with
mark_reg_pointer.
(spu_split_convert): Define.
* config/spu/spu.md (QHSI, QHSDI): New mode iterators.
(_move<mode>, _movdi, _movti): Update predicate and condition.
(load, store): Change to define_split.
(extendqiti2, extendhiti2, extendsiti2, extendditi2): Simplify to
extend<mode>ti2.
(zero_extendqiti2, zero_extendhiti2, <v>lshr<mode>3_imm): Define.
(lshr<mode>3, lshr<mode>3_imm, lshr<mode>3_re): Simplify to one
define_insn_and_split of lshr<mode>3.
(shrqbybi_<mode>, shrqby_<mode>): Simplify to define_expand.
(<v>ashr<mode>3_imm): Define.
(extv, extzv, insv): Allow MEM operands.
(trunc_shr_ti<mode>, trunc_shr_tidi, shl_ext_<mode>ti,
shl_ext_diti, sext_trunc_lshr_tiqisi, zext_trunc_lshr_tiqisi,
sext_trunc_lshr_tihisi, zext_trunc_lshr_tihisi): Define for combine.
(_spu_convert2): Change to define_insn_and_split and remove the
corresponding define_peephole2.
(stack_protect_set, stack_protect_test, stack_protect_test_si):
Change predicates to memory_operand.
2009-05-22 Mark Mitchell <mark@codesourcery.com>
* config/arm/thumb2.md: Add 16-bit multiply instructions.

View File

@ -39,14 +39,14 @@
(ior (not (match_code "subreg"))
(match_test "valid_subreg (op)"))))
(define_predicate "spu_mem_operand"
(and (match_operand 0 "memory_operand")
(match_test "reload_in_progress || reload_completed || aligned_mem_p (op)")))
(define_predicate "spu_mov_operand"
(ior (match_operand 0 "spu_mem_operand")
(ior (match_operand 0 "memory_operand")
(match_operand 0 "spu_nonmem_operand")))
(define_predicate "spu_dest_operand"
(ior (match_operand 0 "memory_operand")
(match_operand 0 "spu_reg_operand")))
(define_predicate "call_operand"
(and (match_code "mem")
(match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op))
@ -114,3 +114,9 @@
(and (match_operand 0 "immediate_operand")
(match_test "exp2_immediate_p (op, mode, 0, 127)"))))
(define_predicate "shiftrt_operator"
(match_code "lshiftrt,ashiftrt"))
(define_predicate "extend_operator"
(match_code "sign_extend,zero_extend"))

View File

@ -62,11 +62,9 @@ extern void spu_setup_incoming_varargs (int *cum, enum machine_mode mode,
tree type, int *pretend_size,
int no_rtl);
extern void spu_conditional_register_usage (void);
extern int aligned_mem_p (rtx mem);
extern int spu_expand_mov (rtx * ops, enum machine_mode mode);
extern void spu_split_load (rtx * ops);
extern void spu_split_store (rtx * ops);
extern int spu_valid_move (rtx * ops);
extern int spu_split_load (rtx * ops);
extern int spu_split_store (rtx * ops);
extern int fsmbi_const_p (rtx x);
extern int cpat_const_p (rtx x, enum machine_mode mode);
extern rtx gen_cpat_const (rtx * ops);
@ -87,6 +85,7 @@ extern void spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt);
extern void spu_expand_sign_extend (rtx ops[]);
extern void spu_expand_vector_init (rtx target, rtx vals);
extern void spu_init_expanders (void);
extern void spu_split_convert (rtx *);
/* spu-c.c */
extern tree spu_resolve_overloaded_builtin (tree fndecl, void *fnargs);

View File

@ -189,9 +189,9 @@ static tree spu_build_builtin_va_list (void);
static void spu_va_start (tree, rtx);
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
gimple_seq * pre_p, gimple_seq * post_p);
static int regno_aligned_for_load (int regno);
static int store_with_one_insn_p (rtx mem);
static int mem_is_padded_component_ref (rtx x);
static int reg_aligned_for_addr (rtx x);
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
static void spu_asm_globalize_label (FILE * file, const char *name);
static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
@ -211,6 +211,7 @@ static tree spu_builtin_vec_perm (tree, tree *);
static int spu_sms_res_mii (struct ddg *g);
static void asm_file_start (void);
static unsigned int spu_section_type_flags (tree, const char *, int);
static rtx spu_expand_load (rtx, rtx, rtx, int);
extern const char *reg_names[];
@ -582,66 +583,85 @@ adjust_operand (rtx op, HOST_WIDE_INT * start)
void
spu_expand_extv (rtx ops[], int unsignedp)
{
rtx dst = ops[0], src = ops[1];
HOST_WIDE_INT width = INTVAL (ops[2]);
HOST_WIDE_INT start = INTVAL (ops[3]);
HOST_WIDE_INT src_size, dst_size;
enum machine_mode src_mode, dst_mode;
rtx dst = ops[0], src = ops[1];
rtx s;
HOST_WIDE_INT align_mask;
rtx s0, s1, mask, r0;
dst = adjust_operand (ops[0], 0);
dst_mode = GET_MODE (dst);
dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
src = adjust_operand (src, &start);
src_mode = GET_MODE (src);
src_size = GET_MODE_BITSIZE (GET_MODE (src));
if (start > 0)
if (MEM_P (src))
{
s = gen_reg_rtx (src_mode);
switch (src_mode)
/* First, determine if we need 1 TImode load or 2. We need only 1
if the bits being extracted do not cross the alignment boundary
as determined by the MEM and its address. */
align_mask = -MEM_ALIGN (src);
if ((start & align_mask) == ((start + width - 1) & align_mask))
{
case SImode:
emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
break;
case DImode:
emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
break;
case TImode:
emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
break;
default:
abort ();
/* Alignment is sufficient for 1 load. */
s0 = gen_reg_rtx (TImode);
r0 = spu_expand_load (s0, 0, src, start / 8);
start &= 7;
if (r0)
emit_insn (gen_rotqby_ti (s0, s0, r0));
}
src = s;
else
{
/* Need 2 loads. */
s0 = gen_reg_rtx (TImode);
s1 = gen_reg_rtx (TImode);
r0 = spu_expand_load (s0, s1, src, start / 8);
start &= 7;
gcc_assert (start + width <= 128);
if (r0)
{
rtx r1 = gen_reg_rtx (SImode);
mask = gen_reg_rtx (TImode);
emit_move_insn (mask, GEN_INT (-1));
emit_insn (gen_rotqby_ti (s0, s0, r0));
emit_insn (gen_rotqby_ti (s1, s1, r0));
if (GET_CODE (r0) == CONST_INT)
r1 = GEN_INT (INTVAL (r0) & 15);
else
emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
emit_insn (gen_shlqby_ti (mask, mask, r1));
emit_insn (gen_selb (s0, s1, s0, mask));
}
}
}
else if (GET_CODE (src) == SUBREG)
{
rtx r = SUBREG_REG (src);
gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
s0 = gen_reg_rtx (TImode);
if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
else
emit_move_insn (s0, src);
}
else
{
gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
s0 = gen_reg_rtx (TImode);
emit_move_insn (s0, src);
}
if (width < src_size)
/* Now s0 is TImode and contains the bits to extract at start. */
if (start)
emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
if (128 - width)
{
rtx pat;
int icode;
switch (src_mode)
{
case SImode:
icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
break;
case DImode:
icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
break;
case TImode:
icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
break;
default:
abort ();
}
s = gen_reg_rtx (src_mode);
pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
emit_insn (pat);
src = s;
tree c = build_int_cst (NULL_TREE, 128 - width);
s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
}
convert_move (dst, src, unsignedp);
emit_move_insn (dst, s0);
}
void
@ -734,38 +754,41 @@ spu_expand_insv (rtx ops[])
}
if (GET_CODE (ops[0]) == MEM)
{
rtx aligned = gen_reg_rtx (SImode);
rtx low = gen_reg_rtx (SImode);
rtx addr = gen_reg_rtx (SImode);
rtx rotl = gen_reg_rtx (SImode);
rtx mask0 = gen_reg_rtx (TImode);
rtx addr;
rtx addr0;
rtx addr1;
rtx mem;
emit_move_insn (addr, XEXP (ops[0], 0));
emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
addr = force_reg (Pmode, XEXP (ops[0], 0));
addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
emit_insn (gen_negsi2 (rotl, low));
emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
mem = change_address (ops[0], TImode, aligned);
mem = change_address (ops[0], TImode, addr0);
set_mem_alias_set (mem, 0);
emit_move_insn (dst, mem);
emit_insn (gen_selb (dst, dst, shift_reg, mask0));
emit_move_insn (mem, dst);
if (start + width > MEM_ALIGN (ops[0]))
{
rtx shl = gen_reg_rtx (SImode);
rtx mask1 = gen_reg_rtx (TImode);
rtx dst1 = gen_reg_rtx (TImode);
rtx mem1;
addr1 = plus_constant (addr, 16);
addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
emit_insn (gen_shlqby_ti (mask1, mask, shl));
mem1 = adjust_address (mem, TImode, 16);
mem1 = change_address (ops[0], TImode, addr1);
set_mem_alias_set (mem1, 0);
emit_move_insn (dst1, mem1);
emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
emit_move_insn (mem1, dst1);
}
emit_move_insn (mem, dst);
}
else
emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
@ -2998,7 +3021,7 @@ spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
insn = ready[i];
if (INSN_CODE (insn) == -1
|| INSN_CODE (insn) == CODE_FOR_blockage
|| INSN_CODE (insn) == CODE_FOR__spu_convert)
|| (INSN_P (insn) && get_attr_length (insn) == 0))
{
ready[i] = ready[nready - 1];
ready[nready - 1] = insn;
@ -3129,8 +3152,8 @@ spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|| INSN_CODE (dep_insn) == CODE_FOR_blockage)
return 0;
if (INSN_CODE (insn) == CODE_FOR__spu_convert
|| INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
if ((INSN_P (insn) && get_attr_length (insn) == 0)
|| (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
return 0;
/* Make sure hbrps are spread out. */
@ -3611,44 +3634,36 @@ spu_legitimate_constant_p (rtx x)
/* Valid address are:
- symbol_ref, label_ref, const
- reg
- reg + const, where either reg or const is 16 byte aligned
- reg + const_int, where const_int is 16 byte aligned
- reg + reg, alignment doesn't matter
The alignment matters in the reg+const case because lqd and stqd
ignore the 4 least significant bits of the const. (TODO: It might be
preferable to allow any alignment and fix it up when splitting.) */
bool
spu_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
ignore the 4 least significant bits of the const. We only care about
16 byte modes because the expand phase will change all smaller MEM
references to TImode. */
static bool
spu_legitimate_address_p (enum machine_mode mode,
rtx x, bool reg_ok_strict)
{
if (mode == TImode && GET_CODE (x) == AND
int aligned = GET_MODE_SIZE (mode) >= 16;
if (aligned
&& GET_CODE (x) == AND
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
&& INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
x = XEXP (x, 0);
switch (GET_CODE (x))
{
case SYMBOL_REF:
case LABEL_REF:
return !TARGET_LARGE_MEM;
case SYMBOL_REF:
case CONST:
if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
{
rtx sym = XEXP (XEXP (x, 0), 0);
rtx cst = XEXP (XEXP (x, 0), 1);
/* Accept any symbol_ref + constant, assuming it does not
wrap around the local store addressability limit. */
if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
return 1;
}
return 0;
return !TARGET_LARGE_MEM;
case CONST_INT:
return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
case SUBREG:
x = XEXP (x, 0);
gcc_assert (GET_CODE (x) == REG);
if (REG_P (x))
return 0;
case REG:
return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
@ -3662,29 +3677,25 @@ spu_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
op0 = XEXP (op0, 0);
if (GET_CODE (op1) == SUBREG)
op1 = XEXP (op1, 0);
/* We can't just accept any aligned register because CSE can
change it to a register that is not marked aligned and then
recog will fail. So we only accept frame registers because
they will only be changed to other frame registers. */
if (GET_CODE (op0) == REG
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
&& GET_CODE (op1) == CONST_INT
&& INTVAL (op1) >= -0x2000
&& INTVAL (op1) <= 0x1fff
&& (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
return 1;
&& (!aligned || (INTVAL (op1) & 15) == 0))
return TRUE;
if (GET_CODE (op0) == REG
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
&& GET_CODE (op1) == REG
&& INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
return 1;
return TRUE;
}
break;
default:
break;
}
return 0;
return FALSE;
}
/* When the address is reg + const_int, force the const_int into a
@ -4137,60 +4148,14 @@ spu_conditional_register_usage (void)
}
}
/* This is called to decide when we can simplify a load instruction. We
must only return true for registers which we know will always be
aligned. Taking into account that CSE might replace this reg with
another one that has not been marked aligned.
So this is really only true for frame, stack and virtual registers,
which we know are always aligned and should not be adversely effected
by CSE. */
/* This is called any time we inspect the alignment of a register for
addresses. */
static int
regno_aligned_for_load (int regno)
reg_aligned_for_addr (rtx x)
{
return regno == FRAME_POINTER_REGNUM
|| (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
|| regno == ARG_POINTER_REGNUM
|| regno == STACK_POINTER_REGNUM
|| (regno >= FIRST_VIRTUAL_REGISTER
&& regno <= LAST_VIRTUAL_REGISTER);
}
/* Return TRUE when mem is known to be 16-byte aligned. */
int
aligned_mem_p (rtx mem)
{
if (MEM_ALIGN (mem) >= 128)
return 1;
if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
return 1;
if (GET_CODE (XEXP (mem, 0)) == PLUS)
{
rtx p0 = XEXP (XEXP (mem, 0), 0);
rtx p1 = XEXP (XEXP (mem, 0), 1);
if (regno_aligned_for_load (REGNO (p0)))
{
if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
return 1;
if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
return 1;
}
}
else if (GET_CODE (XEXP (mem, 0)) == REG)
{
if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
return 1;
}
else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
return 1;
else if (GET_CODE (XEXP (mem, 0)) == CONST)
{
rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
if (GET_CODE (p0) == SYMBOL_REF
&& GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
return 1;
}
return 0;
int regno =
REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
return REGNO_POINTER_ALIGN (regno) >= 128;
}
/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
@ -4219,9 +4184,12 @@ spu_encode_section_info (tree decl, rtx rtl, int first)
static int
store_with_one_insn_p (rtx mem)
{
enum machine_mode mode = GET_MODE (mem);
rtx addr = XEXP (mem, 0);
if (GET_MODE (mem) == BLKmode)
if (mode == BLKmode)
return 0;
if (GET_MODE_SIZE (mode) >= 16)
return 1;
/* Only static objects. */
if (GET_CODE (addr) == SYMBOL_REF)
{
@ -4245,6 +4213,22 @@ store_with_one_insn_p (rtx mem)
return 0;
}
/* Return 1 when the address is not valid for a simple load and store as
required by the '_mov*' patterns. We could make this less strict
for loads, but we prefer mem's to look the same so they are more
likely to be merged. */
static int
address_needs_split (rtx mem)
{
if (GET_MODE_SIZE (GET_MODE (mem)) < 16
&& (GET_MODE_SIZE (GET_MODE (mem)) < 4
|| !(store_with_one_insn_p (mem)
|| mem_is_padded_component_ref (mem))))
return 1;
return 0;
}
int
spu_expand_mov (rtx * ops, enum machine_mode mode)
{
@ -4289,54 +4273,63 @@ spu_expand_mov (rtx * ops, enum machine_mode mode)
return spu_split_immediate (ops);
return 0;
}
else
/* Catch the SImode immediates greater than 0x7fffffff, and sign
extend them. */
if (GET_CODE (ops[1]) == CONST_INT)
{
if (GET_CODE (ops[0]) == MEM)
HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
if (val != INTVAL (ops[1]))
{
if (!spu_valid_move (ops))
{
emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
gen_reg_rtx (TImode)));
return 1;
}
}
else if (GET_CODE (ops[1]) == MEM)
{
if (!spu_valid_move (ops))
{
emit_insn (gen_load
(ops[0], ops[1], gen_reg_rtx (TImode),
gen_reg_rtx (SImode)));
return 1;
}
}
/* Catch the SImode immediates greater than 0x7fffffff, and sign
extend them. */
if (GET_CODE (ops[1]) == CONST_INT)
{
HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
if (val != INTVAL (ops[1]))
{
emit_move_insn (ops[0], GEN_INT (val));
return 1;
}
emit_move_insn (ops[0], GEN_INT (val));
return 1;
}
}
if (MEM_P (ops[0]))
return spu_split_store (ops);
if (MEM_P (ops[1]))
return spu_split_load (ops);
return 0;
}
void
spu_split_load (rtx * ops)
static void
spu_convert_move (rtx dst, rtx src)
{
enum machine_mode mode = GET_MODE (ops[0]);
rtx addr, load, rot, mem, p0, p1;
int rot_amt;
enum machine_mode mode = GET_MODE (dst);
enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
rtx reg;
gcc_assert (GET_MODE (src) == TImode);
reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
emit_insn (gen_rtx_SET (VOIDmode, reg,
gen_rtx_TRUNCATE (int_mode,
gen_rtx_LSHIFTRT (TImode, src,
GEN_INT (int_mode == DImode ? 64 : 96)))));
if (int_mode != mode)
{
reg = simplify_gen_subreg (mode, reg, int_mode, 0);
emit_move_insn (dst, reg);
}
}
addr = XEXP (ops[1], 0);
/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
the address from SRC and SRC+16. Return a REG or CONST_INT that
specifies how many bytes to rotate the loaded registers, plus any
extra from EXTRA_ROTQBY. The address and rotate amounts are
normalized to improve merging of loads and rotate computations. */
static rtx
spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
{
rtx addr = XEXP (src, 0);
rtx p0, p1, rot, addr0, addr1;
int rot_amt;
rot = 0;
rot_amt = 0;
if (GET_CODE (addr) == PLUS)
if (MEM_ALIGN (src) >= 128)
/* Address is already aligned; simply perform a TImode load. */ ;
else if (GET_CODE (addr) == PLUS)
{
/* 8 cases:
aligned reg + aligned reg => lqx
@ -4350,12 +4343,34 @@ spu_split_load (rtx * ops)
*/
p0 = XEXP (addr, 0);
p1 = XEXP (addr, 1);
if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
if (!reg_aligned_for_addr (p0))
{
if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
if (REG_P (p1) && !reg_aligned_for_addr (p1))
{
emit_insn (gen_addsi3 (ops[3], p0, p1));
rot = ops[3];
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
}
else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
{
if (INTVAL (p1) > 0
&& REG_POINTER (p0)
&& INTVAL (p1) * BITS_PER_UNIT
< REGNO_POINTER_ALIGN (REGNO (p0)))
{
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
addr = p0;
}
else
{
rtx x = gen_reg_rtx (SImode);
emit_move_insn (x, p1);
if (!spu_arith_operand (p1, SImode))
p1 = x;
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
addr = gen_rtx_PLUS (Pmode, p0, x);
}
}
else
rot = p0;
@ -4365,16 +4380,21 @@ spu_split_load (rtx * ops)
if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
{
rot_amt = INTVAL (p1) & 15;
p1 = GEN_INT (INTVAL (p1) & -16);
addr = gen_rtx_PLUS (SImode, p0, p1);
if (INTVAL (p1) & -16)
{
p1 = GEN_INT (INTVAL (p1) & -16);
addr = gen_rtx_PLUS (SImode, p0, p1);
}
else
addr = p0;
}
else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
else if (REG_P (p1) && !reg_aligned_for_addr (p1))
rot = p1;
}
}
else if (GET_CODE (addr) == REG)
else if (REG_P (addr))
{
if (!regno_aligned_for_load (REGNO (addr)))
if (!reg_aligned_for_addr (addr))
rot = addr;
}
else if (GET_CODE (addr) == CONST)
@ -4393,7 +4413,10 @@ spu_split_load (rtx * ops)
addr = XEXP (XEXP (addr, 0), 0);
}
else
rot = addr;
{
rot = gen_reg_rtx (Pmode);
emit_move_insn (rot, addr);
}
}
else if (GET_CODE (addr) == CONST_INT)
{
@ -4401,49 +4424,96 @@ spu_split_load (rtx * ops)
addr = GEN_INT (rot_amt & -16);
}
else if (!ALIGNED_SYMBOL_REF_P (addr))
rot = addr;
{
rot = gen_reg_rtx (Pmode);
emit_move_insn (rot, addr);
}
if (GET_MODE_SIZE (mode) < 4)
rot_amt += GET_MODE_SIZE (mode) - 4;
rot_amt += extra_rotby;
rot_amt &= 15;
if (rot && rot_amt)
{
emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
rot = ops[3];
rtx x = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
rot = x;
rot_amt = 0;
}
if (!rot && rot_amt)
rot = GEN_INT (rot_amt);
load = ops[2];
addr0 = copy_rtx (addr);
addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
mem = change_address (ops[1], TImode, addr);
if (dst1)
{
addr1 = plus_constant (copy_rtx (addr), 16);
addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
}
emit_insn (gen_movti (load, mem));
return rot;
}
int
spu_split_load (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
rtx addr, load, rot;
int rot_amt;
if (GET_MODE_SIZE (mode) >= 16)
return 0;
addr = XEXP (ops[1], 0);
gcc_assert (GET_CODE (addr) != AND);
if (!address_needs_split (ops[1]))
{
ops[1] = change_address (ops[1], TImode, addr);
load = gen_reg_rtx (TImode);
emit_insn (gen__movti (load, ops[1]));
spu_convert_move (ops[0], load);
return 1;
}
rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
load = gen_reg_rtx (TImode);
rot = spu_expand_load (load, 0, ops[1], rot_amt);
if (rot)
emit_insn (gen_rotqby_ti (load, load, rot));
else if (rot_amt)
emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
if (reload_completed)
emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
else
emit_insn (gen_spu_convert (ops[0], load));
spu_convert_move (ops[0], load);
return 1;
}
void
int
spu_split_store (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
rtx pat = ops[2];
rtx reg = ops[3];
rtx reg;
rtx addr, p0, p1, p1_lo, smem;
int aform;
int scalar;
if (GET_MODE_SIZE (mode) >= 16)
return 0;
addr = XEXP (ops[0], 0);
gcc_assert (GET_CODE (addr) != AND);
if (!address_needs_split (ops[0]))
{
reg = gen_reg_rtx (TImode);
emit_insn (gen_spu_convert (reg, ops[1]));
ops[0] = change_address (ops[0], TImode, addr);
emit_move_insn (ops[0], reg);
return 1;
}
if (GET_CODE (addr) == PLUS)
{
@ -4455,19 +4525,31 @@ spu_split_store (rtx * ops)
unaligned reg + aligned reg => lqx, c?x, shuf, stqx
unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
unaligned reg + aligned const => lqd, c?d, shuf, stqx
unaligned reg + unaligned const -> not allowed by legitimate address
unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
*/
aform = 0;
p0 = XEXP (addr, 0);
p1 = p1_lo = XEXP (addr, 1);
if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
{
p1_lo = GEN_INT (INTVAL (p1) & 15);
p1 = GEN_INT (INTVAL (p1) & -16);
addr = gen_rtx_PLUS (SImode, p0, p1);
if (reg_aligned_for_addr (p0))
{
p1 = GEN_INT (INTVAL (p1) & -16);
if (p1 == const0_rtx)
addr = p0;
else
addr = gen_rtx_PLUS (SImode, p0, p1);
}
else
{
rtx x = gen_reg_rtx (SImode);
emit_move_insn (x, p1);
addr = gen_rtx_PLUS (SImode, p0, x);
}
}
}
else if (GET_CODE (addr) == REG)
else if (REG_P (addr))
{
aform = 0;
p0 = addr;
@ -4481,31 +4563,34 @@ spu_split_store (rtx * ops)
p1_lo = addr;
if (ALIGNED_SYMBOL_REF_P (addr))
p1_lo = const0_rtx;
else if (GET_CODE (addr) == CONST)
else if (GET_CODE (addr) == CONST
&& GET_CODE (XEXP (addr, 0)) == PLUS
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
{
if (GET_CODE (XEXP (addr, 0)) == PLUS
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
{
HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
if ((v & -16) != 0)
addr = gen_rtx_CONST (Pmode,
gen_rtx_PLUS (Pmode,
XEXP (XEXP (addr, 0), 0),
GEN_INT (v & -16)));
else
addr = XEXP (XEXP (addr, 0), 0);
p1_lo = GEN_INT (v & 15);
}
HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
if ((v & -16) != 0)
addr = gen_rtx_CONST (Pmode,
gen_rtx_PLUS (Pmode,
XEXP (XEXP (addr, 0), 0),
GEN_INT (v & -16)));
else
addr = XEXP (XEXP (addr, 0), 0);
p1_lo = GEN_INT (v & 15);
}
else if (GET_CODE (addr) == CONST_INT)
{
p1_lo = GEN_INT (INTVAL (addr) & 15);
addr = GEN_INT (INTVAL (addr) & -16);
}
else
{
p1_lo = gen_reg_rtx (SImode);
emit_move_insn (p1_lo, addr);
}
}
addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
reg = gen_reg_rtx (TImode);
scalar = store_with_one_insn_p (ops[0]);
if (!scalar)
@ -4515,11 +4600,12 @@ spu_split_store (rtx * ops)
possible, and copying the flags will prevent that in certain
cases, e.g. consider the volatile flag. */
rtx pat = gen_reg_rtx (TImode);
rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
set_mem_alias_set (lmem, 0);
emit_insn (gen_movti (reg, lmem));
if (!p0 || regno_aligned_for_load (REGNO (p0)))
if (!p0 || reg_aligned_for_addr (p0))
p0 = stack_pointer_rtx;
if (!p1_lo)
p1_lo = const0_rtx;
@ -4527,17 +4613,6 @@ spu_split_store (rtx * ops)
emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
emit_insn (gen_shufb (reg, ops[1], reg, pat));
}
else if (reload_completed)
{
if (GET_CODE (ops[1]) == REG)
emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
else if (GET_CODE (ops[1]) == SUBREG)
emit_move_insn (reg,
gen_rtx_REG (GET_MODE (reg),
REGNO (SUBREG_REG (ops[1]))));
else
abort ();
}
else
{
if (GET_CODE (ops[1]) == REG)
@ -4549,15 +4624,16 @@ spu_split_store (rtx * ops)
}
if (GET_MODE_SIZE (mode) < 4 && scalar)
emit_insn (gen_shlqby_ti
(reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
emit_insn (gen_ashlti3
(reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
smem = change_address (ops[0], TImode, addr);
smem = change_address (ops[0], TImode, copy_rtx (addr));
/* We can't use the previous alias set because the memory has changed
size and can potentially overlap objects of other types. */
set_mem_alias_set (smem, 0);
emit_insn (gen_movti (smem, reg));
return 1;
}
/* Return TRUE if X is MEM which is a struct member reference
@ -4656,37 +4732,6 @@ fix_range (const char *const_str)
}
}
int
spu_valid_move (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
return 0;
/* init_expr_once tries to recog against load and store insns to set
the direct_load[] and direct_store[] arrays. We always want to
consider those loads and stores valid. init_expr_once is called in
the context of a dummy function which does not have a decl. */
if (cfun->decl == 0)
return 1;
/* Don't allows loads/stores which would require more than 1 insn.
During and after reload we assume loads and stores only take 1
insn. */
if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
{
if (GET_CODE (ops[0]) == MEM
&& (GET_MODE_SIZE (mode) < 4
|| !(store_with_one_insn_p (ops[0])
|| mem_is_padded_component_ref (ops[0]))))
return 0;
if (GET_CODE (ops[1]) == MEM
&& (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
return 0;
}
return 1;
}
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
can be generated using the fsmbi instruction. */
int
@ -6400,12 +6445,25 @@ spu_sms_res_mii (struct ddg *g)
void
spu_init_expanders (void)
{
/* HARD_FRAME_REGISTER is only 128 bit aligned when
* frame_pointer_needed is true. We don't know that until we're
* expanding the prologue. */
{
if (cfun)
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
{
rtx r0, r1;
/* HARD_FRAME_REGISTER is only 128 bit aligned when
frame_pointer_needed is true. We don't know that until we're
expanding the prologue. */
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
/* A number of passes use LAST_VIRTUAL_REGISTER+1 and
LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
to be treated as aligned, so generate them here. */
r0 = gen_reg_rtx (SImode);
r1 = gen_reg_rtx (SImode);
mark_reg_pointer (r0, 128);
mark_reg_pointer (r1, 128);
gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
&& REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
}
}
static enum machine_mode
@ -6480,4 +6538,20 @@ spu_gen_exp2 (enum machine_mode mode, rtx scale)
}
}
/* After reload, just change the convert into a move instruction
or a dead instruction. */
void
spu_split_convert (rtx ops[])
{
if (REGNO (ops[0]) == REGNO (ops[1]))
emit_note (NOTE_INSN_DELETED);
else
{
/* Use TImode always as this might help hard reg copyprop. */
rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
emit_insn (gen_move_insn (op0, op1));
}
}
#include "gt-spu.h"

View File

@ -178,6 +178,8 @@
SF V4SF
DF V2DF])
(define_mode_iterator QHSI [QI HI SI])
(define_mode_iterator QHSDI [QI HI SI DI])
(define_mode_iterator DTI [DI TI])
(define_mode_iterator VINT [QI V16QI
@ -316,9 +318,10 @@
;; move internal
(define_insn "_mov<mode>"
[(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
[(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
(match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
"spu_valid_move (operands)"
"register_operand(operands[0], <MODE>mode)
|| register_operand(operands[1], <MODE>mode)"
"@
ori\t%0,%1,0
il%s1\t%0,%S1
@ -336,9 +339,10 @@
"iohl\t%0,%2@l")
(define_insn "_movdi"
[(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
[(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
(match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
"spu_valid_move (operands)"
"register_operand(operands[0], DImode)
|| register_operand(operands[1], DImode)"
"@
ori\t%0,%1,0
il%d1\t%0,%D1
@ -349,9 +353,10 @@
[(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
(define_insn "_movti"
[(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
[(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
(match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
"spu_valid_move (operands)"
"register_operand(operands[0], TImode)
|| register_operand(operands[1], TImode)"
"@
ori\t%0,%1,0
il%t1\t%0,%T1
@ -361,30 +366,29 @@
stq%p0\t%1,%0"
[(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
(define_insn_and_split "load"
[(set (match_operand 0 "spu_reg_operand" "=r")
(match_operand 1 "memory_operand" "m"))
(clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
(clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))]
"GET_MODE(operands[0]) == GET_MODE(operands[1])"
"#"
""
(define_split
[(set (match_operand 0 "spu_reg_operand")
(match_operand 1 "memory_operand"))]
"GET_MODE_SIZE (GET_MODE (operands[0])) < 16
&& GET_MODE(operands[0]) == GET_MODE(operands[1])
&& !reload_in_progress && !reload_completed"
[(set (match_dup 0)
(match_dup 1))]
{ spu_split_load(operands); DONE; })
{ if (spu_split_load(operands))
DONE;
})
(define_insn_and_split "store"
[(set (match_operand 0 "memory_operand" "=m")
(match_operand 1 "spu_reg_operand" "r"))
(clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
(clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
"GET_MODE(operands[0]) == GET_MODE(operands[1])"
"#"
""
(define_split
[(set (match_operand 0 "memory_operand")
(match_operand 1 "spu_reg_operand"))]
"GET_MODE_SIZE (GET_MODE (operands[0])) < 16
&& GET_MODE(operands[0]) == GET_MODE(operands[1])
&& !reload_in_progress && !reload_completed"
[(set (match_dup 0)
(match_dup 1))]
{ spu_split_store(operands); DONE; })
{ if (spu_split_store(operands))
DONE;
})
;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
(define_expand "cpat"
@ -462,33 +466,20 @@
""
"xswd\t%0,%1");
(define_expand "extendqiti2"
;; By splitting this late we don't allow much opportunity for sharing of
;; constants. That's ok because this should really be optimized away.
(define_insn_and_split "extend<mode>ti2"
[(set (match_operand:TI 0 "register_operand" "")
(sign_extend:TI (match_operand:QI 1 "register_operand" "")))]
(sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))]
""
"spu_expand_sign_extend(operands);
DONE;")
(define_expand "extendhiti2"
[(set (match_operand:TI 0 "register_operand" "")
(sign_extend:TI (match_operand:HI 1 "register_operand" "")))]
"#"
""
"spu_expand_sign_extend(operands);
DONE;")
(define_expand "extendsiti2"
[(set (match_operand:TI 0 "register_operand" "")
(sign_extend:TI (match_operand:SI 1 "register_operand" "")))]
""
"spu_expand_sign_extend(operands);
DONE;")
(define_expand "extendditi2"
[(set (match_operand:TI 0 "register_operand" "")
(sign_extend:TI (match_operand:DI 1 "register_operand" "")))]
""
"spu_expand_sign_extend(operands);
DONE;")
[(set (match_dup:TI 0)
(sign_extend:TI (match_dup:QHSDI 1)))]
{
spu_expand_sign_extend(operands);
DONE;
})
;; zero_extend
@ -525,6 +516,22 @@
"rotqmbyi\t%0,%1,-4"
[(set_attr "type" "shuf")])
(define_insn "zero_extendqiti2"
[(set (match_operand:TI 0 "spu_reg_operand" "=r")
(zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))]
""
"andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12"
[(set_attr "type" "multi0")
(set_attr "length" "8")])
(define_insn "zero_extendhiti2"
[(set (match_operand:TI 0 "spu_reg_operand" "=r")
(zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))]
""
"shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14"
[(set_attr "type" "multi1")
(set_attr "length" "8")])
(define_insn "zero_extendsiti2"
[(set (match_operand:TI 0 "spu_reg_operand" "=r")
(zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
@ -2348,6 +2355,13 @@
""
[(set_attr "type" "*,fx3")])
(define_insn "<v>lshr<mode>3_imm"
[(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
(match_operand:VHSI 2 "immediate_operand" "W")))]
""
"rot<bh>mi\t%0,%1,-%<umask>2"
[(set_attr "type" "fx3")])
(define_insn "rotm_<mode>"
[(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
@ -2359,89 +2373,59 @@
rot<bh>mi\t%0,%1,-%<nmask>2"
[(set_attr "type" "fx3")])
(define_expand "lshr<mode>3"
[(parallel [(set (match_operand:DTI 0 "spu_reg_operand" "")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "")
(match_operand:SI 2 "spu_nonmem_operand" "")))
(clobber (match_dup:DTI 3))
(clobber (match_dup:SI 4))
(clobber (match_dup:SI 5))])]
""
"if (GET_CODE (operands[2]) == CONST_INT)
{
emit_insn (gen_lshr<mode>3_imm(operands[0], operands[1], operands[2]));
DONE;
}
operands[3] = gen_reg_rtx (<MODE>mode);
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);")
(define_insn_and_split "lshr<mode>3_imm"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
(match_operand:SI 2 "immediate_operand" "O,P")))]
(define_insn_and_split "lshr<mode>3"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r")
(match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))]
""
"@
#
rotqmbyi\t%0,%1,-%h2
rotqmbii\t%0,%1,-%e2"
"!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
[(set (match_dup:DTI 0)
"REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))"
[(set (match_dup:DTI 3)
(lshiftrt:DTI (match_dup:DTI 1)
(match_dup:SI 4)))
(set (match_dup:DTI 0)
(lshiftrt:DTI (match_dup:DTI 0)
(lshiftrt:DTI (match_dup:DTI 3)
(match_dup:SI 5)))]
{
HOST_WIDE_INT val = INTVAL(operands[2]);
operands[4] = GEN_INT (val&7);
operands[5] = GEN_INT (val&-8);
operands[3] = gen_reg_rtx (<MODE>mode);
if (GET_CODE (operands[2]) == CONST_INT)
{
HOST_WIDE_INT val = INTVAL(operands[2]);
operands[4] = GEN_INT (val & 7);
operands[5] = GEN_INT (val & -8);
}
else
{
rtx t0 = gen_reg_rtx (SImode);
rtx t1 = gen_reg_rtx (SImode);
emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2]));
emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2]));
operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7));
operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8));
}
}
[(set_attr "type" "shuf,shuf")])
[(set_attr "type" "*,shuf,shuf")])
(define_insn_and_split "lshr<mode>3_reg"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r")
(match_operand:SI 2 "spu_reg_operand" "r")))
(clobber (match_operand:DTI 3 "spu_reg_operand" "=&r"))
(clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
(clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))]
""
"#"
""
[(set (match_dup:DTI 3)
(lshiftrt:DTI (match_dup:DTI 1)
(and:SI (neg:SI (match_dup:SI 4))
(const_int 7))))
(set (match_dup:DTI 0)
(lshiftrt:DTI (match_dup:DTI 3)
(and:SI (neg:SI (and:SI (match_dup:SI 5)
(const_int -8)))
(const_int -8))))]
{
emit_insn (gen_subsi3(operands[4], GEN_INT(0), operands[2]));
emit_insn (gen_subsi3(operands[5], GEN_INT(7), operands[2]));
})
(define_insn_and_split "shrqbybi_<mode>"
(define_expand "shrqbybi_<mode>"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
(and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
(const_int -8))))
(clobber (match_scratch:SI 3 "=&r,X"))]
""
"#"
"reload_completed"
[(set (match_dup:DTI 0)
(lshiftrt:DTI (match_dup:DTI 1)
(and:SI (neg:SI (and:SI (match_dup:SI 3) (const_int -8)))
(and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
(const_int -8)))
(const_int -8))))]
""
{
if (GET_CODE (operands[2]) == CONST_INT)
operands[3] = GEN_INT (7 - INTVAL (operands[2]));
operands[2] = GEN_INT (7 - INTVAL (operands[2]));
else
emit_insn (gen_subsi3 (operands[3], GEN_INT (7), operands[2]));
}
[(set_attr "type" "shuf")])
{
rtx t0 = gen_reg_rtx (SImode);
emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2]));
operands[2] = t0;
}
})
(define_insn "rotqmbybi_<mode>"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
@ -2486,25 +2470,22 @@
rotqmbii\t%0,%1,-%E2"
[(set_attr "type" "shuf")])
(define_insn_and_split "shrqby_<mode>"
(define_expand "shrqby_<mode>"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
(mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
(const_int 8))))
(clobber (match_scratch:SI 3 "=&r,X"))]
(mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
(const_int 8))))]
""
"#"
"reload_completed"
[(set (match_dup:DTI 0)
(lshiftrt:DTI (match_dup:DTI 1)
(mult:SI (neg:SI (match_dup:SI 3)) (const_int 8))))]
{
if (GET_CODE (operands[2]) == CONST_INT)
operands[3] = GEN_INT (-INTVAL (operands[2]));
operands[2] = GEN_INT (-INTVAL (operands[2]));
else
emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2]));
}
[(set_attr "type" "shuf")])
{
rtx t0 = gen_reg_rtx (SImode);
emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2]));
operands[2] = t0;
}
})
(define_insn "rotqmby_<mode>"
[(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
@ -2538,6 +2519,14 @@
""
[(set_attr "type" "*,fx3")])
(define_insn "<v>ashr<mode>3_imm"
[(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
(match_operand:VHSI 2 "immediate_operand" "W")))]
""
"rotma<bh>i\t%0,%1,-%<umask>2"
[(set_attr "type" "fx3")])
(define_insn "rotma_<mode>"
[(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
@ -2622,11 +2611,16 @@
})
(define_expand "ashrti3"
[(set (match_operand:TI 0 "spu_reg_operand" "")
(ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "")
(match_operand:SI 2 "spu_nonmem_operand" "")))]
(define_insn_and_split "ashrti3"
[(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
(ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
(match_operand:SI 2 "spu_nonmem_operand" "r,i")))]
""
"#"
""
[(set (match_dup:TI 0)
(ashiftrt:TI (match_dup:TI 1)
(match_dup:SI 2)))]
{
rtx sign_shift = gen_reg_rtx (SImode);
rtx sign_mask = gen_reg_rtx (TImode);
@ -2711,33 +2705,133 @@
;; struct extract/insert
;; We have to handle mem's because GCC will generate invalid SUBREG's
;; if it handles them. We generate better code anyway.
;; We handle mem's because GCC will generate invalid SUBREG's
;; and inefficient code.
(define_expand "extv"
[(set (match_operand 0 "register_operand" "")
(sign_extract (match_operand 1 "register_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")))]
""
{ spu_expand_extv(operands, 0); DONE; })
(define_expand "extzv"
[(set (match_operand 0 "register_operand" "")
(zero_extract (match_operand 1 "register_operand" "")
[(set (match_operand:TI 0 "register_operand" "")
(sign_extract:TI (match_operand 1 "nonimmediate_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")))]
""
{ spu_expand_extv(operands, 1); DONE; })
{
spu_expand_extv (operands, 0);
DONE;
})
(define_expand "extzv"
[(set (match_operand:TI 0 "register_operand" "")
(zero_extract:TI (match_operand 1 "nonimmediate_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")))]
""
{
spu_expand_extv (operands, 1);
DONE;
})
(define_expand "insv"
[(set (zero_extract (match_operand 0 "register_operand" "")
[(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
(match_operand:SI 1 "const_int_operand" "")
(match_operand:SI 2 "const_int_operand" ""))
(match_operand 3 "nonmemory_operand" ""))]
""
{ spu_expand_insv(operands); DONE; })
;; Simplify a number of patterns that get generated by extv, extzv,
;; insv, and loads.
(define_insn_and_split "trunc_shr_ti<mode>"
[(set (match_operand:QHSI 0 "spu_reg_operand" "=r")
(truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
(const_int 96)])))]
""
"#"
"reload_completed"
[(const_int 0)]
{
spu_split_convert (operands);
DONE;
}
[(set_attr "type" "convert")
(set_attr "length" "0")])
(define_insn_and_split "trunc_shr_tidi"
[(set (match_operand:DI 0 "spu_reg_operand" "=r")
(truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
(const_int 64)])))]
""
"#"
"reload_completed"
[(const_int 0)]
{
spu_split_convert (operands);
DONE;
}
[(set_attr "type" "convert")
(set_attr "length" "0")])
(define_insn_and_split "shl_ext_<mode>ti"
[(set (match_operand:TI 0 "spu_reg_operand" "=r")
(ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")])
(const_int 96)))]
""
"#"
"reload_completed"
[(const_int 0)]
{
spu_split_convert (operands);
DONE;
}
[(set_attr "type" "convert")
(set_attr "length" "0")])
(define_insn_and_split "shl_ext_diti"
[(set (match_operand:TI 0 "spu_reg_operand" "=r")
(ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")])
(const_int 64)))]
""
"#"
"reload_completed"
[(const_int 0)]
{
spu_split_convert (operands);
DONE;
}
[(set_attr "type" "convert")
(set_attr "length" "0")])
(define_insn "sext_trunc_lshr_tiqisi"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
(sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
(const_int 120)]))))]
""
"rotmai\t%0,%1,-24"
[(set_attr "type" "fx3")])
(define_insn "zext_trunc_lshr_tiqisi"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
(zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
(const_int 120)]))))]
""
"rotmi\t%0,%1,-24"
[(set_attr "type" "fx3")])
(define_insn "sext_trunc_lshr_tihisi"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
(sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
(const_int 112)]))))]
""
"rotmai\t%0,%1,-16"
[(set_attr "type" "fx3")])
(define_insn "zext_trunc_lshr_tihisi"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
(zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
(const_int 112)]))))]
""
"rotmi\t%0,%1,-16"
[(set_attr "type" "fx3")])
;; String/block move insn.
;; Argument 0 is the destination
@ -4303,21 +4397,20 @@ selb\t%0,%4,%0,%3"
DONE;
})
(define_insn "_spu_convert"
(define_insn_and_split "_spu_convert"
[(set (match_operand 0 "spu_reg_operand" "=r")
(unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
"operands"
""
"#"
"reload_completed"
[(const_int 0)]
{
spu_split_convert (operands);
DONE;
}
[(set_attr "type" "convert")
(set_attr "length" "0")])
(define_peephole2
[(set (match_operand 0 "spu_reg_operand")
(unspec [(match_operand 1 "spu_reg_operand")] UNSPEC_CONVERT))]
""
[(use (const_int 0))]
"")
;;
(include "spu-builtins.md")
@ -5186,8 +5279,8 @@ DONE;
}")
(define_insn "stack_protect_set"
[(set (match_operand:SI 0 "spu_mem_operand" "=m")
(unspec:SI [(match_operand:SI 1 "spu_mem_operand" "m")] UNSPEC_SP_SET))
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
(set (match_scratch:SI 2 "=&r") (const_int 0))]
""
"lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2"
@ -5196,8 +5289,8 @@ DONE;
)
(define_expand "stack_protect_test"
[(match_operand 0 "spu_mem_operand" "")
(match_operand 1 "spu_mem_operand" "")
[(match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" "")
(match_operand 2 "" "")]
""
{
@ -5223,8 +5316,8 @@ DONE;
(define_insn "stack_protect_test_si"
[(set (match_operand:SI 0 "spu_reg_operand" "=&r")
(unspec:SI [(match_operand:SI 1 "spu_mem_operand" "m")
(match_operand:SI 2 "spu_mem_operand" "m")]
(unspec:SI [(match_operand:SI 1 "memory_operand" "m")
(match_operand:SI 2 "memory_operand" "m")]
UNSPEC_SP_TEST))
(set (match_scratch:SI 3 "=&r") (const_int 0))]
""