lib1funcs.asm (init_trampoline): New entry point.

* config/sh/lib1funcs.asm (init_trampoline): New entry point.
	* sh-protos.h (sh_initialize_trampoline): Declare.
	* sh.c (sh_initialize_trampoline): New function.
	* sh.h (TRAMPOLINE_SIZE): Only 24 for TARGET_SHMEDIA32.
	(TRAMPOLINE_ALIGNMENT): Need cache-line alignment for TARGET_SHMEDIA.
	(INITIALIZE_TRAMPOLINE): Call sh_initialize_trampoline.
	(TRAMPOLINE_ADJUST_ADDRESS): Not needed for SHcompact.
	* sh.md (initialize_trampoline, double_shori): New patterns.
	(initialize_trampoline_compact): Likewise.
	(shmedia32_initialize_trampoline_big): Remove.
	(shmedia32_initialize_trampoline_little): Likewise.

From-SVN: r55529
This commit is contained in:
J"orn Rennecke 2002-07-17 15:43:18 +00:00 committed by Joern Rennecke
parent 0ac785173d
commit ca903bba77
6 changed files with 263 additions and 102 deletions

View File

@ -1,4 +1,16 @@
Wed Jul 17 14:04:10 2002 J"orn Rennecke <joern.rennecke@superh.com>
Wed Jul 17 16:28:53 2002 J"orn Rennecke <joern.rennecke@superh.com>
* config/sh/lib1funcs.asm (init_trampoline): New entry point.
* sh-protos.h (sh_initialize_trampoline): Declare.
* sh.c (sh_initialize_trampoline): New function.
* sh.h (TRAMPOLINE_SIZE): Only 24 for TARGET_SHMEDIA32.
(TRAMPOLINE_ALIGNMENT): Need cache-line alignment for TARGET_SHMEDIA.
(INITIALIZE_TRAMPOLINE): Call sh_initialize_trampoline.
(TRAMPOLINE_ADJUST_ADDRESS): Not needed for SHcompact.
* sh.md (initialize_trampoline, double_shori): New patterns.
(initialize_trampoline_compact): Likewise.
(shmedia32_initialize_trampoline_big): Remove.
(shmedia32_initialize_trampoline_little): Likewise.
* sh-protos.h (binary_float_operator): Remove declaration.
(sh_expand_unop_v2sf, sh_expand_binop_v2sf): Declare.

View File

@ -1821,6 +1821,22 @@ LOCAL(set_fpscr_L1):
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(init_trampoline)
GLOBAL(init_trampoline):
st.l r0,8,r2
#ifdef __LITTLE_ENDIAN__
movi 9,r20
shori 0x402b,r20
shori 0xd101,r20
shori 0xd002,r20
#else
movi 0xffffffffffffd002,r20
shori 0xd101,r20
shori 0x402b,r20
shori 9,r20
#endif
st.q r0,0,r20
st.l r0,12,r3
.global GLOBAL(ic_invalidate)
GLOBAL(ic_invalidate):
ocbwb r0,0

View File

@ -125,6 +125,7 @@ extern int fldi_ok PARAMS ((void));
extern int sh_pr_n_sets PARAMS ((void));
extern int sh_hard_regno_rename_ok PARAMS ((unsigned int, unsigned int));
extern int sh_cfun_interrupt_handler_p (void);
extern void sh_initialize_trampoline (rtx, rtx, rtx);
#ifdef HARD_CONST
extern void fpscr_set_from_mem PARAMS ((int, HARD_REG_SET));

View File

@ -7113,6 +7113,177 @@ sh_strip_name_encoding (str)
return str;
}
/*
On the SH1..SH4, the trampoline looks like
2 0002 D202 mov.l l2,r2
1 0000 D301 mov.l l1,r3
3 0004 422B jmp @r2
4 0006 0009 nop
5 0008 00000000 l1: .long area
6 000c 00000000 l2: .long function
SH5 (compact) uses r1 instead of r3 for the static chain. */
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */
void
sh_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
if (TARGET_SHMEDIA64)
{
rtx tramp_templ;
int fixed_len;
rtx movi1 = GEN_INT (0xcc000010);
rtx shori1 = GEN_INT (0xc8000010);
rtx src, dst;
/* The following trampoline works within a +- 128 KB range for cxt:
ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
gettr tr1,r1; blink tr0,r63 */
/* Address rounding makes it hard to compute the exact bounds of the
offset for this trampoline, but we have a rather generous offset
range, so frame_offset should do fine as an upper bound. */
if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
{
/* ??? could optimize this trampoline initialization
by writing DImode words with two insns each. */
rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
insn = gen_rtx_AND (DImode, insn, mask);
/* Or in ptb/u .,tr1 pattern */
insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
insn = force_operand (insn, NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
insn);
insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
GEN_INT (0x6bf10600));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
GEN_INT (0x4415fc10));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
GEN_INT (0x4401fff0));
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
tramp_templ = gen_datalabel_ref (tramp_templ);
dst = gen_rtx_MEM (BLKmode, tramp);
src = gen_rtx_MEM (BLKmode, tramp_templ);
set_mem_align (dst, 256);
set_mem_align (src, 64);
emit_block_move (dst, src, GEN_INT (fixed_len));
emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
fnaddr);
emit_move_insn (gen_rtx_MEM (Pmode,
plus_constant (tramp,
fixed_len
+ GET_MODE_SIZE (Pmode))),
cxt);
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
else if (TARGET_SHMEDIA)
{
/* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
/* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
rotated 10 right, and higer 16 bit of every 32 selected. */
rtx movishori
= force_reg (V2HImode, (simplify_gen_subreg
(V2HImode, GEN_INT (0x4330432), SImode, 0)));
rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
tramp = force_reg (Pmode, tramp);
fnaddr = force_reg (SImode, fnaddr);
cxt = force_reg (SImode, cxt);
emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
gen_rtx_SUBREG (V2HImode, fnaddr, 0),
movishori));
emit_insn (gen_rotldi3_mextr (quad0, quad0,
GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
gen_rtx_SUBREG (V2HImode, cxt, 0),
movishori));
emit_insn (gen_rotldi3_mextr (cxtload, cxtload,
GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
if (TARGET_LITTLE_ENDIAN)
{
emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
emit_insn (gen_mextr4 (quad2, cxtload, blink));
}
else
{
emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
}
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
else if (TARGET_SHCOMPACT)
{
emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
return;
}
emit_move_insn (gen_rtx_MEM (SImode, tramp),
gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
SImode));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
SImode));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
cxt);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
fnaddr);
if (TARGET_HARVARD)
{
if (TARGET_USERMODE)
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
0, VOIDmode, 1, tramp, SImode);
else
emit_insn (gen_ic_invalidate_line (tramp));
}
}
/* Machine specific built-in functions. */

View File

@ -2082,64 +2082,25 @@ while (0)
6 000c 00000000 l2: .long function */
/* Length in units of the trampoline for entering a nested function. */
#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 32 : 16)
#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
/* Alignment required for a trampoline in bits . */
#define TRAMPOLINE_ALIGNMENT \
((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 : 64)
((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 \
: TARGET_SHMEDIA ? 256 : 64)
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) do \
{ \
if (TARGET_SH5) \
{ \
rtx tramp_templ = gen_rtx_SYMBOL_REF (Pmode, \
"__GCC_nested_trampoline"); \
int fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); \
\
tramp_templ = gen_datalabel_ref (tramp_templ); \
emit_block_move (gen_rtx_MEM (BLKmode, (TRAMP)), \
gen_rtx_MEM (BLKmode, tramp_templ), \
GEN_INT (fixed_len)); \
emit_move_insn (gen_rtx_MEM (Pmode, plus_constant ((TRAMP), \
fixed_len)), \
(FNADDR)); \
emit_move_insn (gen_rtx_MEM (Pmode, \
plus_constant ((TRAMP), \
fixed_len \
+ GET_MODE_SIZE (Pmode))), \
(CXT)); \
emit_insn (gen_ic_invalidate_line (TRAMP)); \
break; \
} \
emit_move_insn (gen_rtx_MEM (SImode, (TRAMP)), \
GEN_INT (trunc_int_for_mode \
(TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,\
SImode))); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 4)), \
GEN_INT (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009));\
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 8)), \
(CXT)); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 12)), \
(FNADDR)); \
if (TARGET_HARVARD) \
{ \
if (TARGET_USERMODE) \
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),\
0, VOIDmode, 1, (TRAMP), SImode); \
else \
emit_insn (gen_ic_invalidate_line (TRAMP)); \
} \
} while (0)
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
sh_initialize_trampoline ((TRAMP), (FNADDR), (CXT))
/* On SH5, trampolines are SHmedia code, so add 1 to the address. */
#define TRAMPOLINE_ADJUST_ADDRESS(TRAMP) do \
{ \
if (TARGET_SH5) \
if (TARGET_SHMEDIA) \
(TRAMP) = expand_simple_binop (Pmode, PLUS, (TRAMP), GEN_INT (1), \
gen_reg_rtx (Pmode), 0, \
OPTAB_LIB_WIDEN); \

View File

@ -3667,6 +3667,8 @@
[(set_attr "length" "8")
(set_attr "insn_class" "cwb")])
;; ??? could make arg 0 an offsettable memory operand to allow to save
;; an add in the code that calculates the address.
(define_insn "ic_invalidate_line_media"
[(unspec_volatile [(match_operand 0 "register_operand" "r")]
UNSPEC_ICACHE)]
@ -3685,6 +3687,37 @@
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_expand "initialize_trampoline"
[(match_operand:SI 0 "" "")
(match_operand:SI 1 "" "")
(match_operand:SI 2 "" "")]
"TARGET_SHCOMPACT"
"
{
rtx sfun, tramp;
sfun = force_reg (Pmode, gen_rtx_SYMBOL_REF (Pmode, \"__init_trampoline\"));
tramp = gen_rtx_REG (SImode, R0_REG);
emit_move_insn (tramp, operands[0]);
emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
emit_insn (gen_initialize_trampoline_compact (tramp, sfun));
DONE;
}")
(define_insn "initialize_trampoline_compact"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
(match_operand:SI 1 "register_operand" "r")
(reg:SI R2_REG) (reg:SI R3_REG)]
UNSPEC_INIT_TRAMP)
(clobber (reg:SI PR_REG))]
"TARGET_SHCOMPACT"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "movqi_i"
[(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
(match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))]
@ -7198,62 +7231,6 @@
"jsr @r0%#"
[(set_attr "needs_delay_slot" "yes")])
;; ??? could make arg 0 an offsettable memory operand - and do likewise
;; for cache invalidation - to allow to save an add in the code that
;; calculates the address.
(define_insn "shmedia32_initialize_trampoline_big"
[(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r"))
(unspec [(match_operand:SI 1 "arith_reg_operand" "r")
(match_operand:SI 2 "arith_reg_operand" "r")]
UNSPEC_INIT_TRAMP))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
"TARGET_SHMEDIA32 && ! TARGET_LITTLE_ENDIAN"
"movi 0x433,%3
shori 0x432,%3
mshflo.w %1,%3,%4
mextr7 %4,%4,%4
shlli %4,2,%4
st.q %0,0,%4
mshflo.w %2,%3,%4
shlli %4,10,%4
addi %4,0x10,%4
movi 0x6bf1,%3
shori 0x0600,%3
mextr4 %4,%3,%3
st.q %0,8,%3
shori 0x4401,%4
shori 0xfff0,%4
st.q %0,16,%4"
[(set_attr "length" "64")])
(define_insn "shmedia32_initialize_trampoline_little"
[(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r"))
(unspec [(match_operand:SI 1 "arith_reg_operand" "r")
(match_operand:SI 2 "arith_reg_operand" "r")]
UNSPEC_INIT_TRAMP))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
"TARGET_SHMEDIA32 && TARGET_LITTLE_ENDIAN"
"movi 0x433,%3
shori 0x432,%3
mshflo.w %1,%3,%4
mextr3 %4,%4,%4
shlli %4,2,%4
st.q %0,0,%4
mshflo.w %2,%3,%4
shlli %4,10,%4
addi %4,0x10,%4
movi 0x6bf1,%3
shori 0x0600,%3
shori 0x4401,%3
shori 0xfff0,%3
st.l %0,16,%r4
st.l %0,20,%r3
mshfhi.l %3,%4,%4
st.q %0,8,%4"
[(set_attr "length" "68")])
(define_expand "prologue"
[(const_int 0)]
""
@ -10263,6 +10240,29 @@
"mshflo.l %N2, %N1, %0"
[(set_attr "type" "arith_media")])
;; Combiner pattern for trampoline initialization.
(define_insn_and_split "*double_shori"
[(set (match_operand:DI 0 "arith_reg_dest" "=r")
(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
(const_int 32))
(match_operand:DI 2 "const_int_operand" "n")))]
"TARGET_SHMEDIA
&& INTVAL (operands[2]) == trunc_int_for_mode (INTVAL (operands[2]), SImode)"
"#"
"rtx_equal_p (operands[0], operands[1])"
[(const_int 0)]
"
{
HOST_WIDE_INT v = INTVAL (operands[2]);
emit_insn (gen_shori_media (operands[0], operands[0],
gen_int_mode (INTVAL (operands[2]) >> 16, HImode)));
emit_insn (gen_shori_media (operands[0], operands[0],
gen_int_mode (v, HImode)));
DONE;
}")
(define_insn "*mshflo_l_di_x"
[(set (match_operand:DI 0 "arith_reg_dest" "=r")
(ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"