lib1funcs.asm (init_trampoline): New entry point.

* config/sh/lib1funcs.asm (init_trampoline): New entry point.
	* sh-protos.h (sh_initialize_trampoline): Declare.
	* sh.c (sh_initialize_trampoline): New function.
	* sh.h (TRAMPOLINE_SIZE): Only 24 for TARGET_SHMEDIA32.
	(TRAMPOLINE_ALIGNMENT): Need cache-line alignment for TARGET_SHMEDIA.
	(INITIALIZE_TRAMPOLINE): Call sh_initialize_trampoline.
	(TRAMPOLINE_ADJUST_ADDRESS): Not needed for SHcompact.
	* sh.md (initialize_trampoline, double_shori): New patterns.
	(initialize_trampoline_compact): Likewise.
	(shmedia32_initialize_trampoline_big): Remove.
	(shmedia32_initialize_trampoline_little): Likewise.

From-SVN: r55529
This commit is contained in:
J"orn Rennecke 2002-07-17 15:43:18 +00:00 committed by Joern Rennecke
parent 0ac785173d
commit ca903bba77
6 changed files with 263 additions and 102 deletions

View File

@ -1,4 +1,16 @@
Wed Jul 17 14:04:10 2002 J"orn Rennecke <joern.rennecke@superh.com> Wed Jul 17 16:28:53 2002 J"orn Rennecke <joern.rennecke@superh.com>
* config/sh/lib1funcs.asm (init_trampoline): New entry point.
* sh-protos.h (sh_initialize_trampoline): Declare.
* sh.c (sh_initialize_trampoline): New function.
* sh.h (TRAMPOLINE_SIZE): Only 24 for TARGET_SHMEDIA32.
(TRAMPOLINE_ALIGNMENT): Need cache-line alignment for TARGET_SHMEDIA.
(INITIALIZE_TRAMPOLINE): Call sh_initialize_trampoline.
(TRAMPOLINE_ADJUST_ADDRESS): Not needed for SHcompact.
* sh.md (initialize_trampoline, double_shori): New patterns.
(initialize_trampoline_compact): Likewise.
(shmedia32_initialize_trampoline_big): Remove.
(shmedia32_initialize_trampoline_little): Likewise.
* sh-protos.h (binary_float_operator): Remove declaration. * sh-protos.h (binary_float_operator): Remove declaration.
(sh_expand_unop_v2sf, sh_expand_binop_v2sf): Declare. (sh_expand_unop_v2sf, sh_expand_binop_v2sf): Declare.

View File

@ -1821,6 +1821,22 @@ LOCAL(set_fpscr_L1):
.mode SHmedia .mode SHmedia
.section .text..SHmedia32,"ax" .section .text..SHmedia32,"ax"
.align 2 .align 2
.global GLOBAL(init_trampoline)
GLOBAL(init_trampoline):
st.l r0,8,r2
#ifdef __LITTLE_ENDIAN__
movi 9,r20
shori 0x402b,r20
shori 0xd101,r20
shori 0xd002,r20
#else
movi 0xffffffffffffd002,r20
shori 0xd101,r20
shori 0x402b,r20
shori 9,r20
#endif
st.q r0,0,r20
st.l r0,12,r3
.global GLOBAL(ic_invalidate) .global GLOBAL(ic_invalidate)
GLOBAL(ic_invalidate): GLOBAL(ic_invalidate):
ocbwb r0,0 ocbwb r0,0

View File

@ -125,6 +125,7 @@ extern int fldi_ok PARAMS ((void));
extern int sh_pr_n_sets PARAMS ((void)); extern int sh_pr_n_sets PARAMS ((void));
extern int sh_hard_regno_rename_ok PARAMS ((unsigned int, unsigned int)); extern int sh_hard_regno_rename_ok PARAMS ((unsigned int, unsigned int));
extern int sh_cfun_interrupt_handler_p (void); extern int sh_cfun_interrupt_handler_p (void);
extern void sh_initialize_trampoline (rtx, rtx, rtx);
#ifdef HARD_CONST #ifdef HARD_CONST
extern void fpscr_set_from_mem PARAMS ((int, HARD_REG_SET)); extern void fpscr_set_from_mem PARAMS ((int, HARD_REG_SET));

View File

@ -7113,6 +7113,177 @@ sh_strip_name_encoding (str)
return str; return str;
} }
/*
On the SH1..SH4, the trampoline looks like
2 0002 D202 mov.l l2,r2
1 0000 D301 mov.l l1,r3
3 0004 422B jmp @r2
4 0006 0009 nop
5 0008 00000000 l1: .long area
6 000c 00000000 l2: .long function
SH5 (compact) uses r1 instead of r3 for the static chain. */
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */
void
sh_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
if (TARGET_SHMEDIA64)
{
rtx tramp_templ;
int fixed_len;
rtx movi1 = GEN_INT (0xcc000010);
rtx shori1 = GEN_INT (0xc8000010);
rtx src, dst;
/* The following trampoline works within a +- 128 KB range for cxt:
ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
gettr tr1,r1; blink tr0,r63 */
/* Address rounding makes it hard to compute the exact bounds of the
offset for this trampoline, but we have a rather generous offset
range, so frame_offset should do fine as an upper bound. */
if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
{
/* ??? could optimize this trampoline initialization
by writing DImode words with two insns each. */
rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
insn = gen_rtx_AND (DImode, insn, mask);
/* Or in ptb/u .,tr1 pattern */
insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
insn = force_operand (insn, NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
insn);
insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
insn = gen_rtx_AND (DImode, insn, mask);
insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
insn = gen_lowpart (SImode, insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
insn);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
GEN_INT (0x6bf10600));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
GEN_INT (0x4415fc10));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
GEN_INT (0x4401fff0));
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
tramp_templ = gen_datalabel_ref (tramp_templ);
dst = gen_rtx_MEM (BLKmode, tramp);
src = gen_rtx_MEM (BLKmode, tramp_templ);
set_mem_align (dst, 256);
set_mem_align (src, 64);
emit_block_move (dst, src, GEN_INT (fixed_len));
emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
fnaddr);
emit_move_insn (gen_rtx_MEM (Pmode,
plus_constant (tramp,
fixed_len
+ GET_MODE_SIZE (Pmode))),
cxt);
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
else if (TARGET_SHMEDIA)
{
/* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
/* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
rotated 10 right, and higer 16 bit of every 32 selected. */
rtx movishori
= force_reg (V2HImode, (simplify_gen_subreg
(V2HImode, GEN_INT (0x4330432), SImode, 0)));
rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
tramp = force_reg (Pmode, tramp);
fnaddr = force_reg (SImode, fnaddr);
cxt = force_reg (SImode, cxt);
emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
gen_rtx_SUBREG (V2HImode, fnaddr, 0),
movishori));
emit_insn (gen_rotldi3_mextr (quad0, quad0,
GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
gen_rtx_SUBREG (V2HImode, cxt, 0),
movishori));
emit_insn (gen_rotldi3_mextr (cxtload, cxtload,
GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
if (TARGET_LITTLE_ENDIAN)
{
emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
emit_insn (gen_mextr4 (quad2, cxtload, blink));
}
else
{
emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
}
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
emit_insn (gen_ic_invalidate_line (tramp));
return;
}
else if (TARGET_SHCOMPACT)
{
emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
return;
}
emit_move_insn (gen_rtx_MEM (SImode, tramp),
gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
SImode));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
SImode));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
cxt);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
fnaddr);
if (TARGET_HARVARD)
{
if (TARGET_USERMODE)
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),
0, VOIDmode, 1, tramp, SImode);
else
emit_insn (gen_ic_invalidate_line (tramp));
}
}
/* Machine specific built-in functions. */ /* Machine specific built-in functions. */

View File

@ -2082,64 +2082,25 @@ while (0)
6 000c 00000000 l2: .long function */ 6 000c 00000000 l2: .long function */
/* Length in units of the trampoline for entering a nested function. */ /* Length in units of the trampoline for entering a nested function. */
#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 32 : 16) #define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
/* Alignment required for a trampoline in bits . */ /* Alignment required for a trampoline in bits . */
#define TRAMPOLINE_ALIGNMENT \ #define TRAMPOLINE_ALIGNMENT \
((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 : 64) ((CACHE_LOG < 3 || (TARGET_SMALLCODE && ! TARGET_HARVARD)) ? 32 \
: TARGET_SHMEDIA ? 256 : 64)
/* Emit RTL insns to initialize the variable parts of a trampoline. /* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code. FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */ CXT is an RTX for the static chain value for the function. */
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) do \ #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
{ \ sh_initialize_trampoline ((TRAMP), (FNADDR), (CXT))
if (TARGET_SH5) \
{ \
rtx tramp_templ = gen_rtx_SYMBOL_REF (Pmode, \
"__GCC_nested_trampoline"); \
int fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); \
\
tramp_templ = gen_datalabel_ref (tramp_templ); \
emit_block_move (gen_rtx_MEM (BLKmode, (TRAMP)), \
gen_rtx_MEM (BLKmode, tramp_templ), \
GEN_INT (fixed_len)); \
emit_move_insn (gen_rtx_MEM (Pmode, plus_constant ((TRAMP), \
fixed_len)), \
(FNADDR)); \
emit_move_insn (gen_rtx_MEM (Pmode, \
plus_constant ((TRAMP), \
fixed_len \
+ GET_MODE_SIZE (Pmode))), \
(CXT)); \
emit_insn (gen_ic_invalidate_line (TRAMP)); \
break; \
} \
emit_move_insn (gen_rtx_MEM (SImode, (TRAMP)), \
GEN_INT (trunc_int_for_mode \
(TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,\
SImode))); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 4)), \
GEN_INT (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009));\
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 8)), \
(CXT)); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant ((TRAMP), 12)), \
(FNADDR)); \
if (TARGET_HARVARD) \
{ \
if (TARGET_USERMODE) \
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__ic_invalidate"),\
0, VOIDmode, 1, (TRAMP), SImode); \
else \
emit_insn (gen_ic_invalidate_line (TRAMP)); \
} \
} while (0)
/* On SH5, trampolines are SHmedia code, so add 1 to the address. */ /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
#define TRAMPOLINE_ADJUST_ADDRESS(TRAMP) do \ #define TRAMPOLINE_ADJUST_ADDRESS(TRAMP) do \
{ \ { \
if (TARGET_SH5) \ if (TARGET_SHMEDIA) \
(TRAMP) = expand_simple_binop (Pmode, PLUS, (TRAMP), GEN_INT (1), \ (TRAMP) = expand_simple_binop (Pmode, PLUS, (TRAMP), GEN_INT (1), \
gen_reg_rtx (Pmode), 0, \ gen_reg_rtx (Pmode), 0, \
OPTAB_LIB_WIDEN); \ OPTAB_LIB_WIDEN); \

View File

@ -3667,6 +3667,8 @@
[(set_attr "length" "8") [(set_attr "length" "8")
(set_attr "insn_class" "cwb")]) (set_attr "insn_class" "cwb")])
;; ??? could make arg 0 an offsettable memory operand to allow to save
;; an add in the code that calculates the address.
(define_insn "ic_invalidate_line_media" (define_insn "ic_invalidate_line_media"
[(unspec_volatile [(match_operand 0 "register_operand" "r")] [(unspec_volatile [(match_operand 0 "register_operand" "r")]
UNSPEC_ICACHE)] UNSPEC_ICACHE)]
@ -3685,6 +3687,37 @@
[(set_attr "type" "sfunc") [(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")]) (set_attr "needs_delay_slot" "yes")])
(define_expand "initialize_trampoline"
[(match_operand:SI 0 "" "")
(match_operand:SI 1 "" "")
(match_operand:SI 2 "" "")]
"TARGET_SHCOMPACT"
"
{
rtx sfun, tramp;
sfun = force_reg (Pmode, gen_rtx_SYMBOL_REF (Pmode, \"__init_trampoline\"));
tramp = gen_rtx_REG (SImode, R0_REG);
emit_move_insn (tramp, operands[0]);
emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
emit_insn (gen_initialize_trampoline_compact (tramp, sfun));
DONE;
}")
(define_insn "initialize_trampoline_compact"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
(match_operand:SI 1 "register_operand" "r")
(reg:SI R2_REG) (reg:SI R3_REG)]
UNSPEC_INIT_TRAMP)
(clobber (reg:SI PR_REG))]
"TARGET_SHCOMPACT"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "movqi_i" (define_insn "movqi_i"
[(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l") [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
(match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))] (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))]
@ -7198,62 +7231,6 @@
"jsr @r0%#" "jsr @r0%#"
[(set_attr "needs_delay_slot" "yes")]) [(set_attr "needs_delay_slot" "yes")])
;; ??? could make arg 0 an offsettable memory operand - and do likewise
;; for cache invalidation - to allow to save an add in the code that
;; calculates the address.
(define_insn "shmedia32_initialize_trampoline_big"
[(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r"))
(unspec [(match_operand:SI 1 "arith_reg_operand" "r")
(match_operand:SI 2 "arith_reg_operand" "r")]
UNSPEC_INIT_TRAMP))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
"TARGET_SHMEDIA32 && ! TARGET_LITTLE_ENDIAN"
"movi 0x433,%3
shori 0x432,%3
mshflo.w %1,%3,%4
mextr7 %4,%4,%4
shlli %4,2,%4
st.q %0,0,%4
mshflo.w %2,%3,%4
shlli %4,10,%4
addi %4,0x10,%4
movi 0x6bf1,%3
shori 0x0600,%3
mextr4 %4,%3,%3
st.q %0,8,%3
shori 0x4401,%4
shori 0xfff0,%4
st.q %0,16,%4"
[(set_attr "length" "64")])
(define_insn "shmedia32_initialize_trampoline_little"
[(set (mem:BLK (match_operand:SI 0 "arith_reg_operand" "r"))
(unspec [(match_operand:SI 1 "arith_reg_operand" "r")
(match_operand:SI 2 "arith_reg_operand" "r")]
UNSPEC_INIT_TRAMP))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
"TARGET_SHMEDIA32 && TARGET_LITTLE_ENDIAN"
"movi 0x433,%3
shori 0x432,%3
mshflo.w %1,%3,%4
mextr3 %4,%4,%4
shlli %4,2,%4
st.q %0,0,%4
mshflo.w %2,%3,%4
shlli %4,10,%4
addi %4,0x10,%4
movi 0x6bf1,%3
shori 0x0600,%3
shori 0x4401,%3
shori 0xfff0,%3
st.l %0,16,%r4
st.l %0,20,%r3
mshfhi.l %3,%4,%4
st.q %0,8,%4"
[(set_attr "length" "68")])
(define_expand "prologue" (define_expand "prologue"
[(const_int 0)] [(const_int 0)]
"" ""
@ -10263,6 +10240,29 @@
"mshflo.l %N2, %N1, %0" "mshflo.l %N2, %N1, %0"
[(set_attr "type" "arith_media")]) [(set_attr "type" "arith_media")])
;; Combiner pattern for trampoline initialization.
(define_insn_and_split "*double_shori"
[(set (match_operand:DI 0 "arith_reg_dest" "=r")
(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
(const_int 32))
(match_operand:DI 2 "const_int_operand" "n")))]
"TARGET_SHMEDIA
&& INTVAL (operands[2]) == trunc_int_for_mode (INTVAL (operands[2]), SImode)"
"#"
"rtx_equal_p (operands[0], operands[1])"
[(const_int 0)]
"
{
HOST_WIDE_INT v = INTVAL (operands[2]);
emit_insn (gen_shori_media (operands[0], operands[0],
gen_int_mode (INTVAL (operands[2]) >> 16, HImode)));
emit_insn (gen_shori_media (operands[0], operands[0],
gen_int_mode (v, HImode)));
DONE;
}")
(define_insn "*mshflo_l_di_x" (define_insn "*mshflo_l_di_x"
[(set (match_operand:DI 0 "arith_reg_dest" "=r") [(set (match_operand:DI 0 "arith_reg_dest" "=r")
(ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand" (ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"