xtensa.md (fix_return_addr): Remove.

* config/xtensa/xtensa.md (fix_return_addr): Remove.
	* config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New.
	(xtensa_trampoline_template): New.
	* config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h.
	(xtensa_return_addr): Expand to standard Xtensa insns instead of
	fix_return_addr.  Get high bits from a local label.
	(xtensa_trampoline_template): New function with code moved from
	TRAMPOLINE_TEMPLATE in xtensa.h.  Use L32R instead of CALL0 except
	when using CONST16 or absolute-mode literals.
	(xtensa_initialize_trampoline): New function with code moved from
	INITIALIZE_TRAMPOLINE in xtensa.h.  Use different offsets depending
	on which trampoline version is used.
	* config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment.
	* config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define.
	(MIN_FRAME_SIZE): Moved to xtensa.c.
	(TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template.
	(TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes.
	(INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline.
	* config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define.
	(__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3
	helper when not using the CALL0 ABI.  Change .Lmul_mulsi3 to match.
	* config/xtensa/lib1funcs.asm (__umulsidi3): Likewise.
	* config/xtensa/ieee754-sf.S (__mulsf3): Likewise.

From-SVN: r131108
This commit is contained in:
Bob Wilson 2007-12-20 22:35:59 +00:00 committed by Bob Wilson
parent 4c12c8ea94
commit 7f0ee69424
9 changed files with 320 additions and 174 deletions

View File

@ -1,3 +1,29 @@
2007-12-20 Bob Wilson <bob.wilson@acm.org>
* config/xtensa/xtensa.md (fix_return_addr): Remove.
* config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New.
(xtensa_trampoline_template): New.
* config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h.
(xtensa_return_addr): Expand to standard Xtensa insns instead of
fix_return_addr. Get high bits from a local label.
(xtensa_trampoline_template): New function with code moved from
TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except
when using CONST16 or absolute-mode literals.
(xtensa_initialize_trampoline): New function with code moved from
INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending
on which trampoline version is used.
* config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment.
* config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define.
(MIN_FRAME_SIZE): Moved to xtensa.c.
(TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template.
(TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes.
(INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline.
* config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define.
(__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3
helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match.
* config/xtensa/lib1funcs.asm (__umulsidi3): Likewise.
* config/xtensa/ieee754-sf.S (__mulsf3): Likewise.
2007-12-20 Jakub Jelinek <jakub@redhat.com>
PR c++/34459

View File

@ -1,5 +1,5 @@
/* IEEE-754 double-precision functions for Xtensa
Copyright (C) 2006 Free Software Foundation, Inc.
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
@ -607,6 +607,10 @@ __subdf3:
#ifdef L_muldf3
/* Multiplication */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#define XCHAL_NO_MUL 1
#endif
__muldf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
@ -728,13 +732,19 @@ __muldf3_aux:
.global __muldf3
.type __muldf3, @function
__muldf3:
leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
#elif XCHAL_NO_MUL
/* This is not really a leaf function; allocate enough stack space
to allow CALL12s to a helper function. */
leaf_entry sp, 64
#else
leaf_entry sp, 32
#endif
movi a6, 0x7ff00000
@ -809,7 +819,7 @@ __muldf3:
muluh xh, xh, yh
add xh, xh, a9
#else
#else /* ! XCHAL_HAVE_MUL32_HIGH */
/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
products. These partial products are:
@ -847,7 +857,7 @@ __muldf3:
/* Save a7 since it is needed to hold a temporary value. */
s32i a7, sp, 4
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Calling a separate multiply function will clobber a0 and requires
use of a8 as a temporary, so save those values now. (The function
uses a custom ABI so nothing else needs to be saved.) */
@ -915,12 +925,21 @@ __muldf3:
#define set_arg_h(dst, src) \
srli dst, src, 16
#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
#endif
#else
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a14, xreg); \
set_arg_ ## yhalf (a15, yreg); \
call12 .Lmul_mulsi3; \
mov dst, a14
#endif /* __XTENSA_CALL0_ABI__ */
#endif /* no multiply hardware */
/* Add pp1 and pp2 into a10 with carry-out in a9. */
do_mul(a10, xl, l, yl, h) /* pp 1 */
@ -1032,11 +1051,11 @@ __muldf3:
/* Restore values saved on the stack during the multiplication. */
l32i a7, sp, 4
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
l32i a0, sp, 0
l32i a8, sp, 8
#endif
#endif
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
/* Shift left by 12 bits, unless there was a carry-out from the
multiply, in which case, shift by 11 bits and increment the
@ -1157,38 +1176,47 @@ __muldf3:
movi xl, 0
j .Lmul_done
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
the floating-point mantissas. It uses a custom ABI: the inputs
are passed in a13 and a14, the result is returned in a12, and
a8 and a15 are clobbered. */
the floating-point mantissas. When using CALL0, this function
uses a custom ABI: the inputs are passed in a13 and a14, the
result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
movi a12, 0
.Lmul_mult_loop:
add a15, a14, a12
extui a8, a13, 0, 1
movnez a12, a15, a8
leaf_entry sp, 16
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
movi \dst, 0
1: add \tmp1, \src2, \dst
extui \tmp2, \src1, 0, 1
movnez \dst, \tmp1, \tmp2
do_addx2 a15, a14, a12, a15
extui a8, a13, 1, 1
movnez a12, a15, a8
do_addx2 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 1, 1
movnez \dst, \tmp1, \tmp2
do_addx4 a15, a14, a12, a15
extui a8, a13, 2, 1
movnez a12, a15, a8
do_addx4 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 2, 1
movnez \dst, \tmp1, \tmp2
do_addx8 a15, a14, a12, a15
extui a8, a13, 3, 1
movnez a12, a15, a8
do_addx8 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 3, 1
movnez \dst, \tmp1, \tmp2
srli a13, a13, 4
slli a14, a14, 4
bnez a13, .Lmul_mult_loop
ret
#endif /* !MUL16 && !MUL32 && !MAC16 */
srli \src1, \src1, 4
slli \src2, \src2, 4
bnez \src1, 1b
.endm
#if __XTENSA_CALL0_ABI__
mul_mulsi3_body a12, a13, a14, a15, a8
#else
/* The result will be written into a2, so save that argument in a4. */
mov a4, a2
mul_mulsi3_body a2, a4, a3, a5, a6
#endif
leaf_return
#endif /* XCHAL_NO_MUL */
#endif /* L_muldf3 */
#ifdef L_divdf3

View File

@ -1,5 +1,5 @@
/* IEEE-754 single-precision functions for Xtensa
Copyright (C) 2006 Free Software Foundation, Inc.
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
@ -488,6 +488,10 @@ __subsf3:
#ifdef L_mulsf3
/* Multiplication */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#define XCHAL_NO_MUL 1
#endif
__mulsf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
@ -570,13 +574,19 @@ __mulsf3_aux:
.global __mulsf3
.type __mulsf3, @function
__mulsf3:
leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
#elif XCHAL_NO_MUL
/* This is not really a leaf function; allocate enough stack space
to allow CALL12s to a helper function. */
leaf_entry sp, 64
#else
leaf_entry sp, 32
#endif
movi a6, 0x7f800000
@ -633,7 +643,7 @@ __mulsf3:
chunks can be extracted when setting up the arguments to the
separate multiply function. */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Calling a separate multiply function will clobber a0 and requires
use of a8 as a temporary, so save those values now. (The function
uses a custom ABI so nothing else needs to be saved.) */
@ -693,12 +703,21 @@ __mulsf3:
#define set_arg_h(dst, src) \
srli dst, src, 16
#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
#endif
#else
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a14, xreg); \
set_arg_ ## yhalf (a15, yreg); \
call12 .Lmul_mulsi3; \
mov dst, a14
#endif /* __XTENSA_CALL0_ABI__ */
#endif /* no multiply hardware */
/* Add pp1 and pp2 into a6 with carry-out in a9. */
do_mul(a6, a2, l, a3, h) /* pp 1 */
@ -724,12 +743,12 @@ __mulsf3:
do_mul(a2, a2, h, a3, h) /* pp 3 */
add a2, a2, a9
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Restore values saved on the stack during the multiplication. */
l32i a0, sp, 0
l32i a8, sp, 4
#endif
#endif
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
/* Shift left by 9 bits, unless there was a carry-out from the
multiply, in which case, shift by 8 bits and increment the
@ -825,38 +844,47 @@ __mulsf3:
slli a2, a2, 31
j .Lmul_done
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
the floating-point mantissas. It uses a custom ABI: the inputs
are passed in a13 and a14, the result is returned in a12, and
a8 and a15 are clobbered. */
the floating-point mantissas. When using CALL0, this function
uses a custom ABI: the inputs are passed in a13 and a14, the
result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
movi a12, 0
.Lmul_mult_loop:
add a15, a14, a12
extui a8, a13, 0, 1
movnez a12, a15, a8
leaf_entry sp, 16
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
movi \dst, 0
1: add \tmp1, \src2, \dst
extui \tmp2, \src1, 0, 1
movnez \dst, \tmp1, \tmp2
do_addx2 a15, a14, a12, a15
extui a8, a13, 1, 1
movnez a12, a15, a8
do_addx2 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 1, 1
movnez \dst, \tmp1, \tmp2
do_addx4 a15, a14, a12, a15
extui a8, a13, 2, 1
movnez a12, a15, a8
do_addx4 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 2, 1
movnez \dst, \tmp1, \tmp2
do_addx8 a15, a14, a12, a15
extui a8, a13, 3, 1
movnez a12, a15, a8
do_addx8 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 3, 1
movnez \dst, \tmp1, \tmp2
srli a13, a13, 4
slli a14, a14, 4
bnez a13, .Lmul_mult_loop
ret
#endif /* !MUL16 && !MUL32 && !MAC16 */
srli \src1, \src1, 4
slli \src2, \src2, 4
bnez \src1, 1b
.endm
#if __XTENSA_CALL0_ABI__
mul_mulsi3_body a12, a13, a14, a15, a8
#else
/* The result will be written into a2, so save that argument in a4. */
mov a4, a2
mul_mulsi3_body a2, a4, a3, a5, a6
#endif
leaf_return
#endif /* XCHAL_NO_MUL */
#endif /* L_mulsf3 */
#ifdef L_divsf3

View File

@ -201,17 +201,28 @@ __mulsi3:
#ifdef L_umulsidi3
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#define XCHAL_NO_MUL 1
#endif
.align 4
.global __umulsidi3
.type __umulsidi3, @function
__umulsidi3:
leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
leaf_entry sp, 32
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
#elif XCHAL_NO_MUL
/* This is not really a leaf function; allocate enough stack space
to allow CALL12s to a helper function. */
leaf_entry sp, 48
#else
leaf_entry sp, 16
#endif
#ifdef __XTENSA_EB__
@ -232,7 +243,7 @@ __umulsidi3:
#else /* ! MUL32_HIGH */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* a0 and a8 will be clobbered by calling the multiply function
but a8 is not used here and need not be saved. */
s32i a0, sp, 0
@ -290,12 +301,21 @@ __umulsidi3:
#define set_arg_h(dst, src) \
srli dst, src, 16
#if __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
#endif
#else
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a14, xreg); \
set_arg_ ## yhalf (a15, yreg); \
call12 .Lmul_mulsi3; \
mov dst, a14
#endif /* __XTENSA_CALL0_ABI__ */
#endif /* no multiply hardware */
/* Add pp1 and pp2 into a6 with carry-out in a9. */
do_mul(a6, a2, l, a3, h) /* pp 1 */
@ -324,7 +344,7 @@ __umulsidi3:
#endif /* !MUL32_HIGH */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
/* Restore the original return address. */
l32i a0, sp, 0
#endif
@ -337,38 +357,47 @@ __umulsidi3:
#endif
leaf_return
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
#if XCHAL_NO_MUL
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
the floating-point mantissas. It uses a custom ABI: the inputs
are passed in a13 and a14, the result is returned in a12, and
a8 and a15 are clobbered. */
the floating-point mantissas. When using CALL0, this function
uses a custom ABI: the inputs are passed in a13 and a14, the
result is returned in a12, and a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
movi a12, 0
.Lmul_mult_loop:
add a15, a14, a12
extui a8, a13, 0, 1
movnez a12, a15, a8
leaf_entry sp, 16
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
movi \dst, 0
1: add \tmp1, \src2, \dst
extui \tmp2, \src1, 0, 1
movnez \dst, \tmp1, \tmp2
do_addx2 a15, a14, a12, a15
extui a8, a13, 1, 1
movnez a12, a15, a8
do_addx2 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 1, 1
movnez \dst, \tmp1, \tmp2
do_addx4 a15, a14, a12, a15
extui a8, a13, 2, 1
movnez a12, a15, a8
do_addx4 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 2, 1
movnez \dst, \tmp1, \tmp2
do_addx8 a15, a14, a12, a15
extui a8, a13, 3, 1
movnez a12, a15, a8
do_addx8 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 3, 1
movnez \dst, \tmp1, \tmp2
srli a13, a13, 4
slli a14, a14, 4
bnez a13, .Lmul_mult_loop
ret
#endif /* !MUL16 && !MUL32 && !MAC16 */
srli \src1, \src1, 4
slli \src2, \src2, 4
bnez \src1, 1b
.endm
#if __XTENSA_CALL0_ABI__
mul_mulsi3_body a12, a13, a14, a15, a8
#else
/* The result will be written into a2, so save that argument in a4. */
mov a4, a2
mul_mulsi3_body a2, a4, a3, a5, a6
#endif
leaf_return
#endif /* XCHAL_NO_MUL */
.size __umulsidi3, . - __umulsidi3

View File

@ -1,5 +1,5 @@
/* Assembly functions for libgcc2.
Copyright (C) 2001, 2006 Free Software Foundation, Inc.
Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
This file is part of GCC.
@ -151,6 +151,7 @@ __xtensa_nonlocal_goto:
make sure that the modified instructions are loaded into the instruction
fetch buffer. */
/* Use the maximum trampoline size. Flushing a bit extra is OK. */
#define TRAMPOLINE_SIZE 60
.text

View File

@ -69,6 +69,7 @@ extern enum reg_class xtensa_preferred_reload_class (rtx, enum reg_class, int);
extern enum reg_class xtensa_secondary_reload_class (enum reg_class,
enum machine_mode, rtx,
int);
extern void xtensa_initialize_trampoline (rtx, rtx, rtx);
#endif /* RTX_CODE */
#ifdef TREE_CODE
@ -85,5 +86,6 @@ extern long compute_frame_size (int);
extern int xtensa_frame_pointer_required (void);
extern void xtensa_expand_prologue (void);
extern void order_regs_for_local_alloc (void);
extern void xtensa_trampoline_template (FILE *);
#endif /* !__XTENSA_PROTOS_H__ */

View File

@ -2301,6 +2301,10 @@ xtensa_frame_pointer_required (void)
}
/* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits. */
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
void
xtensa_expand_prologue (void)
{
@ -2379,7 +2383,7 @@ xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
rtx
xtensa_return_addr (int count, rtx frame)
{
rtx result, retaddr;
rtx result, retaddr, curaddr, label;
if (count == -1)
retaddr = gen_rtx_REG (Pmode, A0_REG);
@ -2393,10 +2397,25 @@ xtensa_return_addr (int count, rtx frame)
/* The 2 most-significant bits of the return address on Xtensa hold
the register window size. To get the real return address, these
bits must be replaced with the high bits from the current PC. */
bits must be replaced with the high bits from some address in the
code. */
/* Get the 2 high bits of a local label in the code. */
curaddr = gen_reg_rtx (Pmode);
label = gen_label_rtx ();
emit_label (label);
LABEL_PRESERVE_P (label) = 1;
emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
/* Clear the 2 high bits of the return address. */
result = gen_reg_rtx (Pmode);
emit_insn (gen_fix_return_addr (result, retaddr));
emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
/* Combine them to get the result. */
emit_insn (gen_iorsi3 (result, result, curaddr));
return result;
}
@ -3126,4 +3145,95 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
> 4 * UNITS_PER_WORD);
}
/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
instruction with a minimal stack frame in order to get some free
registers. Once the actual call target is known, the proper stack frame
size is extracted from the ENTRY instruction at the target and the
current frame is adjusted to match. The trampoline then transfers
control to the instruction following the ENTRY at the target. Note:
this assumes that the target begins with an ENTRY instruction. */
void
xtensa_trampoline_template (FILE *stream)
{
bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
fprintf (stream, "\t.begin no-transform\n");
fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
if (use_call0)
{
/* Save the return address. */
fprintf (stream, "\tmov\ta10, a0\n");
/* Use a CALL0 instruction to skip past the constants and in the
process get the PC into A0. This allows PC-relative access to
the constants without relying on L32R. */
fprintf (stream, "\tcall0\t.Lskipconsts\n");
}
else
fprintf (stream, "\tj\t.Lskipconsts\n");
fprintf (stream, "\t.align\t4\n");
fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
fprintf (stream, ".Lskipconsts:\n");
/* Load the static chain and function address from the trampoline. */
if (use_call0)
{
fprintf (stream, "\taddi\ta0, a0, 3\n");
fprintf (stream, "\tl32i\ta9, a0, 0\n");
fprintf (stream, "\tl32i\ta8, a0, 4\n");
}
else
{
fprintf (stream, "\tl32r\ta9, .Lchainval\n");
fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
}
/* Store the static chain. */
fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
/* Set the proper stack pointer value. */
fprintf (stream, "\tl32i\ta9, a8, 0\n");
fprintf (stream, "\textui\ta9, a9, %d, 12\n",
TARGET_BIG_ENDIAN ? 8 : 12);
fprintf (stream, "\tslli\ta9, a9, 3\n");
fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
fprintf (stream, "\tsub\ta9, sp, a9\n");
fprintf (stream, "\tmovsp\tsp, a9\n");
if (use_call0)
/* Restore the return address. */
fprintf (stream, "\tmov\ta0, a10\n");
/* Jump to the instruction following the ENTRY. */
fprintf (stream, "\taddi\ta8, a8, 3\n");
fprintf (stream, "\tjx\ta8\n");
/* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */
if (use_call0)
fprintf (stream, "\t.byte\t0\n");
else
fprintf (stream, "\tnop\n");
fprintf (stream, "\t.end no-transform\n");
}
void
xtensa_initialize_trampoline (rtx addr, rtx func, rtx chain)
{
bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
int chain_off = use_call0 ? 12 : 8;
int func_off = use_call0 ? 16 : 12;
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, chain_off)), chain);
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, func_off)), func);
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
0, VOIDmode, 1, addr, Pmode);
}
#include "gt-xtensa.h"

View File

@ -72,6 +72,7 @@ extern unsigned xtensa_current_frame_size;
#define TARGET_ADDX XCHAL_HAVE_ADDX
#define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC
#define TARGET_S32C1I XCHAL_HAVE_S32C1I
#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
#define TARGET_DEFAULT ( \
(XCHAL_HAVE_L32R ? 0 : MASK_CONST16))
@ -704,83 +705,19 @@ typedef struct xtensa_args
/* Stack pointer value doesn't matter at exit. */
#define EXIT_IGNORE_STACK 1
/* A C statement to output, on the stream FILE, assembler code for a
block of data that contains the constant parts of a trampoline.
This code should not include a label--the label is taken care of
automatically.
For Xtensa, the trampoline must perform an entry instruction with a
minimal stack frame in order to get some free registers. Once the
actual call target is known, the proper stack frame size is extracted
from the entry instruction at the target and the current frame is
adjusted to match. The trampoline then transfers control to the
instruction following the entry at the target. Note: this assumes
that the target begins with an entry instruction. */
/* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits */
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
#define TRAMPOLINE_TEMPLATE(STREAM) \
do { \
fprintf (STREAM, "\t.begin no-transform\n"); \
fprintf (STREAM, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); \
\
/* save the return address */ \
fprintf (STREAM, "\tmov\ta10, a0\n"); \
\
/* Use a CALL0 instruction to skip past the constants and in the \
process get the PC into A0. This allows PC-relative access to \
the constants without relying on L32R, which may not always be \
available. */ \
\
fprintf (STREAM, "\tcall0\t.Lskipconsts\n"); \
fprintf (STREAM, "\t.align\t4\n"); \
fprintf (STREAM, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); \
fprintf (STREAM, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); \
fprintf (STREAM, ".Lskipconsts:\n"); \
\
/* store the static chain */ \
fprintf (STREAM, "\taddi\ta0, a0, 3\n"); \
fprintf (STREAM, "\tl32i\ta8, a0, 0\n"); \
fprintf (STREAM, "\ts32i\ta8, sp, %d\n", MIN_FRAME_SIZE - 20); \
\
/* set the proper stack pointer value */ \
fprintf (STREAM, "\tl32i\ta8, a0, 4\n"); \
fprintf (STREAM, "\tl32i\ta9, a8, 0\n"); \
fprintf (STREAM, "\textui\ta9, a9, %d, 12\n", \
TARGET_BIG_ENDIAN ? 8 : 12); \
fprintf (STREAM, "\tslli\ta9, a9, 3\n"); \
fprintf (STREAM, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); \
fprintf (STREAM, "\tsub\ta9, sp, a9\n"); \
fprintf (STREAM, "\tmovsp\tsp, a9\n"); \
\
/* restore the return address */ \
fprintf (STREAM, "\tmov\ta0, a10\n"); \
\
/* jump to the instruction following the entry */ \
fprintf (STREAM, "\taddi\ta8, a8, 3\n"); \
fprintf (STREAM, "\tjx\ta8\n"); \
fprintf (STREAM, "\t.byte\t0\n"); \
fprintf (STREAM, "\t.end no-transform\n"); \
} while (0)
#define TRAMPOLINE_TEMPLATE(STREAM) xtensa_trampoline_template (STREAM)
/* Size in bytes of the trampoline, as an integer. Make sure this is
a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */
#define TRAMPOLINE_SIZE 60
#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
/* Alignment required for trampolines, in bits. */
#define TRAMPOLINE_ALIGNMENT (32)
#define TRAMPOLINE_ALIGNMENT 32
/* A C statement to initialize the variable parts of a trampoline. */
#define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN) \
do { \
rtx addr = ADDR; \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 12)), CHAIN); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 16)), FUNC); \
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), \
0, VOIDmode, 1, addr, Pmode); \
} while (0)
xtensa_initialize_trampoline (ADDR, FUNC, CHAIN)
/* If defined, a C expression that produces the machine-specific code
to setup the stack so that arbitrary frames can be accessed.

View File

@ -1666,21 +1666,6 @@
(set_attr "mode" "none")
(set_attr "length" "0")])
;; The fix_return_addr pattern sets the high 2 bits of an address in a
;; register to match the high bits of the current PC.
(define_insn "fix_return_addr"
[(set (match_operand:SI 0 "register_operand" "=a")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
UNSPEC_RET_ADDR))
(clobber (match_scratch:SI 2 "=r"))
(clobber (match_scratch:SI 3 "=r"))]
""
"mov\t%2, a0\;call0\t0f\;.align\t4\;0:\;mov\t%3, a0\;mov\ta0, %2\;\
srli\t%3, %3, 30\;slli\t%0, %1, 2\;ssai\t2\;src\t%0, %3, %0"
[(set_attr "type" "multi")
(set_attr "mode" "SI")
(set_attr "length" "24")])
;; Instructions for the Xtensa "boolean" option.