xtensa.md (fix_return_addr): Remove.
* config/xtensa/xtensa.md (fix_return_addr): Remove. * config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New. (xtensa_trampoline_template): New. * config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h. (xtensa_return_addr): Expand to standard Xtensa insns instead of fix_return_addr. Get high bits from a local label. (xtensa_trampoline_template): New function with code moved from TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except when using CONST16 or absolute-mode literals. (xtensa_initialize_trampoline): New function with code moved from INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending on which trampoline version is used. * config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment. * config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define. (MIN_FRAME_SIZE): Moved to xtensa.c. (TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template. (TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes. (INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline. * config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define. (__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3 helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match. * config/xtensa/lib1funcs.asm (__umulsidi3): Likewise. * config/xtensa/ieee754-sf.S (__mulsf3): Likewise. From-SVN: r131108
This commit is contained in:
parent
4c12c8ea94
commit
7f0ee69424
|
@ -1,3 +1,29 @@
|
|||
2007-12-20 Bob Wilson <bob.wilson@acm.org>
|
||||
|
||||
* config/xtensa/xtensa.md (fix_return_addr): Remove.
|
||||
* config/xtensa/xtensa-protos.h (xtensa_initialize_trampoline): New.
|
||||
(xtensa_trampoline_template): New.
|
||||
* config/xtensa/xtensa.c (MIN_FRAME_SIZE): Moved here from xtensa.h.
|
||||
(xtensa_return_addr): Expand to standard Xtensa insns instead of
|
||||
fix_return_addr. Get high bits from a local label.
|
||||
(xtensa_trampoline_template): New function with code moved from
|
||||
TRAMPOLINE_TEMPLATE in xtensa.h. Use L32R instead of CALL0 except
|
||||
when using CONST16 or absolute-mode literals.
|
||||
(xtensa_initialize_trampoline): New function with code moved from
|
||||
INITIALIZE_TRAMPOLINE in xtensa.h. Use different offsets depending
|
||||
on which trampoline version is used.
|
||||
* config/xtensa/lib2funcs.S (TRAMPOLINE_SIZE): Add comment.
|
||||
* config/xtensa/xtensa.h (TARGET_ABSOLUTE_LITERALS): Define.
|
||||
(MIN_FRAME_SIZE): Moved to xtensa.c.
|
||||
(TRAMPOLINE_TEMPLATE): Use xtensa_trampoline_template.
|
||||
(TRAMPOLINE_SIZE): Two versions of the trampoline have different sizes.
|
||||
(INITIALIZE_TRAMPOLINE): Use xtensa_initialize_trampoline.
|
||||
* config/xtensa/ieee754-df.S (XCHAL_NO_MUL): Define.
|
||||
(__muldf3): Use CALL12 instead of CALL0 to invoke .Lmul_mulsi3
|
||||
helper when not using the CALL0 ABI. Change .Lmul_mulsi3 to match.
|
||||
* config/xtensa/lib1funcs.asm (__umulsidi3): Likewise.
|
||||
* config/xtensa/ieee754-sf.S (__mulsf3): Likewise.
|
||||
|
||||
2007-12-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c++/34459
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* IEEE-754 double-precision functions for Xtensa
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
||||
|
||||
This file is part of GCC.
|
||||
|
@ -607,6 +607,10 @@ __subdf3:
|
|||
#ifdef L_muldf3
|
||||
|
||||
/* Multiplication */
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#define XCHAL_NO_MUL 1
|
||||
#endif
|
||||
|
||||
__muldf3_aux:
|
||||
|
||||
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
||||
|
@ -728,13 +732,19 @@ __muldf3_aux:
|
|||
.global __muldf3
|
||||
.type __muldf3, @function
|
||||
__muldf3:
|
||||
leaf_entry sp, 32
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
leaf_entry sp, 32
|
||||
addi sp, sp, -32
|
||||
s32i a12, sp, 16
|
||||
s32i a13, sp, 20
|
||||
s32i a14, sp, 24
|
||||
s32i a15, sp, 28
|
||||
#elif XCHAL_NO_MUL
|
||||
/* This is not really a leaf function; allocate enough stack space
|
||||
to allow CALL12s to a helper function. */
|
||||
leaf_entry sp, 64
|
||||
#else
|
||||
leaf_entry sp, 32
|
||||
#endif
|
||||
movi a6, 0x7ff00000
|
||||
|
||||
|
@ -809,7 +819,7 @@ __muldf3:
|
|||
muluh xh, xh, yh
|
||||
add xh, xh, a9
|
||||
|
||||
#else
|
||||
#else /* ! XCHAL_HAVE_MUL32_HIGH */
|
||||
|
||||
/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
|
||||
products. These partial products are:
|
||||
|
@ -847,7 +857,7 @@ __muldf3:
|
|||
|
||||
/* Save a7 since it is needed to hold a temporary value. */
|
||||
s32i a7, sp, 4
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
/* Calling a separate multiply function will clobber a0 and requires
|
||||
use of a8 as a temporary, so save those values now. (The function
|
||||
uses a custom ABI so nothing else needs to be saved.) */
|
||||
|
@ -915,12 +925,21 @@ __muldf3:
|
|||
#define set_arg_h(dst, src) \
|
||||
srli dst, src, 16
|
||||
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a13, xreg); \
|
||||
set_arg_ ## yhalf (a14, yreg); \
|
||||
call0 .Lmul_mulsi3; \
|
||||
mov dst, a12
|
||||
#endif
|
||||
#else
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a14, xreg); \
|
||||
set_arg_ ## yhalf (a15, yreg); \
|
||||
call12 .Lmul_mulsi3; \
|
||||
mov dst, a14
|
||||
#endif /* __XTENSA_CALL0_ABI__ */
|
||||
|
||||
#endif /* no multiply hardware */
|
||||
|
||||
/* Add pp1 and pp2 into a10 with carry-out in a9. */
|
||||
do_mul(a10, xl, l, yl, h) /* pp 1 */
|
||||
|
@ -1032,11 +1051,11 @@ __muldf3:
|
|||
|
||||
/* Restore values saved on the stack during the multiplication. */
|
||||
l32i a7, sp, 4
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
l32i a0, sp, 0
|
||||
l32i a8, sp, 8
|
||||
#endif
|
||||
#endif
|
||||
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
|
||||
|
||||
/* Shift left by 12 bits, unless there was a carry-out from the
|
||||
multiply, in which case, shift by 11 bits and increment the
|
||||
|
@ -1157,38 +1176,47 @@ __muldf3:
|
|||
movi xl, 0
|
||||
j .Lmul_done
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if XCHAL_NO_MUL
|
||||
|
||||
/* For Xtensa processors with no multiply hardware, this simplified
|
||||
version of _mulsi3 is used for multiplying 16-bit chunks of
|
||||
the floating-point mantissas. It uses a custom ABI: the inputs
|
||||
are passed in a13 and a14, the result is returned in a12, and
|
||||
a8 and a15 are clobbered. */
|
||||
the floating-point mantissas. When using CALL0, this function
|
||||
uses a custom ABI: the inputs are passed in a13 and a14, the
|
||||
result is returned in a12, and a8 and a15 are clobbered. */
|
||||
.align 4
|
||||
.Lmul_mulsi3:
|
||||
movi a12, 0
|
||||
.Lmul_mult_loop:
|
||||
add a15, a14, a12
|
||||
extui a8, a13, 0, 1
|
||||
movnez a12, a15, a8
|
||||
leaf_entry sp, 16
|
||||
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
||||
movi \dst, 0
|
||||
1: add \tmp1, \src2, \dst
|
||||
extui \tmp2, \src1, 0, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx2 a15, a14, a12, a15
|
||||
extui a8, a13, 1, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx2 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 1, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx4 a15, a14, a12, a15
|
||||
extui a8, a13, 2, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx4 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 2, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx8 a15, a14, a12, a15
|
||||
extui a8, a13, 3, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx8 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 3, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
srli a13, a13, 4
|
||||
slli a14, a14, 4
|
||||
bnez a13, .Lmul_mult_loop
|
||||
ret
|
||||
#endif /* !MUL16 && !MUL32 && !MAC16 */
|
||||
srli \src1, \src1, 4
|
||||
slli \src2, \src2, 4
|
||||
bnez \src1, 1b
|
||||
.endm
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
mul_mulsi3_body a12, a13, a14, a15, a8
|
||||
#else
|
||||
/* The result will be written into a2, so save that argument in a4. */
|
||||
mov a4, a2
|
||||
mul_mulsi3_body a2, a4, a3, a5, a6
|
||||
#endif
|
||||
leaf_return
|
||||
#endif /* XCHAL_NO_MUL */
|
||||
#endif /* L_muldf3 */
|
||||
|
||||
#ifdef L_divdf3
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* IEEE-754 single-precision functions for Xtensa
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
||||
|
||||
This file is part of GCC.
|
||||
|
@ -488,6 +488,10 @@ __subsf3:
|
|||
#ifdef L_mulsf3
|
||||
|
||||
/* Multiplication */
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#define XCHAL_NO_MUL 1
|
||||
#endif
|
||||
|
||||
__mulsf3_aux:
|
||||
|
||||
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
||||
|
@ -570,13 +574,19 @@ __mulsf3_aux:
|
|||
.global __mulsf3
|
||||
.type __mulsf3, @function
|
||||
__mulsf3:
|
||||
leaf_entry sp, 32
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
leaf_entry sp, 32
|
||||
addi sp, sp, -32
|
||||
s32i a12, sp, 16
|
||||
s32i a13, sp, 20
|
||||
s32i a14, sp, 24
|
||||
s32i a15, sp, 28
|
||||
#elif XCHAL_NO_MUL
|
||||
/* This is not really a leaf function; allocate enough stack space
|
||||
to allow CALL12s to a helper function. */
|
||||
leaf_entry sp, 64
|
||||
#else
|
||||
leaf_entry sp, 32
|
||||
#endif
|
||||
movi a6, 0x7f800000
|
||||
|
||||
|
@ -633,7 +643,7 @@ __mulsf3:
|
|||
chunks can be extracted when setting up the arguments to the
|
||||
separate multiply function. */
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
/* Calling a separate multiply function will clobber a0 and requires
|
||||
use of a8 as a temporary, so save those values now. (The function
|
||||
uses a custom ABI so nothing else needs to be saved.) */
|
||||
|
@ -693,12 +703,21 @@ __mulsf3:
|
|||
#define set_arg_h(dst, src) \
|
||||
srli dst, src, 16
|
||||
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a13, xreg); \
|
||||
set_arg_ ## yhalf (a14, yreg); \
|
||||
call0 .Lmul_mulsi3; \
|
||||
mov dst, a12
|
||||
#endif
|
||||
#else
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a14, xreg); \
|
||||
set_arg_ ## yhalf (a15, yreg); \
|
||||
call12 .Lmul_mulsi3; \
|
||||
mov dst, a14
|
||||
#endif /* __XTENSA_CALL0_ABI__ */
|
||||
|
||||
#endif /* no multiply hardware */
|
||||
|
||||
/* Add pp1 and pp2 into a6 with carry-out in a9. */
|
||||
do_mul(a6, a2, l, a3, h) /* pp 1 */
|
||||
|
@ -724,12 +743,12 @@ __mulsf3:
|
|||
do_mul(a2, a2, h, a3, h) /* pp 3 */
|
||||
add a2, a2, a9
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
/* Restore values saved on the stack during the multiplication. */
|
||||
l32i a0, sp, 0
|
||||
l32i a8, sp, 4
|
||||
#endif
|
||||
#endif
|
||||
#endif /* ! XCHAL_HAVE_MUL32_HIGH */
|
||||
|
||||
/* Shift left by 9 bits, unless there was a carry-out from the
|
||||
multiply, in which case, shift by 8 bits and increment the
|
||||
|
@ -825,38 +844,47 @@ __mulsf3:
|
|||
slli a2, a2, 31
|
||||
j .Lmul_done
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if XCHAL_NO_MUL
|
||||
|
||||
/* For Xtensa processors with no multiply hardware, this simplified
|
||||
version of _mulsi3 is used for multiplying 16-bit chunks of
|
||||
the floating-point mantissas. It uses a custom ABI: the inputs
|
||||
are passed in a13 and a14, the result is returned in a12, and
|
||||
a8 and a15 are clobbered. */
|
||||
the floating-point mantissas. When using CALL0, this function
|
||||
uses a custom ABI: the inputs are passed in a13 and a14, the
|
||||
result is returned in a12, and a8 and a15 are clobbered. */
|
||||
.align 4
|
||||
.Lmul_mulsi3:
|
||||
movi a12, 0
|
||||
.Lmul_mult_loop:
|
||||
add a15, a14, a12
|
||||
extui a8, a13, 0, 1
|
||||
movnez a12, a15, a8
|
||||
leaf_entry sp, 16
|
||||
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
||||
movi \dst, 0
|
||||
1: add \tmp1, \src2, \dst
|
||||
extui \tmp2, \src1, 0, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx2 a15, a14, a12, a15
|
||||
extui a8, a13, 1, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx2 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 1, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx4 a15, a14, a12, a15
|
||||
extui a8, a13, 2, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx4 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 2, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx8 a15, a14, a12, a15
|
||||
extui a8, a13, 3, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx8 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 3, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
srli a13, a13, 4
|
||||
slli a14, a14, 4
|
||||
bnez a13, .Lmul_mult_loop
|
||||
ret
|
||||
#endif /* !MUL16 && !MUL32 && !MAC16 */
|
||||
srli \src1, \src1, 4
|
||||
slli \src2, \src2, 4
|
||||
bnez \src1, 1b
|
||||
.endm
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
mul_mulsi3_body a12, a13, a14, a15, a8
|
||||
#else
|
||||
/* The result will be written into a2, so save that argument in a4. */
|
||||
mov a4, a2
|
||||
mul_mulsi3_body a2, a4, a3, a5, a6
|
||||
#endif
|
||||
leaf_return
|
||||
#endif /* XCHAL_NO_MUL */
|
||||
#endif /* L_mulsf3 */
|
||||
|
||||
#ifdef L_divsf3
|
||||
|
|
|
@ -201,17 +201,28 @@ __mulsi3:
|
|||
|
||||
|
||||
#ifdef L_umulsidi3
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#define XCHAL_NO_MUL 1
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
.global __umulsidi3
|
||||
.type __umulsidi3, @function
|
||||
__umulsidi3:
|
||||
leaf_entry sp, 32
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
leaf_entry sp, 32
|
||||
addi sp, sp, -32
|
||||
s32i a12, sp, 16
|
||||
s32i a13, sp, 20
|
||||
s32i a14, sp, 24
|
||||
s32i a15, sp, 28
|
||||
#elif XCHAL_NO_MUL
|
||||
/* This is not really a leaf function; allocate enough stack space
|
||||
to allow CALL12s to a helper function. */
|
||||
leaf_entry sp, 48
|
||||
#else
|
||||
leaf_entry sp, 16
|
||||
#endif
|
||||
|
||||
#ifdef __XTENSA_EB__
|
||||
|
@ -232,7 +243,7 @@ __umulsidi3:
|
|||
|
||||
#else /* ! MUL32_HIGH */
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
/* a0 and a8 will be clobbered by calling the multiply function
|
||||
but a8 is not used here and need not be saved. */
|
||||
s32i a0, sp, 0
|
||||
|
@ -290,12 +301,21 @@ __umulsidi3:
|
|||
#define set_arg_h(dst, src) \
|
||||
srli dst, src, 16
|
||||
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a13, xreg); \
|
||||
set_arg_ ## yhalf (a14, yreg); \
|
||||
call0 .Lmul_mulsi3; \
|
||||
mov dst, a12
|
||||
#endif
|
||||
#else
|
||||
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
||||
set_arg_ ## xhalf (a14, xreg); \
|
||||
set_arg_ ## yhalf (a15, yreg); \
|
||||
call12 .Lmul_mulsi3; \
|
||||
mov dst, a14
|
||||
#endif /* __XTENSA_CALL0_ABI__ */
|
||||
|
||||
#endif /* no multiply hardware */
|
||||
|
||||
/* Add pp1 and pp2 into a6 with carry-out in a9. */
|
||||
do_mul(a6, a2, l, a3, h) /* pp 1 */
|
||||
|
@ -324,7 +344,7 @@ __umulsidi3:
|
|||
|
||||
#endif /* !MUL32_HIGH */
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
||||
/* Restore the original return address. */
|
||||
l32i a0, sp, 0
|
||||
#endif
|
||||
|
@ -337,38 +357,47 @@ __umulsidi3:
|
|||
#endif
|
||||
leaf_return
|
||||
|
||||
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
||||
#if XCHAL_NO_MUL
|
||||
|
||||
/* For Xtensa processors with no multiply hardware, this simplified
|
||||
version of _mulsi3 is used for multiplying 16-bit chunks of
|
||||
the floating-point mantissas. It uses a custom ABI: the inputs
|
||||
are passed in a13 and a14, the result is returned in a12, and
|
||||
a8 and a15 are clobbered. */
|
||||
the floating-point mantissas. When using CALL0, this function
|
||||
uses a custom ABI: the inputs are passed in a13 and a14, the
|
||||
result is returned in a12, and a8 and a15 are clobbered. */
|
||||
.align 4
|
||||
.Lmul_mulsi3:
|
||||
movi a12, 0
|
||||
.Lmul_mult_loop:
|
||||
add a15, a14, a12
|
||||
extui a8, a13, 0, 1
|
||||
movnez a12, a15, a8
|
||||
leaf_entry sp, 16
|
||||
.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
||||
movi \dst, 0
|
||||
1: add \tmp1, \src2, \dst
|
||||
extui \tmp2, \src1, 0, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx2 a15, a14, a12, a15
|
||||
extui a8, a13, 1, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx2 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 1, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx4 a15, a14, a12, a15
|
||||
extui a8, a13, 2, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx4 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 2, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
do_addx8 a15, a14, a12, a15
|
||||
extui a8, a13, 3, 1
|
||||
movnez a12, a15, a8
|
||||
do_addx8 \tmp1, \src2, \dst, \tmp1
|
||||
extui \tmp2, \src1, 3, 1
|
||||
movnez \dst, \tmp1, \tmp2
|
||||
|
||||
srli a13, a13, 4
|
||||
slli a14, a14, 4
|
||||
bnez a13, .Lmul_mult_loop
|
||||
ret
|
||||
#endif /* !MUL16 && !MUL32 && !MAC16 */
|
||||
srli \src1, \src1, 4
|
||||
slli \src2, \src2, 4
|
||||
bnez \src1, 1b
|
||||
.endm
|
||||
#if __XTENSA_CALL0_ABI__
|
||||
mul_mulsi3_body a12, a13, a14, a15, a8
|
||||
#else
|
||||
/* The result will be written into a2, so save that argument in a4. */
|
||||
mov a4, a2
|
||||
mul_mulsi3_body a2, a4, a3, a5, a6
|
||||
#endif
|
||||
leaf_return
|
||||
#endif /* XCHAL_NO_MUL */
|
||||
|
||||
.size __umulsidi3, . - __umulsidi3
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Assembly functions for libgcc2.
|
||||
Copyright (C) 2001, 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
|
||||
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
||||
|
||||
This file is part of GCC.
|
||||
|
@ -151,6 +151,7 @@ __xtensa_nonlocal_goto:
|
|||
make sure that the modified instructions are loaded into the instruction
|
||||
fetch buffer. */
|
||||
|
||||
/* Use the maximum trampoline size. Flushing a bit extra is OK. */
|
||||
#define TRAMPOLINE_SIZE 60
|
||||
|
||||
.text
|
||||
|
|
|
@ -69,6 +69,7 @@ extern enum reg_class xtensa_preferred_reload_class (rtx, enum reg_class, int);
|
|||
extern enum reg_class xtensa_secondary_reload_class (enum reg_class,
|
||||
enum machine_mode, rtx,
|
||||
int);
|
||||
extern void xtensa_initialize_trampoline (rtx, rtx, rtx);
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
#ifdef TREE_CODE
|
||||
|
@ -85,5 +86,6 @@ extern long compute_frame_size (int);
|
|||
extern int xtensa_frame_pointer_required (void);
|
||||
extern void xtensa_expand_prologue (void);
|
||||
extern void order_regs_for_local_alloc (void);
|
||||
extern void xtensa_trampoline_template (FILE *);
|
||||
|
||||
#endif /* !__XTENSA_PROTOS_H__ */
|
||||
|
|
|
@ -2301,6 +2301,10 @@ xtensa_frame_pointer_required (void)
|
|||
}
|
||||
|
||||
|
||||
/* minimum frame = reg save area (4 words) plus static chain (1 word)
|
||||
and the total number of words must be a multiple of 128 bits. */
|
||||
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
|
||||
|
||||
void
|
||||
xtensa_expand_prologue (void)
|
||||
{
|
||||
|
@ -2379,7 +2383,7 @@ xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|
|||
rtx
|
||||
xtensa_return_addr (int count, rtx frame)
|
||||
{
|
||||
rtx result, retaddr;
|
||||
rtx result, retaddr, curaddr, label;
|
||||
|
||||
if (count == -1)
|
||||
retaddr = gen_rtx_REG (Pmode, A0_REG);
|
||||
|
@ -2393,10 +2397,25 @@ xtensa_return_addr (int count, rtx frame)
|
|||
|
||||
/* The 2 most-significant bits of the return address on Xtensa hold
|
||||
the register window size. To get the real return address, these
|
||||
bits must be replaced with the high bits from the current PC. */
|
||||
bits must be replaced with the high bits from some address in the
|
||||
code. */
|
||||
|
||||
/* Get the 2 high bits of a local label in the code. */
|
||||
curaddr = gen_reg_rtx (Pmode);
|
||||
label = gen_label_rtx ();
|
||||
emit_label (label);
|
||||
LABEL_PRESERVE_P (label) = 1;
|
||||
emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
|
||||
emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
|
||||
emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
|
||||
|
||||
/* Clear the 2 high bits of the return address. */
|
||||
result = gen_reg_rtx (Pmode);
|
||||
emit_insn (gen_fix_return_addr (result, retaddr));
|
||||
emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
|
||||
emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
|
||||
|
||||
/* Combine them to get the result. */
|
||||
emit_insn (gen_iorsi3 (result, result, curaddr));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -3126,4 +3145,95 @@ xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
|||
> 4 * UNITS_PER_WORD);
|
||||
}
|
||||
|
||||
|
||||
/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
|
||||
instruction with a minimal stack frame in order to get some free
|
||||
registers. Once the actual call target is known, the proper stack frame
|
||||
size is extracted from the ENTRY instruction at the target and the
|
||||
current frame is adjusted to match. The trampoline then transfers
|
||||
control to the instruction following the ENTRY at the target. Note:
|
||||
this assumes that the target begins with an ENTRY instruction. */
|
||||
|
||||
void
|
||||
xtensa_trampoline_template (FILE *stream)
|
||||
{
|
||||
bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
|
||||
|
||||
fprintf (stream, "\t.begin no-transform\n");
|
||||
fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
|
||||
|
||||
if (use_call0)
|
||||
{
|
||||
/* Save the return address. */
|
||||
fprintf (stream, "\tmov\ta10, a0\n");
|
||||
|
||||
/* Use a CALL0 instruction to skip past the constants and in the
|
||||
process get the PC into A0. This allows PC-relative access to
|
||||
the constants without relying on L32R. */
|
||||
fprintf (stream, "\tcall0\t.Lskipconsts\n");
|
||||
}
|
||||
else
|
||||
fprintf (stream, "\tj\t.Lskipconsts\n");
|
||||
|
||||
fprintf (stream, "\t.align\t4\n");
|
||||
fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
|
||||
fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
|
||||
fprintf (stream, ".Lskipconsts:\n");
|
||||
|
||||
/* Load the static chain and function address from the trampoline. */
|
||||
if (use_call0)
|
||||
{
|
||||
fprintf (stream, "\taddi\ta0, a0, 3\n");
|
||||
fprintf (stream, "\tl32i\ta9, a0, 0\n");
|
||||
fprintf (stream, "\tl32i\ta8, a0, 4\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stream, "\tl32r\ta9, .Lchainval\n");
|
||||
fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
|
||||
}
|
||||
|
||||
/* Store the static chain. */
|
||||
fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
|
||||
|
||||
/* Set the proper stack pointer value. */
|
||||
fprintf (stream, "\tl32i\ta9, a8, 0\n");
|
||||
fprintf (stream, "\textui\ta9, a9, %d, 12\n",
|
||||
TARGET_BIG_ENDIAN ? 8 : 12);
|
||||
fprintf (stream, "\tslli\ta9, a9, 3\n");
|
||||
fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
|
||||
fprintf (stream, "\tsub\ta9, sp, a9\n");
|
||||
fprintf (stream, "\tmovsp\tsp, a9\n");
|
||||
|
||||
if (use_call0)
|
||||
/* Restore the return address. */
|
||||
fprintf (stream, "\tmov\ta0, a10\n");
|
||||
|
||||
/* Jump to the instruction following the ENTRY. */
|
||||
fprintf (stream, "\taddi\ta8, a8, 3\n");
|
||||
fprintf (stream, "\tjx\ta8\n");
|
||||
|
||||
/* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */
|
||||
if (use_call0)
|
||||
fprintf (stream, "\t.byte\t0\n");
|
||||
else
|
||||
fprintf (stream, "\tnop\n");
|
||||
|
||||
fprintf (stream, "\t.end no-transform\n");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xtensa_initialize_trampoline (rtx addr, rtx func, rtx chain)
|
||||
{
|
||||
bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
|
||||
int chain_off = use_call0 ? 12 : 8;
|
||||
int func_off = use_call0 ? 16 : 12;
|
||||
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, chain_off)), chain);
|
||||
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, func_off)), func);
|
||||
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
|
||||
0, VOIDmode, 1, addr, Pmode);
|
||||
}
|
||||
|
||||
|
||||
#include "gt-xtensa.h"
|
||||
|
|
|
@ -72,6 +72,7 @@ extern unsigned xtensa_current_frame_size;
|
|||
#define TARGET_ADDX XCHAL_HAVE_ADDX
|
||||
#define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC
|
||||
#define TARGET_S32C1I XCHAL_HAVE_S32C1I
|
||||
#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
|
||||
|
||||
#define TARGET_DEFAULT ( \
|
||||
(XCHAL_HAVE_L32R ? 0 : MASK_CONST16))
|
||||
|
@ -704,83 +705,19 @@ typedef struct xtensa_args
|
|||
/* Stack pointer value doesn't matter at exit. */
|
||||
#define EXIT_IGNORE_STACK 1
|
||||
|
||||
/* A C statement to output, on the stream FILE, assembler code for a
|
||||
block of data that contains the constant parts of a trampoline.
|
||||
This code should not include a label--the label is taken care of
|
||||
automatically.
|
||||
|
||||
For Xtensa, the trampoline must perform an entry instruction with a
|
||||
minimal stack frame in order to get some free registers. Once the
|
||||
actual call target is known, the proper stack frame size is extracted
|
||||
from the entry instruction at the target and the current frame is
|
||||
adjusted to match. The trampoline then transfers control to the
|
||||
instruction following the entry at the target. Note: this assumes
|
||||
that the target begins with an entry instruction. */
|
||||
|
||||
/* minimum frame = reg save area (4 words) plus static chain (1 word)
|
||||
and the total number of words must be a multiple of 128 bits */
|
||||
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
|
||||
|
||||
#define TRAMPOLINE_TEMPLATE(STREAM) \
|
||||
do { \
|
||||
fprintf (STREAM, "\t.begin no-transform\n"); \
|
||||
fprintf (STREAM, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); \
|
||||
\
|
||||
/* save the return address */ \
|
||||
fprintf (STREAM, "\tmov\ta10, a0\n"); \
|
||||
\
|
||||
/* Use a CALL0 instruction to skip past the constants and in the \
|
||||
process get the PC into A0. This allows PC-relative access to \
|
||||
the constants without relying on L32R, which may not always be \
|
||||
available. */ \
|
||||
\
|
||||
fprintf (STREAM, "\tcall0\t.Lskipconsts\n"); \
|
||||
fprintf (STREAM, "\t.align\t4\n"); \
|
||||
fprintf (STREAM, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); \
|
||||
fprintf (STREAM, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); \
|
||||
fprintf (STREAM, ".Lskipconsts:\n"); \
|
||||
\
|
||||
/* store the static chain */ \
|
||||
fprintf (STREAM, "\taddi\ta0, a0, 3\n"); \
|
||||
fprintf (STREAM, "\tl32i\ta8, a0, 0\n"); \
|
||||
fprintf (STREAM, "\ts32i\ta8, sp, %d\n", MIN_FRAME_SIZE - 20); \
|
||||
\
|
||||
/* set the proper stack pointer value */ \
|
||||
fprintf (STREAM, "\tl32i\ta8, a0, 4\n"); \
|
||||
fprintf (STREAM, "\tl32i\ta9, a8, 0\n"); \
|
||||
fprintf (STREAM, "\textui\ta9, a9, %d, 12\n", \
|
||||
TARGET_BIG_ENDIAN ? 8 : 12); \
|
||||
fprintf (STREAM, "\tslli\ta9, a9, 3\n"); \
|
||||
fprintf (STREAM, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); \
|
||||
fprintf (STREAM, "\tsub\ta9, sp, a9\n"); \
|
||||
fprintf (STREAM, "\tmovsp\tsp, a9\n"); \
|
||||
\
|
||||
/* restore the return address */ \
|
||||
fprintf (STREAM, "\tmov\ta0, a10\n"); \
|
||||
\
|
||||
/* jump to the instruction following the entry */ \
|
||||
fprintf (STREAM, "\taddi\ta8, a8, 3\n"); \
|
||||
fprintf (STREAM, "\tjx\ta8\n"); \
|
||||
fprintf (STREAM, "\t.byte\t0\n"); \
|
||||
fprintf (STREAM, "\t.end no-transform\n"); \
|
||||
} while (0)
|
||||
#define TRAMPOLINE_TEMPLATE(STREAM) xtensa_trampoline_template (STREAM)
|
||||
|
||||
/* Size in bytes of the trampoline, as an integer. Make sure this is
|
||||
a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */
|
||||
#define TRAMPOLINE_SIZE 60
|
||||
#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
|
||||
|
||||
/* Alignment required for trampolines, in bits. */
|
||||
#define TRAMPOLINE_ALIGNMENT (32)
|
||||
#define TRAMPOLINE_ALIGNMENT 32
|
||||
|
||||
/* A C statement to initialize the variable parts of a trampoline. */
|
||||
#define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN) \
|
||||
do { \
|
||||
rtx addr = ADDR; \
|
||||
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 12)), CHAIN); \
|
||||
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 16)), FUNC); \
|
||||
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), \
|
||||
0, VOIDmode, 1, addr, Pmode); \
|
||||
} while (0)
|
||||
xtensa_initialize_trampoline (ADDR, FUNC, CHAIN)
|
||||
|
||||
|
||||
/* If defined, a C expression that produces the machine-specific code
|
||||
to setup the stack so that arbitrary frames can be accessed.
|
||||
|
|
|
@ -1666,21 +1666,6 @@
|
|||
(set_attr "mode" "none")
|
||||
(set_attr "length" "0")])
|
||||
|
||||
;; The fix_return_addr pattern sets the high 2 bits of an address in a
|
||||
;; register to match the high bits of the current PC.
|
||||
(define_insn "fix_return_addr"
|
||||
[(set (match_operand:SI 0 "register_operand" "=a")
|
||||
(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
|
||||
UNSPEC_RET_ADDR))
|
||||
(clobber (match_scratch:SI 2 "=r"))
|
||||
(clobber (match_scratch:SI 3 "=r"))]
|
||||
""
|
||||
"mov\t%2, a0\;call0\t0f\;.align\t4\;0:\;mov\t%3, a0\;mov\ta0, %2\;\
|
||||
srli\t%3, %3, 30\;slli\t%0, %1, 2\;ssai\t2\;src\t%0, %3, %0"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "length" "24")])
|
||||
|
||||
|
||||
;; Instructions for the Xtensa "boolean" option.
|
||||
|
||||
|
|
Loading…
Reference in New Issue