libgcc.S: Gather related function in the same input section.

* config/avr/libgcc.S: Gather related function in the
	same input section.
	__divmodsi4): Use XCALL/XJMP instead of rcall/rjmp for external
	references.
	(__udivmodqi4, __divmodqi4, __udivmodhi4, __divmodhi4,
	__udivmodsi4, __divmodsi4, __prologue_saves__,
	__epilogue_restores__, _exit, __tablejump2__, __tablejump__,
	__do_copy_data, __do_clear_bss, __do_global_ctors,
	__do_global_dtors, __tablejump_elpm__): Enclose in DEFUN/ENDF.

From-SVN: r177133
This commit is contained in:
Georg-Johann Lay 2011-08-02 12:17:43 +00:00 committed by Georg-Johann Lay
parent 62be5d0aad
commit 7ea56b2382
2 changed files with 77 additions and 62 deletions

View File

@ -1,3 +1,16 @@
2011-08-02 Georg-Johann Lay <avr@gjlay.de>
* config/avr/libgcc.S: Gather related function in the
same input section.
(__mulqihi3, __mulqihi3, __divmodqi4, __divmodhi4, __udivmodsi4,
__divmodsi4): Use XCALL/XJMP instead of rcall/rjmp for external
references.
(__udivmodqi4, __divmodqi4, __udivmodhi4, __divmodhi4,
__udivmodsi4, __divmodsi4, __prologue_saves__,
__epilogue_restores__, _exit, __tablejump2__, __tablejump__,
__do_copy_data, __do_clear_bss, __do_global_ctors,
__do_global_dtors, __tablejump_elpm__): Enclose in DEFUN/ENDF.
2011-08-02 Uros Bizjak <ubizjak@gmail.com>
PR target/47766

View File

@ -34,7 +34,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
This can make better code because GCC knows exactly which
of the call-used registers (not all of them) are clobbered. */
.section .text.libgcc, "ax", @progbits
/* FIXME: At present, there is no SORT directive in the linker
script so that we must not assume that different modules
in the same input section like .libgcc.text.mul will be
located close together. Therefore, we cannot use
RCALL/RJMP to call a function like __udivmodhi4 from
__divmodhi4 and have to use lengthy XCALL/XJMP even
though they are in the same input section and all same
input sections together are small enough to reach every
location with a RCALL/RJMP instruction. */
.macro mov_l r_dest, r_src
#if defined (__AVR_HAVE_MOVW__)
@ -72,6 +80,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endm
.section .text.libgcc.mul, "ax", @progbits
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
#if !defined (__AVR_HAVE_MUL__)
@ -112,7 +122,7 @@ DEFUN __mulqihi3
clr r23
sbrc r22, 7
dec r22
rjmp __mulhi3
XJMP __mulhi3
ENDF __mulqihi3:
#endif /* defined (L_mulqihi3) */
@ -120,7 +130,7 @@ ENDF __mulqihi3:
DEFUN __umulqihi3
clr r25
clr r23
rjmp __mulhi3
XJMP __mulhi3
ENDF __umulqihi3
#endif /* defined (L_umulqihi3) */
@ -447,6 +457,8 @@ ENDF __mulsi3
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.section .text.libgcc.div, "ax", @progbits
/*******************************************************
Division 8 / 8 => (result + remainder)
*******************************************************/
@ -456,9 +468,7 @@ ENDF __mulsi3
#define r_cnt r23 /* loop count */
#if defined (L_udivmodqi4)
.global __udivmodqi4
.func __udivmodqi4
__udivmodqi4:
DEFUN __udivmodqi4
sub r_rem,r_rem ; clear remainder and carry
ldi r_cnt,9 ; init loop counter
rjmp __udivmodqi4_ep ; jump to entry point
@ -474,13 +484,11 @@ __udivmodqi4_ep:
com r_arg1 ; complement result
; because C flag was complemented in loop
ret
.endfunc
ENDF __udivmodqi4
#endif /* defined (L_udivmodqi4) */
#if defined (L_divmodqi4)
.global __divmodqi4
.func __divmodqi4
__divmodqi4:
DEFUN __divmodqi4
bst r_arg1,7 ; store sign of dividend
mov __tmp_reg__,r_arg1
eor __tmp_reg__,r_arg2; r0.7 is sign of result
@ -488,7 +496,7 @@ __divmodqi4:
neg r_arg1 ; dividend negative : negate
sbrc r_arg2,7
neg r_arg2 ; divisor negative : negate
rcall __udivmodqi4 ; do the unsigned div/mod
XCALL __udivmodqi4 ; do the unsigned div/mod
brtc __divmodqi4_1
neg r_rem ; correct remainder sign
__divmodqi4_1:
@ -496,7 +504,7 @@ __divmodqi4_1:
neg r_arg1 ; correct result sign
__divmodqi4_exit:
ret
.endfunc
ENDF __divmodqi4
#endif /* defined (L_divmodqi4) */
#undef r_rem
@ -522,9 +530,7 @@ __divmodqi4_exit:
#define r_cnt r21 /* loop count */
#if defined (L_udivmodhi4)
.global __udivmodhi4
.func __udivmodhi4
__udivmodhi4:
DEFUN __udivmodhi4
sub r_remL,r_remL
sub r_remH,r_remH ; clear remainder and carry
ldi r_cnt,17 ; init loop counter
@ -550,13 +556,11 @@ __udivmodhi4_ep:
mov_l r_arg1L, r_remL ; remainder
mov_h r_arg1H, r_remH
ret
.endfunc
ENDF __udivmodhi4
#endif /* defined (L_udivmodhi4) */
#if defined (L_divmodhi4)
.global __divmodhi4
.func __divmodhi4
__divmodhi4:
DEFUN __divmodhi4
.global _div
_div:
bst r_arg1H,7 ; store sign of dividend
@ -565,7 +569,7 @@ _div:
rcall __divmodhi4_neg1 ; dividend negative : negate
sbrc r_arg2H,7
rcall __divmodhi4_neg2 ; divisor negative : negate
rcall __udivmodhi4 ; do the unsigned div/mod
XCALL __udivmodhi4 ; do the unsigned div/mod
rcall __divmodhi4_neg1 ; correct remainder sign
tst __tmp_reg__
brpl __divmodhi4_exit
@ -581,7 +585,7 @@ __divmodhi4_neg1:
neg r_arg1L ; correct dividend/remainder sign
sbci r_arg1H,0xff
ret
.endfunc
ENDF __divmodhi4
#endif /* defined (L_divmodhi4) */
#undef r_remH
@ -618,9 +622,7 @@ __divmodhi4_neg1:
#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
#if defined (L_udivmodsi4)
.global __udivmodsi4
.func __udivmodsi4
__udivmodsi4:
DEFUN __udivmodsi4
ldi r_remL, 33 ; init loop counter
mov r_cnt, r_remL
sub r_remL,r_remL
@ -664,20 +666,18 @@ __udivmodsi4_ep:
mov_l r_arg1HL, r_remHL
mov_h r_arg1HH, r_remHH
ret
.endfunc
ENDF __udivmodsi4
#endif /* defined (L_udivmodsi4) */
#if defined (L_divmodsi4)
.global __divmodsi4
.func __divmodsi4
__divmodsi4:
DEFUN __divmodsi4
bst r_arg1HH,7 ; store sign of dividend
mov __tmp_reg__,r_arg1HH
eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result
rcall __divmodsi4_neg1 ; dividend negative : negate
sbrc r_arg2HH,7
rcall __divmodsi4_neg2 ; divisor negative : negate
rcall __udivmodsi4 ; do the unsigned div/mod
XCALL __udivmodsi4 ; do the unsigned div/mod
rcall __divmodsi4_neg1 ; correct remainder sign
rol __tmp_reg__
brcc __divmodsi4_exit
@ -701,17 +701,18 @@ __divmodsi4_neg1:
sbci r_arg1HL,0xff
sbci r_arg1HH,0xff
ret
.endfunc
ENDF __divmodsi4
#endif /* defined (L_divmodsi4) */
.section .text.libgcc.prologue, "ax", @progbits
/**********************************
* This is a prologue subroutine
**********************************/
#if defined (L_prologue)
.global __prologue_saves__
.func __prologue_saves__
__prologue_saves__:
DEFUN __prologue_saves__
push r2
push r3
push r4
@ -745,7 +746,7 @@ __prologue_saves__:
ijmp
#endif
.endfunc
ENDF __prologue_saves__
#endif /* defined (L_prologue) */
/*
@ -753,9 +754,7 @@ __prologue_saves__:
*/
#if defined (L_epilogue)
.global __epilogue_restores__
.func __epilogue_restores__
__epilogue_restores__:
DEFUN __epilogue_restores__
ldd r2,Y+18
ldd r3,Y+17
ldd r4,Y+16
@ -784,17 +783,15 @@ __epilogue_restores__:
mov_l r28, r26
mov_h r29, r27
ret
.endfunc
ENDF __epilogue_restores__
#endif /* defined (L_epilogue) */
#ifdef L_exit
.section .fini9,"ax",@progbits
.global _exit
.func _exit
_exit:
DEFUN _exit
.weak exit
exit:
.endfunc
ENDF _exit
/* Code from .fini8 ... .fini1 sections inserted by ld script. */
@ -812,14 +809,17 @@ _cleanup:
.endfunc
#endif /* defined (L_cleanup) */
.section .text.libgcc, "ax", @progbits
#ifdef L_tablejump
.global __tablejump2__
.func __tablejump2__
__tablejump2__:
DEFUN __tablejump2__
lsl r30
rol r31
.global __tablejump__
__tablejump__:
;; FALLTHRU
ENDF __tablejump2__
DEFUN __tablejump__
#if defined (__AVR_HAVE_LPMX__)
lpm __tmp_reg__, Z+
lpm r31, Z
@ -842,13 +842,12 @@ __tablejump__:
#endif
ret
#endif
.endfunc
ENDF __tablejump__
#endif /* defined (L_tablejump) */
#ifdef L_copy_data
.section .init4,"ax",@progbits
.global __do_copy_data
__do_copy_data:
DEFUN __do_copy_data
#if defined(__AVR_HAVE_ELPMX__)
ldi r17, hi8(__data_end)
ldi r26, lo8(__data_start)
@ -905,14 +904,14 @@ __do_copy_data:
cpc r27, r17
brne .L__do_copy_data_loop
#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
ENDF __do_copy_data
#endif /* L_copy_data */
/* __do_clear_bss is only necessary if there is anything in .bss section. */
#ifdef L_clear_bss
.section .init4,"ax",@progbits
.global __do_clear_bss
__do_clear_bss:
DEFUN __do_clear_bss
ldi r17, hi8(__bss_end)
ldi r26, lo8(__bss_start)
ldi r27, hi8(__bss_start)
@ -923,6 +922,7 @@ __do_clear_bss:
cpi r26, lo8(__bss_end)
cpc r27, r17
brne .do_clear_bss_loop
ENDF __do_clear_bss
#endif /* L_clear_bss */
/* __do_global_ctors and __do_global_dtors are only necessary
@ -930,9 +930,8 @@ __do_clear_bss:
#ifdef L_ctors
.section .init6,"ax",@progbits
.global __do_global_ctors
DEFUN __do_global_ctors
#if defined(__AVR_HAVE_RAMPZ__)
__do_global_ctors:
ldi r17, hi8(__ctors_start)
ldi r28, lo8(__ctors_end)
ldi r29, hi8(__ctors_end)
@ -952,7 +951,6 @@ __do_global_ctors:
cpc r16, r24
brne .L__do_global_ctors_loop
#else
__do_global_ctors:
ldi r17, hi8(__ctors_start)
ldi r28, lo8(__ctors_end)
ldi r29, hi8(__ctors_end)
@ -967,13 +965,13 @@ __do_global_ctors:
cpc r29, r17
brne .L__do_global_ctors_loop
#endif /* defined(__AVR_HAVE_RAMPZ__) */
ENDF __do_global_ctors
#endif /* L_ctors */
#ifdef L_dtors
.section .fini6,"ax",@progbits
.global __do_global_dtors
DEFUN __do_global_dtors
#if defined(__AVR_HAVE_RAMPZ__)
__do_global_dtors:
ldi r17, hi8(__dtors_end)
ldi r28, lo8(__dtors_start)
ldi r29, hi8(__dtors_start)
@ -993,7 +991,6 @@ __do_global_dtors:
cpc r16, r24
brne .L__do_global_dtors_loop
#else
__do_global_dtors:
ldi r17, hi8(__dtors_end)
ldi r28, lo8(__dtors_start)
ldi r29, hi8(__dtors_start)
@ -1008,12 +1005,13 @@ __do_global_dtors:
cpc r29, r17
brne .L__do_global_dtors_loop
#endif /* defined(__AVR_HAVE_RAMPZ__) */
ENDF __do_global_dtors
#endif /* L_dtors */
.section .text.libgcc, "ax", @progbits
#ifdef L_tablejump_elpm
.global __tablejump_elpm__
.func __tablejump_elpm__
__tablejump_elpm__:
DEFUN __tablejump_elpm__
#if defined (__AVR_HAVE_ELPM__)
#if defined (__AVR_HAVE_LPMX__)
elpm __tmp_reg__, Z+
@ -1037,10 +1035,12 @@ __tablejump_elpm__:
ret
#endif
#endif /* defined (__AVR_HAVE_ELPM__) */
.endfunc
ENDF __tablejump_elpm__
#endif /* defined (L_tablejump_elpm) */
.section .text.libgcc.builtins, "ax", @progbits
/**********************************
* Find first set Bit (ffs)
**********************************/
@ -1440,6 +1440,8 @@ DEFUN __ashldi3
ENDF __ashldi3
#endif /* defined (L_ashldi3) */
.section .text.libgcc.fmul, "ax", @progbits
/***********************************************************/
;;; Softmul versions of FMUL, FMULS and FMULSU to implement