ac1dca3cab
From-SVN: r206295
3227 lines
66 KiB
ArmAsm
3227 lines
66 KiB
ArmAsm
/* -*- Mode: Asm -*- */
|
||
/* Copyright (C) 1998-2014 Free Software Foundation, Inc.
|
||
Contributed by Denis Chertykov <chertykov@gmail.com>
|
||
|
||
This file is free software; you can redistribute it and/or modify it
|
||
under the terms of the GNU General Public License as published by the
|
||
Free Software Foundation; either version 3, or (at your option) any
|
||
later version.
|
||
|
||
This file is distributed in the hope that it will be useful, but
|
||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
General Public License for more details.
|
||
|
||
Under Section 7 of GPL version 3, you are granted additional
|
||
permissions described in the GCC Runtime Library Exception, version
|
||
3.1, as published by the Free Software Foundation.
|
||
|
||
You should have received a copy of the GNU General Public License and
|
||
a copy of the GCC Runtime Library Exception along with this program;
|
||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||
<http://www.gnu.org/licenses/>. */
|
||
|
||
#define __zero_reg__ r1
|
||
#define __tmp_reg__ r0
|
||
#define __SREG__ 0x3f
|
||
#if defined (__AVR_HAVE_SPH__)
|
||
#define __SP_H__ 0x3e
|
||
#endif
|
||
#define __SP_L__ 0x3d
|
||
#define __RAMPZ__ 0x3B
|
||
#define __EIND__ 0x3C
|
||
|
||
/* Most of the functions here are called directly from avr.md
|
||
patterns, instead of using the standard libcall mechanisms.
|
||
This can make better code because GCC knows exactly which
|
||
of the call-used registers (not all of them) are clobbered. */
|
||
|
||
/* FIXME: At present, there is no SORT directive in the linker
|
||
script so that we must not assume that different modules
|
||
in the same input section like .libgcc.text.mul will be
|
||
located close together. Therefore, we cannot use
|
||
RCALL/RJMP to call a function like __udivmodhi4 from
|
||
__divmodhi4 and have to use lengthy XCALL/XJMP even
|
||
though they are in the same input section and all same
|
||
input sections together are small enough to reach every
|
||
location with a RCALL/RJMP instruction. */
|
||
|
||
.macro mov_l r_dest, r_src
|
||
#if defined (__AVR_HAVE_MOVW__)
|
||
movw \r_dest, \r_src
|
||
#else
|
||
mov \r_dest, \r_src
|
||
#endif
|
||
.endm
|
||
|
||
.macro mov_h r_dest, r_src
|
||
#if defined (__AVR_HAVE_MOVW__)
|
||
; empty
|
||
#else
|
||
mov \r_dest, \r_src
|
||
#endif
|
||
.endm
|
||
|
||
.macro wmov r_dest, r_src
|
||
#if defined (__AVR_HAVE_MOVW__)
|
||
movw \r_dest, \r_src
|
||
#else
|
||
mov \r_dest, \r_src
|
||
mov \r_dest+1, \r_src+1
|
||
#endif
|
||
.endm
|
||
|
||
#if defined (__AVR_HAVE_JMP_CALL__)
|
||
#define XCALL call
|
||
#define XJMP jmp
|
||
#else
|
||
#define XCALL rcall
|
||
#define XJMP rjmp
|
||
#endif
|
||
|
||
;; Prologue stuff
|
||
|
||
.macro do_prologue_saves n_pushed n_frame=0
|
||
ldi r26, lo8(\n_frame)
|
||
ldi r27, hi8(\n_frame)
|
||
ldi r30, lo8(gs(.L_prologue_saves.\@))
|
||
ldi r31, hi8(gs(.L_prologue_saves.\@))
|
||
XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
|
||
.L_prologue_saves.\@:
|
||
.endm
|
||
|
||
;; Epilogue stuff
|
||
|
||
.macro do_epilogue_restores n_pushed n_frame=0
|
||
in r28, __SP_L__
|
||
#ifdef __AVR_HAVE_SPH__
|
||
in r29, __SP_H__
|
||
.if \n_frame > 63
|
||
subi r28, lo8(-\n_frame)
|
||
sbci r29, hi8(-\n_frame)
|
||
.elseif \n_frame > 0
|
||
adiw r28, \n_frame
|
||
.endif
|
||
#else
|
||
clr r29
|
||
.if \n_frame > 0
|
||
subi r28, lo8(-\n_frame)
|
||
.endif
|
||
#endif /* HAVE SPH */
|
||
ldi r30, \n_pushed
|
||
XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
|
||
.endm
|
||
|
||
;; Support function entry and exit for convenience
|
||
|
||
.macro DEFUN name
|
||
.global \name
|
||
.func \name
|
||
\name:
|
||
.endm
|
||
|
||
.macro ENDF name
|
||
.size \name, .-\name
|
||
.endfunc
|
||
.endm
|
||
|
||
.macro FALIAS name
|
||
.global \name
|
||
.func \name
|
||
\name:
|
||
.size \name, .-\name
|
||
.endfunc
|
||
.endm
|
||
|
||
;; Skip next instruction, typically a jump target
|
||
#define skip cpse 0,0
|
||
|
||
;; Negate a 2-byte value held in consecutive registers
|
||
.macro NEG2 reg
|
||
com \reg+1
|
||
neg \reg
|
||
sbci \reg+1, -1
|
||
.endm
|
||
|
||
;; Negate a 4-byte value held in consecutive registers
|
||
;; Sets the V flag for signed overflow tests if REG >= 16
|
||
.macro NEG4 reg
|
||
com \reg+3
|
||
com \reg+2
|
||
com \reg+1
|
||
.if \reg >= 16
|
||
neg \reg
|
||
sbci \reg+1, -1
|
||
sbci \reg+2, -1
|
||
sbci \reg+3, -1
|
||
.else
|
||
com \reg
|
||
adc \reg, __zero_reg__
|
||
adc \reg+1, __zero_reg__
|
||
adc \reg+2, __zero_reg__
|
||
adc \reg+3, __zero_reg__
|
||
.endif
|
||
.endm
|
||
|
||
#define exp_lo(N) hlo8 ((N) << 23)
|
||
#define exp_hi(N) hhi8 ((N) << 23)
|
||
|
||
|
||
.section .text.libgcc.mul, "ax", @progbits
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
|
||
#if !defined (__AVR_HAVE_MUL__)
|
||
/*******************************************************
|
||
Multiplication 8 x 8 without MUL
|
||
*******************************************************/
|
||
#if defined (L_mulqi3)
|
||
|
||
#define r_arg2 r22 /* multiplicand */
|
||
#define r_arg1 r24 /* multiplier */
|
||
#define r_res __tmp_reg__ /* result */
|
||
|
||
DEFUN __mulqi3
|
||
clr r_res ; clear result
|
||
__mulqi3_loop:
|
||
sbrc r_arg1,0
|
||
add r_res,r_arg2
|
||
add r_arg2,r_arg2 ; shift multiplicand
|
||
breq __mulqi3_exit ; while multiplicand != 0
|
||
lsr r_arg1 ;
|
||
brne __mulqi3_loop ; exit if multiplier = 0
|
||
__mulqi3_exit:
|
||
mov r_arg1,r_res ; result to return register
|
||
ret
|
||
ENDF __mulqi3
|
||
|
||
#undef r_arg2
|
||
#undef r_arg1
|
||
#undef r_res
|
||
|
||
#endif /* defined (L_mulqi3) */
|
||
|
||
|
||
/*******************************************************
|
||
Widening Multiplication 16 = 8 x 8 without MUL
|
||
Multiplication 16 x 16 without MUL
|
||
*******************************************************/
|
||
|
||
#define A0 r22
|
||
#define A1 r23
|
||
#define B0 r24
|
||
#define BB0 r20
|
||
#define B1 r25
|
||
;; Output overlaps input, thus expand result in CC0/1
|
||
#define C0 r24
|
||
#define C1 r25
|
||
#define CC0 __tmp_reg__
|
||
#define CC1 R21
|
||
|
||
#if defined (L_umulqihi3)
|
||
;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
|
||
;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
|
||
;;; Clobbers: __tmp_reg__, R21..R23
|
||
DEFUN __umulqihi3
|
||
clr A1
|
||
clr B1
|
||
XJMP __mulhi3
|
||
ENDF __umulqihi3
|
||
#endif /* L_umulqihi3 */
|
||
|
||
#if defined (L_mulqihi3)
|
||
;;; R25:R24 = (signed int) R22 * (signed int) R24
|
||
;;; (C1:C0) = (signed int) A0 * (signed int) B0
|
||
;;; Clobbers: __tmp_reg__, R20..R23
|
||
DEFUN __mulqihi3
|
||
;; Sign-extend B0
|
||
clr B1
|
||
sbrc B0, 7
|
||
com B1
|
||
;; The multiplication runs twice as fast if A1 is zero, thus:
|
||
;; Zero-extend A0
|
||
clr A1
|
||
#ifdef __AVR_HAVE_JMP_CALL__
|
||
;; Store B0 * sign of A
|
||
clr BB0
|
||
sbrc A0, 7
|
||
mov BB0, B0
|
||
call __mulhi3
|
||
#else /* have no CALL */
|
||
;; Skip sign-extension of A if A >= 0
|
||
;; Same size as with the first alternative but avoids errata skip
|
||
;; and is faster if A >= 0
|
||
sbrs A0, 7
|
||
rjmp __mulhi3
|
||
;; If A < 0 store B
|
||
mov BB0, B0
|
||
rcall __mulhi3
|
||
#endif /* HAVE_JMP_CALL */
|
||
;; 1-extend A after the multiplication
|
||
sub C1, BB0
|
||
ret
|
||
ENDF __mulqihi3
|
||
#endif /* L_mulqihi3 */
|
||
|
||
#if defined (L_mulhi3)
|
||
;;; R25:R24 = R23:R22 * R25:R24
|
||
;;; (C1:C0) = (A1:A0) * (B1:B0)
|
||
;;; Clobbers: __tmp_reg__, R21..R23
|
||
DEFUN __mulhi3
|
||
|
||
;; Clear result
|
||
clr CC0
|
||
clr CC1
|
||
rjmp 3f
|
||
1:
|
||
;; Bit n of A is 1 --> C += B << n
|
||
add CC0, B0
|
||
adc CC1, B1
|
||
2:
|
||
lsl B0
|
||
rol B1
|
||
3:
|
||
;; If B == 0 we are ready
|
||
sbiw B0, 0
|
||
breq 9f
|
||
|
||
;; Carry = n-th bit of A
|
||
lsr A1
|
||
ror A0
|
||
;; If bit n of A is set, then go add B * 2^n to C
|
||
brcs 1b
|
||
|
||
;; Carry = 0 --> The ROR above acts like CP A0, 0
|
||
;; Thus, it is sufficient to CPC the high part to test A against 0
|
||
cpc A1, __zero_reg__
|
||
;; Only proceed if A != 0
|
||
brne 2b
|
||
9:
|
||
;; Move Result into place
|
||
mov C0, CC0
|
||
mov C1, CC1
|
||
ret
|
||
ENDF __mulhi3
|
||
#endif /* L_mulhi3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef B0
|
||
#undef BB0
|
||
#undef B1
|
||
#undef C0
|
||
#undef C1
|
||
#undef CC0
|
||
#undef CC1
|
||
|
||
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
|
||
#define B0 18
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
|
||
#define CC0 26
|
||
#define CC1 CC0+1
|
||
#define CC2 30
|
||
#define CC3 CC2+1
|
||
|
||
#define C0 22
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
|
||
/*******************************************************
|
||
Widening Multiplication 32 = 16 x 16 without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_umulhisi3)
|
||
DEFUN __umulhisi3
|
||
wmov B0, 24
|
||
;; Zero-extend B
|
||
clr B2
|
||
clr B3
|
||
;; Zero-extend A
|
||
wmov A2, B2
|
||
XJMP __mulsi3
|
||
ENDF __umulhisi3
|
||
#endif /* L_umulhisi3 */
|
||
|
||
#if defined (L_mulhisi3)
|
||
DEFUN __mulhisi3
|
||
wmov B0, 24
|
||
;; Sign-extend B
|
||
lsl r25
|
||
sbc B2, B2
|
||
mov B3, B2
|
||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||
;; Sign-extend A
|
||
clr A2
|
||
sbrc A1, 7
|
||
com A2
|
||
mov A3, A2
|
||
XJMP __mulsi3
|
||
#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
|
||
;; Zero-extend A and __mulsi3 will run at least twice as fast
|
||
;; compared to a sign-extended A.
|
||
clr A2
|
||
clr A3
|
||
sbrs A1, 7
|
||
XJMP __mulsi3
|
||
;; If A < 0 then perform the B * 0xffff.... before the
|
||
;; very multiplication by initializing the high part of the
|
||
;; result CC with -B.
|
||
wmov CC2, A2
|
||
sub CC2, B0
|
||
sbc CC3, B1
|
||
XJMP __mulsi3_helper
|
||
#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
|
||
ENDF __mulhisi3
|
||
#endif /* L_mulhisi3 */
|
||
|
||
|
||
/*******************************************************
|
||
Multiplication 32 x 32 without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulsi3)
|
||
DEFUN __mulsi3
|
||
;; Clear result
|
||
clr CC2
|
||
clr CC3
|
||
;; FALLTHRU
|
||
ENDF __mulsi3
|
||
|
||
DEFUN __mulsi3_helper
|
||
clr CC0
|
||
clr CC1
|
||
rjmp 3f
|
||
|
||
1: ;; If bit n of A is set, then add B * 2^n to the result in CC
|
||
;; CC += B
|
||
add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
|
||
|
||
2: ;; B <<= 1
|
||
lsl B0 $ rol B1 $ rol B2 $ rol B3
|
||
|
||
3: ;; A >>= 1: Carry = n-th bit of A
|
||
lsr A3 $ ror A2 $ ror A1 $ ror A0
|
||
|
||
brcs 1b
|
||
;; Only continue if A != 0
|
||
sbci A1, 0
|
||
brne 2b
|
||
sbiw A2, 0
|
||
brne 2b
|
||
|
||
;; All bits of A are consumed: Copy result to return register C
|
||
wmov C0, CC0
|
||
wmov C2, CC2
|
||
ret
|
||
ENDF __mulsi3_helper
|
||
#endif /* L_mulsi3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef CC0
|
||
#undef CC1
|
||
#undef CC2
|
||
#undef CC3
|
||
|
||
#endif /* !defined (__AVR_HAVE_MUL__) */
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
#define A0 26
|
||
#define B0 18
|
||
#define C0 22
|
||
|
||
#define A1 A0+1
|
||
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
|
||
/*******************************************************
|
||
Widening Multiplication 32 = 16 x 16 with MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulhisi3)
|
||
;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
|
||
;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __mulhisi3
|
||
XCALL __umulhisi3
|
||
;; Sign-extend B
|
||
tst B1
|
||
brpl 1f
|
||
sub C2, A0
|
||
sbc C3, A1
|
||
1: ;; Sign-extend A
|
||
XJMP __usmulhisi3_tail
|
||
ENDF __mulhisi3
|
||
#endif /* L_mulhisi3 */
|
||
|
||
#if defined (L_usmulhisi3)
|
||
;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
|
||
;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __usmulhisi3
|
||
XCALL __umulhisi3
|
||
;; FALLTHRU
|
||
ENDF __usmulhisi3
|
||
|
||
DEFUN __usmulhisi3_tail
|
||
;; Sign-extend A
|
||
sbrs A1, 7
|
||
ret
|
||
sub C2, B0
|
||
sbc C3, B1
|
||
ret
|
||
ENDF __usmulhisi3_tail
|
||
#endif /* L_usmulhisi3 */
|
||
|
||
#if defined (L_umulhisi3)
|
||
;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
|
||
;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __umulhisi3
|
||
mul A0, B0
|
||
movw C0, r0
|
||
mul A1, B1
|
||
movw C2, r0
|
||
mul A0, B1
|
||
#ifdef __AVR_HAVE_JMP_CALL__
|
||
;; This function is used by many other routines, often multiple times.
|
||
;; Therefore, if the flash size is not too limited, avoid the RCALL
|
||
;; and inverst 6 Bytes to speed things up.
|
||
add C1, r0
|
||
adc C2, r1
|
||
clr __zero_reg__
|
||
adc C3, __zero_reg__
|
||
#else
|
||
rcall 1f
|
||
#endif
|
||
mul A1, B0
|
||
1: add C1, r0
|
||
adc C2, r1
|
||
clr __zero_reg__
|
||
adc C3, __zero_reg__
|
||
ret
|
||
ENDF __umulhisi3
|
||
#endif /* L_umulhisi3 */
|
||
|
||
/*******************************************************
|
||
Widening Multiplication 32 = 16 x 32 with MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulshisi3)
|
||
;;; R25:R22 = (signed long) R27:R26 * R21:R18
|
||
;;; (C3:C0) = (signed long) A1:A0 * B3:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __mulshisi3
|
||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||
;; Some cores have problem skipping 2-word instruction
|
||
tst A1
|
||
brmi __mulohisi3
|
||
#else
|
||
sbrs A1, 7
|
||
#endif /* __AVR_HAVE_JMP_CALL__ */
|
||
XJMP __muluhisi3
|
||
;; FALLTHRU
|
||
ENDF __mulshisi3
|
||
|
||
;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
|
||
;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __mulohisi3
|
||
XCALL __muluhisi3
|
||
;; One-extend R27:R26 (A1:A0)
|
||
sub C2, B0
|
||
sbc C3, B1
|
||
ret
|
||
ENDF __mulohisi3
|
||
#endif /* L_mulshisi3 */
|
||
|
||
#if defined (L_muluhisi3)
|
||
;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
|
||
;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
|
||
;;; Clobbers: __tmp_reg__
|
||
DEFUN __muluhisi3
|
||
XCALL __umulhisi3
|
||
mul A0, B3
|
||
add C3, r0
|
||
mul A1, B2
|
||
add C3, r0
|
||
mul A0, B2
|
||
add C2, r0
|
||
adc C3, r1
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __muluhisi3
|
||
#endif /* L_muluhisi3 */
|
||
|
||
/*******************************************************
|
||
Multiplication 32 x 32 with MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulsi3)
|
||
;;; R25:R22 = R25:R22 * R21:R18
|
||
;;; (C3:C0) = C3:C0 * B3:B0
|
||
;;; Clobbers: R26, R27, __tmp_reg__
|
||
DEFUN __mulsi3
|
||
movw A0, C0
|
||
push C2
|
||
push C3
|
||
XCALL __muluhisi3
|
||
pop A1
|
||
pop A0
|
||
;; A1:A0 now contains the high word of A
|
||
mul A0, B0
|
||
add C2, r0
|
||
adc C3, r1
|
||
mul A0, B1
|
||
add C3, r0
|
||
mul A1, B0
|
||
add C3, r0
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __mulsi3
|
||
#endif /* L_mulsi3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
|
||
#endif /* __AVR_HAVE_MUL__ */
|
||
|
||
/*******************************************************
|
||
Multiplication 24 x 24 with MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulpsi3)
|
||
|
||
;; A[0..2]: In: Multiplicand; Out: Product
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
|
||
;; B[0..2]: In: Multiplier
|
||
#define B0 18
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
|
||
;; C[0..2]: Expand Result
|
||
#define C0 22
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
|
||
;; R24:R22 *= R20:R18
|
||
;; Clobbers: r21, r25, r26, r27, __tmp_reg__
|
||
|
||
#define AA0 26
|
||
#define AA2 21
|
||
|
||
DEFUN __mulpsi3
|
||
wmov AA0, A0
|
||
mov AA2, A2
|
||
XCALL __umulhisi3
|
||
mul AA2, B0 $ add C2, r0
|
||
mul AA0, B2 $ add C2, r0
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __mulpsi3
|
||
|
||
#undef AA2
|
||
#undef AA0
|
||
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#else /* !HAVE_MUL */
|
||
|
||
;; C[0..2]: Expand Result
|
||
#define C0 0
|
||
#define C1 C0+1
|
||
#define C2 21
|
||
|
||
;; R24:R22 *= R20:R18
|
||
;; Clobbers: __tmp_reg__, R18, R19, R20, R21
|
||
|
||
DEFUN __mulpsi3
|
||
|
||
;; C[] = 0
|
||
clr __tmp_reg__
|
||
clr C2
|
||
|
||
0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
|
||
LSR B2 $ ror B1 $ ror B0
|
||
|
||
;; If the N-th Bit of B[] was set...
|
||
brcc 1f
|
||
|
||
;; ...then add A[] * 2^N to the Result C[]
|
||
ADD C0,A0 $ adc C1,A1 $ adc C2,A2
|
||
|
||
1: ;; Multiply A[] by 2
|
||
LSL A0 $ rol A1 $ rol A2
|
||
|
||
;; Loop until B[] is 0
|
||
subi B0,0 $ sbci B1,0 $ sbci B2,0
|
||
brne 0b
|
||
|
||
;; Copy C[] to the return Register A[]
|
||
wmov A0, C0
|
||
mov A2, C2
|
||
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __mulpsi3
|
||
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#endif /* HAVE_MUL */
|
||
|
||
#undef B2
|
||
#undef B1
|
||
#undef B0
|
||
|
||
#undef A2
|
||
#undef A1
|
||
#undef A0
|
||
|
||
#endif /* L_mulpsi3 */
|
||
|
||
#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
|
||
|
||
;; A[0..2]: In: Multiplicand
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
|
||
;; BB: In: Multiplier
|
||
#define BB 25
|
||
|
||
;; C[0..2]: Result
|
||
#define C0 18
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
|
||
;; C[] = A[] * sign_extend (BB)
|
||
DEFUN __mulsqipsi3
|
||
mul A0, BB
|
||
movw C0, r0
|
||
mul A2, BB
|
||
mov C2, r0
|
||
mul A1, BB
|
||
add C1, r0
|
||
adc C2, r1
|
||
clr __zero_reg__
|
||
sbrs BB, 7
|
||
ret
|
||
;; One-extend BB
|
||
sub C1, A0
|
||
sbc C2, A1
|
||
ret
|
||
ENDF __mulsqipsi3
|
||
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#undef BB
|
||
|
||
#undef A2
|
||
#undef A1
|
||
#undef A0
|
||
|
||
#endif /* L_mulsqipsi3 && HAVE_MUL */
|
||
|
||
/*******************************************************
|
||
Multiplication 64 x 64
|
||
*******************************************************/
|
||
|
||
;; A[] = A[] * B[]
|
||
|
||
;; A[0..7]: In: Multiplicand
|
||
;; Out: Product
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
#define A4 A0+4
|
||
#define A5 A0+5
|
||
#define A6 A0+6
|
||
#define A7 A0+7
|
||
|
||
;; B[0..7]: In: Multiplier
|
||
#define B0 10
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
#define B4 B0+4
|
||
#define B5 B0+5
|
||
#define B6 B0+6
|
||
#define B7 B0+7
|
||
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
|
||
;; Define C[] for convenience
|
||
;; Notice that parts of C[] overlap A[] respective B[]
|
||
#define C0 16
|
||
#define C1 C0+1
|
||
#define C2 20
|
||
#define C3 C2+1
|
||
#define C4 28
|
||
#define C5 C4+1
|
||
#define C6 C4+2
|
||
#define C7 C4+3
|
||
|
||
#if defined (L_muldi3)
|
||
|
||
;; A[] *= B[]
|
||
;; R25:R18 *= R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __muldi3
|
||
push r29
|
||
push r28
|
||
push r17
|
||
push r16
|
||
|
||
;; Counting in Words, we have to perform a 4 * 4 Multiplication
|
||
|
||
;; 3 * 0 + 0 * 3
|
||
mul A7,B0 $ $ mov C7,r0
|
||
mul A0,B7 $ $ add C7,r0
|
||
mul A6,B1 $ $ add C7,r0
|
||
mul A6,B0 $ mov C6,r0 $ add C7,r1
|
||
mul B6,A1 $ $ add C7,r0
|
||
mul B6,A0 $ add C6,r0 $ adc C7,r1
|
||
|
||
;; 1 * 2
|
||
mul A2,B4 $ add C6,r0 $ adc C7,r1
|
||
mul A3,B4 $ $ add C7,r0
|
||
mul A2,B5 $ $ add C7,r0
|
||
|
||
push A5
|
||
push A4
|
||
push B1
|
||
push B0
|
||
push A3
|
||
push A2
|
||
|
||
;; 0 * 0
|
||
wmov 26, B0
|
||
XCALL __umulhisi3
|
||
wmov C0, 22
|
||
wmov C2, 24
|
||
|
||
;; 0 * 2
|
||
wmov 26, B4
|
||
XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
|
||
|
||
wmov 26, B2
|
||
;; 0 * 1
|
||
XCALL __muldi3_6
|
||
|
||
pop A0
|
||
pop A1
|
||
;; 1 * 1
|
||
wmov 26, B2
|
||
XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
|
||
|
||
pop r26
|
||
pop r27
|
||
;; 1 * 0
|
||
XCALL __muldi3_6
|
||
|
||
pop A0
|
||
pop A1
|
||
;; 2 * 0
|
||
XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
|
||
|
||
;; 2 * 1
|
||
wmov 26, B2
|
||
XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
|
||
|
||
;; A[] = C[]
|
||
wmov A0, C0
|
||
;; A2 = C2 already
|
||
wmov A4, C4
|
||
wmov A6, C6
|
||
|
||
clr __zero_reg__
|
||
pop r16
|
||
pop r17
|
||
pop r28
|
||
pop r29
|
||
ret
|
||
ENDF __muldi3
|
||
#endif /* L_muldi3 */
|
||
|
||
#if defined (L_muldi3_6)
|
||
;; A helper for some 64-bit multiplications with MUL available
|
||
DEFUN __muldi3_6
|
||
__muldi3_6:
|
||
XCALL __umulhisi3
|
||
add C2, 22
|
||
adc C3, 23
|
||
adc C4, 24
|
||
adc C5, 25
|
||
brcc 0f
|
||
adiw C6, 1
|
||
0: ret
|
||
ENDF __muldi3_6
|
||
#endif /* L_muldi3_6 */
|
||
|
||
#undef C7
|
||
#undef C6
|
||
#undef C5
|
||
#undef C4
|
||
#undef C3
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#else /* !HAVE_MUL */
|
||
|
||
#if defined (L_muldi3)
|
||
|
||
#define C0 26
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
#define C4 C0+4
|
||
#define C5 C0+5
|
||
#define C6 0
|
||
#define C7 C6+1
|
||
|
||
#define Loop 9
|
||
|
||
;; A[] *= B[]
|
||
;; R25:R18 *= R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __muldi3
|
||
push r29
|
||
push r28
|
||
push Loop
|
||
|
||
ldi C0, 64
|
||
mov Loop, C0
|
||
|
||
;; C[] = 0
|
||
clr __tmp_reg__
|
||
wmov C0, 0
|
||
wmov C2, 0
|
||
wmov C4, 0
|
||
|
||
0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
|
||
;; where N = 64 - Loop.
|
||
;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
|
||
;; B[] will have its initial Value again.
|
||
LSR B7 $ ror B6 $ ror B5 $ ror B4
|
||
ror B3 $ ror B2 $ ror B1 $ ror B0
|
||
|
||
;; If the N-th Bit of B[] was set then...
|
||
brcc 1f
|
||
;; ...finish Rotation...
|
||
ori B7, 1 << 7
|
||
|
||
;; ...and add A[] * 2^N to the Result C[]
|
||
ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
|
||
adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
|
||
|
||
1: ;; Multiply A[] by 2
|
||
LSL A0 $ rol A1 $ rol A2 $ rol A3
|
||
rol A4 $ rol A5 $ rol A6 $ rol A7
|
||
|
||
dec Loop
|
||
brne 0b
|
||
|
||
;; We expanded the Result in C[]
|
||
;; Copy Result to the Return Register A[]
|
||
wmov A0, C0
|
||
wmov A2, C2
|
||
wmov A4, C4
|
||
wmov A6, C6
|
||
|
||
clr __zero_reg__
|
||
pop Loop
|
||
pop r28
|
||
pop r29
|
||
ret
|
||
ENDF __muldi3
|
||
|
||
#undef Loop
|
||
|
||
#undef C7
|
||
#undef C6
|
||
#undef C5
|
||
#undef C4
|
||
#undef C3
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#endif /* L_muldi3 */
|
||
#endif /* HAVE_MUL */
|
||
|
||
#undef B7
|
||
#undef B6
|
||
#undef B5
|
||
#undef B4
|
||
#undef B3
|
||
#undef B2
|
||
#undef B1
|
||
#undef B0
|
||
|
||
#undef A7
|
||
#undef A6
|
||
#undef A5
|
||
#undef A4
|
||
#undef A3
|
||
#undef A2
|
||
#undef A1
|
||
#undef A0
|
||
|
||
/*******************************************************
|
||
Widening Multiplication 64 = 32 x 32 with MUL
|
||
*******************************************************/
|
||
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
#define A0 r22
|
||
#define A1 r23
|
||
#define A2 r24
|
||
#define A3 r25
|
||
|
||
#define B0 r18
|
||
#define B1 r19
|
||
#define B2 r20
|
||
#define B3 r21
|
||
|
||
#define C0 18
|
||
#define C1 C0+1
|
||
#define C2 20
|
||
#define C3 C2+1
|
||
#define C4 28
|
||
#define C5 C4+1
|
||
#define C6 C4+2
|
||
#define C7 C4+3
|
||
|
||
#if defined (L_umulsidi3)
|
||
|
||
;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
|
||
|
||
;; R18[8] = R22[4] * R18[4]
|
||
;;
|
||
;; Ordinary ABI Function, but additionally sets
|
||
;; X = R20[2] = B2[2]
|
||
;; Z = R22[2] = A0[2]
|
||
DEFUN __umulsidi3
|
||
clt
|
||
;; FALLTHRU
|
||
ENDF __umulsidi3
|
||
;; T = sign (A)
|
||
DEFUN __umulsidi3_helper
|
||
push 29 $ push 28 ; Y
|
||
wmov 30, A2
|
||
;; Counting in Words, we have to perform 4 Multiplications
|
||
;; 0 * 0
|
||
wmov 26, A0
|
||
XCALL __umulhisi3
|
||
push 23 $ push 22 ; C0
|
||
wmov 28, B0
|
||
wmov 18, B2
|
||
wmov C2, 24
|
||
push 27 $ push 26 ; A0
|
||
push 19 $ push 18 ; B2
|
||
;;
|
||
;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
|
||
;; B2 C2 -- -- -- B0 A2
|
||
;; 1 * 1
|
||
wmov 26, 30 ; A2
|
||
XCALL __umulhisi3
|
||
;; Sign-extend A. T holds the sign of A
|
||
brtc 0f
|
||
;; Subtract B from the high part of the result
|
||
sub 22, 28
|
||
sbc 23, 29
|
||
sbc 24, 18
|
||
sbc 25, 19
|
||
0: wmov 18, 28 ;; B0
|
||
wmov C4, 22
|
||
wmov C6, 24
|
||
;;
|
||
;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
|
||
;; B0 C2 -- -- A2 C4 C6
|
||
;;
|
||
;; 1 * 0
|
||
XCALL __muldi3_6
|
||
;; 0 * 1
|
||
pop 26 $ pop 27 ;; B2
|
||
pop 18 $ pop 19 ;; A0
|
||
XCALL __muldi3_6
|
||
|
||
;; Move result C into place and save A0 in Z
|
||
wmov 22, C4
|
||
wmov 24, C6
|
||
wmov 30, 18 ; A0
|
||
pop C0 $ pop C1
|
||
|
||
;; Epilogue
|
||
pop 28 $ pop 29 ;; Y
|
||
ret
|
||
ENDF __umulsidi3_helper
|
||
#endif /* L_umulsidi3 */
|
||
|
||
|
||
#if defined (L_mulsidi3)
|
||
|
||
;; Signed widening 64 = 32 * 32 Multiplication
|
||
;;
|
||
;; R18[8] = R22[4] * R18[4]
|
||
;; Ordinary ABI Function
|
||
DEFUN __mulsidi3
|
||
bst A3, 7
|
||
sbrs B3, 7 ; Enhanced core has no skip bug
|
||
XJMP __umulsidi3_helper
|
||
|
||
;; B needs sign-extension
|
||
push A3
|
||
push A2
|
||
XCALL __umulsidi3_helper
|
||
;; A0 survived in Z
|
||
sub r22, r30
|
||
sbc r23, r31
|
||
pop r26
|
||
pop r27
|
||
sbc r24, r26
|
||
sbc r25, r27
|
||
ret
|
||
ENDF __mulsidi3
|
||
#endif /* L_mulsidi3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef C4
|
||
#undef C5
|
||
#undef C6
|
||
#undef C7
|
||
#endif /* HAVE_MUL */
|
||
|
||
/**********************************************************
|
||
Widening Multiplication 64 = 32 x 32 without MUL
|
||
**********************************************************/
|
||
|
||
#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
#define A4 A0+4
|
||
#define A5 A0+5
|
||
#define A6 A0+6
|
||
#define A7 A0+7
|
||
|
||
#define B0 10
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
#define B4 B0+4
|
||
#define B5 B0+5
|
||
#define B6 B0+6
|
||
#define B7 B0+7
|
||
|
||
#define AA0 22
|
||
#define AA1 AA0+1
|
||
#define AA2 AA0+2
|
||
#define AA3 AA0+3
|
||
|
||
#define BB0 18
|
||
#define BB1 BB0+1
|
||
#define BB2 BB0+2
|
||
#define BB3 BB0+3
|
||
|
||
#define Mask r30
|
||
|
||
;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
|
||
;;
|
||
;; R18[8] = R22[4] * R18[4]
|
||
;; Ordinary ABI Function
|
||
DEFUN __mulsidi3
|
||
set
|
||
skip
|
||
;; FALLTHRU
|
||
ENDF __mulsidi3
|
||
|
||
DEFUN __umulsidi3
|
||
clt ; skipped
|
||
;; Save 10 Registers: R10..R17, R28, R29
|
||
do_prologue_saves 10
|
||
ldi Mask, 0xff
|
||
bld Mask, 7
|
||
;; Move B into place...
|
||
wmov B0, BB0
|
||
wmov B2, BB2
|
||
;; ...and extend it
|
||
and BB3, Mask
|
||
lsl BB3
|
||
sbc B4, B4
|
||
mov B5, B4
|
||
wmov B6, B4
|
||
;; Move A into place...
|
||
wmov A0, AA0
|
||
wmov A2, AA2
|
||
;; ...and extend it
|
||
and AA3, Mask
|
||
lsl AA3
|
||
sbc A4, A4
|
||
mov A5, A4
|
||
wmov A6, A4
|
||
XCALL __muldi3
|
||
do_epilogue_restores 10
|
||
ENDF __umulsidi3
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef A4
|
||
#undef A5
|
||
#undef A6
|
||
#undef A7
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef B4
|
||
#undef B5
|
||
#undef B6
|
||
#undef B7
|
||
#undef AA0
|
||
#undef AA1
|
||
#undef AA2
|
||
#undef AA3
|
||
#undef BB0
|
||
#undef BB1
|
||
#undef BB2
|
||
#undef BB3
|
||
#undef Mask
|
||
#endif /* L_mulsidi3 && !HAVE_MUL */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
|
||
.section .text.libgcc.div, "ax", @progbits
|
||
|
||
/*******************************************************
|
||
Division 8 / 8 => (result + remainder)
|
||
*******************************************************/
|
||
#define r_rem r25 /* remainder */
|
||
#define r_arg1 r24 /* dividend, quotient */
|
||
#define r_arg2 r22 /* divisor */
|
||
#define r_cnt r23 /* loop count */
|
||
|
||
#if defined (L_udivmodqi4)
|
||
DEFUN __udivmodqi4
|
||
sub r_rem,r_rem ; clear remainder and carry
|
||
ldi r_cnt,9 ; init loop counter
|
||
rjmp __udivmodqi4_ep ; jump to entry point
|
||
__udivmodqi4_loop:
|
||
rol r_rem ; shift dividend into remainder
|
||
cp r_rem,r_arg2 ; compare remainder & divisor
|
||
brcs __udivmodqi4_ep ; remainder <= divisor
|
||
sub r_rem,r_arg2 ; restore remainder
|
||
__udivmodqi4_ep:
|
||
rol r_arg1 ; shift dividend (with CARRY)
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivmodqi4_loop
|
||
com r_arg1 ; complement result
|
||
; because C flag was complemented in loop
|
||
ret
|
||
ENDF __udivmodqi4
|
||
#endif /* defined (L_udivmodqi4) */
|
||
|
||
#if defined (L_divmodqi4)
|
||
DEFUN __divmodqi4
|
||
bst r_arg1,7 ; store sign of dividend
|
||
mov __tmp_reg__,r_arg1
|
||
eor __tmp_reg__,r_arg2; r0.7 is sign of result
|
||
sbrc r_arg1,7
|
||
neg r_arg1 ; dividend negative : negate
|
||
sbrc r_arg2,7
|
||
neg r_arg2 ; divisor negative : negate
|
||
XCALL __udivmodqi4 ; do the unsigned div/mod
|
||
brtc __divmodqi4_1
|
||
neg r_rem ; correct remainder sign
|
||
__divmodqi4_1:
|
||
sbrc __tmp_reg__,7
|
||
neg r_arg1 ; correct result sign
|
||
__divmodqi4_exit:
|
||
ret
|
||
ENDF __divmodqi4
|
||
#endif /* defined (L_divmodqi4) */
|
||
|
||
#undef r_rem
|
||
#undef r_arg1
|
||
#undef r_arg2
|
||
#undef r_cnt
|
||
|
||
|
||
/*******************************************************
|
||
Division 16 / 16 => (result + remainder)
|
||
*******************************************************/
|
||
#define r_remL r26 /* remainder Low */
|
||
#define r_remH r27 /* remainder High */
|
||
|
||
/* return: remainder */
|
||
#define r_arg1L r24 /* dividend Low */
|
||
#define r_arg1H r25 /* dividend High */
|
||
|
||
/* return: quotient */
|
||
#define r_arg2L r22 /* divisor Low */
|
||
#define r_arg2H r23 /* divisor High */
|
||
|
||
#define r_cnt r21 /* loop count */
|
||
|
||
#if defined (L_udivmodhi4)
|
||
DEFUN __udivmodhi4
|
||
sub r_remL,r_remL
|
||
sub r_remH,r_remH ; clear remainder and carry
|
||
ldi r_cnt,17 ; init loop counter
|
||
rjmp __udivmodhi4_ep ; jump to entry point
|
||
__udivmodhi4_loop:
|
||
rol r_remL ; shift dividend into remainder
|
||
rol r_remH
|
||
cp r_remL,r_arg2L ; compare remainder & divisor
|
||
cpc r_remH,r_arg2H
|
||
brcs __udivmodhi4_ep ; remainder < divisor
|
||
sub r_remL,r_arg2L ; restore remainder
|
||
sbc r_remH,r_arg2H
|
||
__udivmodhi4_ep:
|
||
rol r_arg1L ; shift dividend (with CARRY)
|
||
rol r_arg1H
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivmodhi4_loop
|
||
com r_arg1L
|
||
com r_arg1H
|
||
; div/mod results to return registers, as for the div() function
|
||
mov_l r_arg2L, r_arg1L ; quotient
|
||
mov_h r_arg2H, r_arg1H
|
||
mov_l r_arg1L, r_remL ; remainder
|
||
mov_h r_arg1H, r_remH
|
||
ret
|
||
ENDF __udivmodhi4
|
||
#endif /* defined (L_udivmodhi4) */
|
||
|
||
#if defined (L_divmodhi4)
|
||
DEFUN __divmodhi4
|
||
.global _div
|
||
_div:
|
||
bst r_arg1H,7 ; store sign of dividend
|
||
mov __tmp_reg__,r_arg2H
|
||
brtc 0f
|
||
com __tmp_reg__ ; r0.7 is sign of result
|
||
rcall __divmodhi4_neg1 ; dividend negative: negate
|
||
0:
|
||
sbrc r_arg2H,7
|
||
rcall __divmodhi4_neg2 ; divisor negative: negate
|
||
XCALL __udivmodhi4 ; do the unsigned div/mod
|
||
sbrc __tmp_reg__,7
|
||
rcall __divmodhi4_neg2 ; correct remainder sign
|
||
brtc __divmodhi4_exit
|
||
__divmodhi4_neg1:
|
||
;; correct dividend/remainder sign
|
||
com r_arg1H
|
||
neg r_arg1L
|
||
sbci r_arg1H,0xff
|
||
ret
|
||
__divmodhi4_neg2:
|
||
;; correct divisor/result sign
|
||
com r_arg2H
|
||
neg r_arg2L
|
||
sbci r_arg2H,0xff
|
||
__divmodhi4_exit:
|
||
ret
|
||
ENDF __divmodhi4
|
||
#endif /* defined (L_divmodhi4) */
|
||
|
||
#undef r_remH
|
||
#undef r_remL
|
||
|
||
#undef r_arg1H
|
||
#undef r_arg1L
|
||
|
||
#undef r_arg2H
|
||
#undef r_arg2L
|
||
|
||
#undef r_cnt
|
||
|
||
/*******************************************************
|
||
Division 24 / 24 => (result + remainder)
|
||
*******************************************************/
|
||
|
||
;; A[0..2]: In: Dividend; Out: Quotient
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
|
||
;; B[0..2]: In: Divisor; Out: Remainder
|
||
#define B0 18
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
|
||
;; C[0..2]: Expand remainder
|
||
#define C0 __zero_reg__
|
||
#define C1 26
|
||
#define C2 25
|
||
|
||
;; Loop counter
|
||
#define r_cnt 21
|
||
|
||
#if defined (L_udivmodpsi4)
|
||
;; R24:R22 = R24:R22 udiv R20:R18
|
||
;; R20:R18 = R24:R22 umod R20:R18
|
||
;; Clobbers: R21, R25, R26
|
||
|
||
DEFUN __udivmodpsi4
|
||
; init loop counter
|
||
ldi r_cnt, 24+1
|
||
; Clear remainder and carry. C0 is already 0
|
||
clr C1
|
||
sub C2, C2
|
||
; jump to entry point
|
||
rjmp __udivmodpsi4_start
|
||
__udivmodpsi4_loop:
|
||
; shift dividend into remainder
|
||
rol C0
|
||
rol C1
|
||
rol C2
|
||
; compare remainder & divisor
|
||
cp C0, B0
|
||
cpc C1, B1
|
||
cpc C2, B2
|
||
brcs __udivmodpsi4_start ; remainder <= divisor
|
||
sub C0, B0 ; restore remainder
|
||
sbc C1, B1
|
||
sbc C2, B2
|
||
__udivmodpsi4_start:
|
||
; shift dividend (with CARRY)
|
||
rol A0
|
||
rol A1
|
||
rol A2
|
||
; decrement loop counter
|
||
dec r_cnt
|
||
brne __udivmodpsi4_loop
|
||
com A0
|
||
com A1
|
||
com A2
|
||
; div/mod results to return registers
|
||
; remainder
|
||
mov B0, C0
|
||
mov B1, C1
|
||
mov B2, C2
|
||
clr __zero_reg__ ; C0
|
||
ret
|
||
ENDF __udivmodpsi4
|
||
#endif /* defined (L_udivmodpsi4) */
|
||
|
||
#if defined (L_divmodpsi4)
|
||
;; R24:R22 = R24:R22 div R20:R18
|
||
;; R20:R18 = R24:R22 mod R20:R18
|
||
;; Clobbers: T, __tmp_reg__, R21, R25, R26
|
||
|
||
DEFUN __divmodpsi4
|
||
; R0.7 will contain the sign of the result:
|
||
; R0.7 = A.sign ^ B.sign
|
||
mov __tmp_reg__, B2
|
||
; T-flag = sign of dividend
|
||
bst A2, 7
|
||
brtc 0f
|
||
com __tmp_reg__
|
||
; Adjust dividend's sign
|
||
rcall __divmodpsi4_negA
|
||
0:
|
||
; Adjust divisor's sign
|
||
sbrc B2, 7
|
||
rcall __divmodpsi4_negB
|
||
|
||
; Do the unsigned div/mod
|
||
XCALL __udivmodpsi4
|
||
|
||
; Adjust quotient's sign
|
||
sbrc __tmp_reg__, 7
|
||
rcall __divmodpsi4_negA
|
||
|
||
; Adjust remainder's sign
|
||
brtc __divmodpsi4_end
|
||
|
||
__divmodpsi4_negB:
|
||
; Correct divisor/remainder sign
|
||
com B2
|
||
com B1
|
||
neg B0
|
||
sbci B1, -1
|
||
sbci B2, -1
|
||
ret
|
||
|
||
; Correct dividend/quotient sign
|
||
__divmodpsi4_negA:
|
||
com A2
|
||
com A1
|
||
neg A0
|
||
sbci A1, -1
|
||
sbci A2, -1
|
||
__divmodpsi4_end:
|
||
ret
|
||
|
||
ENDF __divmodpsi4
|
||
#endif /* defined (L_divmodpsi4) */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
|
||
#undef r_cnt
|
||
|
||
/*******************************************************
|
||
Division 32 / 32 => (result + remainder)
|
||
*******************************************************/
|
||
#define r_remHH r31 /* remainder High */
|
||
#define r_remHL r30
|
||
#define r_remH r27
|
||
#define r_remL r26 /* remainder Low */
|
||
|
||
/* return: remainder */
|
||
#define r_arg1HH r25 /* dividend High */
|
||
#define r_arg1HL r24
|
||
#define r_arg1H r23
|
||
#define r_arg1L r22 /* dividend Low */
|
||
|
||
/* return: quotient */
|
||
#define r_arg2HH r21 /* divisor High */
|
||
#define r_arg2HL r20
|
||
#define r_arg2H r19
|
||
#define r_arg2L r18 /* divisor Low */
|
||
|
||
#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
|
||
|
||
#if defined (L_udivmodsi4)
|
||
DEFUN __udivmodsi4
|
||
ldi r_remL, 33 ; init loop counter
|
||
mov r_cnt, r_remL
|
||
sub r_remL,r_remL
|
||
sub r_remH,r_remH ; clear remainder and carry
|
||
mov_l r_remHL, r_remL
|
||
mov_h r_remHH, r_remH
|
||
rjmp __udivmodsi4_ep ; jump to entry point
|
||
__udivmodsi4_loop:
|
||
rol r_remL ; shift dividend into remainder
|
||
rol r_remH
|
||
rol r_remHL
|
||
rol r_remHH
|
||
cp r_remL,r_arg2L ; compare remainder & divisor
|
||
cpc r_remH,r_arg2H
|
||
cpc r_remHL,r_arg2HL
|
||
cpc r_remHH,r_arg2HH
|
||
brcs __udivmodsi4_ep ; remainder <= divisor
|
||
sub r_remL,r_arg2L ; restore remainder
|
||
sbc r_remH,r_arg2H
|
||
sbc r_remHL,r_arg2HL
|
||
sbc r_remHH,r_arg2HH
|
||
__udivmodsi4_ep:
|
||
rol r_arg1L ; shift dividend (with CARRY)
|
||
rol r_arg1H
|
||
rol r_arg1HL
|
||
rol r_arg1HH
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivmodsi4_loop
|
||
; __zero_reg__ now restored (r_cnt == 0)
|
||
com r_arg1L
|
||
com r_arg1H
|
||
com r_arg1HL
|
||
com r_arg1HH
|
||
; div/mod results to return registers, as for the ldiv() function
|
||
mov_l r_arg2L, r_arg1L ; quotient
|
||
mov_h r_arg2H, r_arg1H
|
||
mov_l r_arg2HL, r_arg1HL
|
||
mov_h r_arg2HH, r_arg1HH
|
||
mov_l r_arg1L, r_remL ; remainder
|
||
mov_h r_arg1H, r_remH
|
||
mov_l r_arg1HL, r_remHL
|
||
mov_h r_arg1HH, r_remHH
|
||
ret
|
||
ENDF __udivmodsi4
|
||
#endif /* defined (L_udivmodsi4) */
|
||
|
||
#if defined (L_divmodsi4)
|
||
DEFUN __divmodsi4
|
||
mov __tmp_reg__,r_arg2HH
|
||
bst r_arg1HH,7 ; store sign of dividend
|
||
brtc 0f
|
||
com __tmp_reg__ ; r0.7 is sign of result
|
||
XCALL __negsi2 ; dividend negative: negate
|
||
0:
|
||
sbrc r_arg2HH,7
|
||
rcall __divmodsi4_neg2 ; divisor negative: negate
|
||
XCALL __udivmodsi4 ; do the unsigned div/mod
|
||
sbrc __tmp_reg__, 7 ; correct quotient sign
|
||
rcall __divmodsi4_neg2
|
||
brtc __divmodsi4_exit ; correct remainder sign
|
||
XJMP __negsi2
|
||
__divmodsi4_neg2:
|
||
;; correct divisor/quotient sign
|
||
com r_arg2HH
|
||
com r_arg2HL
|
||
com r_arg2H
|
||
neg r_arg2L
|
||
sbci r_arg2H,0xff
|
||
sbci r_arg2HL,0xff
|
||
sbci r_arg2HH,0xff
|
||
__divmodsi4_exit:
|
||
ret
|
||
ENDF __divmodsi4
|
||
#endif /* defined (L_divmodsi4) */
|
||
|
||
#if defined (L_negsi2)
|
||
;; (set (reg:SI 22)
|
||
;; (neg:SI (reg:SI 22)))
|
||
;; Sets the V flag for signed overflow tests
|
||
DEFUN __negsi2
|
||
NEG4 22
|
||
ret
|
||
ENDF __negsi2
|
||
#endif /* L_negsi2 */
|
||
|
||
#undef r_remHH
|
||
#undef r_remHL
|
||
#undef r_remH
|
||
#undef r_remL
|
||
#undef r_arg1HH
|
||
#undef r_arg1HL
|
||
#undef r_arg1H
|
||
#undef r_arg1L
|
||
#undef r_arg2HH
|
||
#undef r_arg2HL
|
||
#undef r_arg2H
|
||
#undef r_arg2L
|
||
#undef r_cnt
|
||
|
||
/*******************************************************
|
||
Division 64 / 64
|
||
Modulo 64 % 64
|
||
*******************************************************/
|
||
|
||
;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
|
||
;; at least 16k of Program Memory. For smaller Devices, depend
|
||
;; on MOVW and SP Size. There is a Connexion between SP Size and
|
||
;; Flash Size so that SP Size can be used to test for Flash Size.
|
||
|
||
#if defined (__AVR_HAVE_JMP_CALL__)
|
||
# define SPEED_DIV 8
|
||
#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
|
||
# define SPEED_DIV 16
|
||
#else
|
||
# define SPEED_DIV 0
|
||
#endif
|
||
|
||
;; A[0..7]: In: Dividend;
|
||
;; Out: Quotient (T = 0)
|
||
;; Out: Remainder (T = 1)
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
#define A4 A0+4
|
||
#define A5 A0+5
|
||
#define A6 A0+6
|
||
#define A7 A0+7
|
||
|
||
;; B[0..7]: In: Divisor; Out: Clobber
|
||
#define B0 10
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
#define B4 B0+4
|
||
#define B5 B0+5
|
||
#define B6 B0+6
|
||
#define B7 B0+7
|
||
|
||
;; C[0..7]: Expand remainder; Out: Remainder (unused)
|
||
#define C0 8
|
||
#define C1 C0+1
|
||
#define C2 30
|
||
#define C3 C2+1
|
||
#define C4 28
|
||
#define C5 C4+1
|
||
#define C6 26
|
||
#define C7 C6+1
|
||
|
||
;; Holds Signs during Division Routine
|
||
#define SS __tmp_reg__
|
||
|
||
;; Bit-Counter in Division Routine
|
||
#define R_cnt __zero_reg__
|
||
|
||
;; Scratch Register for Negation
|
||
#define NN r31
|
||
|
||
#if defined (L_udivdi3)
|
||
|
||
;; R25:R18 = R24:R18 umod R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __umoddi3
|
||
set
|
||
rjmp __udivdi3_umoddi3
|
||
ENDF __umoddi3
|
||
|
||
;; R25:R18 = R24:R18 udiv R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __udivdi3
|
||
clt
|
||
ENDF __udivdi3
|
||
|
||
DEFUN __udivdi3_umoddi3
|
||
push C0
|
||
push C1
|
||
push C4
|
||
push C5
|
||
XCALL __udivmod64
|
||
pop C5
|
||
pop C4
|
||
pop C1
|
||
pop C0
|
||
ret
|
||
ENDF __udivdi3_umoddi3
|
||
#endif /* L_udivdi3 */
|
||
|
||
#if defined (L_udivmod64)
|
||
|
||
;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
|
||
;; No Registers saved/restored; the Callers will take Care.
|
||
;; Preserves B[] and T-flag
|
||
;; T = 0: Compute Quotient in A[]
|
||
;; T = 1: Compute Remainder in A[] and shift SS one Bit left
|
||
|
||
DEFUN __udivmod64
|
||
|
||
;; Clear Remainder (C6, C7 will follow)
|
||
clr C0
|
||
clr C1
|
||
wmov C2, C0
|
||
wmov C4, C0
|
||
ldi C7, 64
|
||
|
||
#if SPEED_DIV == 0 || SPEED_DIV == 16
|
||
;; Initialize Loop-Counter
|
||
mov R_cnt, C7
|
||
wmov C6, C0
|
||
#endif /* SPEED_DIV */
|
||
|
||
#if SPEED_DIV == 8
|
||
|
||
push A7
|
||
clr C6
|
||
|
||
1: ;; Compare shifted Devidend against Divisor
|
||
;; If -- even after Shifting -- it is smaller...
|
||
CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
|
||
cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
|
||
brcc 2f
|
||
|
||
;; ...then we can subtract it. Thus, it is legal to shift left
|
||
$ mov C6,C5 $ mov C5,C4 $ mov C4,C3
|
||
mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
|
||
mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
|
||
mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
|
||
|
||
;; 8 Bits are done
|
||
subi C7, 8
|
||
brne 1b
|
||
|
||
;; Shifted 64 Bits: A7 has traveled to C7
|
||
pop C7
|
||
;; Divisor is greater than Dividend. We have:
|
||
;; A[] % B[] = A[]
|
||
;; A[] / B[] = 0
|
||
;; Thus, we can return immediately
|
||
rjmp 5f
|
||
|
||
2: ;; Initialze Bit-Counter with Number of Bits still to be performed
|
||
mov R_cnt, C7
|
||
|
||
;; Push of A7 is not needed because C7 is still 0
|
||
pop C7
|
||
clr C7
|
||
|
||
#elif SPEED_DIV == 16
|
||
|
||
;; Compare shifted Dividend against Divisor
|
||
cp A7, B3
|
||
cpc C0, B4
|
||
cpc C1, B5
|
||
cpc C2, B6
|
||
cpc C3, B7
|
||
brcc 2f
|
||
|
||
;; Divisor is greater than shifted Dividen: We can shift the Dividend
|
||
;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
|
||
wmov C2,A6 $ wmov C0,A4
|
||
wmov A6,A2 $ wmov A4,A0
|
||
wmov A2,C6 $ wmov A0,C4
|
||
|
||
;; Set Bit Counter to 32
|
||
lsr R_cnt
|
||
2:
|
||
#elif SPEED_DIV
|
||
#error SPEED_DIV = ?
|
||
#endif /* SPEED_DIV */
|
||
|
||
;; The very Division + Remainder Routine
|
||
|
||
3: ;; Left-shift Dividend...
|
||
lsl A0 $ rol A1 $ rol A2 $ rol A3
|
||
rol A4 $ rol A5 $ rol A6 $ rol A7
|
||
|
||
;; ...into Remainder
|
||
rol C0 $ rol C1 $ rol C2 $ rol C3
|
||
rol C4 $ rol C5 $ rol C6 $ rol C7
|
||
|
||
;; Compare Remainder and Divisor
|
||
CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
|
||
cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
|
||
|
||
brcs 4f
|
||
|
||
;; Divisor fits into Remainder: Subtract it from Remainder...
|
||
SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
|
||
sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
|
||
|
||
;; ...and set according Bit in the upcoming Quotient
|
||
;; The Bit will travel to its final Position
|
||
ori A0, 1
|
||
|
||
4: ;; This Bit is done
|
||
dec R_cnt
|
||
brne 3b
|
||
;; __zero_reg__ is 0 again
|
||
|
||
;; T = 0: We are fine with the Quotient in A[]
|
||
;; T = 1: Copy Remainder to A[]
|
||
5: brtc 6f
|
||
wmov A0, C0
|
||
wmov A2, C2
|
||
wmov A4, C4
|
||
wmov A6, C6
|
||
;; Move the Sign of the Result to SS.7
|
||
lsl SS
|
||
|
||
6: ret
|
||
|
||
ENDF __udivmod64
|
||
#endif /* L_udivmod64 */
|
||
|
||
|
||
#if defined (L_divdi3)
|
||
|
||
;; R25:R18 = R24:R18 mod R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __moddi3
|
||
set
|
||
rjmp __divdi3_moddi3
|
||
ENDF __moddi3
|
||
|
||
;; R25:R18 = R24:R18 div R17:R10
|
||
;; Ordinary ABI-Function
|
||
|
||
DEFUN __divdi3
|
||
clt
|
||
ENDF __divdi3
|
||
|
||
DEFUN __divdi3_moddi3
|
||
#if SPEED_DIV
|
||
mov r31, A7
|
||
or r31, B7
|
||
brmi 0f
|
||
;; Both Signs are 0: the following Complexitiy is not needed
|
||
XJMP __udivdi3_umoddi3
|
||
#endif /* SPEED_DIV */
|
||
|
||
0: ;; The Prologue
|
||
;; Save 12 Registers: Y, 17...8
|
||
;; No Frame needed
|
||
do_prologue_saves 12
|
||
|
||
;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
|
||
;; SS.6 will contain the Sign of the Remainder (A.sign)
|
||
mov SS, A7
|
||
asr SS
|
||
;; Adjust Dividend's Sign as needed
|
||
#if SPEED_DIV
|
||
;; Compiling for Speed we know that at least one Sign must be < 0
|
||
;; Thus, if A[] >= 0 then we know B[] < 0
|
||
brpl 22f
|
||
#else
|
||
brpl 21f
|
||
#endif /* SPEED_DIV */
|
||
|
||
XCALL __negdi2
|
||
|
||
;; Adjust Divisor's Sign and SS.7 as needed
|
||
21: tst B7
|
||
brpl 3f
|
||
22: ldi NN, 1 << 7
|
||
eor SS, NN
|
||
|
||
ldi NN, -1
|
||
com B4 $ com B5 $ com B6 $ com B7
|
||
$ com B1 $ com B2 $ com B3
|
||
NEG B0
|
||
$ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
|
||
sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
|
||
|
||
3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
|
||
XCALL __udivmod64
|
||
|
||
;; Adjust Result's Sign
|
||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||
tst SS
|
||
brpl 4f
|
||
#else
|
||
sbrc SS, 7
|
||
#endif /* __AVR_HAVE_JMP_CALL__ */
|
||
XCALL __negdi2
|
||
|
||
4: ;; Epilogue: Restore 12 Registers and return
|
||
do_epilogue_restores 12
|
||
|
||
ENDF __divdi3_moddi3
|
||
|
||
#endif /* L_divdi3 */
|
||
|
||
#undef R_cnt
|
||
#undef SS
|
||
#undef NN
|
||
|
||
.section .text.libgcc, "ax", @progbits
|
||
|
||
#define TT __tmp_reg__
|
||
|
||
#if defined (L_adddi3)
|
||
;; (set (reg:DI 18)
|
||
;; (plus:DI (reg:DI 18)
|
||
;; (reg:DI 10)))
|
||
;; Sets the V flag for signed overflow tests
|
||
;; Sets the C flag for unsigned overflow tests
|
||
DEFUN __adddi3
|
||
ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
|
||
adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
|
||
ret
|
||
ENDF __adddi3
|
||
#endif /* L_adddi3 */
|
||
|
||
#if defined (L_adddi3_s8)
|
||
;; (set (reg:DI 18)
|
||
;; (plus:DI (reg:DI 18)
|
||
;; (sign_extend:SI (reg:QI 26))))
|
||
;; Sets the V flag for signed overflow tests
|
||
;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
|
||
DEFUN __adddi3_s8
|
||
clr TT
|
||
sbrc r26, 7
|
||
com TT
|
||
ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
|
||
adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
|
||
ret
|
||
ENDF __adddi3_s8
|
||
#endif /* L_adddi3_s8 */
|
||
|
||
#if defined (L_subdi3)
|
||
;; (set (reg:DI 18)
|
||
;; (minus:DI (reg:DI 18)
|
||
;; (reg:DI 10)))
|
||
;; Sets the V flag for signed overflow tests
|
||
;; Sets the C flag for unsigned overflow tests
|
||
DEFUN __subdi3
|
||
SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
|
||
sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
|
||
ret
|
||
ENDF __subdi3
|
||
#endif /* L_subdi3 */
|
||
|
||
#if defined (L_cmpdi2)
|
||
;; (set (cc0)
|
||
;; (compare (reg:DI 18)
|
||
;; (reg:DI 10)))
|
||
DEFUN __cmpdi2
|
||
CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
|
||
cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
|
||
ret
|
||
ENDF __cmpdi2
|
||
#endif /* L_cmpdi2 */
|
||
|
||
#if defined (L_cmpdi2_s8)
|
||
;; (set (cc0)
|
||
;; (compare (reg:DI 18)
|
||
;; (sign_extend:SI (reg:QI 26))))
|
||
DEFUN __cmpdi2_s8
|
||
clr TT
|
||
sbrc r26, 7
|
||
com TT
|
||
CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
|
||
cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
|
||
ret
|
||
ENDF __cmpdi2_s8
|
||
#endif /* L_cmpdi2_s8 */
|
||
|
||
#if defined (L_negdi2)
|
||
;; (set (reg:DI 18)
|
||
;; (neg:DI (reg:DI 18)))
|
||
;; Sets the V flag for signed overflow tests
|
||
DEFUN __negdi2
|
||
|
||
com A4 $ com A5 $ com A6 $ com A7
|
||
$ com A1 $ com A2 $ com A3
|
||
NEG A0
|
||
$ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
|
||
sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
|
||
ret
|
||
|
||
ENDF __negdi2
|
||
#endif /* L_negdi2 */
|
||
|
||
#undef TT
|
||
|
||
#undef C7
|
||
#undef C6
|
||
#undef C5
|
||
#undef C4
|
||
#undef C3
|
||
#undef C2
|
||
#undef C1
|
||
#undef C0
|
||
|
||
#undef B7
|
||
#undef B6
|
||
#undef B5
|
||
#undef B4
|
||
#undef B3
|
||
#undef B2
|
||
#undef B1
|
||
#undef B0
|
||
|
||
#undef A7
|
||
#undef A6
|
||
#undef A5
|
||
#undef A4
|
||
#undef A3
|
||
#undef A2
|
||
#undef A1
|
||
#undef A0
|
||
|
||
|
||
.section .text.libgcc.prologue, "ax", @progbits
|
||
|
||
/**********************************
|
||
* This is a prologue subroutine
|
||
**********************************/
|
||
#if defined (L_prologue)
|
||
|
||
;; This function does not clobber T-flag; 64-bit division relies on it
|
||
DEFUN __prologue_saves__
|
||
push r2
|
||
push r3
|
||
push r4
|
||
push r5
|
||
push r6
|
||
push r7
|
||
push r8
|
||
push r9
|
||
push r10
|
||
push r11
|
||
push r12
|
||
push r13
|
||
push r14
|
||
push r15
|
||
push r16
|
||
push r17
|
||
push r28
|
||
push r29
|
||
#if !defined (__AVR_HAVE_SPH__)
|
||
in r28,__SP_L__
|
||
sub r28,r26
|
||
out __SP_L__,r28
|
||
clr r29
|
||
#elif defined (__AVR_XMEGA__)
|
||
in r28,__SP_L__
|
||
in r29,__SP_H__
|
||
sub r28,r26
|
||
sbc r29,r27
|
||
out __SP_L__,r28
|
||
out __SP_H__,r29
|
||
#else
|
||
in r28,__SP_L__
|
||
in r29,__SP_H__
|
||
sub r28,r26
|
||
sbc r29,r27
|
||
in __tmp_reg__,__SREG__
|
||
cli
|
||
out __SP_H__,r29
|
||
out __SREG__,__tmp_reg__
|
||
out __SP_L__,r28
|
||
#endif /* #SP = 8/16 */
|
||
|
||
#if defined (__AVR_HAVE_EIJMP_EICALL__)
|
||
eijmp
|
||
#else
|
||
ijmp
|
||
#endif
|
||
|
||
ENDF __prologue_saves__
|
||
#endif /* defined (L_prologue) */
|
||
|
||
/*
|
||
* This is an epilogue subroutine
|
||
*/
|
||
#if defined (L_epilogue)
|
||
|
||
DEFUN __epilogue_restores__
|
||
ldd r2,Y+18
|
||
ldd r3,Y+17
|
||
ldd r4,Y+16
|
||
ldd r5,Y+15
|
||
ldd r6,Y+14
|
||
ldd r7,Y+13
|
||
ldd r8,Y+12
|
||
ldd r9,Y+11
|
||
ldd r10,Y+10
|
||
ldd r11,Y+9
|
||
ldd r12,Y+8
|
||
ldd r13,Y+7
|
||
ldd r14,Y+6
|
||
ldd r15,Y+5
|
||
ldd r16,Y+4
|
||
ldd r17,Y+3
|
||
ldd r26,Y+2
|
||
#if !defined (__AVR_HAVE_SPH__)
|
||
ldd r29,Y+1
|
||
add r28,r30
|
||
out __SP_L__,r28
|
||
mov r28, r26
|
||
#elif defined (__AVR_XMEGA__)
|
||
ldd r27,Y+1
|
||
add r28,r30
|
||
adc r29,__zero_reg__
|
||
out __SP_L__,r28
|
||
out __SP_H__,r29
|
||
wmov 28, 26
|
||
#else
|
||
ldd r27,Y+1
|
||
add r28,r30
|
||
adc r29,__zero_reg__
|
||
in __tmp_reg__,__SREG__
|
||
cli
|
||
out __SP_H__,r29
|
||
out __SREG__,__tmp_reg__
|
||
out __SP_L__,r28
|
||
mov_l r28, r26
|
||
mov_h r29, r27
|
||
#endif /* #SP = 8/16 */
|
||
ret
|
||
ENDF __epilogue_restores__
|
||
#endif /* defined (L_epilogue) */
|
||
|
||
#ifdef L_exit
|
||
.section .fini9,"ax",@progbits
|
||
DEFUN _exit
|
||
.weak exit
|
||
exit:
|
||
ENDF _exit
|
||
|
||
/* Code from .fini8 ... .fini1 sections inserted by ld script. */
|
||
|
||
.section .fini0,"ax",@progbits
|
||
cli
|
||
__stop_program:
|
||
rjmp __stop_program
|
||
#endif /* defined (L_exit) */
|
||
|
||
#ifdef L_cleanup
|
||
.weak _cleanup
|
||
.func _cleanup
|
||
_cleanup:
|
||
ret
|
||
.endfunc
|
||
#endif /* defined (L_cleanup) */
|
||
|
||
|
||
.section .text.libgcc, "ax", @progbits
|
||
|
||
#ifdef L_tablejump
|
||
DEFUN __tablejump2__
|
||
lsl r30
|
||
rol r31
|
||
;; FALLTHRU
|
||
ENDF __tablejump2__
|
||
|
||
DEFUN __tablejump__
|
||
#if defined (__AVR_HAVE_LPMX__)
|
||
lpm __tmp_reg__, Z+
|
||
lpm r31, Z
|
||
mov r30, __tmp_reg__
|
||
#if defined (__AVR_HAVE_EIJMP_EICALL__)
|
||
eijmp
|
||
#else
|
||
ijmp
|
||
#endif
|
||
|
||
#else /* !HAVE_LPMX */
|
||
lpm
|
||
adiw r30, 1
|
||
push r0
|
||
lpm
|
||
push r0
|
||
#if defined (__AVR_HAVE_EIJMP_EICALL__)
|
||
in __tmp_reg__, __EIND__
|
||
push __tmp_reg__
|
||
#endif
|
||
ret
|
||
#endif /* !HAVE_LPMX */
|
||
ENDF __tablejump__
|
||
#endif /* defined (L_tablejump) */
|
||
|
||
#ifdef L_copy_data
|
||
.section .init4,"ax",@progbits
|
||
DEFUN __do_copy_data
|
||
#if defined(__AVR_HAVE_ELPMX__)
|
||
ldi r17, hi8(__data_end)
|
||
ldi r26, lo8(__data_start)
|
||
ldi r27, hi8(__data_start)
|
||
ldi r30, lo8(__data_load_start)
|
||
ldi r31, hi8(__data_load_start)
|
||
ldi r16, hh8(__data_load_start)
|
||
out __RAMPZ__, r16
|
||
rjmp .L__do_copy_data_start
|
||
.L__do_copy_data_loop:
|
||
elpm r0, Z+
|
||
st X+, r0
|
||
.L__do_copy_data_start:
|
||
cpi r26, lo8(__data_end)
|
||
cpc r27, r17
|
||
brne .L__do_copy_data_loop
|
||
#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
|
||
ldi r17, hi8(__data_end)
|
||
ldi r26, lo8(__data_start)
|
||
ldi r27, hi8(__data_start)
|
||
ldi r30, lo8(__data_load_start)
|
||
ldi r31, hi8(__data_load_start)
|
||
ldi r16, hh8(__data_load_start - 0x10000)
|
||
.L__do_copy_data_carry:
|
||
inc r16
|
||
out __RAMPZ__, r16
|
||
rjmp .L__do_copy_data_start
|
||
.L__do_copy_data_loop:
|
||
elpm
|
||
st X+, r0
|
||
adiw r30, 1
|
||
brcs .L__do_copy_data_carry
|
||
.L__do_copy_data_start:
|
||
cpi r26, lo8(__data_end)
|
||
cpc r27, r17
|
||
brne .L__do_copy_data_loop
|
||
#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
|
||
ldi r17, hi8(__data_end)
|
||
ldi r26, lo8(__data_start)
|
||
ldi r27, hi8(__data_start)
|
||
ldi r30, lo8(__data_load_start)
|
||
ldi r31, hi8(__data_load_start)
|
||
rjmp .L__do_copy_data_start
|
||
.L__do_copy_data_loop:
|
||
#if defined (__AVR_HAVE_LPMX__)
|
||
lpm r0, Z+
|
||
#else
|
||
lpm
|
||
adiw r30, 1
|
||
#endif
|
||
st X+, r0
|
||
.L__do_copy_data_start:
|
||
cpi r26, lo8(__data_end)
|
||
cpc r27, r17
|
||
brne .L__do_copy_data_loop
|
||
#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
|
||
#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
|
||
;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
|
||
out __RAMPZ__, __zero_reg__
|
||
#endif /* ELPM && RAMPD */
|
||
ENDF __do_copy_data
|
||
#endif /* L_copy_data */
|
||
|
||
/* __do_clear_bss is only necessary if there is anything in .bss section. */
|
||
|
||
#ifdef L_clear_bss
|
||
.section .init4,"ax",@progbits
|
||
DEFUN __do_clear_bss
|
||
ldi r17, hi8(__bss_end)
|
||
ldi r26, lo8(__bss_start)
|
||
ldi r27, hi8(__bss_start)
|
||
rjmp .do_clear_bss_start
|
||
.do_clear_bss_loop:
|
||
st X+, __zero_reg__
|
||
.do_clear_bss_start:
|
||
cpi r26, lo8(__bss_end)
|
||
cpc r27, r17
|
||
brne .do_clear_bss_loop
|
||
ENDF __do_clear_bss
|
||
#endif /* L_clear_bss */
|
||
|
||
/* __do_global_ctors and __do_global_dtors are only necessary
|
||
if there are any constructors/destructors. */
|
||
|
||
#ifdef L_ctors
|
||
.section .init6,"ax",@progbits
|
||
DEFUN __do_global_ctors
|
||
#if defined(__AVR_HAVE_ELPM__)
|
||
ldi r17, hi8(__ctors_start)
|
||
ldi r28, lo8(__ctors_end)
|
||
ldi r29, hi8(__ctors_end)
|
||
ldi r16, hh8(__ctors_end)
|
||
rjmp .L__do_global_ctors_start
|
||
.L__do_global_ctors_loop:
|
||
sbiw r28, 2
|
||
sbc r16, __zero_reg__
|
||
mov_h r31, r29
|
||
mov_l r30, r28
|
||
out __RAMPZ__, r16
|
||
XCALL __tablejump_elpm__
|
||
.L__do_global_ctors_start:
|
||
cpi r28, lo8(__ctors_start)
|
||
cpc r29, r17
|
||
ldi r24, hh8(__ctors_start)
|
||
cpc r16, r24
|
||
brne .L__do_global_ctors_loop
|
||
#else
|
||
ldi r17, hi8(__ctors_start)
|
||
ldi r28, lo8(__ctors_end)
|
||
ldi r29, hi8(__ctors_end)
|
||
rjmp .L__do_global_ctors_start
|
||
.L__do_global_ctors_loop:
|
||
sbiw r28, 2
|
||
mov_h r31, r29
|
||
mov_l r30, r28
|
||
XCALL __tablejump__
|
||
.L__do_global_ctors_start:
|
||
cpi r28, lo8(__ctors_start)
|
||
cpc r29, r17
|
||
brne .L__do_global_ctors_loop
|
||
#endif /* defined(__AVR_HAVE_ELPM__) */
|
||
ENDF __do_global_ctors
|
||
#endif /* L_ctors */
|
||
|
||
#ifdef L_dtors
|
||
.section .fini6,"ax",@progbits
|
||
DEFUN __do_global_dtors
|
||
#if defined(__AVR_HAVE_ELPM__)
|
||
ldi r17, hi8(__dtors_end)
|
||
ldi r28, lo8(__dtors_start)
|
||
ldi r29, hi8(__dtors_start)
|
||
ldi r16, hh8(__dtors_start)
|
||
rjmp .L__do_global_dtors_start
|
||
.L__do_global_dtors_loop:
|
||
sbiw r28, 2
|
||
sbc r16, __zero_reg__
|
||
mov_h r31, r29
|
||
mov_l r30, r28
|
||
out __RAMPZ__, r16
|
||
XCALL __tablejump_elpm__
|
||
.L__do_global_dtors_start:
|
||
cpi r28, lo8(__dtors_end)
|
||
cpc r29, r17
|
||
ldi r24, hh8(__dtors_end)
|
||
cpc r16, r24
|
||
brne .L__do_global_dtors_loop
|
||
#else
|
||
ldi r17, hi8(__dtors_end)
|
||
ldi r28, lo8(__dtors_start)
|
||
ldi r29, hi8(__dtors_start)
|
||
rjmp .L__do_global_dtors_start
|
||
.L__do_global_dtors_loop:
|
||
mov_h r31, r29
|
||
mov_l r30, r28
|
||
XCALL __tablejump__
|
||
adiw r28, 2
|
||
.L__do_global_dtors_start:
|
||
cpi r28, lo8(__dtors_end)
|
||
cpc r29, r17
|
||
brne .L__do_global_dtors_loop
|
||
#endif /* defined(__AVR_HAVE_ELPM__) */
|
||
ENDF __do_global_dtors
|
||
#endif /* L_dtors */
|
||
|
||
.section .text.libgcc, "ax", @progbits
|
||
|
||
#ifdef L_tablejump_elpm
|
||
DEFUN __tablejump_elpm__
|
||
#if defined (__AVR_HAVE_ELPMX__)
|
||
elpm __tmp_reg__, Z+
|
||
elpm r31, Z
|
||
mov r30, __tmp_reg__
|
||
#if defined (__AVR_HAVE_RAMPD__)
|
||
;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
|
||
out __RAMPZ__, __zero_reg__
|
||
#endif /* RAMPD */
|
||
#if defined (__AVR_HAVE_EIJMP_EICALL__)
|
||
eijmp
|
||
#else
|
||
ijmp
|
||
#endif
|
||
|
||
#elif defined (__AVR_HAVE_ELPM__)
|
||
elpm
|
||
adiw r30, 1
|
||
push r0
|
||
elpm
|
||
push r0
|
||
#if defined (__AVR_HAVE_EIJMP_EICALL__)
|
||
in __tmp_reg__, __EIND__
|
||
push __tmp_reg__
|
||
#endif
|
||
ret
|
||
#endif
|
||
ENDF __tablejump_elpm__
|
||
#endif /* defined (L_tablejump_elpm) */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Loading n bytes from Flash; n = 3,4
|
||
;; R22... = Flash[Z]
|
||
;; Clobbers: __tmp_reg__
|
||
|
||
#if (defined (L_load_3) \
|
||
|| defined (L_load_4)) \
|
||
&& !defined (__AVR_HAVE_LPMX__)
|
||
|
||
;; Destination
|
||
#define D0 22
|
||
#define D1 D0+1
|
||
#define D2 D0+2
|
||
#define D3 D0+3
|
||
|
||
.macro .load dest, n
|
||
lpm
|
||
mov \dest, r0
|
||
.if \dest != D0+\n-1
|
||
adiw r30, 1
|
||
.else
|
||
sbiw r30, \n-1
|
||
.endif
|
||
.endm
|
||
|
||
#if defined (L_load_3)
|
||
DEFUN __load_3
|
||
push D3
|
||
XCALL __load_4
|
||
pop D3
|
||
ret
|
||
ENDF __load_3
|
||
#endif /* L_load_3 */
|
||
|
||
#if defined (L_load_4)
|
||
DEFUN __load_4
|
||
.load D0, 4
|
||
.load D1, 4
|
||
.load D2, 4
|
||
.load D3, 4
|
||
ret
|
||
ENDF __load_4
|
||
#endif /* L_load_4 */
|
||
|
||
#endif /* L_load_3 || L_load_3 */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Loading n bytes from Flash or RAM; n = 1,2,3,4
|
||
;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
|
||
;; Clobbers: __tmp_reg__, R21, R30, R31
|
||
|
||
#if (defined (L_xload_1) \
|
||
|| defined (L_xload_2) \
|
||
|| defined (L_xload_3) \
|
||
|| defined (L_xload_4))
|
||
|
||
;; Destination
|
||
#define D0 22
|
||
#define D1 D0+1
|
||
#define D2 D0+2
|
||
#define D3 D0+3
|
||
|
||
;; Register containing bits 16+ of the address
|
||
|
||
#define HHI8 21
|
||
|
||
.macro .xload dest, n
|
||
#if defined (__AVR_HAVE_ELPMX__)
|
||
elpm \dest, Z+
|
||
#elif defined (__AVR_HAVE_ELPM__)
|
||
elpm
|
||
mov \dest, r0
|
||
.if \dest != D0+\n-1
|
||
adiw r30, 1
|
||
adc HHI8, __zero_reg__
|
||
out __RAMPZ__, HHI8
|
||
.endif
|
||
#elif defined (__AVR_HAVE_LPMX__)
|
||
lpm \dest, Z+
|
||
#else
|
||
lpm
|
||
mov \dest, r0
|
||
.if \dest != D0+\n-1
|
||
adiw r30, 1
|
||
.endif
|
||
#endif
|
||
#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
|
||
.if \dest == D0+\n-1
|
||
;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
|
||
out __RAMPZ__, __zero_reg__
|
||
.endif
|
||
#endif
|
||
.endm ; .xload
|
||
|
||
#if defined (L_xload_1)
|
||
DEFUN __xload_1
|
||
#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
|
||
sbrc HHI8, 7
|
||
ld D0, Z
|
||
sbrs HHI8, 7
|
||
lpm D0, Z
|
||
ret
|
||
#else
|
||
sbrc HHI8, 7
|
||
rjmp 1f
|
||
#if defined (__AVR_HAVE_ELPM__)
|
||
out __RAMPZ__, HHI8
|
||
#endif /* __AVR_HAVE_ELPM__ */
|
||
.xload D0, 1
|
||
ret
|
||
1: ld D0, Z
|
||
ret
|
||
#endif /* LPMx && ! ELPM */
|
||
ENDF __xload_1
|
||
#endif /* L_xload_1 */
|
||
|
||
#if defined (L_xload_2)
|
||
DEFUN __xload_2
|
||
sbrc HHI8, 7
|
||
rjmp 1f
|
||
#if defined (__AVR_HAVE_ELPM__)
|
||
out __RAMPZ__, HHI8
|
||
#endif /* __AVR_HAVE_ELPM__ */
|
||
.xload D0, 2
|
||
.xload D1, 2
|
||
ret
|
||
1: ld D0, Z+
|
||
ld D1, Z+
|
||
ret
|
||
ENDF __xload_2
|
||
#endif /* L_xload_2 */
|
||
|
||
#if defined (L_xload_3)
|
||
DEFUN __xload_3
|
||
sbrc HHI8, 7
|
||
rjmp 1f
|
||
#if defined (__AVR_HAVE_ELPM__)
|
||
out __RAMPZ__, HHI8
|
||
#endif /* __AVR_HAVE_ELPM__ */
|
||
.xload D0, 3
|
||
.xload D1, 3
|
||
.xload D2, 3
|
||
ret
|
||
1: ld D0, Z+
|
||
ld D1, Z+
|
||
ld D2, Z+
|
||
ret
|
||
ENDF __xload_3
|
||
#endif /* L_xload_3 */
|
||
|
||
#if defined (L_xload_4)
|
||
DEFUN __xload_4
|
||
sbrc HHI8, 7
|
||
rjmp 1f
|
||
#if defined (__AVR_HAVE_ELPM__)
|
||
out __RAMPZ__, HHI8
|
||
#endif /* __AVR_HAVE_ELPM__ */
|
||
.xload D0, 4
|
||
.xload D1, 4
|
||
.xload D2, 4
|
||
.xload D3, 4
|
||
ret
|
||
1: ld D0, Z+
|
||
ld D1, Z+
|
||
ld D2, Z+
|
||
ld D3, Z+
|
||
ret
|
||
ENDF __xload_4
|
||
#endif /* L_xload_4 */
|
||
|
||
#endif /* L_xload_{1|2|3|4} */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; memcopy from Address Space __pgmx to RAM
|
||
;; R23:Z = Source Address
|
||
;; X = Destination Address
|
||
;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
|
||
|
||
#if defined (L_movmemx)
|
||
|
||
#define HHI8 23
|
||
#define LOOP 24
|
||
|
||
DEFUN __movmemx_qi
|
||
;; #Bytes to copy fity in 8 Bits (1..255)
|
||
;; Zero-extend Loop Counter
|
||
clr LOOP+1
|
||
;; FALLTHRU
|
||
ENDF __movmemx_qi
|
||
|
||
DEFUN __movmemx_hi
|
||
|
||
;; Read from where?
|
||
sbrc HHI8, 7
|
||
rjmp 1f
|
||
|
||
;; Read from Flash
|
||
|
||
#if defined (__AVR_HAVE_ELPM__)
|
||
out __RAMPZ__, HHI8
|
||
#endif
|
||
|
||
0: ;; Load 1 Byte from Flash...
|
||
|
||
#if defined (__AVR_HAVE_ELPMX__)
|
||
elpm r0, Z+
|
||
#elif defined (__AVR_HAVE_ELPM__)
|
||
elpm
|
||
adiw r30, 1
|
||
adc HHI8, __zero_reg__
|
||
out __RAMPZ__, HHI8
|
||
#elif defined (__AVR_HAVE_LPMX__)
|
||
lpm r0, Z+
|
||
#else
|
||
lpm
|
||
adiw r30, 1
|
||
#endif
|
||
|
||
;; ...and store that Byte to RAM Destination
|
||
st X+, r0
|
||
sbiw LOOP, 1
|
||
brne 0b
|
||
#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
|
||
;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
|
||
out __RAMPZ__, __zero_reg__
|
||
#endif /* ELPM && RAMPD */
|
||
ret
|
||
|
||
;; Read from RAM
|
||
|
||
1: ;; Read 1 Byte from RAM...
|
||
ld r0, Z+
|
||
;; and store that Byte to RAM Destination
|
||
st X+, r0
|
||
sbiw LOOP, 1
|
||
brne 1b
|
||
ret
|
||
ENDF __movmemx_hi
|
||
|
||
#undef HHI8
|
||
#undef LOOP
|
||
|
||
#endif /* L_movmemx */
|
||
|
||
|
||
.section .text.libgcc.builtins, "ax", @progbits
|
||
|
||
/**********************************
|
||
* Find first set Bit (ffs)
|
||
**********************************/
|
||
|
||
#if defined (L_ffssi2)
|
||
;; find first set bit
|
||
;; r25:r24 = ffs32 (r25:r22)
|
||
;; clobbers: r22, r26
|
||
DEFUN __ffssi2
|
||
clr r26
|
||
tst r22
|
||
brne 1f
|
||
subi r26, -8
|
||
or r22, r23
|
||
brne 1f
|
||
subi r26, -8
|
||
or r22, r24
|
||
brne 1f
|
||
subi r26, -8
|
||
or r22, r25
|
||
brne 1f
|
||
ret
|
||
1: mov r24, r22
|
||
XJMP __loop_ffsqi2
|
||
ENDF __ffssi2
|
||
#endif /* defined (L_ffssi2) */
|
||
|
||
#if defined (L_ffshi2)
|
||
;; find first set bit
|
||
;; r25:r24 = ffs16 (r25:r24)
|
||
;; clobbers: r26
|
||
DEFUN __ffshi2
|
||
clr r26
|
||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||
;; Some cores have problem skipping 2-word instruction
|
||
tst r24
|
||
breq 2f
|
||
#else
|
||
cpse r24, __zero_reg__
|
||
#endif /* __AVR_HAVE_JMP_CALL__ */
|
||
1: XJMP __loop_ffsqi2
|
||
2: ldi r26, 8
|
||
or r24, r25
|
||
brne 1b
|
||
ret
|
||
ENDF __ffshi2
|
||
#endif /* defined (L_ffshi2) */
|
||
|
||
#if defined (L_loop_ffsqi2)
|
||
;; Helper for ffshi2, ffssi2
|
||
;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
|
||
;; r24 must be != 0
|
||
;; clobbers: r26
|
||
DEFUN __loop_ffsqi2
|
||
inc r26
|
||
lsr r24
|
||
brcc __loop_ffsqi2
|
||
mov r24, r26
|
||
clr r25
|
||
ret
|
||
ENDF __loop_ffsqi2
|
||
#endif /* defined (L_loop_ffsqi2) */
|
||
|
||
|
||
/**********************************
|
||
* Count trailing Zeros (ctz)
|
||
**********************************/
|
||
|
||
#if defined (L_ctzsi2)
|
||
;; count trailing zeros
|
||
;; r25:r24 = ctz32 (r25:r22)
|
||
;; clobbers: r26, r22
|
||
;; ctz(0) = 255
|
||
;; Note that ctz(0) in undefined for GCC
|
||
DEFUN __ctzsi2
|
||
XCALL __ffssi2
|
||
dec r24
|
||
ret
|
||
ENDF __ctzsi2
|
||
#endif /* defined (L_ctzsi2) */
|
||
|
||
#if defined (L_ctzhi2)
|
||
;; count trailing zeros
|
||
;; r25:r24 = ctz16 (r25:r24)
|
||
;; clobbers: r26
|
||
;; ctz(0) = 255
|
||
;; Note that ctz(0) in undefined for GCC
|
||
DEFUN __ctzhi2
|
||
XCALL __ffshi2
|
||
dec r24
|
||
ret
|
||
ENDF __ctzhi2
|
||
#endif /* defined (L_ctzhi2) */
|
||
|
||
|
||
/**********************************
|
||
* Count leading Zeros (clz)
|
||
**********************************/
|
||
|
||
#if defined (L_clzdi2)
|
||
;; count leading zeros
|
||
;; r25:r24 = clz64 (r25:r18)
|
||
;; clobbers: r22, r23, r26
|
||
DEFUN __clzdi2
|
||
XCALL __clzsi2
|
||
sbrs r24, 5
|
||
ret
|
||
mov_l r22, r18
|
||
mov_h r23, r19
|
||
mov_l r24, r20
|
||
mov_h r25, r21
|
||
XCALL __clzsi2
|
||
subi r24, -32
|
||
ret
|
||
ENDF __clzdi2
|
||
#endif /* defined (L_clzdi2) */
|
||
|
||
#if defined (L_clzsi2)
|
||
;; count leading zeros
|
||
;; r25:r24 = clz32 (r25:r22)
|
||
;; clobbers: r26
|
||
DEFUN __clzsi2
|
||
XCALL __clzhi2
|
||
sbrs r24, 4
|
||
ret
|
||
mov_l r24, r22
|
||
mov_h r25, r23
|
||
XCALL __clzhi2
|
||
subi r24, -16
|
||
ret
|
||
ENDF __clzsi2
|
||
#endif /* defined (L_clzsi2) */
|
||
|
||
#if defined (L_clzhi2)
|
||
;; count leading zeros
|
||
;; r25:r24 = clz16 (r25:r24)
|
||
;; clobbers: r26
|
||
DEFUN __clzhi2
|
||
clr r26
|
||
tst r25
|
||
brne 1f
|
||
subi r26, -8
|
||
or r25, r24
|
||
brne 1f
|
||
ldi r24, 16
|
||
ret
|
||
1: cpi r25, 16
|
||
brsh 3f
|
||
subi r26, -3
|
||
swap r25
|
||
2: inc r26
|
||
3: lsl r25
|
||
brcc 2b
|
||
mov r24, r26
|
||
clr r25
|
||
ret
|
||
ENDF __clzhi2
|
||
#endif /* defined (L_clzhi2) */
|
||
|
||
|
||
/**********************************
|
||
* Parity
|
||
**********************************/
|
||
|
||
#if defined (L_paritydi2)
|
||
;; r25:r24 = parity64 (r25:r18)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __paritydi2
|
||
eor r24, r18
|
||
eor r24, r19
|
||
eor r24, r20
|
||
eor r24, r21
|
||
XJMP __paritysi2
|
||
ENDF __paritydi2
|
||
#endif /* defined (L_paritydi2) */
|
||
|
||
#if defined (L_paritysi2)
|
||
;; r25:r24 = parity32 (r25:r22)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __paritysi2
|
||
eor r24, r22
|
||
eor r24, r23
|
||
XJMP __parityhi2
|
||
ENDF __paritysi2
|
||
#endif /* defined (L_paritysi2) */
|
||
|
||
#if defined (L_parityhi2)
|
||
;; r25:r24 = parity16 (r25:r24)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __parityhi2
|
||
eor r24, r25
|
||
;; FALLTHRU
|
||
ENDF __parityhi2
|
||
|
||
;; r25:r24 = parity8 (r24)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __parityqi2
|
||
;; parity is in r24[0..7]
|
||
mov __tmp_reg__, r24
|
||
swap __tmp_reg__
|
||
eor r24, __tmp_reg__
|
||
;; parity is in r24[0..3]
|
||
subi r24, -4
|
||
andi r24, -5
|
||
subi r24, -6
|
||
;; parity is in r24[0,3]
|
||
sbrc r24, 3
|
||
inc r24
|
||
;; parity is in r24[0]
|
||
andi r24, 1
|
||
clr r25
|
||
ret
|
||
ENDF __parityqi2
|
||
#endif /* defined (L_parityhi2) */
|
||
|
||
|
||
/**********************************
|
||
* Population Count
|
||
**********************************/
|
||
|
||
#if defined (L_popcounthi2)
|
||
;; population count
|
||
;; r25:r24 = popcount16 (r25:r24)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __popcounthi2
|
||
XCALL __popcountqi2
|
||
push r24
|
||
mov r24, r25
|
||
XCALL __popcountqi2
|
||
clr r25
|
||
;; FALLTHRU
|
||
ENDF __popcounthi2
|
||
|
||
DEFUN __popcounthi2_tail
|
||
pop __tmp_reg__
|
||
add r24, __tmp_reg__
|
||
ret
|
||
ENDF __popcounthi2_tail
|
||
#endif /* defined (L_popcounthi2) */
|
||
|
||
#if defined (L_popcountsi2)
|
||
;; population count
|
||
;; r25:r24 = popcount32 (r25:r22)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __popcountsi2
|
||
XCALL __popcounthi2
|
||
push r24
|
||
mov_l r24, r22
|
||
mov_h r25, r23
|
||
XCALL __popcounthi2
|
||
XJMP __popcounthi2_tail
|
||
ENDF __popcountsi2
|
||
#endif /* defined (L_popcountsi2) */
|
||
|
||
#if defined (L_popcountdi2)
|
||
;; population count
|
||
;; r25:r24 = popcount64 (r25:r18)
|
||
;; clobbers: r22, r23, __tmp_reg__
|
||
DEFUN __popcountdi2
|
||
XCALL __popcountsi2
|
||
push r24
|
||
mov_l r22, r18
|
||
mov_h r23, r19
|
||
mov_l r24, r20
|
||
mov_h r25, r21
|
||
XCALL __popcountsi2
|
||
XJMP __popcounthi2_tail
|
||
ENDF __popcountdi2
|
||
#endif /* defined (L_popcountdi2) */
|
||
|
||
#if defined (L_popcountqi2)
|
||
;; population count
|
||
;; r24 = popcount8 (r24)
|
||
;; clobbers: __tmp_reg__
|
||
DEFUN __popcountqi2
|
||
mov __tmp_reg__, r24
|
||
andi r24, 1
|
||
lsr __tmp_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __zero_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __zero_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __zero_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __zero_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __zero_reg__
|
||
lsr __tmp_reg__
|
||
adc r24, __tmp_reg__
|
||
ret
|
||
ENDF __popcountqi2
|
||
#endif /* defined (L_popcountqi2) */
|
||
|
||
|
||
/**********************************
|
||
* Swap bytes
|
||
**********************************/
|
||
|
||
;; swap two registers with different register number
|
||
.macro bswap a, b
|
||
eor \a, \b
|
||
eor \b, \a
|
||
eor \a, \b
|
||
.endm
|
||
|
||
#if defined (L_bswapsi2)
|
||
;; swap bytes
|
||
;; r25:r22 = bswap32 (r25:r22)
|
||
DEFUN __bswapsi2
|
||
bswap r22, r25
|
||
bswap r23, r24
|
||
ret
|
||
ENDF __bswapsi2
|
||
#endif /* defined (L_bswapsi2) */
|
||
|
||
#if defined (L_bswapdi2)
|
||
;; swap bytes
|
||
;; r25:r18 = bswap64 (r25:r18)
|
||
DEFUN __bswapdi2
|
||
bswap r18, r25
|
||
bswap r19, r24
|
||
bswap r20, r23
|
||
bswap r21, r22
|
||
ret
|
||
ENDF __bswapdi2
|
||
#endif /* defined (L_bswapdi2) */
|
||
|
||
|
||
/**********************************
|
||
* 64-bit shifts
|
||
**********************************/
|
||
|
||
#if defined (L_ashrdi3)
|
||
;; Arithmetic shift right
|
||
;; r25:r18 = ashr64 (r25:r18, r17:r16)
|
||
DEFUN __ashrdi3
|
||
bst r25, 7
|
||
bld __zero_reg__, 0
|
||
;; FALLTHRU
|
||
ENDF __ashrdi3
|
||
|
||
;; Logic shift right
|
||
;; r25:r18 = lshr64 (r25:r18, r17:r16)
|
||
DEFUN __lshrdi3
|
||
lsr __zero_reg__
|
||
sbc __tmp_reg__, __tmp_reg__
|
||
push r16
|
||
0: cpi r16, 8
|
||
brlo 2f
|
||
subi r16, 8
|
||
mov r18, r19
|
||
mov r19, r20
|
||
mov r20, r21
|
||
mov r21, r22
|
||
mov r22, r23
|
||
mov r23, r24
|
||
mov r24, r25
|
||
mov r25, __tmp_reg__
|
||
rjmp 0b
|
||
1: asr __tmp_reg__
|
||
ror r25
|
||
ror r24
|
||
ror r23
|
||
ror r22
|
||
ror r21
|
||
ror r20
|
||
ror r19
|
||
ror r18
|
||
2: dec r16
|
||
brpl 1b
|
||
pop r16
|
||
ret
|
||
ENDF __lshrdi3
|
||
#endif /* defined (L_ashrdi3) */
|
||
|
||
#if defined (L_ashldi3)
|
||
;; Shift left
|
||
;; r25:r18 = ashl64 (r25:r18, r17:r16)
|
||
DEFUN __ashldi3
|
||
push r16
|
||
0: cpi r16, 8
|
||
brlo 2f
|
||
mov r25, r24
|
||
mov r24, r23
|
||
mov r23, r22
|
||
mov r22, r21
|
||
mov r21, r20
|
||
mov r20, r19
|
||
mov r19, r18
|
||
clr r18
|
||
subi r16, 8
|
||
rjmp 0b
|
||
1: lsl r18
|
||
rol r19
|
||
rol r20
|
||
rol r21
|
||
rol r22
|
||
rol r23
|
||
rol r24
|
||
rol r25
|
||
2: dec r16
|
||
brpl 1b
|
||
pop r16
|
||
ret
|
||
ENDF __ashldi3
|
||
#endif /* defined (L_ashldi3) */
|
||
|
||
#if defined (L_rotldi3)
|
||
;; Shift left
|
||
;; r25:r18 = rotl64 (r25:r18, r17:r16)
|
||
DEFUN __rotldi3
|
||
push r16
|
||
0: cpi r16, 8
|
||
brlo 2f
|
||
subi r16, 8
|
||
mov __tmp_reg__, r25
|
||
mov r25, r24
|
||
mov r24, r23
|
||
mov r23, r22
|
||
mov r22, r21
|
||
mov r21, r20
|
||
mov r20, r19
|
||
mov r19, r18
|
||
mov r18, __tmp_reg__
|
||
rjmp 0b
|
||
1: lsl r18
|
||
rol r19
|
||
rol r20
|
||
rol r21
|
||
rol r22
|
||
rol r23
|
||
rol r24
|
||
rol r25
|
||
adc r18, __zero_reg__
|
||
2: dec r16
|
||
brpl 1b
|
||
pop r16
|
||
ret
|
||
ENDF __rotldi3
|
||
#endif /* defined (L_rotldi3) */
|
||
|
||
|
||
.section .text.libgcc.fmul, "ax", @progbits
|
||
|
||
/***********************************************************/
|
||
;;; Softmul versions of FMUL, FMULS and FMULSU to implement
|
||
;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
|
||
/***********************************************************/
|
||
|
||
#define A1 24
|
||
#define B1 25
|
||
#define C0 22
|
||
#define C1 23
|
||
#define A0 __tmp_reg__
|
||
|
||
#ifdef L_fmuls
|
||
;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
|
||
;;; Clobbers: r24, r25, __tmp_reg__
|
||
DEFUN __fmuls
|
||
;; A0.7 = negate result?
|
||
mov A0, A1
|
||
eor A0, B1
|
||
;; B1 = |B1|
|
||
sbrc B1, 7
|
||
neg B1
|
||
XJMP __fmulsu_exit
|
||
ENDF __fmuls
|
||
#endif /* L_fmuls */
|
||
|
||
#ifdef L_fmulsu
|
||
;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
|
||
;;; Clobbers: r24, r25, __tmp_reg__
|
||
DEFUN __fmulsu
|
||
;; A0.7 = negate result?
|
||
mov A0, A1
|
||
;; FALLTHRU
|
||
ENDF __fmulsu
|
||
|
||
;; Helper for __fmuls and __fmulsu
|
||
DEFUN __fmulsu_exit
|
||
;; A1 = |A1|
|
||
sbrc A1, 7
|
||
neg A1
|
||
#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
|
||
;; Some cores have problem skipping 2-word instruction
|
||
tst A0
|
||
brmi 1f
|
||
#else
|
||
sbrs A0, 7
|
||
#endif /* __AVR_HAVE_JMP_CALL__ */
|
||
XJMP __fmul
|
||
1: XCALL __fmul
|
||
;; C = -C iff A0.7 = 1
|
||
NEG2 C0
|
||
ret
|
||
ENDF __fmulsu_exit
|
||
#endif /* L_fmulsu */
|
||
|
||
|
||
#ifdef L_fmul
|
||
;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
|
||
;;; Clobbers: r24, r25, __tmp_reg__
|
||
DEFUN __fmul
|
||
; clear result
|
||
clr C0
|
||
clr C1
|
||
clr A0
|
||
1: tst B1
|
||
;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
|
||
2: brpl 3f
|
||
;; C += A
|
||
add C0, A0
|
||
adc C1, A1
|
||
3: ;; A >>= 1
|
||
lsr A1
|
||
ror A0
|
||
;; B <<= 1
|
||
lsl B1
|
||
brne 2b
|
||
ret
|
||
ENDF __fmul
|
||
#endif /* L_fmul */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef B1
|
||
#undef C0
|
||
#undef C1
|
||
|
||
#include "lib1funcs-fixed.S"
|