e13d9d5aeb
libgcc/ Adjust decimal point of signed accum mode to GCC default. PR target/54222 * config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq, _divqq_helper. * config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf) (__fractsasf, __fractsfha, __fractusqsf, __fractsfsa) (__mulha3, __mulsa3) (__divqq3, __divha3, __divsa3): Adjust to new position of decimal point of signed accum types. (__mulusa3_round): New function. (__mulusa3): Use it. (__divqq_helper): New function. (__udivuqq3): Use it. gcc/ Adjust decimal point of signed accum mode to GCC default. PR target/54222 * config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments. (TA): Move decimal point one bit to the right. * config/avr/avr.c (avr_out_fract): Rewrite. From-SVN: r193721
1180 lines
27 KiB
ArmAsm
1180 lines
27 KiB
ArmAsm
/* -*- Mode: Asm -*- */
|
||
;; Copyright (C) 2012
|
||
;; Free Software Foundation, Inc.
|
||
;; Contributed by Sean D'Epagnier (sean@depagnier.com)
|
||
;; Georg-Johann Lay (avr@gjlay.de)
|
||
|
||
;; This file is free software; you can redistribute it and/or modify it
|
||
;; under the terms of the GNU General Public License as published by the
|
||
;; Free Software Foundation; either version 3, or (at your option) any
|
||
;; later version.
|
||
|
||
;; In addition to the permissions in the GNU General Public License, the
|
||
;; Free Software Foundation gives you unlimited permission to link the
|
||
;; compiled version of this file into combinations with other programs,
|
||
;; and to distribute those combinations without any restriction coming
|
||
;; from the use of this file. (The General Public License restrictions
|
||
;; do apply in other respects; for example, they cover modification of
|
||
;; the file, and distribution when not linked into a combine
|
||
;; executable.)
|
||
|
||
;; This file is distributed in the hope that it will be useful, but
|
||
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;; General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program; see the file COPYING. If not, write to
|
||
;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||
;; Boston, MA 02110-1301, USA.
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Fixed point library routines for AVR
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
.section .text.libgcc.fixed, "ax", @progbits
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Conversions to float
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#if defined (L_fractqqsf)
|
||
DEFUN __fractqqsf
|
||
;; Move in place for SA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
;; Sign-extend
|
||
lsl r24
|
||
sbc r24, r24
|
||
mov r25, r24
|
||
XJMP __fractsasf
|
||
ENDF __fractqqsf
|
||
#endif /* L_fractqqsf */
|
||
|
||
#if defined (L_fractuqqsf)
|
||
DEFUN __fractuqqsf
|
||
;; Move in place for USA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
;; Zero-extend
|
||
clr r24
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuqqsf
|
||
#endif /* L_fractuqqsf */
|
||
|
||
#if defined (L_fracthqsf)
|
||
DEFUN __fracthqsf
|
||
;; Move in place for SA -> SF conversion
|
||
wmov 22, 24
|
||
;; Sign-extend
|
||
lsl r25
|
||
sbc r24, r24
|
||
mov r25, r24
|
||
XJMP __fractsasf
|
||
ENDF __fracthqsf
|
||
#endif /* L_fracthqsf */
|
||
|
||
#if defined (L_fractuhqsf)
|
||
DEFUN __fractuhqsf
|
||
;; Move in place for USA -> SF conversion
|
||
wmov 22, 24
|
||
;; Zero-extend
|
||
clr r24
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuhqsf
|
||
#endif /* L_fractuhqsf */
|
||
|
||
#if defined (L_fracthasf)
|
||
DEFUN __fracthasf
|
||
;; Move in place for SA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
mov r24, r25
|
||
;; Sign-extend
|
||
lsl r25
|
||
sbc r25, r25
|
||
XJMP __fractsasf
|
||
ENDF __fracthasf
|
||
#endif /* L_fracthasf */
|
||
|
||
#if defined (L_fractuhasf)
|
||
DEFUN __fractuhasf
|
||
;; Move in place for USA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
mov r24, r25
|
||
;; Zero-extend
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuhasf
|
||
#endif /* L_fractuhasf */
|
||
|
||
|
||
#if defined (L_fractsqsf)
|
||
DEFUN __fractsqsf
|
||
XCALL __floatsisf
|
||
;; Divide non-zero results by 2^31 to move the
|
||
;; decimal point into place
|
||
tst r25
|
||
breq 0f
|
||
subi r24, exp_lo (31)
|
||
sbci r25, exp_hi (31)
|
||
0: ret
|
||
ENDF __fractsqsf
|
||
#endif /* L_fractsqsf */
|
||
|
||
#if defined (L_fractusqsf)
|
||
DEFUN __fractusqsf
|
||
XCALL __floatunsisf
|
||
;; Divide non-zero results by 2^32 to move the
|
||
;; decimal point into place
|
||
cpse r25, __zero_reg__
|
||
subi r25, exp_hi (32)
|
||
ret
|
||
ENDF __fractusqsf
|
||
#endif /* L_fractusqsf */
|
||
|
||
#if defined (L_fractsasf)
|
||
DEFUN __fractsasf
|
||
XCALL __floatsisf
|
||
;; Divide non-zero results by 2^15 to move the
|
||
;; decimal point into place
|
||
tst r25
|
||
breq 0f
|
||
subi r24, exp_lo (15)
|
||
sbci r25, exp_hi (15)
|
||
0: ret
|
||
ENDF __fractsasf
|
||
#endif /* L_fractsasf */
|
||
|
||
#if defined (L_fractusasf)
|
||
DEFUN __fractusasf
|
||
XCALL __floatunsisf
|
||
;; Divide non-zero results by 2^16 to move the
|
||
;; decimal point into place
|
||
cpse r25, __zero_reg__
|
||
subi r25, exp_hi (16)
|
||
ret
|
||
ENDF __fractusasf
|
||
#endif /* L_fractusasf */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Conversions from float
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#if defined (L_fractsfqq)
|
||
DEFUN __fractsfqq
|
||
;; Multiply with 2^{24+7} to get a QQ result in r25
|
||
subi r24, exp_lo (-31)
|
||
sbci r25, exp_hi (-31)
|
||
XCALL __fixsfsi
|
||
mov r24, r25
|
||
ret
|
||
ENDF __fractsfqq
|
||
#endif /* L_fractsfqq */
|
||
|
||
#if defined (L_fractsfuqq)
|
||
DEFUN __fractsfuqq
|
||
;; Multiply with 2^{24+8} to get a UQQ result in r25
|
||
subi r25, exp_hi (-32)
|
||
XCALL __fixunssfsi
|
||
mov r24, r25
|
||
ret
|
||
ENDF __fractsfuqq
|
||
#endif /* L_fractsfuqq */
|
||
|
||
#if defined (L_fractsfha)
|
||
DEFUN __fractsfha
|
||
;; Multiply with 2^{16+7} to get a HA result in r25:r24
|
||
subi r24, exp_lo (-23)
|
||
sbci r25, exp_hi (-23)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfha
|
||
#endif /* L_fractsfha */
|
||
|
||
#if defined (L_fractsfuha)
|
||
DEFUN __fractsfuha
|
||
;; Multiply with 2^24 to get a UHA result in r25:r24
|
||
subi r25, exp_hi (-24)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfuha
|
||
#endif /* L_fractsfuha */
|
||
|
||
#if defined (L_fractsfhq)
|
||
FALIAS __fractsfsq
|
||
|
||
DEFUN __fractsfhq
|
||
;; Multiply with 2^{16+15} to get a HQ result in r25:r24
|
||
;; resp. with 2^31 to get a SQ result in r25:r22
|
||
subi r24, exp_lo (-31)
|
||
sbci r25, exp_hi (-31)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfhq
|
||
#endif /* L_fractsfhq */
|
||
|
||
#if defined (L_fractsfuhq)
|
||
FALIAS __fractsfusq
|
||
|
||
DEFUN __fractsfuhq
|
||
;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
|
||
;; resp. with 2^32 to get a USQ result in r25:r22
|
||
subi r25, exp_hi (-32)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfuhq
|
||
#endif /* L_fractsfuhq */
|
||
|
||
#if defined (L_fractsfsa)
|
||
DEFUN __fractsfsa
|
||
;; Multiply with 2^15 to get a SA result in r25:r22
|
||
subi r24, exp_lo (-15)
|
||
sbci r25, exp_hi (-15)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfsa
|
||
#endif /* L_fractsfsa */
|
||
|
||
#if defined (L_fractsfusa)
|
||
DEFUN __fractsfusa
|
||
;; Multiply with 2^16 to get a USA result in r25:r22
|
||
subi r25, exp_hi (-16)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfusa
|
||
#endif /* L_fractsfusa */
|
||
|
||
|
||
;; For multiplication the functions here are called directly from
|
||
;; avr-fixed.md instead of using the standard libcall mechanisms.
|
||
;; This can make better code because GCC knows exactly which
|
||
;; of the call-used registers (not all of them) are clobbered. */
|
||
|
||
/*******************************************************
|
||
Fractional Multiplication 8 x 8 without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
|
||
;;; R23 = R24 * R25
|
||
;;; Clobbers: __tmp_reg__, R22, R24, R25
|
||
;;; Rounding: ???
|
||
DEFUN __mulqq3
|
||
XCALL __fmuls
|
||
;; TR 18037 requires that (-1) * (-1) does not overflow
|
||
;; The only input that can produce -1 is (-1)^2.
|
||
dec r23
|
||
brvs 0f
|
||
inc r23
|
||
0: ret
|
||
ENDF __mulqq3
|
||
#endif /* L_mulqq3 && ! HAVE_MUL */
|
||
|
||
/*******************************************************
|
||
Fractional Multiply .16 x .16 with and without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulhq3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulhq3
|
||
XCALL __mulhisi3
|
||
;; Shift result into place
|
||
lsl r23
|
||
rol r24
|
||
rol r25
|
||
brvs 1f
|
||
;; Round
|
||
sbrc r23, 7
|
||
adiw r24, 1
|
||
ret
|
||
1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
|
||
ldi r24, lo8 (0x7fff)
|
||
ldi r25, hi8 (0x7fff)
|
||
ret
|
||
ENDF __mulhq3
|
||
#endif /* defined (L_mulhq3) */
|
||
|
||
#if defined (L_muluhq3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) *= (R23:R22)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __muluhq3
|
||
XCALL __umulhisi3
|
||
;; Round
|
||
sbrc r23, 7
|
||
adiw r24, 1
|
||
ret
|
||
ENDF __muluhq3
|
||
#endif /* L_muluhq3 */
|
||
|
||
|
||
/*******************************************************
|
||
Fixed Multiply 8.8 x 8.8 with and without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulha3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulha3
|
||
XCALL __mulhisi3
|
||
lsl r22
|
||
rol r23
|
||
rol r24
|
||
XJMP __muluha3_round
|
||
ENDF __mulha3
|
||
#endif /* L_mulha3 */
|
||
|
||
#if defined (L_muluha3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) *= (R23:R22)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __muluha3
|
||
XCALL __umulhisi3
|
||
XJMP __muluha3_round
|
||
ENDF __muluha3
|
||
#endif /* L_muluha3 */
|
||
|
||
#if defined (L_muluha3_round)
|
||
DEFUN __muluha3_round
|
||
;; Shift result into place
|
||
mov r25, r24
|
||
mov r24, r23
|
||
;; Round
|
||
sbrc r22, 7
|
||
adiw r24, 1
|
||
ret
|
||
ENDF __muluha3_round
|
||
#endif /* L_muluha3_round */
|
||
|
||
|
||
/*******************************************************
|
||
Fixed Multiplication 16.16 x 16.16
|
||
*******************************************************/
|
||
|
||
;; Bits outside the result (below LSB), used in the signed version
|
||
#define GUARD __tmp_reg__
|
||
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
|
||
;; Multiplier
|
||
#define A0 16
|
||
#define A1 A0+1
|
||
#define A2 A1+1
|
||
#define A3 A2+1
|
||
|
||
;; Multiplicand
|
||
#define B0 20
|
||
#define B1 B0+1
|
||
#define B2 B1+1
|
||
#define B3 B2+1
|
||
|
||
;; Result
|
||
#define C0 24
|
||
#define C1 C0+1
|
||
#define C2 C1+1
|
||
#define C3 C2+1
|
||
|
||
#if defined (L_mulusa3)
|
||
;;; (C3:C0) = (A3:A0) * (B3:B0)
|
||
DEFUN __mulusa3
|
||
set
|
||
;; Fallthru
|
||
ENDF __mulusa3
|
||
|
||
;;; Round for last digit iff T = 1
|
||
;;; Return guard bits in GUARD (__tmp_reg__).
|
||
;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
|
||
;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __mulusa3_round
|
||
;; Some of the MUL instructions have LSBs outside the result.
|
||
;; Don't ignore these LSBs in order to tame rounding error.
|
||
;; Use C2/C3 for these LSBs.
|
||
|
||
clr C0
|
||
clr C1
|
||
mul A0, B0 $ movw C2, r0
|
||
|
||
mul A1, B0 $ add C3, r0 $ adc C0, r1
|
||
mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
|
||
|
||
;; Round if T = 1. Store guarding bits outside the result for rounding
|
||
;; and left-shift by the signed version (function below).
|
||
brtc 0f
|
||
sbrc C3, 7
|
||
adiw C0, 1
|
||
0: push C3
|
||
|
||
;; The following MULs don't have LSBs outside the result.
|
||
;; C2/C3 is the high part.
|
||
|
||
mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
|
||
mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
|
||
mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
|
||
neg C2
|
||
|
||
mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
|
||
mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
neg C3
|
||
|
||
mul A1, B3 $ add C2, r0 $ adc C3, r1
|
||
mul A2, B2 $ add C2, r0 $ adc C3, r1
|
||
mul A3, B1 $ add C2, r0 $ adc C3, r1
|
||
|
||
mul A2, B3 $ add C3, r0
|
||
mul A3, B2 $ add C3, r0
|
||
|
||
;; Guard bits used in the signed version below.
|
||
pop GUARD
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __mulusa3_round
|
||
#endif /* L_mulusa3 */
|
||
|
||
#if defined (L_mulsa3)
|
||
;;; (C3:C0) = (A3:A0) * (B3:B0)
|
||
;;; Clobbers: __tmp_reg__, T
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulsa3
|
||
clt
|
||
XCALL __mulusa3_round
|
||
;; A posteriori sign extension of the operands
|
||
tst B3
|
||
brpl 1f
|
||
sub C2, A0
|
||
sbc C3, A1
|
||
1: sbrs A3, 7
|
||
rjmp 2f
|
||
sub C2, B0
|
||
sbc C3, B1
|
||
2:
|
||
;; Shift 1 bit left to adjust for 15 fractional bits
|
||
lsl GUARD
|
||
rol C0
|
||
rol C1
|
||
rol C2
|
||
rol C3
|
||
;; Round last digit
|
||
lsl GUARD
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
ret
|
||
ENDF __mulsa3
|
||
#endif /* L_mulsa3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
|
||
#else /* __AVR_HAVE_MUL__ */
|
||
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
|
||
#define B0 22
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
|
||
#define C0 22
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
|
||
;; __tmp_reg__
|
||
#define CC0 0
|
||
;; __zero_reg__
|
||
#define CC1 1
|
||
#define CC2 16
|
||
#define CC3 17
|
||
|
||
#define AA0 26
|
||
#define AA1 AA0+1
|
||
#define AA2 30
|
||
#define AA3 AA2+1
|
||
|
||
#if defined (L_mulsa3)
|
||
;;; (R25:R22) *= (R21:R18)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; Rounding: -1 LSB <= error <= 1 LSB
|
||
DEFUN __mulsa3
|
||
push B0
|
||
push B1
|
||
push B3
|
||
clt
|
||
XCALL __mulusa3_round
|
||
pop r30
|
||
;; sign-extend B
|
||
bst r30, 7
|
||
brtc 1f
|
||
;; A1, A0 survived in R27:R26
|
||
sub C2, AA0
|
||
sbc C3, AA1
|
||
1:
|
||
pop AA1 ;; B1
|
||
pop AA0 ;; B0
|
||
|
||
;; sign-extend A. A3 survived in R31
|
||
bst AA3, 7
|
||
brtc 2f
|
||
sub C2, AA0
|
||
sbc C3, AA1
|
||
2:
|
||
;; Shift 1 bit left to adjust for 15 fractional bits
|
||
lsl GUARD
|
||
rol C0
|
||
rol C1
|
||
rol C2
|
||
rol C3
|
||
;; Round last digit
|
||
lsl GUARD
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
ret
|
||
ENDF __mulsa3
|
||
#endif /* L_mulsa3 */
|
||
|
||
#if defined (L_mulusa3)
|
||
;;; (R25:R22) *= (R21:R18)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; Rounding: -1 LSB <= error <= 1 LSB
|
||
DEFUN __mulusa3
|
||
set
|
||
;; Fallthru
|
||
ENDF __mulusa3
|
||
|
||
;;; A[] survives in 26, 27, 30, 31
|
||
;;; Also used by __mulsa3 with T = 0
|
||
;;; Round if T = 1
|
||
;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
|
||
DEFUN __mulusa3_round
|
||
push CC2
|
||
push CC3
|
||
; clear result
|
||
clr __tmp_reg__
|
||
wmov CC2, CC0
|
||
; save multiplicand
|
||
wmov AA0, A0
|
||
wmov AA2, A2
|
||
rjmp 3f
|
||
|
||
;; Loop the integral part
|
||
|
||
1: ;; CC += A * 2^n; n >= 0
|
||
add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
|
||
|
||
2: ;; A <<= 1
|
||
lsl A0 $ rol A1 $ rol A2 $ rol A3
|
||
|
||
3: ;; IBIT(B) >>= 1
|
||
;; Carry = n-th bit of B; n >= 0
|
||
lsr B3
|
||
ror B2
|
||
brcs 1b
|
||
sbci B3, 0
|
||
brne 2b
|
||
|
||
;; Loop the fractional part
|
||
;; B2/B3 is 0 now, use as guard bits for rounding
|
||
;; Restore multiplicand
|
||
wmov A0, AA0
|
||
wmov A2, AA2
|
||
rjmp 5f
|
||
|
||
4: ;; CC += A:Guard * 2^n; n < 0
|
||
add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
|
||
5:
|
||
;; A:Guard >>= 1
|
||
lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
|
||
|
||
;; FBIT(B) <<= 1
|
||
;; Carry = n-th bit of B; n < 0
|
||
lsl B0
|
||
rol B1
|
||
brcs 4b
|
||
sbci B0, 0
|
||
brne 5b
|
||
|
||
;; Save guard bits and set carry for rounding
|
||
push B3
|
||
lsl B3
|
||
;; Move result into place
|
||
wmov C2, CC2
|
||
wmov C0, CC0
|
||
clr __zero_reg__
|
||
brtc 6f
|
||
;; Round iff T = 1
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
6:
|
||
pop GUARD
|
||
;; Epilogue
|
||
pop CC3
|
||
pop CC2
|
||
ret
|
||
ENDF __mulusa3_round
|
||
#endif /* L_mulusa3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef AA0
|
||
#undef AA1
|
||
#undef AA2
|
||
#undef AA3
|
||
#undef CC0
|
||
#undef CC1
|
||
#undef CC2
|
||
#undef CC3
|
||
|
||
#endif /* __AVR_HAVE_MUL__ */
|
||
|
||
#undef GUARD
|
||
|
||
/*******************************************************
|
||
Fractional Division 8 / 8
|
||
*******************************************************/
|
||
|
||
#define r_divd r25 /* dividend */
|
||
#define r_quo r24 /* quotient */
|
||
#define r_div r22 /* divisor */
|
||
#define r_sign __tmp_reg__
|
||
|
||
#if defined (L_divqq3)
|
||
DEFUN __divqq3
|
||
mov r_sign, r_divd
|
||
eor r_sign, r_div
|
||
sbrc r_div, 7
|
||
neg r_div
|
||
sbrc r_divd, 7
|
||
neg r_divd
|
||
XCALL __divqq_helper
|
||
lsr r_quo
|
||
sbrc r_sign, 7 ; negate result if needed
|
||
neg r_quo
|
||
ret
|
||
ENDF __divqq3
|
||
#endif /* L_divqq3 */
|
||
|
||
#if defined (L_udivuqq3)
|
||
DEFUN __udivuqq3
|
||
cp r_divd, r_div
|
||
brsh 0f
|
||
XJMP __divqq_helper
|
||
;; Result is out of [0, 1) ==> Return 1 - eps.
|
||
0: ldi r_quo, 0xff
|
||
ret
|
||
ENDF __udivuqq3
|
||
#endif /* L_udivuqq3 */
|
||
|
||
|
||
#if defined (L_divqq_helper)
|
||
DEFUN __divqq_helper
|
||
clr r_quo ; clear quotient
|
||
inc __zero_reg__ ; init loop counter, used per shift
|
||
__udivuqq3_loop:
|
||
lsl r_divd ; shift dividend
|
||
brcs 0f ; dividend overflow
|
||
cp r_divd,r_div ; compare dividend & divisor
|
||
brcc 0f ; dividend >= divisor
|
||
rol r_quo ; shift quotient (with CARRY)
|
||
rjmp __udivuqq3_cont
|
||
0:
|
||
sub r_divd,r_div ; restore dividend
|
||
lsl r_quo ; shift quotient (without CARRY)
|
||
__udivuqq3_cont:
|
||
lsl __zero_reg__ ; shift loop-counter bit
|
||
brne __udivuqq3_loop
|
||
com r_quo ; complement result
|
||
; because C flag was complemented in loop
|
||
ret
|
||
ENDF __divqq_helper
|
||
#endif /* L_divqq_helper */
|
||
|
||
#undef r_divd
|
||
#undef r_quo
|
||
#undef r_div
|
||
#undef r_sign
|
||
|
||
|
||
/*******************************************************
|
||
Fractional Division 16 / 16
|
||
*******************************************************/
|
||
#define r_divdL 26 /* dividend Low */
|
||
#define r_divdH 27 /* dividend Hig */
|
||
#define r_quoL 24 /* quotient Low */
|
||
#define r_quoH 25 /* quotient High */
|
||
#define r_divL 22 /* divisor */
|
||
#define r_divH 23 /* divisor */
|
||
#define r_cnt 21
|
||
|
||
#if defined (L_divhq3)
|
||
DEFUN __divhq3
|
||
mov r0, r_divdH
|
||
eor r0, r_divH
|
||
sbrs r_divH, 7
|
||
rjmp 1f
|
||
NEG2 r_divL
|
||
1:
|
||
sbrs r_divdH, 7
|
||
rjmp 2f
|
||
NEG2 r_divdL
|
||
2:
|
||
cp r_divdL, r_divL
|
||
cpc r_divdH, r_divH
|
||
breq __divhq3_minus1 ; if equal return -1
|
||
XCALL __udivuhq3
|
||
lsr r_quoH
|
||
ror r_quoL
|
||
brpl 9f
|
||
;; negate result if needed
|
||
NEG2 r_quoL
|
||
9:
|
||
ret
|
||
__divhq3_minus1:
|
||
ldi r_quoH, 0x80
|
||
clr r_quoL
|
||
ret
|
||
ENDF __divhq3
|
||
#endif /* defined (L_divhq3) */
|
||
|
||
#if defined (L_udivuhq3)
|
||
DEFUN __udivuhq3
|
||
sub r_quoH,r_quoH ; clear quotient and carry
|
||
;; FALLTHRU
|
||
ENDF __udivuhq3
|
||
|
||
DEFUN __udivuha3_common
|
||
clr r_quoL ; clear quotient
|
||
ldi r_cnt,16 ; init loop counter
|
||
__udivuhq3_loop:
|
||
rol r_divdL ; shift dividend (with CARRY)
|
||
rol r_divdH
|
||
brcs __udivuhq3_ep ; dividend overflow
|
||
cp r_divdL,r_divL ; compare dividend & divisor
|
||
cpc r_divdH,r_divH
|
||
brcc __udivuhq3_ep ; dividend >= divisor
|
||
rol r_quoL ; shift quotient (with CARRY)
|
||
rjmp __udivuhq3_cont
|
||
__udivuhq3_ep:
|
||
sub r_divdL,r_divL ; restore dividend
|
||
sbc r_divdH,r_divH
|
||
lsl r_quoL ; shift quotient (without CARRY)
|
||
__udivuhq3_cont:
|
||
rol r_quoH ; shift quotient
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivuhq3_loop
|
||
com r_quoL ; complement result
|
||
com r_quoH ; because C flag was complemented in loop
|
||
ret
|
||
ENDF __udivuha3_common
|
||
#endif /* defined (L_udivuhq3) */
|
||
|
||
/*******************************************************
|
||
Fixed Division 8.8 / 8.8
|
||
*******************************************************/
|
||
#if defined (L_divha3)
|
||
DEFUN __divha3
|
||
mov r0, r_divdH
|
||
eor r0, r_divH
|
||
sbrs r_divH, 7
|
||
rjmp 1f
|
||
NEG2 r_divL
|
||
1:
|
||
sbrs r_divdH, 7
|
||
rjmp 2f
|
||
NEG2 r_divdL
|
||
2:
|
||
XCALL __udivuha3
|
||
lsr r_quoH ; adjust to 7 fractional bits
|
||
ror r_quoL
|
||
sbrs r0, 7 ; negate result if needed
|
||
ret
|
||
NEG2 r_quoL
|
||
ret
|
||
ENDF __divha3
|
||
#endif /* defined (L_divha3) */
|
||
|
||
#if defined (L_udivuha3)
|
||
DEFUN __udivuha3
|
||
mov r_quoH, r_divdL
|
||
mov r_divdL, r_divdH
|
||
clr r_divdH
|
||
lsl r_quoH ; shift quotient into carry
|
||
XJMP __udivuha3_common ; same as fractional after rearrange
|
||
ENDF __udivuha3
|
||
#endif /* defined (L_udivuha3) */
|
||
|
||
#undef r_divdL
|
||
#undef r_divdH
|
||
#undef r_quoL
|
||
#undef r_quoH
|
||
#undef r_divL
|
||
#undef r_divH
|
||
#undef r_cnt
|
||
|
||
/*******************************************************
|
||
Fixed Division 16.16 / 16.16
|
||
*******************************************************/
|
||
|
||
#define r_arg1L 24 /* arg1 gets passed already in place */
|
||
#define r_arg1H 25
|
||
#define r_arg1HL 26
|
||
#define r_arg1HH 27
|
||
#define r_divdL 26 /* dividend Low */
|
||
#define r_divdH 27
|
||
#define r_divdHL 30
|
||
#define r_divdHH 31 /* dividend High */
|
||
#define r_quoL 22 /* quotient Low */
|
||
#define r_quoH 23
|
||
#define r_quoHL 24
|
||
#define r_quoHH 25 /* quotient High */
|
||
#define r_divL 18 /* divisor Low */
|
||
#define r_divH 19
|
||
#define r_divHL 20
|
||
#define r_divHH 21 /* divisor High */
|
||
#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
|
||
|
||
#if defined (L_divsa3)
|
||
DEFUN __divsa3
|
||
mov r0, r_arg1HH
|
||
eor r0, r_divHH
|
||
sbrs r_divHH, 7
|
||
rjmp 1f
|
||
NEG4 r_divL
|
||
1:
|
||
sbrs r_arg1HH, 7
|
||
rjmp 2f
|
||
NEG4 r_arg1L
|
||
2:
|
||
XCALL __udivusa3
|
||
lsr r_quoHH ; adjust to 15 fractional bits
|
||
ror r_quoHL
|
||
ror r_quoH
|
||
ror r_quoL
|
||
sbrs r0, 7 ; negate result if needed
|
||
ret
|
||
;; negate r_quoL
|
||
XJMP __negsi2
|
||
ENDF __divsa3
|
||
#endif /* defined (L_divsa3) */
|
||
|
||
#if defined (L_udivusa3)
|
||
DEFUN __udivusa3
|
||
ldi r_divdHL, 32 ; init loop counter
|
||
mov r_cnt, r_divdHL
|
||
clr r_divdHL
|
||
clr r_divdHH
|
||
wmov r_quoL, r_divdHL
|
||
lsl r_quoHL ; shift quotient into carry
|
||
rol r_quoHH
|
||
__udivusa3_loop:
|
||
rol r_divdL ; shift dividend (with CARRY)
|
||
rol r_divdH
|
||
rol r_divdHL
|
||
rol r_divdHH
|
||
brcs __udivusa3_ep ; dividend overflow
|
||
cp r_divdL,r_divL ; compare dividend & divisor
|
||
cpc r_divdH,r_divH
|
||
cpc r_divdHL,r_divHL
|
||
cpc r_divdHH,r_divHH
|
||
brcc __udivusa3_ep ; dividend >= divisor
|
||
rol r_quoL ; shift quotient (with CARRY)
|
||
rjmp __udivusa3_cont
|
||
__udivusa3_ep:
|
||
sub r_divdL,r_divL ; restore dividend
|
||
sbc r_divdH,r_divH
|
||
sbc r_divdHL,r_divHL
|
||
sbc r_divdHH,r_divHH
|
||
lsl r_quoL ; shift quotient (without CARRY)
|
||
__udivusa3_cont:
|
||
rol r_quoH ; shift quotient
|
||
rol r_quoHL
|
||
rol r_quoHH
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivusa3_loop
|
||
com r_quoL ; complement result
|
||
com r_quoH ; because C flag was complemented in loop
|
||
com r_quoHL
|
||
com r_quoHH
|
||
ret
|
||
ENDF __udivusa3
|
||
#endif /* defined (L_udivusa3) */
|
||
|
||
#undef r_arg1L
|
||
#undef r_arg1H
|
||
#undef r_arg1HL
|
||
#undef r_arg1HH
|
||
#undef r_divdL
|
||
#undef r_divdH
|
||
#undef r_divdHL
|
||
#undef r_divdHH
|
||
#undef r_quoL
|
||
#undef r_quoH
|
||
#undef r_quoHL
|
||
#undef r_quoHH
|
||
#undef r_divL
|
||
#undef r_divH
|
||
#undef r_divHL
|
||
#undef r_divHH
|
||
#undef r_cnt
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 2 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 24
|
||
#define A1 A0+1
|
||
|
||
#if defined (L_ssneg_2)
|
||
DEFUN __ssneg_2
|
||
NEG2 A0
|
||
brvc 0f
|
||
sbiw A0, 1
|
||
0: ret
|
||
ENDF __ssneg_2
|
||
#endif /* L_ssneg_2 */
|
||
|
||
#if defined (L_ssabs_2)
|
||
DEFUN __ssabs_2
|
||
sbrs A1, 7
|
||
ret
|
||
XJMP __ssneg_2
|
||
ENDF __ssabs_2
|
||
#endif /* L_ssabs_2 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 4 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
|
||
#if defined (L_ssneg_4)
|
||
DEFUN __ssneg_4
|
||
XCALL __negsi2
|
||
brvc 0f
|
||
ldi A3, 0x7f
|
||
ldi A2, 0xff
|
||
ldi A1, 0xff
|
||
ldi A0, 0xff
|
||
0: ret
|
||
ENDF __ssneg_4
|
||
#endif /* L_ssneg_4 */
|
||
|
||
#if defined (L_ssabs_4)
|
||
DEFUN __ssabs_4
|
||
sbrs A3, 7
|
||
ret
|
||
XJMP __ssneg_4
|
||
ENDF __ssabs_4
|
||
#endif /* L_ssabs_4 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 8 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
#define A4 A0+4
|
||
#define A5 A0+5
|
||
#define A6 A0+6
|
||
#define A7 A0+7
|
||
|
||
#if defined (L_clr_8)
|
||
FALIAS __usneguta2
|
||
FALIAS __usneguda2
|
||
FALIAS __usnegudq2
|
||
|
||
;; Clear Carry and all Bytes
|
||
DEFUN __clr_8
|
||
;; Clear Carry and set Z
|
||
sub A7, A7
|
||
;; FALLTHRU
|
||
ENDF __clr_8
|
||
;; Propagate Carry to all Bytes, Carry unaltered
|
||
DEFUN __sbc_8
|
||
sbc A7, A7
|
||
sbc A6, A6
|
||
wmov A4, A6
|
||
wmov A2, A6
|
||
wmov A0, A6
|
||
ret
|
||
ENDF __sbc_8
|
||
#endif /* L_clr_8 */
|
||
|
||
#if defined (L_ssneg_8)
|
||
FALIAS __ssnegta2
|
||
FALIAS __ssnegda2
|
||
FALIAS __ssnegdq2
|
||
|
||
DEFUN __ssneg_8
|
||
XCALL __negdi2
|
||
brvc 0f
|
||
;; A[] = 0x7fffffff
|
||
sec
|
||
XCALL __sbc_8
|
||
ldi A7, 0x7f
|
||
0: ret
|
||
ENDF __ssneg_8
|
||
#endif /* L_ssneg_8 */
|
||
|
||
#if defined (L_ssabs_8)
|
||
FALIAS __ssabsta2
|
||
FALIAS __ssabsda2
|
||
FALIAS __ssabsdq2
|
||
|
||
DEFUN __ssabs_8
|
||
sbrs A7, 7
|
||
ret
|
||
XJMP __ssneg_8
|
||
ENDF __ssabs_8
|
||
#endif /* L_ssabs_8 */
|
||
|
||
;; Second Argument
|
||
#define B0 10
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
#define B4 B0+4
|
||
#define B5 B0+5
|
||
#define B6 B0+6
|
||
#define B7 B0+7
|
||
|
||
#if defined (L_usadd_8)
|
||
FALIAS __usadduta3
|
||
FALIAS __usadduda3
|
||
FALIAS __usaddudq3
|
||
|
||
DEFUN __usadd_8
|
||
XCALL __adddi3
|
||
brcs 0f
|
||
ret
|
||
0: ;; A[] = 0xffffffff
|
||
XJMP __sbc_8
|
||
ENDF __usadd_8
|
||
#endif /* L_usadd_8 */
|
||
|
||
#if defined (L_ussub_8)
|
||
FALIAS __ussubuta3
|
||
FALIAS __ussubuda3
|
||
FALIAS __ussubudq3
|
||
|
||
DEFUN __ussub_8
|
||
XCALL __subdi3
|
||
brcs 0f
|
||
ret
|
||
0: ;; A[] = 0
|
||
XJMP __clr_8
|
||
ENDF __ussub_8
|
||
#endif /* L_ussub_8 */
|
||
|
||
#if defined (L_ssadd_8)
|
||
FALIAS __ssaddta3
|
||
FALIAS __ssaddda3
|
||
FALIAS __ssadddq3
|
||
|
||
DEFUN __ssadd_8
|
||
XCALL __adddi3
|
||
brvc 0f
|
||
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
|
||
cpi B7, 0x80
|
||
XCALL __sbc_8
|
||
subi A7, 0x80
|
||
0: ret
|
||
ENDF __ssadd_8
|
||
#endif /* L_ssadd_8 */
|
||
|
||
#if defined (L_sssub_8)
|
||
FALIAS __sssubta3
|
||
FALIAS __sssubda3
|
||
FALIAS __sssubdq3
|
||
|
||
DEFUN __sssub_8
|
||
XCALL __subdi3
|
||
brvc 0f
|
||
;; A = (B < 0) ? INT64_MAX : INT64_MIN
|
||
ldi A7, 0x7f
|
||
cp A7, B7
|
||
XCALL __sbc_8
|
||
subi A7, 0x80
|
||
0: ret
|
||
ENDF __sssub_8
|
||
#endif /* L_sssub_8 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef A4
|
||
#undef A5
|
||
#undef A6
|
||
#undef A7
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef B4
|
||
#undef B5
|
||
#undef B6
|
||
#undef B7
|