1928 lines
41 KiB
ArmAsm
1928 lines
41 KiB
ArmAsm
/* -*- Mode: Asm -*- */
|
||
;; Copyright (C) 2012-2019 Free Software Foundation, Inc.
|
||
;; Contributed by Sean D'Epagnier (sean@depagnier.com)
|
||
;; Georg-Johann Lay (avr@gjlay.de)
|
||
|
||
;; This file is free software; you can redistribute it and/or modify it
|
||
;; under the terms of the GNU General Public License as published by the
|
||
;; Free Software Foundation; either version 3, or (at your option) any
|
||
;; later version.
|
||
|
||
;; In addition to the permissions in the GNU General Public License, the
|
||
;; Free Software Foundation gives you unlimited permission to link the
|
||
;; compiled version of this file into combinations with other programs,
|
||
;; and to distribute those combinations without any restriction coming
|
||
;; from the use of this file. (The General Public License restrictions
|
||
;; do apply in other respects; for example, they cover modification of
|
||
;; the file, and distribution when not linked into a combine
|
||
;; executable.)
|
||
|
||
;; This file is distributed in the hope that it will be useful, but
|
||
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;; General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program; see the file COPYING. If not, write to
|
||
;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||
;; Boston, MA 02110-1301, USA.
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Fixed point library routines for AVR
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#if defined __AVR_TINY__
|
||
#define __zero_reg__ r17
|
||
#define __tmp_reg__ r16
|
||
#else
|
||
#define __zero_reg__ r1
|
||
#define __tmp_reg__ r0
|
||
#endif
|
||
|
||
.section .text.libgcc.fixed, "ax", @progbits
|
||
|
||
#ifndef __AVR_TINY__
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Conversions to float
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#if defined (L_fractqqsf)
|
||
DEFUN __fractqqsf
|
||
;; Move in place for SA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
;; Sign-extend
|
||
lsl r24
|
||
sbc r24, r24
|
||
mov r25, r24
|
||
XJMP __fractsasf
|
||
ENDF __fractqqsf
|
||
#endif /* L_fractqqsf */
|
||
|
||
#if defined (L_fractuqqsf)
|
||
DEFUN __fractuqqsf
|
||
;; Move in place for USA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
;; Zero-extend
|
||
clr r24
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuqqsf
|
||
#endif /* L_fractuqqsf */
|
||
|
||
#if defined (L_fracthqsf)
|
||
DEFUN __fracthqsf
|
||
;; Move in place for SA -> SF conversion
|
||
wmov 22, 24
|
||
;; Sign-extend
|
||
lsl r25
|
||
sbc r24, r24
|
||
mov r25, r24
|
||
XJMP __fractsasf
|
||
ENDF __fracthqsf
|
||
#endif /* L_fracthqsf */
|
||
|
||
#if defined (L_fractuhqsf)
|
||
DEFUN __fractuhqsf
|
||
;; Move in place for USA -> SF conversion
|
||
wmov 22, 24
|
||
;; Zero-extend
|
||
clr r24
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuhqsf
|
||
#endif /* L_fractuhqsf */
|
||
|
||
#if defined (L_fracthasf)
|
||
DEFUN __fracthasf
|
||
;; Move in place for SA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
mov r24, r25
|
||
;; Sign-extend
|
||
lsl r25
|
||
sbc r25, r25
|
||
XJMP __fractsasf
|
||
ENDF __fracthasf
|
||
#endif /* L_fracthasf */
|
||
|
||
#if defined (L_fractuhasf)
|
||
DEFUN __fractuhasf
|
||
;; Move in place for USA -> SF conversion
|
||
clr r22
|
||
mov r23, r24
|
||
mov r24, r25
|
||
;; Zero-extend
|
||
clr r25
|
||
XJMP __fractusasf
|
||
ENDF __fractuhasf
|
||
#endif /* L_fractuhasf */
|
||
|
||
|
||
#if defined (L_fractsqsf)
|
||
DEFUN __fractsqsf
|
||
XCALL __floatsisf
|
||
;; Divide non-zero results by 2^31 to move the
|
||
;; decimal point into place
|
||
tst r25
|
||
breq 0f
|
||
subi r24, exp_lo (31)
|
||
sbci r25, exp_hi (31)
|
||
0: ret
|
||
ENDF __fractsqsf
|
||
#endif /* L_fractsqsf */
|
||
|
||
#if defined (L_fractusqsf)
|
||
DEFUN __fractusqsf
|
||
XCALL __floatunsisf
|
||
;; Divide non-zero results by 2^32 to move the
|
||
;; decimal point into place
|
||
cpse r25, __zero_reg__
|
||
subi r25, exp_hi (32)
|
||
ret
|
||
ENDF __fractusqsf
|
||
#endif /* L_fractusqsf */
|
||
|
||
#if defined (L_fractsasf)
|
||
DEFUN __fractsasf
|
||
XCALL __floatsisf
|
||
;; Divide non-zero results by 2^15 to move the
|
||
;; decimal point into place
|
||
tst r25
|
||
breq 0f
|
||
subi r24, exp_lo (15)
|
||
sbci r25, exp_hi (15)
|
||
0: ret
|
||
ENDF __fractsasf
|
||
#endif /* L_fractsasf */
|
||
|
||
#if defined (L_fractusasf)
|
||
DEFUN __fractusasf
|
||
XCALL __floatunsisf
|
||
;; Divide non-zero results by 2^16 to move the
|
||
;; decimal point into place
|
||
cpse r25, __zero_reg__
|
||
subi r25, exp_hi (16)
|
||
ret
|
||
ENDF __fractusasf
|
||
#endif /* L_fractusasf */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Conversions from float
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#if defined (L_fractsfqq)
|
||
DEFUN __fractsfqq
|
||
;; Multiply with 2^{24+7} to get a QQ result in r25
|
||
subi r24, exp_lo (-31)
|
||
sbci r25, exp_hi (-31)
|
||
XCALL __fixsfsi
|
||
mov r24, r25
|
||
ret
|
||
ENDF __fractsfqq
|
||
#endif /* L_fractsfqq */
|
||
|
||
#if defined (L_fractsfuqq)
|
||
DEFUN __fractsfuqq
|
||
;; Multiply with 2^{24+8} to get a UQQ result in r25
|
||
subi r25, exp_hi (-32)
|
||
XCALL __fixunssfsi
|
||
mov r24, r25
|
||
ret
|
||
ENDF __fractsfuqq
|
||
#endif /* L_fractsfuqq */
|
||
|
||
#if defined (L_fractsfha)
|
||
DEFUN __fractsfha
|
||
;; Multiply with 2^{16+7} to get a HA result in r25:r24
|
||
subi r24, exp_lo (-23)
|
||
sbci r25, exp_hi (-23)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfha
|
||
#endif /* L_fractsfha */
|
||
|
||
#if defined (L_fractsfuha)
|
||
DEFUN __fractsfuha
|
||
;; Multiply with 2^24 to get a UHA result in r25:r24
|
||
subi r25, exp_hi (-24)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfuha
|
||
#endif /* L_fractsfuha */
|
||
|
||
#if defined (L_fractsfhq)
|
||
FALIAS __fractsfsq
|
||
|
||
DEFUN __fractsfhq
|
||
;; Multiply with 2^{16+15} to get a HQ result in r25:r24
|
||
;; resp. with 2^31 to get a SQ result in r25:r22
|
||
subi r24, exp_lo (-31)
|
||
sbci r25, exp_hi (-31)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfhq
|
||
#endif /* L_fractsfhq */
|
||
|
||
#if defined (L_fractsfuhq)
|
||
FALIAS __fractsfusq
|
||
|
||
DEFUN __fractsfuhq
|
||
;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
|
||
;; resp. with 2^32 to get a USQ result in r25:r22
|
||
subi r25, exp_hi (-32)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfuhq
|
||
#endif /* L_fractsfuhq */
|
||
|
||
#if defined (L_fractsfsa)
|
||
DEFUN __fractsfsa
|
||
;; Multiply with 2^15 to get a SA result in r25:r22
|
||
subi r24, exp_lo (-15)
|
||
sbci r25, exp_hi (-15)
|
||
XJMP __fixsfsi
|
||
ENDF __fractsfsa
|
||
#endif /* L_fractsfsa */
|
||
|
||
#if defined (L_fractsfusa)
|
||
DEFUN __fractsfusa
|
||
;; Multiply with 2^16 to get a USA result in r25:r22
|
||
subi r25, exp_hi (-16)
|
||
XJMP __fixunssfsi
|
||
ENDF __fractsfusa
|
||
#endif /* L_fractsfusa */
|
||
|
||
|
||
;; For multiplication the functions here are called directly from
|
||
;; avr-fixed.md instead of using the standard libcall mechanisms.
|
||
;; This can make better code because GCC knows exactly which
|
||
;; of the call-used registers (not all of them) are clobbered. */
|
||
|
||
/*******************************************************
|
||
Fractional Multiplication 8 x 8 without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
|
||
;;; R23 = R24 * R25
|
||
;;; Clobbers: __tmp_reg__, R22, R24, R25
|
||
;;; Rounding: ???
|
||
DEFUN __mulqq3
|
||
XCALL __fmuls
|
||
;; TR 18037 requires that (-1) * (-1) does not overflow
|
||
;; The only input that can produce -1 is (-1)^2.
|
||
dec r23
|
||
brvs 0f
|
||
inc r23
|
||
0: ret
|
||
ENDF __mulqq3
|
||
#endif /* L_mulqq3 && ! HAVE_MUL */
|
||
|
||
/*******************************************************
|
||
Fractional Multiply .16 x .16 with and without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulhq3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulhq3
|
||
XCALL __mulhisi3
|
||
;; Shift result into place
|
||
lsl r23
|
||
rol r24
|
||
rol r25
|
||
brvs 1f
|
||
;; Round
|
||
sbrc r23, 7
|
||
adiw r24, 1
|
||
ret
|
||
1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
|
||
ldi r24, lo8 (0x7fff)
|
||
ldi r25, hi8 (0x7fff)
|
||
ret
|
||
ENDF __mulhq3
|
||
#endif /* defined (L_mulhq3) */
|
||
|
||
#if defined (L_muluhq3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) *= (R23:R22)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __muluhq3
|
||
XCALL __umulhisi3
|
||
;; Round
|
||
sbrc r23, 7
|
||
adiw r24, 1
|
||
ret
|
||
ENDF __muluhq3
|
||
#endif /* L_muluhq3 */
|
||
|
||
|
||
/*******************************************************
|
||
Fixed Multiply 8.8 x 8.8 with and without MUL
|
||
*******************************************************/
|
||
|
||
#if defined (L_mulha3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulha3
|
||
XCALL __mulhisi3
|
||
lsl r22
|
||
rol r23
|
||
rol r24
|
||
XJMP __muluha3_round
|
||
ENDF __mulha3
|
||
#endif /* L_mulha3 */
|
||
|
||
#if defined (L_muluha3)
|
||
;;; Same code with and without MUL, but the interfaces differ:
|
||
;;; no MUL: (R25:R24) *= (R23:R22)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
|
||
;;; Clobbers: __tmp_reg__, R22, R23
|
||
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __muluha3
|
||
XCALL __umulhisi3
|
||
XJMP __muluha3_round
|
||
ENDF __muluha3
|
||
#endif /* L_muluha3 */
|
||
|
||
#if defined (L_muluha3_round)
|
||
DEFUN __muluha3_round
|
||
;; Shift result into place
|
||
mov r25, r24
|
||
mov r24, r23
|
||
;; Round
|
||
sbrc r22, 7
|
||
adiw r24, 1
|
||
ret
|
||
ENDF __muluha3_round
|
||
#endif /* L_muluha3_round */
|
||
|
||
|
||
/*******************************************************
|
||
Fixed Multiplication 16.16 x 16.16
|
||
*******************************************************/
|
||
|
||
;; Bits outside the result (below LSB), used in the signed version
|
||
#define GUARD __tmp_reg__
|
||
|
||
#if defined (__AVR_HAVE_MUL__)
|
||
|
||
;; Multiplier
|
||
#define A0 16
|
||
#define A1 A0+1
|
||
#define A2 A1+1
|
||
#define A3 A2+1
|
||
|
||
;; Multiplicand
|
||
#define B0 20
|
||
#define B1 B0+1
|
||
#define B2 B1+1
|
||
#define B3 B2+1
|
||
|
||
;; Result
|
||
#define C0 24
|
||
#define C1 C0+1
|
||
#define C2 C1+1
|
||
#define C3 C2+1
|
||
|
||
#if defined (L_mulusa3)
|
||
;;; (C3:C0) = (A3:A0) * (B3:B0)
|
||
DEFUN __mulusa3
|
||
set
|
||
;; Fallthru
|
||
ENDF __mulusa3
|
||
|
||
;;; Round for last digit iff T = 1
|
||
;;; Return guard bits in GUARD (__tmp_reg__).
|
||
;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
|
||
;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
|
||
DEFUN __mulusa3_round
|
||
;; Some of the MUL instructions have LSBs outside the result.
|
||
;; Don't ignore these LSBs in order to tame rounding error.
|
||
;; Use C2/C3 for these LSBs.
|
||
|
||
clr C0
|
||
clr C1
|
||
mul A0, B0 $ movw C2, r0
|
||
|
||
mul A1, B0 $ add C3, r0 $ adc C0, r1
|
||
mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
|
||
|
||
;; Round if T = 1. Store guarding bits outside the result for rounding
|
||
;; and left-shift by the signed version (function below).
|
||
brtc 0f
|
||
sbrc C3, 7
|
||
adiw C0, 1
|
||
0: push C3
|
||
|
||
;; The following MULs don't have LSBs outside the result.
|
||
;; C2/C3 is the high part.
|
||
|
||
mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
|
||
mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
|
||
mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
|
||
neg C2
|
||
|
||
mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
|
||
mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
|
||
neg C3
|
||
|
||
mul A1, B3 $ add C2, r0 $ adc C3, r1
|
||
mul A2, B2 $ add C2, r0 $ adc C3, r1
|
||
mul A3, B1 $ add C2, r0 $ adc C3, r1
|
||
|
||
mul A2, B3 $ add C3, r0
|
||
mul A3, B2 $ add C3, r0
|
||
|
||
;; Guard bits used in the signed version below.
|
||
pop GUARD
|
||
clr __zero_reg__
|
||
ret
|
||
ENDF __mulusa3_round
|
||
#endif /* L_mulusa3 */
|
||
|
||
#if defined (L_mulsa3)
|
||
;;; (C3:C0) = (A3:A0) * (B3:B0)
|
||
;;; Clobbers: __tmp_reg__, T
|
||
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
|
||
DEFUN __mulsa3
|
||
clt
|
||
XCALL __mulusa3_round
|
||
;; A posteriori sign extension of the operands
|
||
tst B3
|
||
brpl 1f
|
||
sub C2, A0
|
||
sbc C3, A1
|
||
1: sbrs A3, 7
|
||
rjmp 2f
|
||
sub C2, B0
|
||
sbc C3, B1
|
||
2:
|
||
;; Shift 1 bit left to adjust for 15 fractional bits
|
||
lsl GUARD
|
||
rol C0
|
||
rol C1
|
||
rol C2
|
||
rol C3
|
||
;; Round last digit
|
||
lsl GUARD
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
ret
|
||
ENDF __mulsa3
|
||
#endif /* L_mulsa3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
|
||
#else /* __AVR_HAVE_MUL__ */
|
||
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
|
||
#define B0 22
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
|
||
#define C0 22
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
|
||
;; __tmp_reg__
|
||
#define CC0 0
|
||
;; __zero_reg__
|
||
#define CC1 1
|
||
#define CC2 16
|
||
#define CC3 17
|
||
|
||
#define AA0 26
|
||
#define AA1 AA0+1
|
||
#define AA2 30
|
||
#define AA3 AA2+1
|
||
|
||
#if defined (L_mulsa3)
|
||
;;; (R25:R22) *= (R21:R18)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; Rounding: -1 LSB <= error <= 1 LSB
|
||
DEFUN __mulsa3
|
||
push B0
|
||
push B1
|
||
push B3
|
||
clt
|
||
XCALL __mulusa3_round
|
||
pop r30
|
||
;; sign-extend B
|
||
bst r30, 7
|
||
brtc 1f
|
||
;; A1, A0 survived in R27:R26
|
||
sub C2, AA0
|
||
sbc C3, AA1
|
||
1:
|
||
pop AA1 ;; B1
|
||
pop AA0 ;; B0
|
||
|
||
;; sign-extend A. A3 survived in R31
|
||
bst AA3, 7
|
||
brtc 2f
|
||
sub C2, AA0
|
||
sbc C3, AA1
|
||
2:
|
||
;; Shift 1 bit left to adjust for 15 fractional bits
|
||
lsl GUARD
|
||
rol C0
|
||
rol C1
|
||
rol C2
|
||
rol C3
|
||
;; Round last digit
|
||
lsl GUARD
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
ret
|
||
ENDF __mulsa3
|
||
#endif /* L_mulsa3 */
|
||
|
||
#if defined (L_mulusa3)
|
||
;;; (R25:R22) *= (R21:R18)
|
||
;;; Clobbers: ABI, called by optabs
|
||
;;; Rounding: -1 LSB <= error <= 1 LSB
|
||
DEFUN __mulusa3
|
||
set
|
||
;; Fallthru
|
||
ENDF __mulusa3
|
||
|
||
;;; A[] survives in 26, 27, 30, 31
|
||
;;; Also used by __mulsa3 with T = 0
|
||
;;; Round if T = 1
|
||
;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
|
||
DEFUN __mulusa3_round
|
||
push CC2
|
||
push CC3
|
||
; clear result
|
||
clr __tmp_reg__
|
||
wmov CC2, CC0
|
||
; save multiplicand
|
||
wmov AA0, A0
|
||
wmov AA2, A2
|
||
rjmp 3f
|
||
|
||
;; Loop the integral part
|
||
|
||
1: ;; CC += A * 2^n; n >= 0
|
||
add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
|
||
|
||
2: ;; A <<= 1
|
||
lsl A0 $ rol A1 $ rol A2 $ rol A3
|
||
|
||
3: ;; IBIT(B) >>= 1
|
||
;; Carry = n-th bit of B; n >= 0
|
||
lsr B3
|
||
ror B2
|
||
brcs 1b
|
||
sbci B3, 0
|
||
brne 2b
|
||
|
||
;; Loop the fractional part
|
||
;; B2/B3 is 0 now, use as guard bits for rounding
|
||
;; Restore multiplicand
|
||
wmov A0, AA0
|
||
wmov A2, AA2
|
||
rjmp 5f
|
||
|
||
4: ;; CC += A:Guard * 2^n; n < 0
|
||
add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
|
||
5:
|
||
;; A:Guard >>= 1
|
||
lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
|
||
|
||
;; FBIT(B) <<= 1
|
||
;; Carry = n-th bit of B; n < 0
|
||
lsl B0
|
||
rol B1
|
||
brcs 4b
|
||
sbci B0, 0
|
||
brne 5b
|
||
|
||
;; Save guard bits and set carry for rounding
|
||
push B3
|
||
lsl B3
|
||
;; Move result into place
|
||
wmov C2, CC2
|
||
wmov C0, CC0
|
||
clr __zero_reg__
|
||
brtc 6f
|
||
;; Round iff T = 1
|
||
adc C0, __zero_reg__
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
6:
|
||
pop GUARD
|
||
;; Epilogue
|
||
pop CC3
|
||
pop CC2
|
||
ret
|
||
ENDF __mulusa3_round
|
||
#endif /* L_mulusa3 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef AA0
|
||
#undef AA1
|
||
#undef AA2
|
||
#undef AA3
|
||
#undef CC0
|
||
#undef CC1
|
||
#undef CC2
|
||
#undef CC3
|
||
|
||
#endif /* __AVR_HAVE_MUL__ */
|
||
|
||
#undef GUARD
|
||
|
||
/***********************************************************
|
||
Fixed unsigned saturated Multiplication 8.8 x 8.8
|
||
***********************************************************/
|
||
|
||
#define C0 22
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
#define SS __tmp_reg__
|
||
|
||
#if defined (L_usmuluha3)
|
||
DEFUN __usmuluha3
|
||
;; Widening multiply
|
||
#ifdef __AVR_HAVE_MUL__
|
||
;; Adjust interface
|
||
movw R26, R22
|
||
movw R18, R24
|
||
#endif /* HAVE MUL */
|
||
XCALL __umulhisi3
|
||
tst C3
|
||
brne .Lmax
|
||
;; Round, target is in C1..C2
|
||
lsl C0
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
brcs .Lmax
|
||
;; Move result into place
|
||
mov C3, C2
|
||
mov C2, C1
|
||
ret
|
||
.Lmax:
|
||
;; Saturate
|
||
ldi C2, 0xff
|
||
ldi C3, 0xff
|
||
ret
|
||
ENDF __usmuluha3
|
||
#endif /* L_usmuluha3 */
|
||
|
||
/***********************************************************
|
||
Fixed signed saturated Multiplication s8.7 x s8.7
|
||
***********************************************************/
|
||
|
||
#if defined (L_ssmulha3)
|
||
DEFUN __ssmulha3
|
||
;; Widening multiply
|
||
#ifdef __AVR_HAVE_MUL__
|
||
;; Adjust interface
|
||
movw R26, R22
|
||
movw R18, R24
|
||
#endif /* HAVE MUL */
|
||
XCALL __mulhisi3
|
||
;; Adjust decimal point
|
||
lsl C0
|
||
rol C1
|
||
rol C2
|
||
brvs .LsatC3.3
|
||
;; The 9 MSBs must be the same
|
||
rol C3
|
||
sbc SS, SS
|
||
cp C3, SS
|
||
brne .LsatSS
|
||
;; Round
|
||
lsl C0
|
||
adc C1, __zero_reg__
|
||
adc C2, __zero_reg__
|
||
brvs .Lmax
|
||
;; Move result into place
|
||
mov C3, C2
|
||
mov C2, C1
|
||
ret
|
||
.Lmax:
|
||
;; Load 0x7fff
|
||
clr C3
|
||
.LsatC3.3:
|
||
;; C3 < 0 --> 0x8000
|
||
;; C3 >= 0 --> 0x7fff
|
||
mov SS, C3
|
||
.LsatSS:
|
||
;; Load min / max value:
|
||
;; SS = -1 --> 0x8000
|
||
;; SS = 0 --> 0x7fff
|
||
ldi C3, 0x7f
|
||
ldi C2, 0xff
|
||
sbrc SS, 7
|
||
adiw C2, 1
|
||
ret
|
||
ENDF __ssmulha3
|
||
#endif /* L_ssmulha3 */
|
||
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef SS
|
||
|
||
/***********************************************************
|
||
Fixed unsigned saturated Multiplication 16.16 x 16.16
|
||
***********************************************************/
|
||
|
||
#define C0 18
|
||
#define C1 C0+1
|
||
#define C2 C0+2
|
||
#define C3 C0+3
|
||
#define C4 C0+4
|
||
#define C5 C0+5
|
||
#define C6 C0+6
|
||
#define C7 C0+7
|
||
#define SS __tmp_reg__
|
||
|
||
#if defined (L_usmulusa3)
|
||
;; R22[4] = R22[4] *{ssat} R18[4]
|
||
;; Ordinary ABI function
|
||
DEFUN __usmulusa3
|
||
;; Widening multiply
|
||
XCALL __umulsidi3
|
||
or C7, C6
|
||
brne .Lmax
|
||
;; Round, target is in C2..C5
|
||
lsl C1
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
adc C4, __zero_reg__
|
||
adc C5, __zero_reg__
|
||
brcs .Lmax
|
||
;; Move result into place
|
||
wmov C6, C4
|
||
wmov C4, C2
|
||
ret
|
||
.Lmax:
|
||
;; Saturate
|
||
ldi C7, 0xff
|
||
ldi C6, 0xff
|
||
wmov C4, C6
|
||
ret
|
||
ENDF __usmulusa3
|
||
#endif /* L_usmulusa3 */
|
||
|
||
/***********************************************************
|
||
Fixed signed saturated Multiplication s16.15 x s16.15
|
||
***********************************************************/
|
||
|
||
#if defined (L_ssmulsa3)
|
||
;; R22[4] = R22[4] *{ssat} R18[4]
|
||
;; Ordinary ABI function
|
||
DEFUN __ssmulsa3
|
||
;; Widening multiply
|
||
XCALL __mulsidi3
|
||
;; Adjust decimal point
|
||
lsl C1
|
||
rol C2
|
||
rol C3
|
||
rol C4
|
||
rol C5
|
||
brvs .LsatC7.7
|
||
;; The 17 MSBs must be the same
|
||
rol C6
|
||
rol C7
|
||
sbc SS, SS
|
||
cp C6, SS
|
||
cpc C7, SS
|
||
brne .LsatSS
|
||
;; Round
|
||
lsl C1
|
||
adc C2, __zero_reg__
|
||
adc C3, __zero_reg__
|
||
adc C4, __zero_reg__
|
||
adc C5, __zero_reg__
|
||
brvs .Lmax
|
||
;; Move result into place
|
||
wmov C6, C4
|
||
wmov C4, C2
|
||
ret
|
||
|
||
.Lmax:
|
||
;; Load 0x7fffffff
|
||
clr C7
|
||
.LsatC7.7:
|
||
;; C7 < 0 --> 0x80000000
|
||
;; C7 >= 0 --> 0x7fffffff
|
||
lsl C7
|
||
sbc SS, SS
|
||
.LsatSS:
|
||
;; Load min / max value:
|
||
;; SS = -1 --> 0x80000000
|
||
;; SS = 0 --> 0x7fffffff
|
||
com SS
|
||
mov C4, SS
|
||
mov C5, C4
|
||
wmov C6, C4
|
||
subi C7, 0x80
|
||
ret
|
||
ENDF __ssmulsa3
|
||
#endif /* L_ssmulsa3 */
|
||
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef C4
|
||
#undef C5
|
||
#undef C6
|
||
#undef C7
|
||
#undef SS
|
||
|
||
/*******************************************************
|
||
Fractional Division 8 / 8
|
||
*******************************************************/
|
||
|
||
#define r_divd r25 /* dividend */
|
||
#define r_quo r24 /* quotient */
|
||
#define r_div r22 /* divisor */
|
||
#define r_sign __tmp_reg__
|
||
|
||
#if defined (L_divqq3)
|
||
DEFUN __divqq3
|
||
mov r_sign, r_divd
|
||
eor r_sign, r_div
|
||
sbrc r_div, 7
|
||
neg r_div
|
||
sbrc r_divd, 7
|
||
neg r_divd
|
||
XCALL __divqq_helper
|
||
lsr r_quo
|
||
sbrc r_sign, 7 ; negate result if needed
|
||
neg r_quo
|
||
ret
|
||
ENDF __divqq3
|
||
#endif /* L_divqq3 */
|
||
|
||
#if defined (L_udivuqq3)
|
||
DEFUN __udivuqq3
|
||
cp r_divd, r_div
|
||
brsh 0f
|
||
XJMP __divqq_helper
|
||
;; Result is out of [0, 1) ==> Return 1 - eps.
|
||
0: ldi r_quo, 0xff
|
||
ret
|
||
ENDF __udivuqq3
|
||
#endif /* L_udivuqq3 */
|
||
|
||
|
||
#if defined (L_divqq_helper)
|
||
DEFUN __divqq_helper
|
||
clr r_quo ; clear quotient
|
||
inc __zero_reg__ ; init loop counter, used per shift
|
||
__udivuqq3_loop:
|
||
lsl r_divd ; shift dividend
|
||
brcs 0f ; dividend overflow
|
||
cp r_divd,r_div ; compare dividend & divisor
|
||
brcc 0f ; dividend >= divisor
|
||
rol r_quo ; shift quotient (with CARRY)
|
||
rjmp __udivuqq3_cont
|
||
0:
|
||
sub r_divd,r_div ; restore dividend
|
||
lsl r_quo ; shift quotient (without CARRY)
|
||
__udivuqq3_cont:
|
||
lsl __zero_reg__ ; shift loop-counter bit
|
||
brne __udivuqq3_loop
|
||
com r_quo ; complement result
|
||
; because C flag was complemented in loop
|
||
ret
|
||
ENDF __divqq_helper
|
||
#endif /* L_divqq_helper */
|
||
|
||
#undef r_divd
|
||
#undef r_quo
|
||
#undef r_div
|
||
#undef r_sign
|
||
|
||
|
||
/*******************************************************
|
||
Fractional Division 16 / 16
|
||
*******************************************************/
|
||
#define r_divdL 26 /* dividend Low */
|
||
#define r_divdH 27 /* dividend Hig */
|
||
#define r_quoL 24 /* quotient Low */
|
||
#define r_quoH 25 /* quotient High */
|
||
#define r_divL 22 /* divisor */
|
||
#define r_divH 23 /* divisor */
|
||
#define r_cnt 21
|
||
|
||
#if defined (L_divhq3)
|
||
DEFUN __divhq3
|
||
mov r0, r_divdH
|
||
eor r0, r_divH
|
||
sbrs r_divH, 7
|
||
rjmp 1f
|
||
NEG2 r_divL
|
||
1:
|
||
sbrs r_divdH, 7
|
||
rjmp 2f
|
||
NEG2 r_divdL
|
||
2:
|
||
cp r_divdL, r_divL
|
||
cpc r_divdH, r_divH
|
||
breq __divhq3_minus1 ; if equal return -1
|
||
XCALL __udivuhq3
|
||
lsr r_quoH
|
||
ror r_quoL
|
||
brpl 9f
|
||
;; negate result if needed
|
||
NEG2 r_quoL
|
||
9:
|
||
ret
|
||
__divhq3_minus1:
|
||
ldi r_quoH, 0x80
|
||
clr r_quoL
|
||
ret
|
||
ENDF __divhq3
|
||
#endif /* defined (L_divhq3) */
|
||
|
||
#if defined (L_udivuhq3)
|
||
DEFUN __udivuhq3
|
||
sub r_quoH,r_quoH ; clear quotient and carry
|
||
;; FALLTHRU
|
||
ENDF __udivuhq3
|
||
|
||
DEFUN __udivuha3_common
|
||
clr r_quoL ; clear quotient
|
||
ldi r_cnt,16 ; init loop counter
|
||
__udivuhq3_loop:
|
||
rol r_divdL ; shift dividend (with CARRY)
|
||
rol r_divdH
|
||
brcs __udivuhq3_ep ; dividend overflow
|
||
cp r_divdL,r_divL ; compare dividend & divisor
|
||
cpc r_divdH,r_divH
|
||
brcc __udivuhq3_ep ; dividend >= divisor
|
||
rol r_quoL ; shift quotient (with CARRY)
|
||
rjmp __udivuhq3_cont
|
||
__udivuhq3_ep:
|
||
sub r_divdL,r_divL ; restore dividend
|
||
sbc r_divdH,r_divH
|
||
lsl r_quoL ; shift quotient (without CARRY)
|
||
__udivuhq3_cont:
|
||
rol r_quoH ; shift quotient
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivuhq3_loop
|
||
com r_quoL ; complement result
|
||
com r_quoH ; because C flag was complemented in loop
|
||
ret
|
||
ENDF __udivuha3_common
|
||
#endif /* defined (L_udivuhq3) */
|
||
|
||
/*******************************************************
|
||
Fixed Division 8.8 / 8.8
|
||
*******************************************************/
|
||
#if defined (L_divha3)
|
||
DEFUN __divha3
|
||
mov r0, r_divdH
|
||
eor r0, r_divH
|
||
sbrs r_divH, 7
|
||
rjmp 1f
|
||
NEG2 r_divL
|
||
1:
|
||
sbrs r_divdH, 7
|
||
rjmp 2f
|
||
NEG2 r_divdL
|
||
2:
|
||
XCALL __udivuha3
|
||
lsr r_quoH ; adjust to 7 fractional bits
|
||
ror r_quoL
|
||
sbrs r0, 7 ; negate result if needed
|
||
ret
|
||
NEG2 r_quoL
|
||
ret
|
||
ENDF __divha3
|
||
#endif /* defined (L_divha3) */
|
||
|
||
#if defined (L_udivuha3)
|
||
DEFUN __udivuha3
|
||
mov r_quoH, r_divdL
|
||
mov r_divdL, r_divdH
|
||
clr r_divdH
|
||
lsl r_quoH ; shift quotient into carry
|
||
XJMP __udivuha3_common ; same as fractional after rearrange
|
||
ENDF __udivuha3
|
||
#endif /* defined (L_udivuha3) */
|
||
|
||
#undef r_divdL
|
||
#undef r_divdH
|
||
#undef r_quoL
|
||
#undef r_quoH
|
||
#undef r_divL
|
||
#undef r_divH
|
||
#undef r_cnt
|
||
|
||
/*******************************************************
|
||
Fixed Division 16.16 / 16.16
|
||
*******************************************************/
|
||
|
||
#define r_arg1L 24 /* arg1 gets passed already in place */
|
||
#define r_arg1H 25
|
||
#define r_arg1HL 26
|
||
#define r_arg1HH 27
|
||
#define r_divdL 26 /* dividend Low */
|
||
#define r_divdH 27
|
||
#define r_divdHL 30
|
||
#define r_divdHH 31 /* dividend High */
|
||
#define r_quoL 22 /* quotient Low */
|
||
#define r_quoH 23
|
||
#define r_quoHL 24
|
||
#define r_quoHH 25 /* quotient High */
|
||
#define r_divL 18 /* divisor Low */
|
||
#define r_divH 19
|
||
#define r_divHL 20
|
||
#define r_divHH 21 /* divisor High */
|
||
#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
|
||
|
||
#if defined (L_divsa3)
|
||
DEFUN __divsa3
|
||
mov r0, r_arg1HH
|
||
eor r0, r_divHH
|
||
sbrs r_divHH, 7
|
||
rjmp 1f
|
||
NEG4 r_divL
|
||
1:
|
||
sbrs r_arg1HH, 7
|
||
rjmp 2f
|
||
NEG4 r_arg1L
|
||
2:
|
||
XCALL __udivusa3
|
||
lsr r_quoHH ; adjust to 15 fractional bits
|
||
ror r_quoHL
|
||
ror r_quoH
|
||
ror r_quoL
|
||
sbrs r0, 7 ; negate result if needed
|
||
ret
|
||
;; negate r_quoL
|
||
XJMP __negsi2
|
||
ENDF __divsa3
|
||
#endif /* defined (L_divsa3) */
|
||
|
||
#if defined (L_udivusa3)
|
||
DEFUN __udivusa3
|
||
ldi r_divdHL, 32 ; init loop counter
|
||
mov r_cnt, r_divdHL
|
||
clr r_divdHL
|
||
clr r_divdHH
|
||
wmov r_quoL, r_divdHL
|
||
lsl r_quoHL ; shift quotient into carry
|
||
rol r_quoHH
|
||
__udivusa3_loop:
|
||
rol r_divdL ; shift dividend (with CARRY)
|
||
rol r_divdH
|
||
rol r_divdHL
|
||
rol r_divdHH
|
||
brcs __udivusa3_ep ; dividend overflow
|
||
cp r_divdL,r_divL ; compare dividend & divisor
|
||
cpc r_divdH,r_divH
|
||
cpc r_divdHL,r_divHL
|
||
cpc r_divdHH,r_divHH
|
||
brcc __udivusa3_ep ; dividend >= divisor
|
||
rol r_quoL ; shift quotient (with CARRY)
|
||
rjmp __udivusa3_cont
|
||
__udivusa3_ep:
|
||
sub r_divdL,r_divL ; restore dividend
|
||
sbc r_divdH,r_divH
|
||
sbc r_divdHL,r_divHL
|
||
sbc r_divdHH,r_divHH
|
||
lsl r_quoL ; shift quotient (without CARRY)
|
||
__udivusa3_cont:
|
||
rol r_quoH ; shift quotient
|
||
rol r_quoHL
|
||
rol r_quoHH
|
||
dec r_cnt ; decrement loop counter
|
||
brne __udivusa3_loop
|
||
com r_quoL ; complement result
|
||
com r_quoH ; because C flag was complemented in loop
|
||
com r_quoHL
|
||
com r_quoHH
|
||
ret
|
||
ENDF __udivusa3
|
||
#endif /* defined (L_udivusa3) */
|
||
|
||
#undef r_arg1L
|
||
#undef r_arg1H
|
||
#undef r_arg1HL
|
||
#undef r_arg1HH
|
||
#undef r_divdL
|
||
#undef r_divdH
|
||
#undef r_divdHL
|
||
#undef r_divdHH
|
||
#undef r_quoL
|
||
#undef r_quoH
|
||
#undef r_quoHL
|
||
#undef r_quoHH
|
||
#undef r_divL
|
||
#undef r_divH
|
||
#undef r_divHL
|
||
#undef r_divHH
|
||
#undef r_cnt
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 1 Byte
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 24
|
||
|
||
#if defined (L_ssabs_1)
|
||
DEFUN __ssabs_1
|
||
sbrs A0, 7
|
||
ret
|
||
neg A0
|
||
sbrc A0,7
|
||
dec A0
|
||
ret
|
||
ENDF __ssabs_1
|
||
#endif /* L_ssabs_1 */
|
||
|
||
#undef A0
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 2 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 24
|
||
#define A1 A0+1
|
||
|
||
#if defined (L_ssneg_2)
|
||
DEFUN __ssneg_2
|
||
NEG2 A0
|
||
brvc 0f
|
||
sbiw A0, 1
|
||
0: ret
|
||
ENDF __ssneg_2
|
||
#endif /* L_ssneg_2 */
|
||
|
||
#if defined (L_ssabs_2)
|
||
DEFUN __ssabs_2
|
||
sbrs A1, 7
|
||
ret
|
||
XJMP __ssneg_2
|
||
ENDF __ssabs_2
|
||
#endif /* L_ssabs_2 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 4 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 22
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
|
||
#if defined (L_ssneg_4)
|
||
DEFUN __ssneg_4
|
||
XCALL __negsi2
|
||
brvc 0f
|
||
ldi A3, 0x7f
|
||
ldi A2, 0xff
|
||
ldi A1, 0xff
|
||
ldi A0, 0xff
|
||
0: ret
|
||
ENDF __ssneg_4
|
||
#endif /* L_ssneg_4 */
|
||
|
||
#if defined (L_ssabs_4)
|
||
DEFUN __ssabs_4
|
||
sbrs A3, 7
|
||
ret
|
||
XJMP __ssneg_4
|
||
ENDF __ssabs_4
|
||
#endif /* L_ssabs_4 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Saturation, 8 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; First Argument and Return Register
|
||
#define A0 18
|
||
#define A1 A0+1
|
||
#define A2 A0+2
|
||
#define A3 A0+3
|
||
#define A4 A0+4
|
||
#define A5 A0+5
|
||
#define A6 A0+6
|
||
#define A7 A0+7
|
||
|
||
#if defined (L_clr_8)
|
||
FALIAS __usneguta2
|
||
FALIAS __usneguda2
|
||
FALIAS __usnegudq2
|
||
|
||
;; Clear Carry and all Bytes
|
||
DEFUN __clr_8
|
||
;; Clear Carry and set Z
|
||
sub A7, A7
|
||
;; FALLTHRU
|
||
ENDF __clr_8
|
||
;; Propagate Carry to all Bytes, Carry unaltered
|
||
DEFUN __sbc_8
|
||
sbc A7, A7
|
||
sbc A6, A6
|
||
wmov A4, A6
|
||
wmov A2, A6
|
||
wmov A0, A6
|
||
ret
|
||
ENDF __sbc_8
|
||
#endif /* L_clr_8 */
|
||
|
||
#if defined (L_ssneg_8)
|
||
FALIAS __ssnegta2
|
||
FALIAS __ssnegda2
|
||
FALIAS __ssnegdq2
|
||
|
||
DEFUN __ssneg_8
|
||
XCALL __negdi2
|
||
brvc 0f
|
||
;; A[] = 0x7fffffff
|
||
sec
|
||
XCALL __sbc_8
|
||
ldi A7, 0x7f
|
||
0: ret
|
||
ENDF __ssneg_8
|
||
#endif /* L_ssneg_8 */
|
||
|
||
#if defined (L_ssabs_8)
|
||
FALIAS __ssabsta2
|
||
FALIAS __ssabsda2
|
||
FALIAS __ssabsdq2
|
||
|
||
DEFUN __ssabs_8
|
||
sbrs A7, 7
|
||
ret
|
||
XJMP __ssneg_8
|
||
ENDF __ssabs_8
|
||
#endif /* L_ssabs_8 */
|
||
|
||
;; Second Argument
|
||
#define B0 10
|
||
#define B1 B0+1
|
||
#define B2 B0+2
|
||
#define B3 B0+3
|
||
#define B4 B0+4
|
||
#define B5 B0+5
|
||
#define B6 B0+6
|
||
#define B7 B0+7
|
||
|
||
#if defined (L_usadd_8)
|
||
FALIAS __usadduta3
|
||
FALIAS __usadduda3
|
||
FALIAS __usaddudq3
|
||
|
||
DEFUN __usadd_8
|
||
XCALL __adddi3
|
||
brcs 0f
|
||
ret
|
||
0: ;; A[] = 0xffffffff
|
||
XJMP __sbc_8
|
||
ENDF __usadd_8
|
||
#endif /* L_usadd_8 */
|
||
|
||
#if defined (L_ussub_8)
|
||
FALIAS __ussubuta3
|
||
FALIAS __ussubuda3
|
||
FALIAS __ussubudq3
|
||
|
||
DEFUN __ussub_8
|
||
XCALL __subdi3
|
||
brcs 0f
|
||
ret
|
||
0: ;; A[] = 0
|
||
XJMP __clr_8
|
||
ENDF __ussub_8
|
||
#endif /* L_ussub_8 */
|
||
|
||
#if defined (L_ssadd_8)
|
||
FALIAS __ssaddta3
|
||
FALIAS __ssaddda3
|
||
FALIAS __ssadddq3
|
||
|
||
DEFUN __ssadd_8
|
||
XCALL __adddi3
|
||
brvc 0f
|
||
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
|
||
cpi B7, 0x80
|
||
XCALL __sbc_8
|
||
subi A7, 0x80
|
||
0: ret
|
||
ENDF __ssadd_8
|
||
#endif /* L_ssadd_8 */
|
||
|
||
#if defined (L_sssub_8)
|
||
FALIAS __sssubta3
|
||
FALIAS __sssubda3
|
||
FALIAS __sssubdq3
|
||
|
||
DEFUN __sssub_8
|
||
XCALL __subdi3
|
||
brvc 0f
|
||
;; A = (B < 0) ? INT64_MAX : INT64_MIN
|
||
ldi A7, 0x7f
|
||
cp A7, B7
|
||
XCALL __sbc_8
|
||
subi A7, 0x80
|
||
0: ret
|
||
ENDF __sssub_8
|
||
#endif /* L_sssub_8 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef A4
|
||
#undef A5
|
||
#undef A6
|
||
#undef A7
|
||
#undef B0
|
||
#undef B1
|
||
#undef B2
|
||
#undef B3
|
||
#undef B4
|
||
#undef B5
|
||
#undef B6
|
||
#undef B7
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Rounding Helpers
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#ifdef L_mask1
|
||
|
||
#define AA 24
|
||
#define CC 25
|
||
|
||
;; R25 = 1 << (R24 & 7)
|
||
;; CC = 1 << (AA & 7)
|
||
;; Clobbers: None
|
||
DEFUN __mask1
|
||
;; CC = 2 ^ AA.1
|
||
ldi CC, 1 << 2
|
||
sbrs AA, 1
|
||
ldi CC, 1 << 0
|
||
;; CC *= 2 ^ AA.0
|
||
sbrc AA, 0
|
||
lsl CC
|
||
;; CC *= 2 ^ AA.2
|
||
sbrc AA, 2
|
||
swap CC
|
||
ret
|
||
ENDF __mask1
|
||
|
||
#undef AA
|
||
#undef CC
|
||
#endif /* L_mask1 */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;; The rounding point. Any bits smaller than
|
||
;; 2^{-RP} will be cleared.
|
||
#define RP R24
|
||
|
||
#define A0 22
|
||
#define A1 A0 + 1
|
||
|
||
#define C0 24
|
||
#define C1 C0 + 1
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Rounding, 1 Byte
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#ifdef L_roundqq3
|
||
|
||
;; R24 = round (R22, R24)
|
||
;; Clobbers: R22, __tmp_reg__
|
||
DEFUN __roundqq3
|
||
mov __tmp_reg__, C1
|
||
subi RP, __QQ_FBIT__ - 1
|
||
neg RP
|
||
;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
|
||
XCALL __mask1
|
||
mov C0, C1
|
||
;; Add-Saturate 2^{-RP-1}
|
||
add A0, C0
|
||
brvc 0f
|
||
ldi C0, 0x7f
|
||
rjmp 9f
|
||
0: ;; Mask out bits beyond RP
|
||
lsl C0
|
||
neg C0
|
||
and C0, A0
|
||
9: mov C1, __tmp_reg__
|
||
ret
|
||
ENDF __roundqq3
|
||
#endif /* L_roundqq3 */
|
||
|
||
#ifdef L_rounduqq3
|
||
|
||
;; R24 = round (R22, R24)
|
||
;; Clobbers: R22, __tmp_reg__
|
||
DEFUN __rounduqq3
|
||
mov __tmp_reg__, C1
|
||
subi RP, __UQQ_FBIT__ - 1
|
||
neg RP
|
||
;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
|
||
XCALL __mask1
|
||
mov C0, C1
|
||
;; Add-Saturate 2^{-RP-1}
|
||
add A0, C0
|
||
brcc 0f
|
||
ldi C0, 0xff
|
||
rjmp 9f
|
||
0: ;; Mask out bits beyond RP
|
||
lsl C0
|
||
neg C0
|
||
and C0, A0
|
||
9: mov C1, __tmp_reg__
|
||
ret
|
||
ENDF __rounduqq3
|
||
#endif /* L_rounduqq3 */
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Rounding, 2 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#ifdef L_addmask_2
|
||
|
||
;; [ R25:R24 = 1 << (R24 & 15)
|
||
;; R23:R22 += 1 << (R24 & 15) ]
|
||
;; SREG is set according to the addition
|
||
DEFUN __addmask_2
|
||
;; R25 = 1 << (R24 & 7)
|
||
XCALL __mask1
|
||
cpi RP, 1 << 3
|
||
sbc C0, C0
|
||
;; Swap C0 and C1 if RP.3 was set
|
||
and C0, C1
|
||
eor C1, C0
|
||
;; Finally, add the power-of-two: A[] += C[]
|
||
add A0, C0
|
||
adc A1, C1
|
||
ret
|
||
ENDF __addmask_2
|
||
#endif /* L_addmask_2 */
|
||
|
||
#ifdef L_round_s2
|
||
|
||
;; R25:R24 = round (R23:R22, R24)
|
||
;; Clobbers: R23, R22
|
||
DEFUN __roundhq3
|
||
subi RP, __HQ_FBIT__ - __HA_FBIT__
|
||
ENDF __roundhq3
|
||
DEFUN __roundha3
|
||
subi RP, __HA_FBIT__ - 1
|
||
neg RP
|
||
;; [ R25:R24 = 1 << (FBIT-1 - RP)
|
||
;; R23:R22 += 1 << (FBIT-1 - RP) ]
|
||
XCALL __addmask_2
|
||
XJMP __round_s2_const
|
||
ENDF __roundha3
|
||
|
||
#endif /* L_round_s2 */
|
||
|
||
#ifdef L_round_u2
|
||
|
||
;; R25:R24 = round (R23:R22, R24)
|
||
;; Clobbers: R23, R22
|
||
DEFUN __rounduhq3
|
||
subi RP, __UHQ_FBIT__ - __UHA_FBIT__
|
||
ENDF __rounduhq3
|
||
DEFUN __rounduha3
|
||
subi RP, __UHA_FBIT__ - 1
|
||
neg RP
|
||
;; [ R25:R24 = 1 << (FBIT-1 - RP)
|
||
;; R23:R22 += 1 << (FBIT-1 - RP) ]
|
||
XCALL __addmask_2
|
||
XJMP __round_u2_const
|
||
ENDF __rounduha3
|
||
|
||
#endif /* L_round_u2 */
|
||
|
||
|
||
#ifdef L_round_2_const
|
||
|
||
;; Helpers for 2 byte wide rounding
|
||
|
||
DEFUN __round_s2_const
|
||
brvc 2f
|
||
ldi C1, 0x7f
|
||
rjmp 1f
|
||
;; FALLTHRU (Barrier)
|
||
ENDF __round_s2_const
|
||
|
||
DEFUN __round_u2_const
|
||
brcc 2f
|
||
ldi C1, 0xff
|
||
1:
|
||
ldi C0, 0xff
|
||
rjmp 9f
|
||
2:
|
||
;; Saturation is performed now.
|
||
;; Currently, we have C[] = 2^{-RP-1}
|
||
;; C[] = 2^{-RP}
|
||
lsl C0
|
||
rol C1
|
||
;;
|
||
NEG2 C0
|
||
;; Clear the bits beyond the rounding point.
|
||
and C0, A0
|
||
and C1, A1
|
||
9: ret
|
||
ENDF __round_u2_const
|
||
|
||
#endif /* L_round_2_const */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef C0
|
||
#undef C1
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Rounding, 4 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#define A0 18
|
||
#define A1 A0 + 1
|
||
#define A2 A0 + 2
|
||
#define A3 A0 + 3
|
||
|
||
#define C0 22
|
||
#define C1 C0 + 1
|
||
#define C2 C0 + 2
|
||
#define C3 C0 + 3
|
||
|
||
#ifdef L_addmask_4
|
||
|
||
;; [ R25:R22 = 1 << (R24 & 31)
|
||
;; R21:R18 += 1 << (R24 & 31) ]
|
||
;; SREG is set according to the addition
|
||
DEFUN __addmask_4
|
||
;; R25 = 1 << (R24 & 7)
|
||
XCALL __mask1
|
||
cpi RP, 1 << 4
|
||
sbc C0, C0
|
||
sbc C1, C1
|
||
;; Swap C2 with C3 if RP.3 is not set
|
||
cpi RP, 1 << 3
|
||
sbc C2, C2
|
||
and C2, C3
|
||
eor C3, C2
|
||
;; Swap C3:C2 with C1:C0 if RP.4 is not set
|
||
and C0, C2 $ eor C2, C0
|
||
and C1, C3 $ eor C3, C1
|
||
;; Finally, add the power-of-two: A[] += C[]
|
||
add A0, C0
|
||
adc A1, C1
|
||
adc A2, C2
|
||
adc A3, C3
|
||
ret
|
||
ENDF __addmask_4
|
||
#endif /* L_addmask_4 */
|
||
|
||
#ifdef L_round_s4
|
||
|
||
;; R25:R22 = round (R21:R18, R24)
|
||
;; Clobbers: R18...R21
|
||
DEFUN __roundsq3
|
||
subi RP, __SQ_FBIT__ - __SA_FBIT__
|
||
ENDF __roundsq3
|
||
DEFUN __roundsa3
|
||
subi RP, __SA_FBIT__ - 1
|
||
neg RP
|
||
;; [ R25:R22 = 1 << (FBIT-1 - RP)
|
||
;; R21:R18 += 1 << (FBIT-1 - RP) ]
|
||
XCALL __addmask_4
|
||
XJMP __round_s4_const
|
||
ENDF __roundsa3
|
||
|
||
#endif /* L_round_s4 */
|
||
|
||
#ifdef L_round_u4
|
||
|
||
;; R25:R22 = round (R21:R18, R24)
|
||
;; Clobbers: R18...R21
|
||
DEFUN __roundusq3
|
||
subi RP, __USQ_FBIT__ - __USA_FBIT__
|
||
ENDF __roundusq3
|
||
DEFUN __roundusa3
|
||
subi RP, __USA_FBIT__ - 1
|
||
neg RP
|
||
;; [ R25:R22 = 1 << (FBIT-1 - RP)
|
||
;; R21:R18 += 1 << (FBIT-1 - RP) ]
|
||
XCALL __addmask_4
|
||
XJMP __round_u4_const
|
||
ENDF __roundusa3
|
||
|
||
#endif /* L_round_u4 */
|
||
|
||
|
||
#ifdef L_round_4_const
|
||
|
||
;; Helpers for 4 byte wide rounding
|
||
|
||
DEFUN __round_s4_const
|
||
brvc 2f
|
||
ldi C3, 0x7f
|
||
rjmp 1f
|
||
;; FALLTHRU (Barrier)
|
||
ENDF __round_s4_const
|
||
|
||
DEFUN __round_u4_const
|
||
brcc 2f
|
||
ldi C3, 0xff
|
||
1:
|
||
ldi C2, 0xff
|
||
ldi C1, 0xff
|
||
ldi C0, 0xff
|
||
rjmp 9f
|
||
2:
|
||
;; Saturation is performed now.
|
||
;; Currently, we have C[] = 2^{-RP-1}
|
||
;; C[] = 2^{-RP}
|
||
lsl C0
|
||
rol C1
|
||
rol C2
|
||
rol C3
|
||
XCALL __negsi2
|
||
;; Clear the bits beyond the rounding point.
|
||
and C0, A0
|
||
and C1, A1
|
||
and C2, A2
|
||
and C3, A3
|
||
9: ret
|
||
ENDF __round_u4_const
|
||
|
||
#endif /* L_round_4_const */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
|
||
#undef RP
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
;; Rounding, 8 Bytes
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
#define RP 16
|
||
#define FBITm1 31
|
||
|
||
#define C0 18
|
||
#define C1 C0 + 1
|
||
#define C2 C0 + 2
|
||
#define C3 C0 + 3
|
||
#define C4 C0 + 4
|
||
#define C5 C0 + 5
|
||
#define C6 C0 + 6
|
||
#define C7 C0 + 7
|
||
|
||
#define A0 16
|
||
#define A1 17
|
||
#define A2 26
|
||
#define A3 27
|
||
#define A4 28
|
||
#define A5 29
|
||
#define A6 30
|
||
#define A7 31
|
||
|
||
|
||
#ifdef L_rounddq3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __rounddq3
|
||
ldi FBITm1, __DQ_FBIT__ - 1
|
||
clt
|
||
XJMP __round_x8
|
||
ENDF __rounddq3
|
||
#endif /* L_rounddq3 */
|
||
|
||
#ifdef L_roundudq3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __roundudq3
|
||
ldi FBITm1, __UDQ_FBIT__ - 1
|
||
set
|
||
XJMP __round_x8
|
||
ENDF __roundudq3
|
||
#endif /* L_roundudq3 */
|
||
|
||
#ifdef L_roundda3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __roundda3
|
||
ldi FBITm1, __DA_FBIT__ - 1
|
||
clt
|
||
XJMP __round_x8
|
||
ENDF __roundda3
|
||
#endif /* L_roundda3 */
|
||
|
||
#ifdef L_rounduda3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __rounduda3
|
||
ldi FBITm1, __UDA_FBIT__ - 1
|
||
set
|
||
XJMP __round_x8
|
||
ENDF __rounduda3
|
||
#endif /* L_rounduda3 */
|
||
|
||
#ifdef L_roundta3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __roundta3
|
||
ldi FBITm1, __TA_FBIT__ - 1
|
||
clt
|
||
XJMP __round_x8
|
||
ENDF __roundta3
|
||
#endif /* L_roundta3 */
|
||
|
||
#ifdef L_rounduta3
|
||
;; R25:R18 = round (R25:R18, R16)
|
||
;; Clobbers: ABI
|
||
DEFUN __rounduta3
|
||
ldi FBITm1, __UTA_FBIT__ - 1
|
||
set
|
||
XJMP __round_x8
|
||
ENDF __rounduta3
|
||
#endif /* L_rounduta3 */
|
||
|
||
|
||
#ifdef L_round_x8
|
||
DEFUN __round_x8
|
||
push r16
|
||
push r17
|
||
push r28
|
||
push r29
|
||
;; Compute log2 of addend from rounding point
|
||
sub RP, FBITm1
|
||
neg RP
|
||
;; Move input to work register A[]
|
||
push C0
|
||
mov A1, C1
|
||
wmov A2, C2
|
||
wmov A4, C4
|
||
wmov A6, C6
|
||
;; C[] = 1 << (FBIT-1 - RP)
|
||
XCALL __clr_8
|
||
inc C0
|
||
XCALL __ashldi3
|
||
pop A0
|
||
;; A[] += C[]
|
||
add A0, C0
|
||
adc A1, C1
|
||
adc A2, C2
|
||
adc A3, C3
|
||
adc A4, C4
|
||
adc A5, C5
|
||
adc A6, C6
|
||
adc A7, C7
|
||
brts 1f
|
||
;; Signed
|
||
brvc 3f
|
||
;; Signed overflow: A[] = 0x7f...
|
||
brvs 2f
|
||
1: ;; Unsigned
|
||
brcc 3f
|
||
;; Unsigned overflow: A[] = 0xff...
|
||
2: ldi C7, 0xff
|
||
ldi C6, 0xff
|
||
wmov C0, C6
|
||
wmov C2, C6
|
||
wmov C4, C6
|
||
bld C7, 7
|
||
rjmp 9f
|
||
3:
|
||
;; C[] = -C[] - C[]
|
||
push A0
|
||
ldi r16, 1
|
||
XCALL __ashldi3
|
||
pop A0
|
||
XCALL __negdi2
|
||
;; Clear the bits beyond the rounding point.
|
||
and C0, A0
|
||
and C1, A1
|
||
and C2, A2
|
||
and C3, A3
|
||
and C4, A4
|
||
and C5, A5
|
||
and C6, A6
|
||
and C7, A7
|
||
9: ;; Epilogue
|
||
pop r29
|
||
pop r28
|
||
pop r17
|
||
pop r16
|
||
ret
|
||
ENDF __round_x8
|
||
|
||
#endif /* L_round_x8 */
|
||
|
||
#undef A0
|
||
#undef A1
|
||
#undef A2
|
||
#undef A3
|
||
#undef A4
|
||
#undef A5
|
||
#undef A6
|
||
#undef A7
|
||
|
||
#undef C0
|
||
#undef C1
|
||
#undef C2
|
||
#undef C3
|
||
#undef C4
|
||
#undef C5
|
||
#undef C6
|
||
#undef C7
|
||
|
||
#undef RP
|
||
#undef FBITm1
|
||
|
||
|
||
;; Supply implementations / symbols for the bit-banging functions
|
||
;; __builtin_avr_bitsfx and __builtin_avr_fxbits
|
||
#ifdef L_ret
|
||
DEFUN __ret
|
||
ret
|
||
ENDF __ret
|
||
#endif /* L_ret */
|
||
|
||
#endif /* if not __AVR_TINY__ */
|