Updated from /src/gmp-1.937

This commit is contained in:
Roland McGrath 1996-03-01 18:43:45 +00:00
parent f860256b2e
commit 3de9f02e92
34 changed files with 1393 additions and 419 deletions

View File

@ -26,16 +26,7 @@
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the 21064.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
@ -52,7 +43,7 @@ __mpn_addmul_1:
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
@ -60,10 +51,10 @@ __mpn_addmul_1:
cmpult $3,$5,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,Lend2 # jump if size was == 2
beq $18,.Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,Loop
bne $18,.Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: addq $5,$3,$3
.Lend1: addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $0,$5,$0

View File

@ -35,84 +35,113 @@
__mpn_add_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
or $31,$31,$25 # clear cy
subq $19,4,$19 # decr loop cnt
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
# Start software pipeline for 1st loop
ldq $0,0($18)
ldq $1,8($18)
ldq $4,0($17)
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
addq $17,32,$17 # update s1_ptr
ldq $2,16($18)
addq $0,$4,$20 # 1st main add
ldq $3,24($18)
subq $19,4,$19 # decr loop cnt
ldq $6,-16($17)
cmpult $20,$0,$25 # compute cy from last add
ldq $7,-8($17)
addq $1,$25,$28 # cy add
addq $18,32,$18 # update s2_ptr
addq $5,$28,$21 # 2nd main add
cmpult $28,$25,$8 # compute cy from last add
blt $19,.Lend1 # if less than 4 limbs remain, jump
# 1st loop handles groups of 4 limbs in a software pipeline
.align 4
.Loop: subq $19,4,$19
unop
ldq $6,8($18)
addq $4,$0,$0
.Loop: cmpult $21,$28,$25 # compute cy from last add
ldq $0,0($18)
or $8,$25,$25 # combine cy from the two adds
ldq $1,8($18)
addq $2,$25,$28 # cy add
ldq $4,0($17)
addq $28,$6,$22 # 3rd main add
ldq $5,8($17)
cmpult $0,$4,$1
ldq $4,16($18)
addq $3,$0,$20
cmpult $20,$3,$0
ldq $3,16($17)
or $0,$1,$0
addq $6,$0,$0
cmpult $0,$6,$1
ldq $6,24($18)
addq $5,$0,$21
cmpult $21,$5,$0
ldq $5,24($17)
or $0,$1,$0
addq $4,$0,$0
cmpult $0,$4,$1
ldq $4,32($18)
addq $3,$0,$22
cmpult $22,$3,$0
ldq $3,32($17)
or $0,$1,$0
addq $6,$0,$0
cmpult $0,$6,$1
addq $5,$0,$23
cmpult $23,$5,$0
or $0,$1,$0
cmpult $28,$25,$8 # compute cy from last add
cmpult $22,$28,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
stq $22,16($16)
stq $23,24($16)
addq $3,$25,$28 # cy add
addq $28,$7,$23 # 4th main add
cmpult $28,$25,$8 # compute cy from last add
cmpult $23,$28,$25 # compute cy from last add
addq $17,32,$17 # update s1_ptr
or $8,$25,$25 # combine cy from the two adds
addq $16,32,$16 # update res_ptr
addq $0,$25,$28 # cy add
ldq $2,16($18)
addq $4,$28,$20 # 1st main add
ldq $3,24($18)
cmpult $28,$25,$8 # compute cy from last add
ldq $6,-16($17)
cmpult $20,$28,$25 # compute cy from last add
ldq $7,-8($17)
or $8,$25,$25 # combine cy from the two adds
subq $19,4,$19 # decr loop cnt
stq $22,-16($16)
addq $1,$25,$28 # cy add
stq $23,-8($16)
addq $5,$28,$21 # 2nd main add
addq $18,32,$18 # update s2_ptr
cmpult $28,$25,$8 # compute cy from last add
bge $19,.Loop
# Finish software pipeline for 1st loop
.Lend1: cmpult $21,$28,$25 # compute cy from last add
or $8,$25,$25 # combine cy from the two adds
addq $2,$25,$28 # cy add
addq $28,$6,$22 # 3rd main add
cmpult $28,$25,$8 # compute cy from last add
cmpult $22,$28,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
addq $3,$25,$28 # cy add
addq $28,$7,$23 # 4th main add
cmpult $28,$25,$8 # compute cy from last add
cmpult $23,$28,$25 # compute cy from last add
or $8,$25,$25 # combine cy from the two adds
addq $16,32,$16 # update res_ptr
stq $22,-16($16)
stq $23,-8($16)
.Lend2: addq $19,4,$19 # restore loop cnt
beq $19,.Lret
# Start software pipeline for 2nd loop
ldq $0,0($18)
ldq $4,0($17)
subq $19,1,$19
beq $19,.Lend0
# 2nd loop handles remaining 1-3 limbs
.align 4
.Loop0: addq $0,$25,$28 # cy add
ldq $0,8($18)
addq $4,$28,$20 # main add
ldq $4,8($17)
addq $18,8,$18
cmpult $28,$25,$8 # compute cy from last add
addq $17,8,$17
stq $20,0($16)
cmpult $20,$28,$25 # compute cy from last add
subq $19,1,$19 # decr loop cnt
or $8,$25,$25 # combine cy from the two adds
addq $16,8,$16
bne $19,.Loop0
.Lend0: addq $0,$25,$28 # cy add
addq $4,$28,$20 # main add
cmpult $28,$25,$8 # compute cy from last add
cmpult $20,$28,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
.Lret: or $25,$31,$0 # return cy
ret $31,($26),1
.end __mpn_add_n

View File

@ -25,7 +25,7 @@
# size r18
# cnt r19
# This code runs at 4.25 cycles/limb on the EV5.
# This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
@ -44,11 +44,11 @@ __mpn_lshift:
and $18,4-1,$28 # number of limbs in first loop
srl $4,$20,$0 # compute function result
beq $28,L0
beq $28,.L0
subq $18,$28,$18
.align 3
Loop0: ldq $3,-16($17)
.Loop0: ldq $3,-16($17)
subq $16,8,$16
sll $4,$19,$5
subq $17,8,$17
@ -57,17 +57,17 @@ Loop0: ldq $3,-16($17)
or $3,$3,$4
or $5,$6,$8
stq $8,0($16)
bne $28,Loop0
bne $28,.Loop0
L0: sll $4,$19,$24
beq $18,Lend
.L0: sll $4,$19,$24
beq $18,.Lend
# warm up phase 1
ldq $1,-16($17)
subq $18,4,$18
ldq $2,-24($17)
ldq $3,-32($17)
ldq $4,-40($17)
beq $18,Lcool1
beq $18,.Lend1
# warm up phase 2
srl $1,$20,$7
sll $1,$19,$21
@ -84,10 +84,10 @@ L0: sll $4,$19,$24
sll $4,$19,$24
ldq $4,-72($17)
subq $18,4,$18
beq $18,Lcool1
beq $18,.Lend2
.align 4
# main loop
Loop: stq $7,-8($16)
.Loop: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
@ -113,16 +113,14 @@ Loop: stq $7,-8($16)
subq $16,32,$16
srl $4,$20,$6
ldq $3,-96($17
ldq $3,-96($17)
sll $4,$19,$24
ldq $4,-104($17)
subq $17,32,$17
bne $18,Loop
unop
unop
bne $18,.Loop
# cool down phase 2/1
Lcool1: stq $7,-8($16)
.Lend2: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
@ -150,7 +148,7 @@ Lcool1: stq $7,-8($16)
ret $31,($26),1
# cool down phase 1/1
Lcool1: srl $1,$20,$7
.Lend1: srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
sll $2,$19,$22
@ -170,6 +168,6 @@ Lcool1: srl $1,$20,$7
stq $24,-40($16)
ret $31,($26),1
Lend stq $24,-8($16)
.Lend: stq $24,-8($16)
ret $31,($26),1
.end __mpn_lshift

View File

@ -25,7 +25,7 @@
# size r18
# cnt r19
# This code runs at 4.25 cycles/limb on the EV5.
# This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
@ -42,11 +42,11 @@ __mpn_rshift:
and $18,4-1,$28 # number of limbs in first loop
sll $4,$20,$0 # compute function result
beq $28,L0
beq $28,.L0
subq $18,$28,$18
.align 3
Loop0: ldq $3,8($17)
.Loop0: ldq $3,8($17)
addq $16,8,$16
srl $4,$19,$5
addq $17,8,$17
@ -55,17 +55,17 @@ Loop0: ldq $3,8($17)
or $3,$3,$4
or $5,$6,$8
stq $8,-8($16)
bne $28,Loop0
bne $28,.Loop0
L0: srl $4,$19,$24
beq $18,Lend
.L0: srl $4,$19,$24
beq $18,.Lend
# warm up phase 1
ldq $1,8($17)
subq $18,4,$18
ldq $2,16($17)
ldq $3,24($17)
ldq $4,32($17)
beq $18,Lcool1
beq $18,.Lend1
# warm up phase 2
sll $1,$20,$7
srl $1,$19,$21
@ -82,10 +82,10 @@ L0: srl $4,$19,$24
srl $4,$19,$24
ldq $4,64($17)
subq $18,4,$18
beq $18,Lcool2
beq $18,.Lend2
.align 4
# main loop
Loop: stq $7,0($16)
.Loop: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
@ -116,11 +116,9 @@ Loop: stq $7,0($16)
ldq $4,96($17)
addq $17,32,$17
bne $18,Loop
unop
unop
bne $18,.Loop
# cool down phase 2/1
Lcool2: stq $7,0($16)
.Lend2: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
@ -148,7 +146,7 @@ Lcool2: stq $7,0($16)
ret $31,($26),1
# cool down phase 1/1
Lcool1: sll $1,$20,$7
.Lend1: sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
srl $2,$19,$22
@ -168,6 +166,6 @@ Lcool1: sll $1,$20,$7
stq $24,32($16)
ret $31,($26),1
Lend: stq $24,0($16)
.Lend: stq $24,0($16)
ret $31,($26),1
.end __mpn_rshift

View File

@ -0,0 +1,148 @@
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_sub_n
.ent __mpn_sub_n
__mpn_sub_n:
.frame $30,0,$26,0
or $31,$31,$25 # clear cy
subq $19,4,$19 # decr loop cnt
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
# Start software pipeline for 1st loop
ldq $0,0($18)
ldq $1,8($18)
ldq $4,0($17)
ldq $5,8($17)
addq $17,32,$17 # update s1_ptr
ldq $2,16($18)
subq $4,$0,$20 # 1st main sub
ldq $3,24($18)
subq $19,4,$19 # decr loop cnt
ldq $6,-16($17)
cmpult $4,$20,$25 # compute cy from last sub
ldq $7,-8($17)
addq $1,$25,$28 # cy add
addq $18,32,$18 # update s2_ptr
subq $5,$28,$21 # 2nd main sub
cmpult $28,$25,$8 # compute cy from last add
blt $19,.Lend1 # if less than 4 limbs remain, jump
# 1st loop handles groups of 4 limbs in a software pipeline
.align 4
.Loop: cmpult $5,$21,$25 # compute cy from last add
ldq $0,0($18)
or $8,$25,$25 # combine cy from the two adds
ldq $1,8($18)
addq $2,$25,$28 # cy add
ldq $4,0($17)
subq $6,$28,$22 # 3rd main sub
ldq $5,8($17)
cmpult $28,$25,$8 # compute cy from last add
cmpult $6,$22,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
addq $3,$25,$28 # cy add
subq $7,$28,$23 # 4th main sub
cmpult $28,$25,$8 # compute cy from last add
cmpult $7,$23,$25 # compute cy from last add
addq $17,32,$17 # update s1_ptr
or $8,$25,$25 # combine cy from the two adds
addq $16,32,$16 # update res_ptr
addq $0,$25,$28 # cy add
ldq $2,16($18)
subq $4,$28,$20 # 1st main sub
ldq $3,24($18)
cmpult $28,$25,$8 # compute cy from last add
ldq $6,-16($17)
cmpult $4,$20,$25 # compute cy from last add
ldq $7,-8($17)
or $8,$25,$25 # combine cy from the two adds
subq $19,4,$19 # decr loop cnt
stq $22,-16($16)
addq $1,$25,$28 # cy add
stq $23,-8($16)
subq $5,$28,$21 # 2nd main sub
addq $18,32,$18 # update s2_ptr
cmpult $28,$25,$8 # compute cy from last add
bge $19,.Loop
# Finish software pipeline for 1st loop
.Lend1: cmpult $5,$21,$25 # compute cy from last add
or $8,$25,$25 # combine cy from the two adds
addq $2,$25,$28 # cy add
subq $6,$28,$22 # 3rd main sub
cmpult $28,$25,$8 # compute cy from last add
cmpult $6,$22,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
addq $3,$25,$28 # cy add
subq $7,$28,$23 # 4th main sub
cmpult $28,$25,$8 # compute cy from last add
cmpult $7,$23,$25 # compute cy from last add
or $8,$25,$25 # combine cy from the two adds
addq $16,32,$16 # update res_ptr
stq $22,-16($16)
stq $23,-8($16)
.Lend2: addq $19,4,$19 # restore loop cnt
beq $19,.Lret
# Start software pipeline for 2nd loop
ldq $0,0($18)
ldq $4,0($17)
subq $19,1,$19
beq $19,.Lend0
# 2nd loop handles remaining 1-3 limbs
.align 4
.Loop0: addq $0,$25,$28 # cy add
ldq $0,8($18)
subq $4,$28,$20 # main sub
ldq $1,8($17)
addq $18,8,$18
cmpult $28,$25,$8 # compute cy from last add
addq $17,8,$17
stq $20,0($16)
cmpult $4,$20,$25 # compute cy from last add
subq $19,1,$19 # decr loop cnt
or $8,$25,$25 # combine cy from the two adds
addq $16,8,$16
or $1,$31,$4
bne $19,.Loop0
.Lend0: addq $0,$25,$28 # cy add
subq $4,$28,$20 # main sub
cmpult $28,$25,$8 # compute cy from last add
cmpult $4,$20,$25 # compute cy from last add
stq $20,0($16)
or $8,$25,$25 # combine cy from the two adds
.Lret: or $25,$31,$0 # return cy
ret $31,($26),1
.end __mpn_sub_n

View File

@ -53,11 +53,11 @@ __mpn_lshift:
and $18,4-1,$20 # number of limbs in first loop
srl $4,$7,$0 # compute function result
beq $20,L0
beq $20,.L0
subq $18,$20,$18
.align 3
Loop0:
.Loop0:
ldq $3,-8($17)
subq $16,8,$16
subq $17,8,$17
@ -67,12 +67,12 @@ Loop0:
bis $3,$3,$4
bis $5,$6,$8
stq $8,0($16)
bne $20,Loop0
bne $20,.Loop0
L0: beq $18,Lend
.L0: beq $18,.Lend
.align 3
Loop: ldq $3,-8($17)
.Loop: ldq $3,-8($17)
subq $16,32,$16
subq $18,4,$18
sll $4,$19,$5
@ -100,9 +100,9 @@ Loop: ldq $3,-8($17)
bis $1,$2,$8
stq $8,0($16)
bgt $18,Loop
bgt $18,.Loop
Lend: sll $4,$19,$8
.Lend: sll $4,$19,$8
stq $8,-8($16)
ret $31,($26),1
.end __mpn_lshift

View File

@ -1,7 +1,7 @@
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.

View File

@ -34,7 +34,7 @@
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
.set noreorder
.set noat
.text
@ -51,11 +51,11 @@ __mpn_rshift:
and $18,4-1,$20 # number of limbs in first loop
sll $4,$7,$0 # compute function result
beq $20,L0
beq $20,.L0
subq $18,$20,$18
.align 3
Loop0:
.Loop0:
ldq $3,0($17)
addq $16,8,$16
addq $17,8,$17
@ -65,12 +65,12 @@ Loop0:
bis $3,$3,$4
bis $5,$6,$8
stq $8,-8($16)
bne $20,Loop0
bne $20,.Loop0
L0: beq $18,Lend
.L0: beq $18,.Lend
.align 3
Loop: ldq $3,0($17)
.Loop: ldq $3,0($17)
addq $16,32,$16
subq $18,4,$18
srl $4,$19,$5
@ -98,9 +98,9 @@ Loop: ldq $3,0($17)
bis $1,$2,$8
stq $8,-8($16)
bgt $18,Loop
bgt $18,.Loop
Lend: srl $4,$19,$8
.Lend: srl $4,$19,$8
stq $8,0($16)
ret $31,($26),1
.end __mpn_rshift

View File

@ -26,16 +26,7 @@
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the 21064.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
@ -52,7 +43,7 @@ __mpn_submul_1:
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
@ -60,10 +51,10 @@ __mpn_submul_1:
cmpult $5,$3,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,Lend2 # jump if size was == 2
beq $18,.Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,Loop
bne $18,.Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: subq $5,$3,$3
.Lend1: subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $0,$5,$0

View File

@ -1,6 +1,6 @@
# Alpha 21064 __udiv_qrnnd
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@ -21,13 +21,11 @@
.set noreorder
.set noat
.text
.align 3
.globl __udiv_qrnnd
.ent __udiv_qrnnd 0
.align 3
.globl __udiv_qrnnd
.ent __udiv_qrnnd
__udiv_qrnnd:
__udiv_qrnnd..ng:
.frame $30,0,$26,0
.prologue 0
#define cnt $2
@ -39,9 +37,9 @@ __udiv_qrnnd..ng:
#define qb $20
ldiq cnt,16
blt d,Largedivisor
blt d,.Largedivisor
Loop1: cmplt n0,0,tmp
.Loop1: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@ -74,12 +72,12 @@ Loop1: cmplt n0,0,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,Loop1
bgt cnt,.Loop1
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
Largedivisor:
.Largedivisor:
and n0,1,$4
srl n0,1,n0
@ -91,7 +89,7 @@ Largedivisor:
srl d,1,$5
addq $5,$6,$5
Loop2: cmplt n0,0,tmp
.Loop2: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
@ -124,27 +122,27 @@ Loop2: cmplt n0,0,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,Loop2
bgt cnt,.Loop2
addq n1,n1,n1
addq $4,n1,n1
bne $6,Odd
bne $6,.LOdd
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
Odd:
.LOdd:
/* q' in n0. r' in n1 */
addq n1,n0,n1
cmpult n1,n0,tmp # tmp := carry from addq
beq tmp,LLp6
beq tmp,.LLp6
addq n0,1,n0
subq n1,d,n1
LLp6: cmpult n1,d,tmp
bne tmp,LLp7
.LLp6: cmpult n1,d,tmp
bne tmp,.LLp7
addq n0,1,n0
subq n1,d,n1
LLp7:
.LLp7:
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1

View File

@ -1,7 +1,7 @@
/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
size (sp + 12)
*/
#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
GLOBL ___mpn_add_n
GLOBL C_SYMBOL_NAME(__mpn_add_n)
LAB(___mpn_add_n)
C_SYMBOL_NAME(__mpn_add_n:)
PROLOG(__mpn_add_n)
/* Save used registers on the stack. */
INSN2(move,l ,MEM_PREDEC(sp),d2)
INSN2(move,l ,MEM_PREDEC(sp),a2)
movel R(d2),MEM_PREDEC(sp)
movel R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
INSN2(move,l ,a2,MEM_DISP(sp,12))
INSN2(move,l ,a0,MEM_DISP(sp,16))
INSN2(move,l ,a1,MEM_DISP(sp,20))
INSN2(move,l ,d2,MEM_DISP(sp,24))
movel MEM_DISP(sp,12),R(a2)
movel MEM_DISP(sp,16),R(a0)
movel MEM_DISP(sp,20),R(a1)
movel MEM_DISP(sp,24),R(d2)
INSN2(eor,w ,d2,#1)
INSN2(lsr,l ,d2,#1)
bcc L1
INSN2(subq,l ,d2,#1) /* clears cy as side effect */
eorw #1,R(d2)
lsrl #1,R(d2)
bcc L(L1)
subql #1,R(d2) /* clears cy as side effect */
LAB(Loop)
INSN2(move,l ,d0,MEM_POSTINC(a0))
INSN2(move,l ,d1,MEM_POSTINC(a1))
INSN2(addx,l ,d0,d1)
INSN2(move,l ,MEM_POSTINC(a2),d0)
LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
INSN2(move,l ,d1,MEM_POSTINC(a1))
INSN2(addx,l ,d0,d1)
INSN2(move,l ,MEM_POSTINC(a2),d0)
L(Loop:)
movel MEM_POSTINC(a0),R(d0)
movel MEM_POSTINC(a1),R(d1)
addxl R(d1),R(d0)
movel R(d0),MEM_POSTINC(a2)
L(L1:) movel MEM_POSTINC(a0),R(d0)
movel MEM_POSTINC(a1),R(d1)
addxl R(d1),R(d0)
movel R(d0),MEM_POSTINC(a2)
dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
INSN2(sub,l ,d2,#0x10000)
bcs L2
INSN2(add,l ,d0,d0) /* restore cy */
bra Loop
dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
subl #0x10000,R(d2)
bcs L(L2)
addl R(d0),R(d0) /* restore cy */
bra L(Loop)
LAB(L2)
INSN1(neg,l ,d0)
L(L2:)
negl R(d0)
/* Restore used registers from stack frame. */
INSN2(move,l ,a2,MEM_POSTINC(sp))
INSN2(move,l ,d2,MEM_POSTINC(sp))
movel MEM_POSTINC(sp),R(a2)
movel MEM_POSTINC(sp),R(d2)
rts
EPILOG(__mpn_add_n)

150
sysdeps/m68k/lshift.S Normal file
View File

@ -0,0 +1,150 @@
/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
License for more details.
You should have received a copy of the GNU Library General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/*
INPUT PARAMETERS
res_ptr (sp + 4)
s_ptr (sp + 8)
s_size (sp + 16)
cnt (sp + 12)
*/
#include "sysdep.h"
#include "asm-syntax.h"
#define res_ptr a1
#define s_ptr a0
#define s_size d6
#define cnt d4
TEXT
ALIGN
GLOBL C_SYMBOL_NAME(__mpn_lshift)
C_SYMBOL_NAME(__mpn_lshift:)
PROLOG(__mpn_lshift)
/* Save used registers on the stack. */
moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. */
movel MEM_DISP(sp,28),R(res_ptr)
movel MEM_DISP(sp,32),R(s_ptr)
movel MEM_DISP(sp,36),R(s_size)
movel MEM_DISP(sp,40),R(cnt)
moveql #1,R(d5)
cmpl R(d5),R(cnt)
bne L(Lnormal)
cmpl R(s_ptr),R(res_ptr)
bls L(Lspecial) /* jump if s_ptr >= res_ptr */
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
lea MEM_INDX1(s_ptr,s_size,l,4),R(a2)
#else /* not mc68020 */
movel R(s_size),R(d0)
asll #2,R(d0)
lea MEM_INDX(s_ptr,d0,l),R(a2)
#endif
cmpl R(res_ptr),R(a2)
bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */
L(Lnormal:)
moveql #32,R(d5)
subl R(cnt),R(d5)
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
#else /* not mc68000 */
movel R(s_size),R(d0)
asll #2,R(d0)
addl R(s_size),R(s_ptr)
addl R(s_size),R(res_ptr)
#endif
movel MEM_PREDEC(s_ptr),R(d2)
movel R(d2),R(d0)
lsrl R(d5),R(d0) /* compute carry limb */
lsll R(cnt),R(d2)
movel R(d2),R(d1)
subql #1,R(s_size)
beq L(Lend)
lsrl #1,R(s_size)
bcs L(L1)
subql #1,R(s_size)
L(Loop:)
movel MEM_PREDEC(s_ptr),R(d2)
movel R(d2),R(d3)
lsrl R(d5),R(d3)
orl R(d3),R(d1)
movel R(d1),MEM_PREDEC(res_ptr)
lsll R(cnt),R(d2)
L(L1:)
movel MEM_PREDEC(s_ptr),R(d1)
movel R(d1),R(d3)
lsrl R(d5),R(d3)
orl R(d3),R(d2)
movel R(d2),MEM_PREDEC(res_ptr)
lsll R(cnt),R(d1)
dbf R(s_size),L(Loop)
subl #0x10000,R(s_size)
bcc L(Loop)
L(Lend:)
movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
/* Restore used registers from stack frame. */
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
rts
/* We loop from least significant end of the arrays, which is only
permissable if the source and destination don't overlap, since the
function is documented to work for overlapping source and destination. */
L(Lspecial:)
clrl R(d0) /* initialize carry */
eorw #1,R(s_size)
lsrl #1,R(s_size)
bcc L(LL1)
subql #1,R(s_size)
L(LLoop:)
movel MEM_POSTINC(s_ptr),R(d2)
addxl R(d2),R(d2)
movel R(d2),MEM_POSTINC(res_ptr)
L(LL1:)
movel MEM_POSTINC(s_ptr),R(d2)
addxl R(d2),R(d2)
movel R(d2),MEM_POSTINC(res_ptr)
dbf R(s_size),L(LLoop)
addxl R(d0),R(d0) /* save cy in lsb */
subl #0x10000,R(s_size)
bcs L(LLend)
lsrl #1,R(d0) /* restore cy */
bra L(LLoop)
L(LLend:)
/* Restore used registers from stack frame. */
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
rts
EPILOG(__mpn_lshift)

View File

@ -1,7 +1,7 @@
/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
size (sp + 12)
s1_size (sp + 12)
s2_limb (sp + 16)
*/
#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
GLOBL ___mpn_addmul_1
GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
LAB(___mpn_addmul_1)
C_SYMBOL_NAME(__mpn_addmul_1:)
PROLOG(__mpn_addmul_1)
#define res_ptr a0
#define s1_ptr a1
#define size d2
#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
moveml R(d2)-R(d5),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
INSN2(move,l ,size,MEM_DISP(sp,28))
INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
movel MEM_DISP(sp,20),R(res_ptr)
movel MEM_DISP(sp,24),R(s1_ptr)
movel MEM_DISP(sp,28),R(s1_size)
movel MEM_DISP(sp,32),R(s2_limb)
INSN2(eor,w ,size,#1)
INSN1(clr,l ,d1)
INSN1(clr,l ,d5)
INSN2(lsr,l ,size,#1)
bcc L1
INSN2(subq,l ,size,#1)
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
eorw #1,R(s1_size)
clrl R(d1)
clrl R(d5)
lsrl #1,R(s1_size)
bcc L(L1)
subql #1,R(s1_size)
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
LAB(Loop)
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d1:d3,s2_limb)
INSN2(addx,l ,d3,d0)
INSN2(addx,l ,d1,d5)
INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d0:d3,s2_limb)
INSN2(addx,l ,d3,d1)
INSN2(addx,l ,d0,d5)
INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
L(Loop:)
movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d1):R(d3)
addxl R(d0),R(d3)
addxl R(d5),R(d1)
addl R(d3),MEM_POSTINC(res_ptr)
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d0):R(d3)
addxl R(d1),R(d3)
addxl R(d5),R(d0)
addl R(d3),MEM_POSTINC(res_ptr)
dbf size,Loop
INSN2(addx,l ,d0,d5)
INSN2(sub,l ,size,#0x10000)
bcc Loop
dbf R(s1_size),L(Loop)
addxl R(d5),R(d0)
subl #0x10000,R(s1_size)
bcc L(Loop)
/* Restore used registers from stack frame. */
INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
moveml MEM_POSTINC(sp),R(d2)-R(d5)
rts
EPILOG(__mpn_addmul_1)

View File

@ -1,7 +1,7 @@
/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@ -23,65 +23,68 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
size (sp + 12)
s1_size (sp + 12)
s2_limb (sp + 16)
*/
#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
GLOBL ___mpn_mul_1
GLOBL C_SYMBOL_NAME(__mpn_mul_1)
LAB(___mpn_mul_1)
C_SYMBOL_NAME(__mpn_mul_1:)
PROLOG(__mpn_mul_1)
#define res_ptr a0
#define s1_ptr a1
#define size d2
#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
INSN2(movem,l ,MEM_PREDEC(sp),d2-d4)
moveml R(d2)-R(d4),MEM_PREDEC(sp)
#if 0
INSN2(move,l ,MEM_PREDEC(sp),d2)
INSN2(move,l ,MEM_PREDEC(sp),d3)
INSN2(move,l ,MEM_PREDEC(sp),d4)
movel R(d2),MEM_PREDEC(sp)
movel R(d3),MEM_PREDEC(sp)
movel R(d4),MEM_PREDEC(sp)
#endif
/* Copy the arguments to registers. Better use movem? */
INSN2(move,l ,res_ptr,MEM_DISP(sp,16))
INSN2(move,l ,s1_ptr,MEM_DISP(sp,20))
INSN2(move,l ,size,MEM_DISP(sp,24))
INSN2(move,l ,s2_limb,MEM_DISP(sp,28))
movel MEM_DISP(sp,16),R(res_ptr)
movel MEM_DISP(sp,20),R(s1_ptr)
movel MEM_DISP(sp,24),R(s1_size)
movel MEM_DISP(sp,28),R(s2_limb)
INSN2(eor,w ,size,#1)
INSN1(clr,l ,d1)
INSN2(lsr,l ,size,#1)
bcc L1
INSN2(subq,l ,size,#1)
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
eorw #1,R(s1_size)
clrl R(d1)
lsrl #1,R(s1_size)
bcc L(L1)
subql #1,R(s1_size)
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
LAB(Loop)
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d1:d3,s2_limb)
INSN2(addx,l ,d3,d0)
INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d0:d3,s2_limb)
INSN2(addx,l ,d3,d1)
INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
L(Loop:)
movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d1):R(d3)
addxl R(d0),R(d3)
movel R(d3),MEM_POSTINC(res_ptr)
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d0):R(d3)
addxl R(d1),R(d3)
movel R(d3),MEM_POSTINC(res_ptr)
dbf size,Loop
INSN1(clr,l ,d3)
INSN2(addx,l ,d0,d3)
INSN2(sub,l ,size,#0x10000)
bcc Loop
dbf R(s1_size),L(Loop)
clrl R(d3)
addxl R(d3),R(d0)
subl #0x10000,R(s1_size)
bcc L(Loop)
/* Restore used registers from stack frame. */
INSN2(movem,l ,d2-d4,MEM_POSTINC(sp))
moveml MEM_POSTINC(sp),R(d2)-R(d4)
#if 0
INSN2(move,l ,d4,MEM_POSTINC(sp))
INSN2(move,l ,d3,MEM_POSTINC(sp))
INSN2(move,l ,d2,MEM_POSTINC(sp))
movel MEM_POSTINC(sp),R(d4)
movel MEM_POSTINC(sp),R(d3)
movel MEM_POSTINC(sp),R(d2)
#endif
rts
EPILOG(__mpn_mul_1)

View File

@ -1,7 +1,7 @@
/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
INPUT PARAMETERS
res_ptr (sp + 4)
s1_ptr (sp + 8)
size (sp + 12)
s1_size (sp + 12)
s2_limb (sp + 16)
*/
#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
GLOBL ___mpn_submul_1
GLOBL C_SYMBOL_NAME(__mpn_submul_1)
LAB(___mpn_submul_1)
C_SYMBOL_NAME(__mpn_submul_1:)
PROLOG(__mpn_submul_1)
#define res_ptr a0
#define s1_ptr a1
#define size d2
#define s1_size d2
#define s2_limb d4
/* Save used registers on the stack. */
INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
moveml R(d2)-R(d5),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
INSN2(move,l ,size,MEM_DISP(sp,28))
INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
movel MEM_DISP(sp,20),R(res_ptr)
movel MEM_DISP(sp,24),R(s1_ptr)
movel MEM_DISP(sp,28),R(s1_size)
movel MEM_DISP(sp,32),R(s2_limb)
INSN2(eor,w ,size,#1)
INSN1(clr,l ,d1)
INSN1(clr,l ,d5)
INSN2(lsr,l ,size,#1)
bcc L1
INSN2(subq,l ,size,#1)
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
eorw #1,R(s1_size)
clrl R(d1)
clrl R(d5)
lsrl #1,R(s1_size)
bcc L(L1)
subql #1,R(s1_size)
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
LAB(Loop)
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d1:d3,s2_limb)
INSN2(addx,l ,d3,d0)
INSN2(addx,l ,d1,d5)
INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
INSN2(mulu,l ,d0:d3,s2_limb)
INSN2(addx,l ,d3,d1)
INSN2(addx,l ,d0,d5)
INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
L(Loop:)
movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d1):R(d3)
addxl R(d0),R(d3)
addxl R(d5),R(d1)
subl R(d3),MEM_POSTINC(res_ptr)
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
mulul R(s2_limb),R(d0):R(d3)
addxl R(d1),R(d3)
addxl R(d5),R(d0)
subl R(d3),MEM_POSTINC(res_ptr)
dbf size,Loop
INSN2(addx,l ,d0,d5)
INSN2(sub,l ,size,#0x10000)
bcc Loop
dbf R(s1_size),L(Loop)
addxl R(d5),R(d0)
subl #0x10000,R(s1_size)
bcc L(Loop)
/* Restore used registers from stack frame. */
INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
moveml MEM_POSTINC(sp),R(d2)-R(d5)
rts
EPILOG(__mpn_submul_1)

149
sysdeps/m68k/rshift.S Normal file
View File

@ -0,0 +1,149 @@
/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer.
Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
License for more details.
You should have received a copy of the GNU Library General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/*
INPUT PARAMETERS
res_ptr (sp + 4)
s_ptr (sp + 8)
s_size (sp + 16)
cnt (sp + 12)
*/
#include "sysdep.h"
#include "asm-syntax.h"
#define res_ptr a1
#define s_ptr a0
#define s_size d6
#define cnt d4
TEXT
ALIGN
GLOBL C_SYMBOL_NAME(__mpn_rshift)
C_SYMBOL_NAME(__mpn_rshift:)
PROLOG(__mpn_rshift)
/* Save used registers on the stack. */
moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. */
movel MEM_DISP(sp,28),R(res_ptr)
movel MEM_DISP(sp,32),R(s_ptr)
movel MEM_DISP(sp,36),R(s_size)
movel MEM_DISP(sp,40),R(cnt)
moveql #1,R(d5)
cmpl R(d5),R(cnt)
bne L(Lnormal)
cmpl R(res_ptr),R(s_ptr)
bls L(Lspecial) /* jump if res_ptr >= s_ptr */
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
#else /* not mc68020 */
movel R(s_size),R(d0)
asll #2,R(d0)
lea MEM_INDX(res_ptr,d0,l),R(a2)
#endif
cmpl R(s_ptr),R(a2)
bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */
L(Lnormal:)
moveql #32,R(d5)
subl R(cnt),R(d5)
movel MEM_POSTINC(s_ptr),R(d2)
movel R(d2),R(d0)
lsll R(d5),R(d0) /* compute carry limb */
lsrl R(cnt),R(d2)
movel R(d2),R(d1)
subql #1,R(s_size)
beq L(Lend)
lsrl #1,R(s_size)
bcs L(L1)
subql #1,R(s_size)
L(Loop:)
movel MEM_POSTINC(s_ptr),R(d2)
movel R(d2),R(d3)
lsll R(d5),R(d3)
orl R(d3),R(d1)
movel R(d1),MEM_POSTINC(res_ptr)
lsrl R(cnt),R(d2)
L(L1:)
movel MEM_POSTINC(s_ptr),R(d1)
movel R(d1),R(d3)
lsll R(d5),R(d3)
orl R(d3),R(d2)
movel R(d2),MEM_POSTINC(res_ptr)
lsrl R(cnt),R(d1)
dbf R(s_size),L(Loop)
subl #0x10000,R(s_size)
bcc L(Loop)
L(Lend:)
movel R(d1),MEM(res_ptr) /* store most significant limb */
/* Restore used registers from stack frame. */
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
rts
/* We loop from most significant end of the arrays, which is only
permissable if the source and destination don't overlap, since the
function is documented to work for overlapping source and destination. */
L(Lspecial:)
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
#else /* not mc68000 */
movel R(s_size),R(d0)
asll #2,R(d0)
addl R(s_size),R(s_ptr)
addl R(s_size),R(res_ptr)
#endif
clrl R(d0) /* initialize carry */
eorw #1,R(s_size)
lsrl #1,R(s_size)
bcc L(LL1)
subql #1,R(s_size)
L(LLoop:)
movel MEM_PREDEC(s_ptr),R(d2)
roxrl #1,R(d2)
movel R(d2),MEM_PREDEC(res_ptr)
L(LL1:)
movel MEM_PREDEC(s_ptr),R(d2)
roxrl #1,R(d2)
movel R(d2),MEM_PREDEC(res_ptr)
dbf R(s_size),L(LLoop)
roxrl #1,R(d0) /* save cy in msb */
subl #0x10000,R(s_size)
bcs L(LLend)
addl R(d0),R(d0) /* restore cy */
bra L(LLoop)
L(LLend:)
/* Restore used registers from stack frame. */
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
rts
EPILOG(__mpn_rshift)

View File

@ -1,7 +1,7 @@
/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
store difference in a third limb vector.
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
size (sp + 12)
*/
#include "sysdep.h"
#include "asm-syntax.h"
TEXT
ALIGN
GLOBL ___mpn_sub_n
GLOBL C_SYMBOL_NAME(__mpn_sub_n)
LAB(___mpn_sub_n)
C_SYMBOL_NAME(__mpn_sub_n:)
PROLOG(__mpn_sub_n)
/* Save used registers on the stack. */
INSN2(move,l ,MEM_PREDEC(sp),d2)
INSN2(move,l ,MEM_PREDEC(sp),a2)
movel R(d2),MEM_PREDEC(sp)
movel R(a2),MEM_PREDEC(sp)
/* Copy the arguments to registers. Better use movem? */
INSN2(move,l ,a2,MEM_DISP(sp,12))
INSN2(move,l ,a0,MEM_DISP(sp,16))
INSN2(move,l ,a1,MEM_DISP(sp,20))
INSN2(move,l ,d2,MEM_DISP(sp,24))
movel MEM_DISP(sp,12),R(a2)
movel MEM_DISP(sp,16),R(a0)
movel MEM_DISP(sp,20),R(a1)
movel MEM_DISP(sp,24),R(d2)
INSN2(eor,w ,d2,#1)
INSN2(lsr,l ,d2,#1)
bcc L1
INSN2(subq,l ,d2,#1) /* clears cy as side effect */
eorw #1,R(d2)
lsrl #1,R(d2)
bcc L(L1)
subql #1,R(d2) /* clears cy as side effect */
LAB(Loop)
INSN2(move,l ,d0,MEM_POSTINC(a0))
INSN2(move,l ,d1,MEM_POSTINC(a1))
INSN2(subx,l ,d0,d1)
INSN2(move,l ,MEM_POSTINC(a2),d0)
LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
INSN2(move,l ,d1,MEM_POSTINC(a1))
INSN2(subx,l ,d0,d1)
INSN2(move,l ,MEM_POSTINC(a2),d0)
L(Loop:)
movel MEM_POSTINC(a0),R(d0)
movel MEM_POSTINC(a1),R(d1)
subxl R(d1),R(d0)
movel R(d0),MEM_POSTINC(a2)
L(L1:) movel MEM_POSTINC(a0),R(d0)
movel MEM_POSTINC(a1),R(d1)
subxl R(d1),R(d0)
movel R(d0),MEM_POSTINC(a2)
dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
INSN2(sub,l ,d2,#0x10000)
bcs L2
INSN2(add,l ,d0,d0) /* restore cy */
bra Loop
dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
subl #0x10000,R(d2)
bcs L(L2)
addl R(d0),R(d0) /* restore cy */
bra L(Loop)
LAB(L2)
INSN1(neg,l ,d0)
L(L2:)
negl R(d0)
/* Restore used registers from stack frame. */
INSN2(move,l ,a2,MEM_POSTINC(sp))
INSN2(move,l ,d2,MEM_POSTINC(sp))
movel MEM_POSTINC(sp),R(a2)
movel MEM_POSTINC(sp),R(d2)
rts
EPILOG(__mpn_sub_n)

View File

@ -1,7 +1,7 @@
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
; sum in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.

199
sysdeps/m88k/m88110/add_n.S Normal file
View File

@ -0,0 +1,199 @@
; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
; sum in a third limb vector.
; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
#define res_ptr r2
#define s1_ptr r3
#define s2_ptr r4
#define size r5
#include "sysdep.h"
text
align 16
global C_SYMBOL_NAME(__mpn_add_n)
C_SYMBOL_NAME(__mpn_add_n):
addu.co r0,r0,r0 ; clear cy flag
xor r12,s2_ptr,res_ptr
bb1 2,r12,L1
; ** V1a **
L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned?
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld r10,s1_ptr,0
addu s1_ptr,s1_ptr,4
ld r8,s2_ptr,0
addu s2_ptr,s2_ptr,4
subu size,size,1
addu.co r6,r10,r8
st r6,res_ptr,0
addu res_ptr,res_ptr,4
L_v1: cmp r12,size,2
bb1 lt,r12,Lend2
ld r10,s1_ptr,0
ld r12,s1_ptr,4
ld.d r8,s2_ptr,0
subu size,size,10
bcnd lt0,size,Lfin1
/* Add blocks of 8 limbs until less than 8 limbs remain */
align 8
Loop1: subu size,size,8
addu.cio r6,r10,r8
ld r10,s1_ptr,8
addu.cio r7,r12,r9
ld r12,s1_ptr,12
ld.d r8,s2_ptr,8
st.d r6,res_ptr,0
addu.cio r6,r10,r8
ld r10,s1_ptr,16
addu.cio r7,r12,r9
ld r12,s1_ptr,20
ld.d r8,s2_ptr,16
st.d r6,res_ptr,8
addu.cio r6,r10,r8
ld r10,s1_ptr,24
addu.cio r7,r12,r9
ld r12,s1_ptr,28
ld.d r8,s2_ptr,24
st.d r6,res_ptr,16
addu.cio r6,r10,r8
ld r10,s1_ptr,32
addu.cio r7,r12,r9
ld r12,s1_ptr,36
addu s1_ptr,s1_ptr,32
ld.d r8,s2_ptr,32
addu s2_ptr,s2_ptr,32
st.d r6,res_ptr,24
addu res_ptr,res_ptr,32
bcnd ge0,size,Loop1
Lfin1: addu size,size,8-2
bcnd lt0,size,Lend1
/* Add blocks of 2 limbs until less than 2 limbs remain */
Loope1: addu.cio r6,r10,r8
ld r10,s1_ptr,8
addu.cio r7,r12,r9
ld r12,s1_ptr,12
ld.d r8,s2_ptr,8
st.d r6,res_ptr,0
subu size,size,2
addu s1_ptr,s1_ptr,8
addu s2_ptr,s2_ptr,8
addu res_ptr,res_ptr,8
bcnd ge0,size,Loope1
Lend1: addu.cio r6,r10,r8
addu.cio r7,r12,r9
st.d r6,res_ptr,0
bb0 0,size,Lret1
/* Add last limb */
ld r10,s1_ptr,8
ld r8,s2_ptr,8
addu.cio r6,r10,r8
st r6,res_ptr,8
Lret1: jmp.n r1
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
L1: xor r12,s1_ptr,res_ptr
bb1 2,r12,L2
; ** V1b **
or r12,r0,s2_ptr
or s2_ptr,r0,s1_ptr
or s1_ptr,r0,r12
br L0
; ** V2 **
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
alignment of s2_ptr and res_ptr differ. Since there are only two ways
things can be aligned (that we care about) we now know that the alignment
of s1_ptr and s2_ptr are the same. */
L2: cmp r12,size,1
bb1 eq,r12,Ljone
bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld r10,s1_ptr,0
addu s1_ptr,s1_ptr,4
ld r8,s2_ptr,0
addu s2_ptr,s2_ptr,4
subu size,size,1
addu.co r6,r10,r8
st r6,res_ptr,0
addu res_ptr,res_ptr,4
L_v2: subu size,size,8
bcnd lt0,size,Lfin2
/* Add blocks of 8 limbs until less than 8 limbs remain */
align 8
Loop2: subu size,size,8
ld.d r8,s1_ptr,0
ld.d r6,s2_ptr,0
addu.cio r8,r8,r6
st r8,res_ptr,0
addu.cio r9,r9,r7
st r9,res_ptr,4
ld.d r8,s1_ptr,8
ld.d r6,s2_ptr,8
addu.cio r8,r8,r6
st r8,res_ptr,8
addu.cio r9,r9,r7
st r9,res_ptr,12
ld.d r8,s1_ptr,16
ld.d r6,s2_ptr,16
addu.cio r8,r8,r6
st r8,res_ptr,16
addu.cio r9,r9,r7
st r9,res_ptr,20
ld.d r8,s1_ptr,24
ld.d r6,s2_ptr,24
addu.cio r8,r8,r6
st r8,res_ptr,24
addu.cio r9,r9,r7
st r9,res_ptr,28
addu s1_ptr,s1_ptr,32
addu s2_ptr,s2_ptr,32
addu res_ptr,res_ptr,32
bcnd ge0,size,Loop2
Lfin2: addu size,size,8-2
bcnd lt0,size,Lend2
Loope2: ld.d r8,s1_ptr,0
ld.d r6,s2_ptr,0
addu.cio r8,r8,r6
st r8,res_ptr,0
addu.cio r9,r9,r7
st r9,res_ptr,4
subu size,size,2
addu s1_ptr,s1_ptr,8
addu s2_ptr,s2_ptr,8
addu res_ptr,res_ptr,8
bcnd ge0,size,Loope2
Lend2: bb0 0,size,Lret2
/* Add last limb */
Ljone: ld r10,s1_ptr,0
ld r8,s2_ptr,0
addu.cio r6,r10,r8
st r6,res_ptr,0
Lret2: jmp.n r1
addu.ci r2,r0,r0 ; return carry-out from most sign. limb

View File

@ -0,0 +1,60 @@
; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
; Copyright (C) 1996 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r2
; s1_ptr r3
; size r4
; s2_limb r5
text
align 16
global ___mpn_addmul_1
___mpn_addmul_1:
lda r3,r3[r4]
lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
subu r4,r0,r4
addu.co r2,r0,r0 ; r2 = cy = 0
ld r6,r3[r4]
addu r4,r4,1
subu r8,r8,4
bcnd.n eq0,r4,Lend
mulu.d r10,r6,r5
Loop: ld r7,r8[r4]
ld r6,r3[r4]
addu.cio r9,r11,r2
addu.ci r2,r10,r0
addu.co r9,r9,r7
st r9,r8[r4]
addu r4,r4,1
mulu.d r10,r6,r5
bcnd ne0,r4,Loop
Lend: ld r7,r8,0
addu.cio r9,r11,r2
addu.ci r2,r10,r0
addu.co r9,r9,r7
st r9,r8,0
jmp.n r1
addu.ci r2,r2,r0

View File

@ -1,7 +1,7 @@
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@ -56,29 +56,3 @@ Lend: addu.cio r9,r11,r2
st r9,r8,4
jmp.n r1
addu.ci r2,r10,r0
; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d ,r11,r5
; ld.d r12,
; mulu.d ,r10,r5
; addu.cio
; addu.cio
; st.d
; mulu.d
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d

275
sysdeps/m88k/m88110/sub_n.S Normal file
View File

@ -0,0 +1,275 @@
; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
; store difference in a third limb vector.
; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
#define res_ptr r2
#define s1_ptr r3
#define s2_ptr r4
#define size r5
#include "sysdep.h"
text
align 16
global C_SYMBOL_NAME(__mpn_sub_n)
C_SYMBOL_NAME(__mpn_sub_n):
subu.co r0,r0,r0 ; set cy flag
xor r12,s2_ptr,res_ptr
bb1 2,r12,L1
; ** V1a **
L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld r10,s1_ptr,0
addu s1_ptr,s1_ptr,4
ld r8,s2_ptr,0
addu s2_ptr,s2_ptr,4
subu size,size,1
subu.co r6,r10,r8
st r6,res_ptr,0
addu res_ptr,res_ptr,4
L_v1: cmp r12,size,2
bb1 lt,r12,Lend2
ld r10,s1_ptr,0
ld r12,s1_ptr,4
ld.d r8,s2_ptr,0
subu size,size,10
bcnd lt0,size,Lfin1
/* Add blocks of 8 limbs until less than 8 limbs remain */
align 8
Loop1: subu size,size,8
subu.cio r6,r10,r8
ld r10,s1_ptr,8
subu.cio r7,r12,r9
ld r12,s1_ptr,12
ld.d r8,s2_ptr,8
st.d r6,res_ptr,0
subu.cio r6,r10,r8
ld r10,s1_ptr,16
subu.cio r7,r12,r9
ld r12,s1_ptr,20
ld.d r8,s2_ptr,16
st.d r6,res_ptr,8
subu.cio r6,r10,r8
ld r10,s1_ptr,24
subu.cio r7,r12,r9
ld r12,s1_ptr,28
ld.d r8,s2_ptr,24
st.d r6,res_ptr,16
subu.cio r6,r10,r8
ld r10,s1_ptr,32
subu.cio r7,r12,r9
ld r12,s1_ptr,36
addu s1_ptr,s1_ptr,32
ld.d r8,s2_ptr,32
addu s2_ptr,s2_ptr,32
st.d r6,res_ptr,24
addu res_ptr,res_ptr,32
bcnd ge0,size,Loop1
Lfin1: addu size,size,8-2
bcnd lt0,size,Lend1
/* Add blocks of 2 limbs until less than 2 limbs remain */
Loope1: subu.cio r6,r10,r8
ld r10,s1_ptr,8
subu.cio r7,r12,r9
ld r12,s1_ptr,12
ld.d r8,s2_ptr,8
st.d r6,res_ptr,0
subu size,size,2
addu s1_ptr,s1_ptr,8
addu s2_ptr,s2_ptr,8
addu res_ptr,res_ptr,8
bcnd ge0,size,Loope1
Lend1: subu.cio r6,r10,r8
subu.cio r7,r12,r9
st.d r6,res_ptr,0
bb0 0,size,Lret1
/* Add last limb */
ld r10,s1_ptr,8
ld r8,s2_ptr,8
subu.cio r6,r10,r8
st r6,res_ptr,8
Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
jmp.n r1
xor r2,r2,1
L1: xor r12,s1_ptr,res_ptr
bb1 2,r12,L2
; ** V1b **
bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
/* Add least significant limb separately to align res_ptr and s1_ptr */
ld r10,s2_ptr,0
addu s2_ptr,s2_ptr,4
ld r8,s1_ptr,0
addu s1_ptr,s1_ptr,4
subu size,size,1
subu.co r6,r8,r10
st r6,res_ptr,0
addu res_ptr,res_ptr,4
L_v1b: cmp r12,size,2
bb1 lt,r12,Lend2
ld r10,s2_ptr,0
ld r12,s2_ptr,4
ld.d r8,s1_ptr,0
subu size,size,10
bcnd lt0,size,Lfin1b
/* Add blocks of 8 limbs until less than 8 limbs remain */
align 8
Loop1b: subu size,size,8
subu.cio r6,r8,r10
ld r10,s2_ptr,8
subu.cio r7,r9,r12
ld r12,s2_ptr,12
ld.d r8,s1_ptr,8
st.d r6,res_ptr,0
subu.cio r6,r8,r10
ld r10,s2_ptr,16
subu.cio r7,r9,r12
ld r12,s2_ptr,20
ld.d r8,s1_ptr,16
st.d r6,res_ptr,8
subu.cio r6,r8,r10
ld r10,s2_ptr,24
subu.cio r7,r9,r12
ld r12,s2_ptr,28
ld.d r8,s1_ptr,24
st.d r6,res_ptr,16
subu.cio r6,r8,r10
ld r10,s2_ptr,32
subu.cio r7,r9,r12
ld r12,s2_ptr,36
addu s2_ptr,s2_ptr,32
ld.d r8,s1_ptr,32
addu s1_ptr,s1_ptr,32
st.d r6,res_ptr,24
addu res_ptr,res_ptr,32
bcnd ge0,size,Loop1b
Lfin1b: addu size,size,8-2
bcnd lt0,size,Lend1b
/* Add blocks of 2 limbs until less than 2 limbs remain */
Loope1b:subu.cio r6,r8,r10
ld r10,s2_ptr,8
subu.cio r7,r9,r12
ld r12,s2_ptr,12
ld.d r8,s1_ptr,8
st.d r6,res_ptr,0
subu size,size,2
addu s1_ptr,s1_ptr,8
addu s2_ptr,s2_ptr,8
addu res_ptr,res_ptr,8
bcnd ge0,size,Loope1b
Lend1b: subu.cio r6,r8,r10
subu.cio r7,r9,r12
st.d r6,res_ptr,0
bb0 0,size,Lret1b
/* Add last limb */
ld r10,s2_ptr,8
ld r8,s1_ptr,8
subu.cio r6,r8,r10
st r6,res_ptr,8
Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
jmp.n r1
xor r2,r2,1
; ** V2 **
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
alignment of s2_ptr and res_ptr differ. Since there are only two ways
things can be aligned (that we care about) we now know that the alignment
of s1_ptr and s2_ptr are the same. */
L2: cmp r12,size,1
bb1 eq,r12,Ljone
bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld r10,s1_ptr,0
addu s1_ptr,s1_ptr,4
ld r8,s2_ptr,0
addu s2_ptr,s2_ptr,4
subu size,size,1
subu.co r6,r10,r8
st r6,res_ptr,0
addu res_ptr,res_ptr,4
L_v2: subu size,size,8
bcnd lt0,size,Lfin2
/* Add blocks of 8 limbs until less than 8 limbs remain */
align 8
Loop2: subu size,size,8
ld.d r8,s1_ptr,0
ld.d r6,s2_ptr,0
subu.cio r8,r8,r6
st r8,res_ptr,0
subu.cio r9,r9,r7
st r9,res_ptr,4
ld.d r8,s1_ptr,8
ld.d r6,s2_ptr,8
subu.cio r8,r8,r6
st r8,res_ptr,8
subu.cio r9,r9,r7
st r9,res_ptr,12
ld.d r8,s1_ptr,16
ld.d r6,s2_ptr,16
subu.cio r8,r8,r6
st r8,res_ptr,16
subu.cio r9,r9,r7
st r9,res_ptr,20
ld.d r8,s1_ptr,24
ld.d r6,s2_ptr,24
subu.cio r8,r8,r6
st r8,res_ptr,24
subu.cio r9,r9,r7
st r9,res_ptr,28
addu s1_ptr,s1_ptr,32
addu s2_ptr,s2_ptr,32
addu res_ptr,res_ptr,32
bcnd ge0,size,Loop2
Lfin2: addu size,size,8-2
bcnd lt0,size,Lend2
Loope2: ld.d r8,s1_ptr,0
ld.d r6,s2_ptr,0
subu.cio r8,r8,r6
st r8,res_ptr,0
subu.cio r9,r9,r7
st r9,res_ptr,4
subu size,size,2
addu s1_ptr,s1_ptr,8
addu s2_ptr,s2_ptr,8
addu res_ptr,res_ptr,8
bcnd ge0,size,Loope2
Lend2: bb0 0,size,Lret2
/* Add last limb */
Ljone: ld r10,s1_ptr,0
ld r8,s2_ptr,0
subu.cio r6,r10,r8
st r6,res_ptr,0
Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
jmp.n r1
xor r2,r2,1

View File

@ -1,7 +1,7 @@
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@ -55,14 +55,14 @@ ___mpn_mul_1:
; Make S1_PTR and RES_PTR point at the end of their blocks
; and negate SIZE.
lda r3,r3[r4]
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
subu r4,r0,r4
addu.co r2,r0,r0 ; r2 = cy = 0
addu.co r2,r0,r0 ; r2 = cy = 0
ld r9,r3[r4]
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
extu r8,r5,16 ; r8 = hi(S2_LIMB)
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
extu r8,r5,16 ; r8 = hi(S2_LIMB)
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
subu r6,r6,4
; General code for any value of S2_LIMB.
@ -75,28 +75,27 @@ ___mpn_mul_1:
br.n L1
addu r4,r4,1
Loop:
ld r9,r3[r4]
Loop: ld r9,r3[r4]
st r26,r6[r4]
; bcnd ne0,r0,0 ; bubble
; bcnd ne0,r0,0 ; bubble
addu r4,r4,1
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
mul r10,r12,r8 ; r10 = prod_1a mul_3
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
addu r10,r10,r11 ; addu_1 WB extu_2
; bcnd ne0,r0,0 ; bubble WB addu_1
addu.co r10,r10,r12 ; WB mul_4
mask.u r10,r10,0xffff ; move the 16 most significant bits...
addu.ci r10,r10,r0 ; ...to the low half of the word...
rot r10,r10,16 ; ...and put carry in pos 16.
addu.co r26,r26,r2 ; add old carry limb
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
mul r10,r12,r8 ; r10 = prod_1a mul_3
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
addu r10,r10,r11 ; addu_1 WB extu_2
; bcnd ne0,r0,0 ; bubble WB addu_1
addu.co r10,r10,r12 ; WB mul_4
mask.u r10,r10,0xffff ; move the 16 most significant bits...
addu.ci r10,r10,r0 ; ...to the low half of the word...
rot r10,r10,16 ; ...and put carry in pos 16.
addu.co r26,r26,r2 ; add old carry limb
bcnd.n ne0,r4,Loop
addu.ci r2,r25,r10 ; compute new carry limb
addu.ci r2,r25,r10 ; compute new carry limb
st r26,r6[r4]
ld.d r25,r31,8
@ -109,20 +108,19 @@ Lsmall:
br.n SL1
addu r4,r4,1
SLoop:
ld r9,r3[r4] ;
st r8,r6[r4] ;
addu r4,r4,1 ;
SL1: mul r8,r9,r5 ; low word of product
mask r12,r9,0xffff ; r12 = lo(s1_limb)
extu r13,r9,16 ; r13 = hi(s1_limb)
mul r11,r12,r7 ; r11 = prod_0
mul r12,r13,r7 ; r12 = prod_1b
addu.cio r8,r8,r2 ; add old carry limb
extu r10,r11,16 ; r11 = hi(prod_0)
addu r10,r10,r12 ;
SLoop: ld r9,r3[r4] ;
st r8,r6[r4] ;
addu r4,r4,1 ;
SL1: mul r8,r9,r5 ; low word of product
mask r12,r9,0xffff ; r12 = lo(s1_limb)
extu r13,r9,16 ; r13 = hi(s1_limb)
mul r11,r12,r7 ; r11 = prod_0
mul r12,r13,r7 ; r12 = prod_1b
addu.cio r8,r8,r2 ; add old carry limb
extu r10,r11,16 ; r11 = hi(prod_0)
addu r10,r10,r12 ;
bcnd.n ne0,r4,SLoop
extu r2,r10,16 ; r2 = new carry limb
extu r2,r10,16 ; r2 = new carry limb
jmp.n r1
st r8,r6[r4]

View File

@ -1,7 +1,7 @@
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
; store difference in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
@ -41,9 +41,10 @@ ___mpn_sub_n:
extu r10,r5,3
ld r7,r4,0 ; read first limb from s2_ptr
subu.co r5,r0,r5 ; (clear carry as side effect)
subu r5,r0,r5
mak r5,r5,3<4>
bcnd eq0,r5,Lzero
bcnd.n eq0,r5,Lzero
subu.co r0,r0,r0 ; initialize carry
or r12,r0,lo16(Lbase)
or.u r12,r12,hi16(Lbase)

View File

@ -1,7 +1,7 @@
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
# add the product to a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@ -63,7 +63,7 @@ Loop: lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -63,7 +63,7 @@ Loop: ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -59,7 +59,7 @@ Loop: mflo $10
sltu $2,$10,$2 # carry from previous addition -> $2
sd $10,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -63,7 +63,7 @@ Loop: ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -1,7 +1,7 @@
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
# store the product in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@ -59,7 +59,7 @@ Loop: mflo $10
sltu $2,$10,$2 # carry from previous addition -> $2
sw $10,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -1,7 +1,7 @@
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
# subtract the product from a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
@ -63,7 +63,7 @@ Loop: lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1

View File

@ -1,6 +1,6 @@
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.

View File

@ -1,7 +1,7 @@
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.

View File

@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
This file is part of the GNU MP Library.

View File

@ -1,7 +1,7 @@
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
! the result in a second limb vector.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.