Updated from ../=mpn/gmp-1.906.7

This commit is contained in:
Roland McGrath 1995-10-16 01:18:40 +00:00
parent 3a29975f0a
commit 7def3d92a4
56 changed files with 5238 additions and 0 deletions

119
sysdeps/alpha/add_n.s Normal file
View File

@ -0,0 +1,119 @@
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
# store sum in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_add_n
.ent __mpn_add_n
__mpn_add_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
.align 3
.Loop: subq $19,4,$19
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
ldq $3,16($17)
addq $6,$0,$6
ldq $4,16($18)
cmpult $6,$0,$1
addq $5,$6,$6
cmpult $6,$5,$0
stq $6,8($16)
or $0,$1,$0
ldq $5,24($17)
addq $4,$0,$4
ldq $6,24($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,16($16)
or $0,$1,$0
ldq $3,32($17)
addq $6,$0,$6
ldq $4,32($18)
cmpult $6,$0,$1
addq $5,$6,$6
cmpult $6,$5,$0
stq $6,24($16)
or $0,$1,$0
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
ret $31,($26),1
.end __mpn_add_n

100
sysdeps/alpha/addmul_1.s Normal file
View File

@ -0,0 +1,100 @@
# Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
# the result to a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the 21064.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
.set noreorder
.set noat
.text
.align 3
.globl __mpn_addmul_1
.ent __mpn_addmul_1 2
__mpn_addmul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
addq $5,$3,$3
cmpult $3,$5,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $0,$5,$0
ret $31,($26),1
.end __mpn_addmul_1

View File

@ -0,0 +1,118 @@
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
# store sum in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_add_n
.ent __mpn_add_n
__mpn_add_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
.align 4
.Loop: subq $19,4,$19
unop
ldq $6,8($18)
addq $4,$0,$0
ldq $5,8($17)
cmpult $0,$4,$1
ldq $4,16($18)
addq $3,$0,$20
cmpult $20,$3,$0
ldq $3,16($17)
or $0,$1,$0
addq $6,$0,$0
cmpult $0,$6,$1
ldq $6,24($18)
addq $5,$0,$21
cmpult $21,$5,$0
ldq $5,24($17)
or $0,$1,$0
addq $4,$0,$0
cmpult $0,$4,$1
ldq $4,32($18)
addq $3,$0,$22
cmpult $22,$3,$0
ldq $3,32($17)
or $0,$1,$0
addq $6,$0,$0
cmpult $0,$6,$1
addq $5,$0,$23
cmpult $23,$5,$0
or $0,$1,$0
stq $20,0($16)
stq $21,8($16)
stq $22,16($16)
stq $23,24($16)
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
ret $31,($26),1
.end __mpn_add_n

View File

@ -0,0 +1,175 @@
# Alpha EV5 __mpn_lshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.25 cycles/limb on the EV5.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_lshift
.ent __mpn_lshift
__mpn_lshift:
.frame $30,0,$26,0
s8addq $18,$17,$17 # make r17 point at end of s1
ldq $4,-8($17) # load first limb
subq $31,$19,$20
s8addq $18,$16,$16 # make r16 point at end of RES
subq $18,1,$18
and $18,4-1,$28 # number of limbs in first loop
srl $4,$20,$0 # compute function result
beq $28,L0
subq $18,$28,$18
.align 3
Loop0: ldq $3,-16($17)
subq $16,8,$16
sll $4,$19,$5
subq $17,8,$17
subq $28,1,$28
srl $3,$20,$6
or $3,$3,$4
or $5,$6,$8
stq $8,0($16)
bne $28,Loop0
L0: sll $4,$19,$24
beq $18,Lend
# warm up phase 1
ldq $1,-16($17)
subq $18,4,$18
ldq $2,-24($17)
ldq $3,-32($17)
ldq $4,-40($17)
beq $18,Lcool1
# warm up phase 2
srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
ldq $1,-48($17)
sll $2,$19,$22
ldq $2,-56($17)
srl $3,$20,$5
or $7,$24,$7
sll $3,$19,$23
or $8,$21,$8
srl $4,$20,$6
ldq $3,-64($17)
sll $4,$19,$24
ldq $4,-72($17)
subq $18,4,$18
beq $18,Lcool1
.align 4
# main loop
Loop: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
srl $1,$20,$7
subq $18,4,$18
sll $1,$19,$21
unop # ldq $31,-96($17)
srl $2,$20,$8
ldq $1,-80($17)
sll $2,$19,$22
ldq $2,-88($17)
stq $5,-24($16)
or $7,$24,$7
stq $6,-32($16)
or $8,$21,$8
srl $3,$20,$5
unop # ldq $31,-96($17)
sll $3,$19,$23
subq $16,32,$16
srl $4,$20,$6
ldq $3,-96($17
sll $4,$19,$24
ldq $4,-104($17)
subq $17,32,$17
bne $18,Loop
unop
unop
# cool down phase 2/1
Lcool1: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
sll $2,$19,$22
stq $5,-24($16)
or $7,$24,$7
stq $6,-32($16)
or $8,$21,$8
srl $3,$20,$5
sll $3,$19,$23
srl $4,$20,$6
sll $4,$19,$24
# cool down phase 2/2
stq $7,-40($16)
or $5,$22,$5
stq $8,-48($16)
or $6,$23,$6
stq $5,-56($16)
stq $6,-64($16)
# cool down phase 2/3
stq $24,-72($16)
ret $31,($26),1
# cool down phase 1/1
Lcool1: srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
sll $2,$19,$22
srl $3,$20,$5
or $7,$24,$7
sll $3,$19,$23
or $8,$21,$8
srl $4,$20,$6
sll $4,$19,$24
# cool down phase 1/2
stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
stq $5,-24($16)
stq $6,-32($16)
stq $24,-40($16)
ret $31,($26),1
Lend stq $24,-8($16)
ret $31,($26),1
.end __mpn_lshift

View File

@ -0,0 +1,173 @@
# Alpha EV5 __mpn_rshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.25 cycles/limb on the EV5.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_rshift
.ent __mpn_rshift
__mpn_rshift:
.frame $30,0,$26,0
ldq $4,0($17) # load first limb
subq $31,$19,$20
subq $18,1,$18
and $18,4-1,$28 # number of limbs in first loop
sll $4,$20,$0 # compute function result
beq $28,L0
subq $18,$28,$18
.align 3
Loop0: ldq $3,8($17)
addq $16,8,$16
srl $4,$19,$5
addq $17,8,$17
subq $28,1,$28
sll $3,$20,$6
or $3,$3,$4
or $5,$6,$8
stq $8,-8($16)
bne $28,Loop0
L0: srl $4,$19,$24
beq $18,Lend
# warm up phase 1
ldq $1,8($17)
subq $18,4,$18
ldq $2,16($17)
ldq $3,24($17)
ldq $4,32($17)
beq $18,Lcool1
# warm up phase 2
sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
ldq $1,40($17)
srl $2,$19,$22
ldq $2,48($17)
sll $3,$20,$5
or $7,$24,$7
srl $3,$19,$23
or $8,$21,$8
sll $4,$20,$6
ldq $3,56($17)
srl $4,$19,$24
ldq $4,64($17)
subq $18,4,$18
beq $18,Lcool2
.align 4
# main loop
Loop: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
sll $1,$20,$7
subq $18,4,$18
srl $1,$19,$21
unop # ldq $31,-96($17)
sll $2,$20,$8
ldq $1,72($17)
srl $2,$19,$22
ldq $2,80($17)
stq $5,16($16)
or $7,$24,$7
stq $6,24($16)
or $8,$21,$8
sll $3,$20,$5
unop # ldq $31,-96($17)
srl $3,$19,$23
addq $16,32,$16
sll $4,$20,$6
ldq $3,88($17)
srl $4,$19,$24
ldq $4,96($17)
addq $17,32,$17
bne $18,Loop
unop
unop
# cool down phase 2/1
Lcool2: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
srl $2,$19,$22
stq $5,16($16)
or $7,$24,$7
stq $6,24($16)
or $8,$21,$8
sll $3,$20,$5
srl $3,$19,$23
sll $4,$20,$6
srl $4,$19,$24
# cool down phase 2/2
stq $7,32($16)
or $5,$22,$5
stq $8,40($16)
or $6,$23,$6
stq $5,48($16)
stq $6,56($16)
# cool down phase 2/3
stq $24,64($16)
ret $31,($26),1
# cool down phase 1/1
Lcool1: sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
srl $2,$19,$22
sll $3,$20,$5
or $7,$24,$7
srl $3,$19,$23
or $8,$21,$8
sll $4,$20,$6
srl $4,$19,$24
# cool down phase 1/2
stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
stq $5,16($16)
stq $6,24($16)
stq $24,32($16)
ret $31,($26),1
Lend: stq $24,0($16)
ret $31,($26),1
.end __mpn_rshift

108
sysdeps/alpha/lshift.s Normal file
View File

@ -0,0 +1,108 @@
# Alpha 21064 __mpn_lshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
# it would take 4 cycles/limb. It should be possible to get down to 3
# cycles/limb since both ldq and stq can be paired with the other used
# instructions. But there are many restrictions in the 21064 pipeline that
# makes it hard, if not impossible, to get down to 3 cycles/limb:
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_lshift
.ent __mpn_lshift
__mpn_lshift:
.frame $30,0,$26,0
s8addq $18,$17,$17 # make r17 point at end of s1
ldq $4,-8($17) # load first limb
subq $17,8,$17
subq $31,$19,$7
s8addq $18,$16,$16 # make r16 point at end of RES
subq $18,1,$18
and $18,4-1,$20 # number of limbs in first loop
srl $4,$7,$0 # compute function result
beq $20,L0
subq $18,$20,$18
.align 3
Loop0:
ldq $3,-8($17)
subq $16,8,$16
subq $17,8,$17
subq $20,1,$20
sll $4,$19,$5
srl $3,$7,$6
bis $3,$3,$4
bis $5,$6,$8
stq $8,0($16)
bne $20,Loop0
L0: beq $18,Lend
.align 3
Loop: ldq $3,-8($17)
subq $16,32,$16
subq $18,4,$18
sll $4,$19,$5
srl $3,$7,$6
ldq $4,-16($17)
sll $3,$19,$1
bis $5,$6,$8
stq $8,24($16)
srl $4,$7,$2
ldq $3,-24($17)
sll $4,$19,$5
bis $1,$2,$8
stq $8,16($16)
srl $3,$7,$6
ldq $4,-32($17)
sll $3,$19,$1
bis $5,$6,$8
stq $8,8($16)
srl $4,$7,$2
subq $17,32,$17
bis $1,$2,$8
stq $8,0($16)
bgt $18,Loop
Lend: sll $4,$19,$8
stq $8,-8($16)
ret $31,($26),1
.end __mpn_lshift

84
sysdeps/alpha/mul_1.s Normal file
View File

@ -0,0 +1,84 @@
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
.set noreorder
.set noat
.text
.align 3
.globl __mpn_mul_1
.ent __mpn_mul_1 2
__mpn_mul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
bic $31,$31,$4 # clear cy_limb
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
ldq $2,8($17) # $2 = s1_limb
subq $18,1,$18 # size--
stq $3,0($16)
beq $18,Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,16($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
stq $3,8($16)
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $16,8,$16 # res_ptr++
bne $18,Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
stq $3,8($16)
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: stq $3,0($16)
ret $31,($26),1
.end __mpn_mul_1

106
sysdeps/alpha/rshift.s Normal file
View File

@ -0,0 +1,106 @@
# Alpha 21064 __mpn_rshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
# it would take 4 cycles/limb. It should be possible to get down to 3
# cycles/limb since both ldq and stq can be paired with the other used
# instructions. But there are many restrictions in the 21064 pipeline that
# makes it hard, if not impossible, to get down to 3 cycles/limb:
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_rshift
.ent __mpn_rshift
__mpn_rshift:
.frame $30,0,$26,0
ldq $4,0($17) # load first limb
addq $17,8,$17
subq $31,$19,$7
subq $18,1,$18
and $18,4-1,$20 # number of limbs in first loop
sll $4,$7,$0 # compute function result
beq $20,L0
subq $18,$20,$18
.align 3
Loop0:
ldq $3,0($17)
addq $16,8,$16
addq $17,8,$17
subq $20,1,$20
srl $4,$19,$5
sll $3,$7,$6
bis $3,$3,$4
bis $5,$6,$8
stq $8,-8($16)
bne $20,Loop0
L0: beq $18,Lend
.align 3
Loop: ldq $3,0($17)
addq $16,32,$16
subq $18,4,$18
srl $4,$19,$5
sll $3,$7,$6
ldq $4,8($17)
srl $3,$19,$1
bis $5,$6,$8
stq $8,-32($16)
sll $4,$7,$2
ldq $3,16($17)
srl $4,$19,$5
bis $1,$2,$8
stq $8,-24($16)
sll $3,$7,$6
ldq $4,24($17)
srl $3,$19,$1
bis $5,$6,$8
stq $8,-16($16)
sll $4,$7,$2
addq $17,32,$17
bis $1,$2,$8
stq $8,-8($16)
bgt $18,Loop
Lend: srl $4,$19,$8
stq $8,0($16)
ret $31,($26),1
.end __mpn_rshift

119
sysdeps/alpha/sub_n.s Normal file
View File

@ -0,0 +1,119 @@
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_sub_n
.ent __mpn_sub_n
__mpn_sub_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
.align 3
.Loop: subq $19,4,$19
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
ldq $3,16($17)
addq $6,$0,$6
ldq $4,16($18)
cmpult $6,$0,$1
subq $5,$6,$6
cmpult $5,$6,$0
stq $6,8($16)
or $0,$1,$0
ldq $5,24($17)
addq $4,$0,$4
ldq $6,24($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,16($16)
or $0,$1,$0
ldq $3,32($17)
addq $6,$0,$6
ldq $4,32($18)
cmpult $6,$0,$1
subq $5,$6,$6
cmpult $5,$6,$0
stq $6,24($16)
or $0,$1,$0
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
ret $31,($26),1
.end __mpn_sub_n

100
sysdeps/alpha/submul_1.s Normal file
View File

@ -0,0 +1,100 @@
# Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
# subtract the result from a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the 21064.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
.set noreorder
.set noat
.text
.align 3
.globl __mpn_submul_1
.ent __mpn_submul_1 2
__mpn_submul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
subq $5,$3,$3
cmpult $5,$3,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $0,$5,$0
ret $31,($26),1
.end __mpn_submul_1

57
sysdeps/hppa/add_n.s Normal file
View File

@ -0,0 +1,57 @@
; HP-PA __mpn_add_n -- Add two limb vectors of the same length > 0 and store
; sum in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr gr26
; s1_ptr gr25
; s2_ptr gr24
; size gr23
; One might want to unroll this as for other processors, but it turns
; out that the data cache contention after a store makes such
; unrolling useless. We can't come under 5 cycles/limb anyway.
.code
.export __mpn_add_n
__mpn_add_n
.proc
.callinfo frame=0,no_calls
.entry
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
addib,= -1,%r23,L$end ; check for (SIZE == 1)
add %r20,%r19,%r28 ; add first limbs ignoring cy
L$loop ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,<> -1,%r23,L$loop
addc %r20,%r19,%r28
L$end stws %r28,0(0,%r26)
bv 0(%r2)
addc %r0,%r0,%r28
.exit
.procend

View File

@ -0,0 +1,101 @@
; HP-PA-1.1 __mpn_addmul_1 -- Multiply a limb vector with a limb and
; add the result to a second limb vector.
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r26
; s1_ptr r25
; size r24
; s2_limb r23
; This runs at 11 cycles/limb on a PA7000. With the used instructions, it
; can not become faster due to data cache contention after a store. On the
; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
; since only the xmpyu does not need the integer pipeline, so the only
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
; on the PA7100.
; There are some ideas described in mul_1.s that applies to this code too.
.code
.export __mpn_addmul_1
__mpn_addmul_1
.proc
.callinfo frame=64,no_calls
.entry
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) ; move s2_limb ...
addib,= -1,%r24,L$just_one_limb
fldws -16(%r30),%fr4 ; ... into fr4
add %r0,%r0,%r0 ; clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 ; least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L$end
ldw -12(%r30),%r1
; Main loop
L$loop ldws 0(%r26),%r29
fldws,ma 4(%r25),%fr5
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addc %r0,%r28,%r28
addib,<> -1,%r24,L$loop
ldw -12(%r30),%r1
L$end ldw 0(%r26),%r29
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
ldws 0(%r26),%r29
addc %r0,%r28,%r28
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
L$just_one_limb
xmpyu %fr4,%fr5,%fr6
ldw 0(%r26),%r29
fstds %fr6,-16(%r30)
ldw -12(%r30),%r1
ldw -16(%r30),%r28
add %r29,%r1,%r19
stw %r19,0(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
.exit
.procend

View File

@ -0,0 +1,97 @@
; HP-PA-1.1 __mpn_mul_1 -- Multiply a limb vector with a limb and store
; the result in a second limb vector.
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r26
; s1_ptr r25
; size r24
; s2_limb r23
; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
; not become faster due to data cache contention after a store. On the
; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
; only the xmpyu does not need the integer pipeline, so the only dual-issue
; we will get are addc+xmpyu. Unrolling would not help either CPU.
; We could use fldds to read two limbs at a time from the S1 array, and that
; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
; PA7100, respectively. We don't do that since it does not seem worth the
; (alignment) troubles...
; At least the PA7100 is rumored to be able to deal with cache-misses
; without stalling instruction issue. If this is true, and the cache is
; actually also lockup-free, we should use a deeper software pipeline, and
; load from S1 very early! (The loads and stores to -12(sp) will surely be
; in the cache.)
.code
.export __mpn_mul_1
__mpn_mul_1
.proc
.callinfo frame=64,no_calls
.entry
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) ; move s2_limb ...
addib,= -1,%r24,L$just_one_limb
fldws -16(%r30),%fr4 ; ... into fr4
add %r0,%r0,%r0 ; clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 ; least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L$end
ldw -12(%r30),%r1
; Main loop
L$loop fldws,ma 4(%r25),%fr5
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addib,<> -1,%r24,L$loop
ldw -12(%r30),%r1
L$end stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
stws,ma %r19,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
L$just_one_limb
xmpyu %fr4,%fr5,%fr6
fstds %fr6,-16(%r30)
ldw -16(%r30),%r28
ldo -64(%r30),%r30
bv 0(%r2)
fstws %fr6R,0(%r26)
.exit
.procend

View File

@ -0,0 +1,110 @@
; HP-PA-1.1 __mpn_submul_1 -- Multiply a limb vector with a limb and
; subtract the result from a second limb vector.
; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r26
; s1_ptr r25
; size r24
; s2_limb r23
; This runs at 12 cycles/limb on a PA7000. With the used instructions, it
; can not become faster due to data cache contention after a store. On the
; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
; since only the xmpyu does not need the integer pipeline, so the only
; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
; on the PA7100.
; There are some ideas described in mul_1.s that applies to this code too.
; It seems possible to make this run as fast as __mpn_addmul_1, if we use
; sub,>>= %r29,%r19,%r22
; addi 1,%r28,%r28
; but that requires reworking the hairy software pipeline...
.code
.export __mpn_submul_1
__mpn_submul_1
.proc
.callinfo frame=64,no_calls
.entry
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) ; move s2_limb ...
addib,= -1,%r24,L$just_one_limb
fldws -16(%r30),%fr4 ; ... into fr4
add %r0,%r0,%r0 ; clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 ; least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L$end
ldw -12(%r30),%r1
; Main loop
L$loop ldws 0(%r26),%r29
fldws,ma 4(%r25),%fr5
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addc %r0,%r28,%r28
addib,<> -1,%r24,L$loop
ldw -12(%r30),%r1
L$end ldw 0(%r26),%r29
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
ldws 0(%r26),%r29
addc %r0,%r28,%r28
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
L$just_one_limb
xmpyu %fr4,%fr5,%fr6
ldw 0(%r26),%r29
fstds %fr6,-16(%r30)
ldw -12(%r30),%r1
ldw -16(%r30),%r28
sub %r29,%r1,%r22
add %r22,%r1,%r0
stw %r22,0(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
.exit
.procend

View File

@ -0,0 +1,74 @@
; HP-PA __udiv_qrnnd division support, used from longlong.h.
; This version runs fast on PA 7000 and later.
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; rem_ptr gr26
; n1 gr25
; n0 gr24
; d gr23
.code
L$0000 .word 0x43f00000
.word 0x0
.export __udiv_qrnnd
__udiv_qrnnd
.proc
.callinfo frame=64,no_calls
.entry
ldo 64(%r30),%r30
stws %r25,-16(0,%r30) ; n_hi
stws %r24,-12(0,%r30) ; n_lo
ldil L'L$0000,%r19
ldo R'L$0000(%r19),%r19
fldds -16(0,%r30),%fr5
stws %r23,-12(0,%r30)
comib,<= 0,%r25,L$1
fcnvxf,dbl,dbl %fr5,%fr5
fldds 0(0,%r19),%fr4
fadd,dbl %fr4,%fr5,%fr5
L$1
fcpy,sgl %fr0,%fr6L
fldws -12(0,%r30),%fr6R
fcnvxf,dbl,dbl %fr6,%fr4
fdiv,dbl %fr5,%fr4,%fr5
fcnvfx,dbl,dbl %fr5,%fr4
fstws %fr4R,-16(%r30)
xmpyu %fr4R,%fr6R,%fr6
ldws -16(%r30),%r28
fstds %fr6,-16(0,%r30)
ldws -12(0,%r30),%r21
ldws -16(0,%r30),%r20
sub %r24,%r21,%r22
subb %r25,%r20,%r19
comib,= 0,%r19,L$2
ldo -64(%r30),%r30
add %r22,%r23,%r22
ldo -1(%r28),%r28
L$2 bv 0(%r2)
stws %r22,0(0,%r26)
.exit
.procend

65
sysdeps/hppa/lshift.s Normal file
View File

@ -0,0 +1,65 @@
; HP-PA __mpn_lshift --
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr gr26
; s_ptr gr25
; size gr24
; cnt gr23
.code
.export __mpn_lshift
__mpn_lshift
.proc
.callinfo frame=64,no_calls
.entry
sh2add %r24,%r25,%r25
sh2add %r24,%r26,%r26
ldws,mb -4(0,%r25),%r22
subi 32,%r23,%r1
mtsar %r1
addib,= -1,%r24,L$0004
vshd %r0,%r22,%r28 ; compute carry out limb
ldws,mb -4(0,%r25),%r29
addib,= -1,%r24,L$0002
vshd %r22,%r29,%r20
L$loop ldws,mb -4(0,%r25),%r22
stws,mb %r20,-4(0,%r26)
addib,= -1,%r24,L$0003
vshd %r29,%r22,%r20
ldws,mb -4(0,%r25),%r29
stws,mb %r20,-4(0,%r26)
addib,<> -1,%r24,L$loop
vshd %r22,%r29,%r20
L$0002 stws,mb %r20,-4(0,%r26)
vshd %r29,%r0,%r20
bv 0(%r2)
stw %r20,-4(0,%r26)
L$0003 stws,mb %r20,-4(0,%r26)
L$0004 vshd %r22,%r0,%r20
bv 0(%r2)
stw %r20,-4(0,%r26)
.exit
.procend

62
sysdeps/hppa/rshift.s Normal file
View File

@ -0,0 +1,62 @@
; HP-PA __mpn_rshift --
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr gr26
; s_ptr gr25
; size gr24
; cnt gr23
.code
.export __mpn_rshift
__mpn_rshift
.proc
.callinfo frame=64,no_calls
.entry
ldws,ma 4(0,%r25),%r22
mtsar %r23
addib,= -1,%r24,L$0004
vshd %r22,%r0,%r28 ; compute carry out limb
ldws,ma 4(0,%r25),%r29
addib,= -1,%r24,L$0002
vshd %r29,%r22,%r20
L$loop ldws,ma 4(0,%r25),%r22
stws,ma %r20,4(0,%r26)
addib,= -1,%r24,L$0003
vshd %r22,%r29,%r20
ldws,ma 4(0,%r25),%r29
stws,ma %r20,4(0,%r26)
addib,<> -1,%r24,L$loop
vshd %r29,%r22,%r20
L$0002 stws,ma %r20,4(0,%r26)
vshd %r0,%r29,%r20
bv 0(%r2)
stw %r20,0(0,%r26)
L$0003 stws,ma %r20,4(0,%r26)
L$0004 vshd %r0,%r22,%r20
bv 0(%r2)
stw %r20,0(0,%r26)
.exit
.procend

58
sysdeps/hppa/sub_n.s Normal file
View File

@ -0,0 +1,58 @@
; HP-PA __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
; store difference in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr gr26
; s1_ptr gr25
; s2_ptr gr24
; size gr23
; One might want to unroll this as for other processors, but it turns
; out that the data cache contention after a store makes such
; unrolling useless. We can't come under 5 cycles/limb anyway.
.code
.export __mpn_sub_n
__mpn_sub_n
.proc
.callinfo frame=0,no_calls
.entry
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
addib,= -1,%r23,L$end ; check for (SIZE == 1)
sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
L$loop ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,<> -1,%r23,L$loop
subb %r20,%r19,%r28
L$end stws %r28,0(0,%r26)
addc %r0,%r0,%r28
bv 0(%r2)
subi 1,%r28,%r28
.exit
.procend

285
sysdeps/hppa/udiv_qrnnd.s Normal file
View File

@ -0,0 +1,285 @@
; HP-PA __udiv_qrnnd division support, used from longlong.h.
; This version runs fast on pre-PA7000 CPUs.
; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; rem_ptr gr26
; n1 gr25
; n0 gr24
; d gr23
; The code size is a bit excessive. We could merge the last two ds;addc
; sequences by simply moving the "bb,< Odd" instruction down. The only
; trouble is the FFFFFFFF code that would need some hacking.
.code
.export __udiv_qrnnd
__udiv_qrnnd
.proc
.callinfo frame=0,no_calls
.entry
comb,< %r23,0,L$largedivisor
sub %r0,%r23,%r1 ; clear cy as side-effect
ds %r0,%r1,%r0
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r24
ds %r25,%r23,%r25
addc %r24,%r24,%r28
ds %r25,%r23,%r25
comclr,>= %r25,%r0,%r0
addl %r25,%r23,%r25
stws %r25,0(0,%r26)
bv 0(%r2)
addc %r28,%r28,%r28
L$largedivisor
extru %r24,31,1,%r19 ; r19 = n0 & 1
bb,< %r23,31,L$odd
extru %r23,30,31,%r22 ; r22 = d >> 1
shd %r25,%r24,1,%r24 ; r24 = new n0
extru %r25,30,31,%r25 ; r25 = new n1
sub %r0,%r22,%r21
ds %r0,%r21,%r0
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
comclr,>= %r25,%r0,%r0
addl %r25,%r22,%r25
sh1addl %r25,%r19,%r25
stws %r25,0(0,%r26)
bv 0(%r2)
addc %r24,%r24,%r28
L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
shd %r25,%r24,1,%r24 ; r24 = new n0
extru %r25,30,31,%r25 ; r25 = new n1
sub %r0,%r22,%r21
ds %r0,%r21,%r0
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r24
ds %r25,%r22,%r25
addc %r24,%r24,%r28
comclr,>= %r25,%r0,%r0
addl %r25,%r22,%r25
sh1addl %r25,%r19,%r25
; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
add,nuv %r28,%r25,%r25
addl %r25,%r1,%r25
addc %r0,%r28,%r28
sub,<< %r25,%r23,%r0
addl %r25,%r1,%r25
stws %r25,0(0,%r26)
bv 0(%r2)
addc %r0,%r28,%r28
; This is just a special case of the code above.
; We come here when d == 0xFFFFFFFF
L$FF.. add,uv %r25,%r24,%r24
sub,<< %r24,%r23,%r0
ldo 1(%r24),%r24
stws %r24,0(0,%r26)
bv 0(%r2)
addc %r0,%r25,%r28
.exit
.procend

21
sysdeps/i960/add_n.s Normal file
View File

@ -0,0 +1,21 @@
.text
.align 4
.globl ___mpn_add_n
___mpn_add_n:
mov 0,g6 # clear carry-save register
cmpo 1,0 # clear cy
Loop: subo 1,g3,g3 # update loop counter
ld (g1),g5 # load from s1_ptr
addo 4,g1,g1 # s1_ptr++
ld (g2),g4 # load from s2_ptr
addo 4,g2,g2 # s2_ptr++
cmpo g6,1 # restore cy from g6, relies on cy being 0
addc g4,g5,g4 # main add
subc 0,0,g6 # save cy in g6
st g4,(g0) # store result to res_ptr
addo 4,g0,g0 # res_ptr++
cmpobne 0,g3,Loop # when branch is taken, clears C bit
mov g6,g0
ret

26
sysdeps/i960/addmul_1.s Normal file
View File

@ -0,0 +1,26 @@
.text
.align 4
.globl ___mpn_mul_1
___mpn_mul_1:
subo g2,0,g2
shlo 2,g2,g4
subo g4,g1,g1
subo g4,g0,g13
mov 0,g0
cmpo 1,0 # clear C bit on AC.cc
Loop: ld (g1)[g2*4],g5
emul g3,g5,g6
ld (g13)[g2*4],g5
addc g0,g6,g6 # relies on that C bit is clear
addc 0,g7,g7
addc g5,g6,g6 # relies on that C bit is clear
st g6,(g13)[g2*4]
addc 0,g7,g0
addo g2,1,g2
cmpobne 0,g2,Loop # when branch is taken, clears C bit
ret

23
sysdeps/i960/mul_1.s Normal file
View File

@ -0,0 +1,23 @@
.text
.align 4
.globl ___mpn_mul_1
___mpn_mul_1:
subo g2,0,g2
shlo 2,g2,g4
subo g4,g1,g1
subo g4,g0,g13
mov 0,g0
cmpo 1,0 # clear C bit on AC.cc
Loop: ld (g1)[g2*4],g5
emul g3,g5,g6
addc g0,g6,g6 # relies on that C bit is clear
st g6,(g13)[g2*4]
addc 0,g7,g0
addo g2,1,g2
cmpobne 0,g2,Loop # when branch is taken, clears C bit
ret

21
sysdeps/i960/sub_n.s Normal file
View File

@ -0,0 +1,21 @@
.text
.align 4
.globl ___mpn_sub_n
___mpn_sub_n:
mov 1,g6 # set carry-save register
cmpo 1,0 # clear cy
Loop: subo 1,g3,g3 # update loop counter
ld (g1),g5 # load from s1_ptr
addo 4,g1,g1 # s1_ptr++
ld (g2),g4 # load from s2_ptr
addo 4,g2,g2 # s2_ptr++
cmpo g6,1 # restore cy from g6, relies on cy being 0
subc g4,g5,g4 # main subtract
subc 0,0,g6 # save cy in g6
st g4,(g0) # store result to res_ptr
addo 4,g0,g0 # res_ptr++
cmpobne 0,g3,Loop # when branch is taken, cy will be 0
mov g6,g0
ret

103
sysdeps/m88k/m88100/add_n.s Normal file
View File

@ -0,0 +1,103 @@
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
; sum in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r2
; s1_ptr r3
; s2_ptr r4
; size r5
; This code has been optimized to run one instruction per clock, avoiding
; load stalls and writeback contention. As a result, the instruction
; order is not always natural.
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
text
align 16
global ___mpn_add_n
___mpn_add_n:
ld r6,r3,0 ; read first limb from s1_ptr
extu r10,r5,3
ld r7,r4,0 ; read first limb from s2_ptr
subu.co r5,r0,r5 ; (clear carry as side effect)
mak r5,r5,3<4>
bcnd eq0,r5,Lzero
or r12,r0,lo16(Lbase)
or.u r12,r12,hi16(Lbase)
addu r12,r12,r5 ; r12 is address for entering in loop
extu r5,r5,2 ; divide by 4
subu r2,r2,r5 ; adjust res_ptr
subu r3,r3,r5 ; adjust s1_ptr
subu r4,r4,r5 ; adjust s2_ptr
or r8,r6,r0
jmp.n r12
or r9,r7,r0
Loop: addu r3,r3,32
st r8,r2,28
addu r4,r4,32
ld r6,r3,0
addu r2,r2,32
ld r7,r4,0
Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt)
Lbase: ld r8,r3,4
addu.cio r6,r6,r7
ld r9,r4,4
st r6,r2,0
ld r6,r3,8 ; add 7 + 8r limbs
addu.cio r8,r8,r9
ld r7,r4,8
st r8,r2,4
ld r8,r3,12 ; add 6 + 8r limbs
addu.cio r6,r6,r7
ld r9,r4,12
st r6,r2,8
ld r6,r3,16 ; add 5 + 8r limbs
addu.cio r8,r8,r9
ld r7,r4,16
st r8,r2,12
ld r8,r3,20 ; add 4 + 8r limbs
addu.cio r6,r6,r7
ld r9,r4,20
st r6,r2,16
ld r6,r3,24 ; add 3 + 8r limbs
addu.cio r8,r8,r9
ld r7,r4,24
st r8,r2,20
ld r8,r3,28 ; add 2 + 8r limbs
addu.cio r6,r6,r7
ld r9,r4,28
st r6,r2,24
bcnd.n ne0,r10,Loop ; add 1 + 8r limbs
addu.cio r8,r8,r9
st r8,r2,28 ; store most significant limb
jmp.n r1
addu.ci r2,r0,r0 ; return carry-out from most sign. limb

128
sysdeps/m88k/m88100/mul_1.s Normal file
View File

@ -0,0 +1,128 @@
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r2
; s1_ptr r3
; size r4
; s2_limb r5
; Common overhead is about 11 cycles/invocation.
; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The
; pipeline stalls 2 cycles due to WB contention.)
; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The
; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
; To enhance speed:
; 1. Unroll main loop 4-8 times.
; 2. Schedule code to avoid WB contention. It might be tempting to move the
; ld instruction in the loops down to save 2 cycles (less WB contention),
; but that looses because the ultimate value will be read from outside
; the allocated space. But if we handle the ultimate multiplication in
; the tail, we can do this.
; 3. Make the multiplication with less instructions. I think the code for
; (S2_LIMB >= 0x10000) is not minimal.
; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
; cycles/limb. (Assuming infinite unrolling.)
text
align 16
global ___mpn_mul_1
___mpn_mul_1:
; Make S1_PTR and RES_PTR point at the end of their blocks
; and negate SIZE.
lda r3,r3[r4]
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
subu r4,r0,r4
addu.co r2,r0,r0 ; r2 = cy = 0
ld r9,r3[r4]
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
extu r8,r5,16 ; r8 = hi(S2_LIMB)
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
subu r6,r6,4
; General code for any value of S2_LIMB.
; Make a stack frame and save r25 and r26
subu r31,r31,16
st.d r25,r31,8
; Enter the loop in the middle
br.n L1
addu r4,r4,1
Loop:
ld r9,r3[r4]
st r26,r6[r4]
; bcnd ne0,r0,0 ; bubble
addu r4,r4,1
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
mul r10,r12,r8 ; r10 = prod_1a mul_3
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
addu r10,r10,r11 ; addu_1 WB extu_2
; bcnd ne0,r0,0 ; bubble WB addu_1
addu.co r10,r10,r12 ; WB mul_4
mask.u r10,r10,0xffff ; move the 16 most significant bits...
addu.ci r10,r10,r0 ; ...to the low half of the word...
rot r10,r10,16 ; ...and put carry in pos 16.
addu.co r26,r26,r2 ; add old carry limb
bcnd.n ne0,r4,Loop
addu.ci r2,r25,r10 ; compute new carry limb
st r26,r6[r4]
ld.d r25,r31,8
jmp.n r1
addu r31,r31,16
; Fast code for S2_LIMB < 0x10000
Lsmall:
; Enter the loop in the middle
br.n SL1
addu r4,r4,1
SLoop:
ld r9,r3[r4] ;
st r8,r6[r4] ;
addu r4,r4,1 ;
SL1: mul r8,r9,r5 ; low word of product
mask r12,r9,0xffff ; r12 = lo(s1_limb)
extu r13,r9,16 ; r13 = hi(s1_limb)
mul r11,r12,r7 ; r11 = prod_0
mul r12,r13,r7 ; r12 = prod_1b
addu.cio r8,r8,r2 ; add old carry limb
extu r10,r11,16 ; r11 = hi(prod_0)
addu r10,r10,r12 ;
bcnd.n ne0,r4,SLoop
extu r2,r10,16 ; r2 = new carry limb
jmp.n r1
st r8,r6[r4]

104
sysdeps/m88k/m88100/sub_n.s Normal file
View File

@ -0,0 +1,104 @@
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
; store difference in a third limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r2
; s1_ptr r3
; s2_ptr r4
; size r5
; This code has been optimized to run one instruction per clock, avoiding
; load stalls and writeback contention. As a result, the instruction
; order is not always natural.
; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
text
align 16
global ___mpn_sub_n
___mpn_sub_n:
ld r6,r3,0 ; read first limb from s1_ptr
extu r10,r5,3
ld r7,r4,0 ; read first limb from s2_ptr
subu.co r5,r0,r5 ; (clear carry as side effect)
mak r5,r5,3<4>
bcnd eq0,r5,Lzero
or r12,r0,lo16(Lbase)
or.u r12,r12,hi16(Lbase)
addu r12,r12,r5 ; r12 is address for entering in loop
extu r5,r5,2 ; divide by 4
subu r2,r2,r5 ; adjust res_ptr
subu r3,r3,r5 ; adjust s1_ptr
subu r4,r4,r5 ; adjust s2_ptr
or r8,r6,r0
jmp.n r12
or r9,r7,r0
Loop: addu r3,r3,32
st r8,r2,28
addu r4,r4,32
ld r6,r3,0
addu r2,r2,32
ld r7,r4,0
Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt)
Lbase: ld r8,r3,4
subu.cio r6,r6,r7
ld r9,r4,4
st r6,r2,0
ld r6,r3,8 ; subtract 7 + 8r limbs
subu.cio r8,r8,r9
ld r7,r4,8
st r8,r2,4
ld r8,r3,12 ; subtract 6 + 8r limbs
subu.cio r6,r6,r7
ld r9,r4,12
st r6,r2,8
ld r6,r3,16 ; subtract 5 + 8r limbs
subu.cio r8,r8,r9
ld r7,r4,16
st r8,r2,12
ld r8,r3,20 ; subtract 4 + 8r limbs
subu.cio r6,r6,r7
ld r9,r4,20
st r6,r2,16
ld r6,r3,24 ; subtract 3 + 8r limbs
subu.cio r8,r8,r9
ld r7,r4,24
st r8,r2,20
ld r8,r3,28 ; subtract 2 + 8r limbs
subu.cio r6,r6,r7
ld r9,r4,28
st r6,r2,24
bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs
subu.cio r8,r8,r9
st r8,r2,28 ; store most significant limb
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
jmp.n r1
xor r2,r2,1

View File

@ -0,0 +1,84 @@
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
; store the product in a second limb vector.
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; INPUT PARAMETERS
; res_ptr r2
; s1_ptr r3
; size r4
; s2_limb r5
text
align 16
global ___mpn_mul_1
___mpn_mul_1:
; Make S1_PTR and RES_PTR point at the end of their blocks
; and negate SIZE.
lda r3,r3[r4]
lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
subu r4,r0,r4
addu.co r2,r0,r0 ; r2 = cy = 0
ld r6,r3[r4]
addu r4,r4,1
mulu.d r10,r6,r5
bcnd.n eq0,r4,Lend
subu r8,r8,8
Loop: ld r6,r3[r4]
addu.cio r9,r11,r2
or r2,r10,r0 ; could be avoided if unrolled
addu r4,r4,1
mulu.d r10,r6,r5
bcnd.n ne0,r4,Loop
st r9,r8[r4]
Lend: addu.cio r9,r11,r2
st r9,r8,4
jmp.n r1
addu.ci r2,r10,r0
; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d ,r11,r5
; ld.d r12,
; mulu.d ,r10,r5
; addu.cio
; addu.cio
; st.d
; mulu.d
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d
; ld.d r10,
; mulu.d
; addu.cio
; addu.cio
; st.d
; mulu.d

119
sysdeps/mips/add_n.s Normal file
View File

@ -0,0 +1,119 @@
# MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and
# store sum in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# s2_ptr $6
# size $7
.text
.align 2
.globl __mpn_add_n
.ent __mpn_add_n
__mpn_add_n:
.set noreorder
.set nomacro
lw $10,0($5)
lw $11,0($6)
addiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
move $2,$0
subu $7,$7,$9
.Loop0: addiu $9,$9,-1
lw $12,4($5)
addu $11,$11,$2
lw $13,4($6)
sltu $8,$11,$2
addu $11,$10,$11
sltu $2,$11,$10
sw $11,0($4)
or $2,$2,$8
addiu $5,$5,4
addiu $6,$6,4
move $10,$12
move $11,$13
bne $9,$0,.Loop0
addiu $4,$4,4
.L0: beq $7,$0,.Lend
nop
.Loop: addiu $7,$7,-4
lw $12,4($5)
addu $11,$11,$2
lw $13,4($6)
sltu $8,$11,$2
addu $11,$10,$11
sltu $2,$11,$10
sw $11,0($4)
or $2,$2,$8
lw $10,8($5)
addu $13,$13,$2
lw $11,8($6)
sltu $8,$13,$2
addu $13,$12,$13
sltu $2,$13,$12
sw $13,4($4)
or $2,$2,$8
lw $12,12($5)
addu $11,$11,$2
lw $13,12($6)
sltu $8,$11,$2
addu $11,$10,$11
sltu $2,$11,$10
sw $11,8($4)
or $2,$2,$8
lw $10,16($5)
addu $13,$13,$2
lw $11,16($6)
sltu $8,$13,$2
addu $13,$12,$13
sltu $2,$13,$12
sw $13,12($4)
or $2,$2,$8
addiu $5,$5,16
addiu $6,$6,16
bne $7,$0,.Loop
addiu $4,$4,16
.Lend: addu $11,$11,$2
sltu $8,$11,$2
addu $11,$10,$11
sltu $2,$11,$10
sw $11,0($4)
j $31
or $2,$2,$8
.end __mpn_add_n

96
sysdeps/mips/addmul_1.s Normal file
View File

@ -0,0 +1,96 @@
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
# add the product to a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_addmul_1
.ent __mpn_addmul_1
__mpn_addmul_1:
.set noreorder
.set nomacro
# warm up phase 0
lw $8,0($5)
# warm up phase 1
addiu $5,$5,4
multu $8,$7
addiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
addiu $6,$6,-1
beq $6,$0,$LC1
lw $8,0($5) # load new s1 limb as early as possible
Loop: lw $10,0($4)
mflo $3
mfhi $9
addiu $5,$5,4
addu $3,$3,$2 # add old carry limb to low product limb
multu $8,$7
lw $8,0($5) # load new s1 limb as early as possible
addiu $6,$6,-1 # decrement loop counter
sltu $2,$3,$2 # carry from previous addition -> $2
addu $3,$10,$3
sltu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: lw $10,0($4)
mflo $3
mfhi $9
addu $3,$3,$2
sltu $2,$3,$2
multu $8,$7
addu $3,$10,$3
sltu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: lw $10,0($4)
mflo $3
mfhi $9
addu $3,$3,$2
sltu $2,$3,$2
addu $3,$10,$3
sltu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
j $31
addu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_addmul_1

94
sysdeps/mips/lshift.s Normal file
View File

@ -0,0 +1,94 @@
# MIPS2 __mpn_lshift --
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# src_ptr $5
# size $6
# cnt $7
.text
.align 2
.globl __mpn_lshift
.ent __mpn_lshift
__mpn_lshift:
.set noreorder
.set nomacro
sll $2,$6,2
addu $5,$5,$2 # make r5 point at end of src
lw $10,-4($5) # load first limb
subu $13,$0,$7
addu $4,$4,$2 # make r4 point at end of res
addiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
srl $2,$10,$13 # compute function result
subu $6,$6,$9
.Loop0: lw $3,-8($5)
addiu $4,$4,-4
addiu $5,$5,-4
addiu $9,$9,-1
sll $11,$10,$7
srl $12,$3,$13
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
sw $8,0($4)
.L0: beq $6,$0,.Lend
nop
.Loop: lw $3,-8($5)
addiu $4,$4,-16
addiu $6,$6,-4
sll $11,$10,$7
srl $12,$3,$13
lw $10,-12($5)
sll $14,$3,$7
or $8,$11,$12
sw $8,12($4)
srl $9,$10,$13
lw $3,-16($5)
sll $11,$10,$7
or $8,$14,$9
sw $8,8($4)
srl $12,$3,$13
lw $10,-20($5)
sll $14,$3,$7
or $8,$11,$12
sw $8,4($4)
srl $9,$10,$13
addiu $5,$5,-16
or $8,$14,$9
bgtz $6,.Loop
sw $8,0($4)
.Lend: sll $8,$10,$7
j $31
sw $8,-4($4)
.end __mpn_lshift

119
sysdeps/mips/mips3/add_n.s Normal file
View File

@ -0,0 +1,119 @@
# MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and
# store sum in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# s2_ptr $6
# size $7
.text
.align 2
.globl __mpn_add_n
.ent __mpn_add_n
__mpn_add_n:
.set noreorder
.set nomacro
ld $10,0($5)
ld $11,0($6)
daddiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
move $2,$0
dsubu $7,$7,$9
.Loop0: daddiu $9,$9,-1
ld $12,8($5)
daddu $11,$11,$2
ld $13,8($6)
sltu $8,$11,$2
daddu $11,$10,$11
sltu $2,$11,$10
sd $11,0($4)
or $2,$2,$8
daddiu $5,$5,8
daddiu $6,$6,8
move $10,$12
move $11,$13
bne $9,$0,.Loop0
daddiu $4,$4,8
.L0: beq $7,$0,.Lend
nop
.Loop: daddiu $7,$7,-4
ld $12,8($5)
daddu $11,$11,$2
ld $13,8($6)
sltu $8,$11,$2
daddu $11,$10,$11
sltu $2,$11,$10
sd $11,0($4)
or $2,$2,$8
ld $10,16($5)
daddu $13,$13,$2
ld $11,16($6)
sltu $8,$13,$2
daddu $13,$12,$13
sltu $2,$13,$12
sd $13,8($4)
or $2,$2,$8
ld $12,24($5)
daddu $11,$11,$2
ld $13,24($6)
sltu $8,$11,$2
daddu $11,$10,$11
sltu $2,$11,$10
sd $11,16($4)
or $2,$2,$8
ld $10,32($5)
daddu $13,$13,$2
ld $11,32($6)
sltu $8,$13,$2
daddu $13,$12,$13
sltu $2,$13,$12
sd $13,24($4)
or $2,$2,$8
daddiu $5,$5,32
daddiu $6,$6,32
bne $7,$0,.Loop
daddiu $4,$4,32
.Lend: daddu $11,$11,$2
sltu $8,$11,$2
daddu $11,$10,$11
sltu $2,$11,$10
sd $11,0($4)
j $31
or $2,$2,$8
.end __mpn_add_n

View File

@ -0,0 +1,96 @@
# MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
# add the product to a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_addmul_1
.ent __mpn_addmul_1
__mpn_addmul_1:
.set noreorder
.set nomacro
# warm up phase 0
ld $8,0($5)
# warm up phase 1
daddiu $5,$5,8
dmultu $8,$7
daddiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
daddiu $6,$6,-1
beq $6,$0,$LC1
ld $8,0($5) # load new s1 limb as early as possible
Loop: ld $10,0($4)
mflo $3
mfhi $9
daddiu $5,$5,8
daddu $3,$3,$2 # add old carry limb to low product limb
dmultu $8,$7
ld $8,0($5) # load new s1 limb as early as possible
daddiu $6,$6,-1 # decrement loop counter
sltu $2,$3,$2 # carry from previous addition -> $2
daddu $3,$10,$3
sltu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: ld $10,0($4)
mflo $3
mfhi $9
daddu $3,$3,$2
sltu $2,$3,$2
dmultu $8,$7
daddu $3,$10,$3
sltu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: ld $10,0($4)
mflo $3
mfhi $9
daddu $3,$3,$2
sltu $2,$3,$2
daddu $3,$10,$3
sltu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
j $31
daddu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_addmul_1

View File

@ -0,0 +1,94 @@
# MIPS3 __mpn_lshift --
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# src_ptr $5
# size $6
# cnt $7
.text
.align 2
.globl __mpn_lshift
.ent __mpn_lshift
__mpn_lshift:
.set noreorder
.set nomacro
dsll $2,$6,3
daddu $5,$5,$2 # make r5 point at end of src
ld $10,-8($5) # load first limb
dsubu $13,$0,$7
daddu $4,$4,$2 # make r4 point at end of res
daddiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
dsrl $2,$10,$13 # compute function result
dsubu $6,$6,$9
.Loop0: ld $3,-16($5)
daddiu $4,$4,-8
daddiu $5,$5,-8
daddiu $9,$9,-1
dsll $11,$10,$7
dsrl $12,$3,$13
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
sd $8,0($4)
.L0: beq $6,$0,.Lend
nop
.Loop: ld $3,-16($5)
daddiu $4,$4,-32
daddiu $6,$6,-4
dsll $11,$10,$7
dsrl $12,$3,$13
ld $10,-24($5)
dsll $14,$3,$7
or $8,$11,$12
sd $8,24($4)
dsrl $9,$10,$13
ld $3,-32($5)
dsll $11,$10,$7
or $8,$14,$9
sd $8,16($4)
dsrl $12,$3,$13
ld $10,-40($5)
dsll $14,$3,$7
or $8,$11,$12
sd $8,8($4)
dsrl $9,$10,$13
daddiu $5,$5,-32
or $8,$14,$9
bgtz $6,.Loop
sd $8,0($4)
.Lend: dsll $8,$10,$7
j $31
sd $8,-8($4)
.end __mpn_lshift

View File

@ -0,0 +1,84 @@
# MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and
# store the product in a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_mul_1
.ent __mpn_mul_1
__mpn_mul_1:
.set noreorder
.set nomacro
# warm up phase 0
ld $8,0($5)
# warm up phase 1
daddiu $5,$5,8
dmultu $8,$7
daddiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
daddiu $6,$6,-1
beq $6,$0,$LC1
ld $8,0($5) # load new s1 limb as early as possible
Loop: mflo $10
mfhi $9
daddiu $5,$5,8
daddu $10,$10,$2 # add old carry limb to low product limb
dmultu $8,$7
ld $8,0($5) # load new s1 limb as early as possible
daddiu $6,$6,-1 # decrement loop counter
sltu $2,$10,$2 # carry from previous addition -> $2
sd $10,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: mflo $10
mfhi $9
daddu $10,$10,$2
sltu $2,$10,$2
dmultu $8,$7
sd $10,0($4)
daddiu $4,$4,8
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: mflo $10
mfhi $9
daddu $10,$10,$2
sltu $2,$10,$2
sd $10,0($4)
j $31
daddu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_mul_1

View File

@ -0,0 +1,91 @@
# MIPS3 __mpn_rshift --
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# src_ptr $5
# size $6
# cnt $7
.text
.align 2
.globl __mpn_rshift
.ent __mpn_rshift
__mpn_rshift:
.set noreorder
.set nomacro
ld $10,0($5) # load first limb
dsubu $13,$0,$7
daddiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
dsll $2,$10,$13 # compute function result
dsubu $6,$6,$9
.Loop0: ld $3,8($5)
daddiu $4,$4,8
daddiu $5,$5,8
daddiu $9,$9,-1
dsrl $11,$10,$7
dsll $12,$3,$13
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
sd $8,-8($4)
.L0: beq $6,$0,.Lend
nop
.Loop: ld $3,8($5)
daddiu $4,$4,32
daddiu $6,$6,-4
dsrl $11,$10,$7
dsll $12,$3,$13
ld $10,16($5)
dsrl $14,$3,$7
or $8,$11,$12
sd $8,-32($4)
dsll $9,$10,$13
ld $3,24($5)
dsrl $11,$10,$7
or $8,$14,$9
sd $8,-24($4)
dsll $12,$3,$13
ld $10,32($5)
dsrl $14,$3,$7
or $8,$11,$12
sd $8,-16($4)
dsll $9,$10,$13
daddiu $5,$5,32
or $8,$14,$9
bgtz $6,.Loop
sd $8,-8($4)
.Lend: dsrl $8,$10,$7
j $31
sd $8,0($4)
.end __mpn_rshift

119
sysdeps/mips/mips3/sub_n.s Normal file
View File

@ -0,0 +1,119 @@
# MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# s2_ptr $6
# size $7
.text
.align 2
.globl __mpn_sub_n
.ent __mpn_sub_n
__mpn_sub_n:
.set noreorder
.set nomacro
ld $10,0($5)
ld $11,0($6)
daddiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
move $2,$0
dsubu $7,$7,$9
.Loop0: daddiu $9,$9,-1
ld $12,8($5)
daddu $11,$11,$2
ld $13,8($6)
sltu $8,$11,$2
dsubu $11,$10,$11
sltu $2,$10,$11
sd $11,0($4)
or $2,$2,$8
daddiu $5,$5,8
daddiu $6,$6,8
move $10,$12
move $11,$13
bne $9,$0,.Loop0
daddiu $4,$4,8
.L0: beq $7,$0,.Lend
nop
.Loop: daddiu $7,$7,-4
ld $12,8($5)
daddu $11,$11,$2
ld $13,8($6)
sltu $8,$11,$2
dsubu $11,$10,$11
sltu $2,$10,$11
sd $11,0($4)
or $2,$2,$8
ld $10,16($5)
daddu $13,$13,$2
ld $11,16($6)
sltu $8,$13,$2
dsubu $13,$12,$13
sltu $2,$12,$13
sd $13,8($4)
or $2,$2,$8
ld $12,24($5)
daddu $11,$11,$2
ld $13,24($6)
sltu $8,$11,$2
dsubu $11,$10,$11
sltu $2,$10,$11
sd $11,16($4)
or $2,$2,$8
ld $10,32($5)
daddu $13,$13,$2
ld $11,32($6)
sltu $8,$13,$2
dsubu $13,$12,$13
sltu $2,$12,$13
sd $13,24($4)
or $2,$2,$8
daddiu $5,$5,32
daddiu $6,$6,32
bne $7,$0,.Loop
daddiu $4,$4,32
.Lend: daddu $11,$11,$2
sltu $8,$11,$2
dsubu $11,$10,$11
sltu $2,$10,$11
sd $11,0($4)
j $31
or $2,$2,$8
.end __mpn_sub_n

View File

@ -0,0 +1,96 @@
# MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and
# subtract the product from a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_submul_1
.ent __mpn_submul_1
__mpn_submul_1:
.set noreorder
.set nomacro
# warm up phase 0
ld $8,0($5)
# warm up phase 1
daddiu $5,$5,8
dmultu $8,$7
daddiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
daddiu $6,$6,-1
beq $6,$0,$LC1
ld $8,0($5) # load new s1 limb as early as possible
Loop: ld $10,0($4)
mflo $3
mfhi $9
daddiu $5,$5,8
daddu $3,$3,$2 # add old carry limb to low product limb
dmultu $8,$7
ld $8,0($5) # load new s1 limb as early as possible
daddiu $6,$6,-1 # decrement loop counter
sltu $2,$3,$2 # carry from previous addition -> $2
dsubu $3,$10,$3
sgtu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
bne $6,$0,Loop # should be "bnel"
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: ld $10,0($4)
mflo $3
mfhi $9
daddu $3,$3,$2
sltu $2,$3,$2
dmultu $8,$7
dsubu $3,$10,$3
sgtu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: ld $10,0($4)
mflo $3
mfhi $9
daddu $3,$3,$2
sltu $2,$3,$2
dsubu $3,$10,$3
sgtu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
j $31
daddu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_submul_1

84
sysdeps/mips/mul_1.s Normal file
View File

@ -0,0 +1,84 @@
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
# store the product in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_mul_1
.ent __mpn_mul_1
__mpn_mul_1:
.set noreorder
.set nomacro
# warm up phase 0
lw $8,0($5)
# warm up phase 1
addiu $5,$5,4
multu $8,$7
addiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
addiu $6,$6,-1
beq $6,$0,$LC1
lw $8,0($5) # load new s1 limb as early as possible
Loop: mflo $10
mfhi $9
addiu $5,$5,4
addu $10,$10,$2 # add old carry limb to low product limb
multu $8,$7
lw $8,0($5) # load new s1 limb as early as possible
addiu $6,$6,-1 # decrement loop counter
sltu $2,$10,$2 # carry from previous addition -> $2
sw $10,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: mflo $10
mfhi $9
addu $10,$10,$2
sltu $2,$10,$2
multu $8,$7
sw $10,0($4)
addiu $4,$4,4
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: mflo $10
mfhi $9
addu $10,$10,$2
sltu $2,$10,$2
sw $10,0($4)
j $31
addu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_mul_1

91
sysdeps/mips/rshift.s Normal file
View File

@ -0,0 +1,91 @@
# MIPS2 __mpn_rshift --
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# src_ptr $5
# size $6
# cnt $7
.text
.align 2
.globl __mpn_rshift
.ent __mpn_rshift
__mpn_rshift:
.set noreorder
.set nomacro
lw $10,0($5) # load first limb
subu $13,$0,$7
addiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
sll $2,$10,$13 # compute function result
subu $6,$6,$9
.Loop0: lw $3,4($5)
addiu $4,$4,4
addiu $5,$5,4
addiu $9,$9,-1
srl $11,$10,$7
sll $12,$3,$13
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
sw $8,-4($4)
.L0: beq $6,$0,.Lend
nop
.Loop: lw $3,4($5)
addiu $4,$4,16
addiu $6,$6,-4
srl $11,$10,$7
sll $12,$3,$13
lw $10,8($5)
srl $14,$3,$7
or $8,$11,$12
sw $8,-16($4)
sll $9,$10,$13
lw $3,12($5)
srl $11,$10,$7
or $8,$14,$9
sw $8,-12($4)
sll $12,$3,$13
lw $10,16($5)
srl $14,$3,$7
or $8,$11,$12
sw $8,-8($4)
sll $9,$10,$13
addiu $5,$5,16
or $8,$14,$9
bgtz $6,.Loop
sw $8,-4($4)
.Lend: srl $8,$10,$7
j $31
sw $8,0($4)
.end __mpn_rshift

119
sysdeps/mips/sub_n.s Normal file
View File

@ -0,0 +1,119 @@
# MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# s2_ptr $6
# size $7
.text
.align 2
.globl __mpn_sub_n
.ent __mpn_sub_n
__mpn_sub_n:
.set noreorder
.set nomacro
lw $10,0($5)
lw $11,0($6)
addiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
move $2,$0
subu $7,$7,$9
.Loop0: addiu $9,$9,-1
lw $12,4($5)
addu $11,$11,$2
lw $13,4($6)
sltu $8,$11,$2
subu $11,$10,$11
sltu $2,$10,$11
sw $11,0($4)
or $2,$2,$8
addiu $5,$5,4
addiu $6,$6,4
move $10,$12
move $11,$13
bne $9,$0,.Loop0
addiu $4,$4,4
.L0: beq $7,$0,.Lend
nop
.Loop: addiu $7,$7,-4
lw $12,4($5)
addu $11,$11,$2
lw $13,4($6)
sltu $8,$11,$2
subu $11,$10,$11
sltu $2,$10,$11
sw $11,0($4)
or $2,$2,$8
lw $10,8($5)
addu $13,$13,$2
lw $11,8($6)
sltu $8,$13,$2
subu $13,$12,$13
sltu $2,$12,$13
sw $13,4($4)
or $2,$2,$8
lw $12,12($5)
addu $11,$11,$2
lw $13,12($6)
sltu $8,$11,$2
subu $11,$10,$11
sltu $2,$10,$11
sw $11,8($4)
or $2,$2,$8
lw $10,16($5)
addu $13,$13,$2
lw $11,16($6)
sltu $8,$13,$2
subu $13,$12,$13
sltu $2,$12,$13
sw $13,12($4)
or $2,$2,$8
addiu $5,$5,16
addiu $6,$6,16
bne $7,$0,.Loop
addiu $4,$4,16
.Lend: addu $11,$11,$2
sltu $8,$11,$2
subu $11,$10,$11
sltu $2,$10,$11
sw $11,0($4)
j $31
or $2,$2,$8
.end __mpn_sub_n

96
sysdeps/mips/submul_1.s Normal file
View File

@ -0,0 +1,96 @@
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
# subtract the product from a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr $4
# s1_ptr $5
# size $6
# s2_limb $7
.text
.align 4
.globl __mpn_submul_1
.ent __mpn_submul_1
__mpn_submul_1:
.set noreorder
.set nomacro
# warm up phase 0
lw $8,0($5)
# warm up phase 1
addiu $5,$5,4
multu $8,$7
addiu $6,$6,-1
beq $6,$0,$LC0
move $2,$0 # zero cy2
addiu $6,$6,-1
beq $6,$0,$LC1
lw $8,0($5) # load new s1 limb as early as possible
Loop: lw $10,0($4)
mflo $3
mfhi $9
addiu $5,$5,4
addu $3,$3,$2 # add old carry limb to low product limb
multu $8,$7
lw $8,0($5) # load new s1 limb as early as possible
addiu $6,$6,-1 # decrement loop counter
sltu $2,$3,$2 # carry from previous addition -> $2
subu $3,$10,$3
sgtu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
bne $6,$0,Loop # should be "bnel"
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
$LC1: lw $10,0($4)
mflo $3
mfhi $9
addu $3,$3,$2
sltu $2,$3,$2
multu $8,$7
subu $3,$10,$3
sgtu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 0
$LC0: lw $10,0($4)
mflo $3
mfhi $9
addu $3,$3,$2
sltu $2,$3,$2
subu $3,$10,$3
sgtu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
j $31
addu $2,$9,$2 # add high product limb and carry from addition
.end __mpn_submul_1

54
sysdeps/rs6000/add_n.s Normal file
View File

@ -0,0 +1,54 @@
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
.toc
.extern __mpn_add_n[DS]
.extern .__mpn_add_n
.csect [PR]
.align 2
.globl __mpn_add_n
.globl .__mpn_add_n
.csect __mpn_add_n[DS]
__mpn_add_n:
.long .__mpn_add_n, TOC[tc0], 0
.csect [PR]
.__mpn_add_n:
mtctr 6 # copy size into CTR
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before used
a 7,0,8 # add least significant limbs, set cy
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3) # store previous limb in load latecny slot
ae 7,0,8 # add new limbs with cy, set cy
bdn Loop # decrement CTR and loop back
Lend: st 7,4(3) # store ultimate result limb
lil 3,0 # load cy into ...
aze 3,3 # ... return value register
br

122
sysdeps/rs6000/addmul_1.s Normal file
View File

@ -0,0 +1,122 @@
# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
# the result to a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_addmul_1[PR]
.align 2
.globl __mpn_addmul_1
.globl .__mpn_addmul_1
.csect __mpn_addmul_1[DS]
__mpn_addmul_1:
.long .__mpn_addmul_1[PR], TOC[tc0], 0
.csect .__mpn_addmul_1[PR]
.__mpn_addmul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
cax 9,9,7
l 7,4(3)
a 8,8,7 # add res_limb
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
l 7,4(3)
aze 9,9
a 8,8,7
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 8,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 8,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

58
sysdeps/rs6000/lshift.s Normal file
View File

@ -0,0 +1,58 @@
# IBM POWER __mpn_lshift --
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
.toc
.extern __mpn_lshift[DS]
.extern .__mpn_lshift
.csect [PR]
.align 2
.globl __mpn_lshift
.globl .__mpn_lshift
.csect __mpn_lshift[DS]
__mpn_lshift:
.long .__mpn_lshift, TOC[tc0], 0
.csect [PR]
.__mpn_lshift:
sli 0,5,2
cax 9,3,0
cax 4,4,0
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
lu 0,-4(4) # read most significant limb
sre 3,0,8 # compute carry out limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,-4(4) # read 2:nd most significant limb
sreq 7,0,8 # compute most significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,-4(4) # load next lower limb
stu 7,-4(9) # store previous result during read latency
sreq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,-4(9) # store 2:nd least significant limb
Lend2: sle 7,0,6 # compute least significant limb
st 7,-4(9) # store it" \
br

109
sysdeps/rs6000/mul_1.s Normal file
View File

@ -0,0 +1,109 @@
# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_mul_1[PR]
.align 2
.globl __mpn_mul_1
.globl .__mpn_mul_1
.csect __mpn_mul_1[DS]
__mpn_mul_1:
.long .__mpn_mul_1[PR], TOC[tc0], 0
.csect .__mpn_mul_1[PR]
.__mpn_mul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
ai 0,0,0 # reset carry
cax 9,9,7
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
cax 10,10,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,9
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
cax 9,9,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,10
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

56
sysdeps/rs6000/rshift.s Normal file
View File

@ -0,0 +1,56 @@
# IBM POWER __mpn_rshift --
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
.toc
.extern __mpn_rshift[DS]
.extern .__mpn_rshift
.csect [PR]
.align 2
.globl __mpn_rshift
.globl .__mpn_rshift
.csect __mpn_rshift[DS]
__mpn_rshift:
.long .__mpn_rshift, TOC[tc0], 0
.csect [PR]
.__mpn_rshift:
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
l 0,0(4) # read least significant limb
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
sle 3,0,8 # compute carry limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,4(4) # read 2:nd least significant limb
sleq 7,0,8 # compute least significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,4(4) # load next higher limb
stu 7,4(9) # store previous result during read latency
sleq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,4(9) # store 2:nd most significant limb
Lend2: sre 7,0,6 # compute most significant limb
st 7,4(9) # store it" \
br

55
sysdeps/rs6000/sub_n.s Normal file
View File

@ -0,0 +1,55 @@
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
.toc
.extern __mpn_sub_n[DS]
.extern .__mpn_sub_n
.csect [PR]
.align 2
.globl __mpn_sub_n
.globl .__mpn_sub_n
.csect __mpn_sub_n[DS]
__mpn_sub_n:
.long .__mpn_sub_n, TOC[tc0], 0
.csect [PR]
.__mpn_sub_n:
mtctr 6 # copy size into CTR
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before used
sf 7,0,8 # add least significant limbs, set cy
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3) # store previous limb in load latecny slot
sfe 7,0,8 # add new limbs with cy, set cy
bdn Loop # decrement CTR and loop back
Lend: st 7,4(3) # store ultimate result limb
sfe 3,0,0 # load !cy into ...
sfi 3,3,0 # ... return value register
br

127
sysdeps/rs6000/submul_1.s Normal file
View File

@ -0,0 +1,127 @@
# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
# the result from a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_submul_1[PR]
.align 2
.globl __mpn_submul_1
.globl .__mpn_submul_1
.csect __mpn_submul_1[DS]
__mpn_submul_1:
.long .__mpn_submul_1[PR], TOC[tc0], 0
.csect .__mpn_submul_1[PR]
.__mpn_submul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 11
cax 9,9,7
l 7,4(3)
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 11,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 11,0,10
l 7,4(3)
aze 9,9
sf 8,11,7
a 11,8,11 # invert cy (r11 is junk)
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 11,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 11,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

47
sysdeps/vax/add_n.s Normal file
View File

@ -0,0 +1,47 @@
# VAX __mpn_add_n -- Add two limb vectors of the same length > 0 and store
# sum in a third limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr (sp + 4)
# s1_ptr (sp + 8)
# s2_ptr (sp + 12)
# size (sp + 16)
.text
.align 1
.globl ___mpn_add_n
___mpn_add_n:
.word 0x0
movl 16(ap),r0
movl 12(ap),r1
movl 8(ap),r2
movl 4(ap),r3
subl2 r4,r4
Loop:
movl (r2)+,r4
adwc (r1)+,r4
movl r4,(r3)+
jsobgtr r0,Loop
adwc r0,r0
ret

125
sysdeps/vax/addmul_1.s Normal file
View File

@ -0,0 +1,125 @@
# VAX __mpn_addmul_1 -- Multiply a limb vector with a limb and add
# the result to a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr (sp + 4)
# s1_ptr (sp + 8)
# size (sp + 12)
# s2_limb (sp + 16)
.text
.align 1
.globl ___mpn_addmul_1
___mpn_addmul_1:
.word 0xfc0
movl 12(ap),r4
movl 8(ap),r8
movl 4(ap),r9
movl 16(ap),r6
jlss s2_big
clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L1
clrl r11
# Loop for S2_LIMB < 0x80000000
Loop1: movl (r8)+,r1
jlss L1n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc $0,r3
addl2 r2,(r9)+
adwc $0,r3
L1: movl (r8)+,r1
jlss L1n1
L1p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc $0,r11
addl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop1
movl r11,r0
ret
L1n0: emul r1,r6,$0,r2
addl2 r11,r2
adwc r6,r3
addl2 r2,(r9)+
adwc $0,r3
movl (r8)+,r1
jgeq L1p1
L1n1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r6,r11
addl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop1
movl r11,r0
ret
s2_big: clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L2
clrl r11
# Loop for S2_LIMB >= 0x80000000
Loop2: movl (r8)+,r1
jlss L2n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc r1,r3
addl2 r2,(r9)+
adwc $0,r3
L2: movl (r8)+,r1
jlss L2n1
L2p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r1,r11
addl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop2
movl r11,r0
ret
L2n0: emul r1,r6,$0,r2
addl2 r11,r2
adwc r6,r3
addl2 r2,(r9)+
adwc r1,r3
movl (r8)+,r1
jgeq L2p1
L2n1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r6,r11
addl2 r10,(r9)+
adwc r1,r11
jsobgtr r7,Loop2
movl r11,r0
ret

122
sysdeps/vax/mul_1.s Normal file
View File

@ -0,0 +1,122 @@
# VAX __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr (sp + 4)
# s1_ptr (sp + 8)
# size (sp + 12)
# s2_limb (sp + 16)
.text
.align 1
.globl ___mpn_mul_1
___mpn_mul_1:
.word 0xfc0
movl 12(ap),r4
movl 8(ap),r8
movl 4(ap),r9
movl 16(ap),r6
jlss s2_big
# One might want to combine the addl2 and the store below, but that
# is actually just slower according to my timing tests. (VAX 3600)
clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L1
clrl r11
# Loop for S2_LIMB < 0x80000000
Loop1: movl (r8)+,r1
jlss L1n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc $0,r3
movl r2,(r9)+
L1: movl (r8)+,r1
jlss L1n1
L1p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc $0,r11
movl r10,(r9)+
jsobgtr r7,Loop1
movl r11,r0
ret
L1n0: emul r1,r6,$0,r2
addl2 r11,r2
adwc r6,r3
movl r2,(r9)+
movl (r8)+,r1
jgeq L1p1
L1n1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r6,r11
movl r10,(r9)+
jsobgtr r7,Loop1
movl r11,r0
ret
s2_big: clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L2
clrl r11
# Loop for S2_LIMB >= 0x80000000
Loop2: movl (r8)+,r1
jlss L2n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc r1,r3
movl r2,(r9)+
L2: movl (r8)+,r1
jlss L2n1
L2p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r1,r11
movl r10,(r9)+
jsobgtr r7,Loop2
movl r11,r0
ret
L2n0: emul r1,r6,$0,r2
addl2 r1,r3
addl2 r11,r2
adwc r6,r3
movl r2,(r9)+
movl (r8)+,r1
jgeq L2p1
L2n1: emul r1,r6,$0,r10
addl2 r1,r11
addl2 r3,r10
adwc r6,r11
movl r10,(r9)+
jsobgtr r7,Loop2
movl r11,r0
ret

47
sysdeps/vax/sub_n.s Normal file
View File

@ -0,0 +1,47 @@
# VAX __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
# difference in a third limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr (sp + 4)
# s1_ptr (sp + 8)
# s2_ptr (sp + 12)
# size (sp + 16)
.text
.align 1
.globl ___mpn_sub_n
___mpn_sub_n:
.word 0x0
movl 16(ap),r0
movl 12(ap),r1
movl 8(ap),r2
movl 4(ap),r3
subl2 r4,r4
Loop:
movl (r2)+,r4
sbwc (r1)+,r4
movl r4,(r3)+
jsobgtr r0,Loop
adwc r0,r0
ret

125
sysdeps/vax/submul_1.s Normal file
View File

@ -0,0 +1,125 @@
# VAX __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
# the result from a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# INPUT PARAMETERS
# res_ptr (sp + 4)
# s1_ptr (sp + 8)
# size (sp + 12)
# s2_limb (sp + 16)
.text
.align 1
.globl ___mpn_submul_1
___mpn_submul_1:
.word 0xfc0
movl 12(ap),r4
movl 8(ap),r8
movl 4(ap),r9
movl 16(ap),r6
jlss s2_big
clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L1
clrl r11
# Loop for S2_LIMB < 0x80000000
Loop1: movl (r8)+,r1
jlss L1n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc $0,r3
subl2 r2,(r9)+
adwc $0,r3
L1: movl (r8)+,r1
jlss L1n1
L1p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc $0,r11
subl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop1
movl r11,r0
ret
L1n0: emul r1,r6,$0,r2
addl2 r11,r2
adwc r6,r3
subl2 r2,(r9)+
adwc $0,r3
movl (r8)+,r1
jgeq L1p1
L1n1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r6,r11
subl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop1
movl r11,r0
ret
s2_big: clrl r3
incl r4
ashl $-1,r4,r7
jlbc r4,L2
clrl r11
# Loop for S2_LIMB >= 0x80000000
Loop2: movl (r8)+,r1
jlss L2n0
emul r1,r6,$0,r2
addl2 r11,r2
adwc r1,r3
subl2 r2,(r9)+
adwc $0,r3
L2: movl (r8)+,r1
jlss L2n1
L2p1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r1,r11
subl2 r10,(r9)+
adwc $0,r11
jsobgtr r7,Loop2
movl r11,r0
ret
L2n0: emul r1,r6,$0,r2
addl2 r11,r2
adwc r6,r3
subl2 r2,(r9)+
adwc r1,r3
movl (r8)+,r1
jgeq L2p1
L2n1: emul r1,r6,$0,r10
addl2 r3,r10
adwc r6,r11
subl2 r10,(r9)+
adwc r1,r11
jsobgtr r7,Loop2
movl r11,r0
ret

52
sysdeps/z8000/add_n.s Normal file
View File

@ -0,0 +1,52 @@
! Z8000 __mpn_add_n -- Add two limb vectors of equal, non-zero length.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr r7
! s1_ptr r6
! s2_ptr r5
! size r4
! If we are really crazy, we can use push to write a few result words
! backwards, using push just because it is faster than reg+disp. We'd
! then add 2x the number of words written to r7...
unseg
.text
even
global ___mpn_add_n
___mpn_add_n:
pop r0,@r6
pop r1,@r5
add r0,r1
ld @r7,r0
dec r4
jr eq,Lend
Loop: pop r0,@r6
pop r1,@r5
adc r0,r1
inc r7,#2
ld @r7,r0
dec r4
jr ne,Loop
Lend: ld r2,r4 ! use 0 already in r4
adc r2,r2
ret t

67
sysdeps/z8000/mul_1.s Normal file
View File

@ -0,0 +1,67 @@
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
! the result in a second limb vector.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr r7
! s1_ptr r6
! size r5
! s2_limb r4
unseg
.text
even
global ___mpn_mul_1
___mpn_mul_1:
sub r2,r2 ! zero carry limb
and r4,r4
jr mi,Lneg
Lpos: pop r1,@r6
ld r9,r1
mult rr8,r4
and r1,r1 ! shift msb of loaded limb into cy
jr mi,Lp ! branch if loaded limb's msb is set
add r8,r4 ! hi_limb += sign_comp2
Lp: add r9,r2 ! lo_limb += cy_limb
xor r2,r2
adc r2,r8
ld @r7,r9
inc r7,#2
dec r5
jr ne,Lpos
ret t
Lneg: pop r1,@r6
ld r9,r1
mult rr8,r4
add r8,r1 ! hi_limb += sign_comp1
and r1,r1
jr mi,Ln
add r8,r4 ! hi_limb += sign_comp2
Ln: add r9,r2 ! lo_limb += cy_limb
xor r2,r2
adc r2,r8
ld @r7,r9
inc r7,#2
dec r5
jr ne,Lneg
ret t

53
sysdeps/z8000/sub_n.s Normal file
View File

@ -0,0 +1,53 @@
! Z8000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! store difference in a third limb vector.
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
! This file is part of the GNU MP Library.
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! INPUT PARAMETERS
! res_ptr r7
! s1_ptr r6
! s2_ptr r5
! size r4
! If we are really crazy, we can use push to write a few result words
! backwards, using push just because it is faster than reg+disp. We'd
! then add 2x the number of words written to r7...
unseg
.text
even
global ___mpn_sub_n
___mpn_sub_n:
pop r0,@r6
pop r1,@r5
sub r0,r1
ld @r7,r0
dec r4
jr eq,Lend
Loop: pop r0,@r6
pop r1,@r5
sbc r0,r1
inc r7,#2
ld @r7,r0
dec r4
jr ne,Loop
Lend: ld r2,r4 ! use 0 already in r4
adc r2,r2
ret t