Move sysdeps/rs6000 off to ports.

2006-02-28  Roland McGrath  <roland@redhat.com>

	* sysdeps/rs6000: Directory removed, saved in ports repository.
This commit is contained in:
Roland McGrath 2006-03-01 07:33:08 +00:00
parent fd0aa33958
commit 15db4832f4
10 changed files with 4 additions and 768 deletions

View File

@ -1,3 +1,7 @@
2006-02-28 Roland McGrath <roland@redhat.com>
* sysdeps/rs6000: Directory removed, saved in ports repository.
2006-03-01 Kaz Kojima <kkojima@rr.iij4u.or.jp>
* sysdeps/unix/sysv/linux/sh/pread.c: Copy in mips pread.c.

View File

@ -1,81 +0,0 @@
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
.toc
.extern __mpn_add_n[DS]
.extern .__mpn_add_n
.csect [PR]
.align 2
.globl __mpn_add_n
.globl .__mpn_add_n
.csect __mpn_add_n[DS]
__mpn_add_n:
.long .__mpn_add_n, TOC[tc0], 0
.csect [PR]
.__mpn_add_n:
andil. 10,6,1 # odd or even number of limbs?
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before it's used
sri 10,6,1 # count for unrolled loop
a 7,0,8 # add least significant limbs, set cy
mtctr 10 # copy count into CTR
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
# We have an odd # of limbs. Add the first limbs separately.
cmpi 1,10,0 # is count for unrolled loop zero?
bne 1,L1 # branch if not
st 7,4(3)
aze 3,10 # use the fact that r10 is zero...
br # return
# We added least significant limbs. Now reload the next limbs to enter loop.
L1: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3)
ae 7,0,8 # add limbs, set cy
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
ae 11,9,10 # add previous limbs with cy, set cy
stu 7,4(3) #
lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
ae 7,0,8 # add previous limbs with cy, set cy
stu 11,4(3) #
bdn Loop # decrement CTR and loop back
Lend: ae 11,9,10 # add limbs with cy, set cy
st 7,4(3) #
st 11,8(3) #
lil 3,0 # load cy into ...
aze 3,3 # ... return value register
br

View File

@ -1,123 +0,0 @@
# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
# the result to a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_addmul_1[PR]
.align 2
.globl __mpn_addmul_1
.globl .__mpn_addmul_1
.csect __mpn_addmul_1[DS]
__mpn_addmul_1:
.long .__mpn_addmul_1[PR], TOC[tc0], 0
.csect .__mpn_addmul_1[PR]
.__mpn_addmul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
cax 9,9,7
l 7,4(3)
a 8,8,7 # add res_limb
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
l 7,4(3)
aze 9,9
a 8,8,7
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 8,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 8,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

View File

@ -1,42 +0,0 @@
/* ffs -- find first set bit in a word, counted from least significant end.
For IBM rs6000.
Copyright (C) 1991, 1992, 1997, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <string.h>
#undef ffs
#ifdef __GNUC__
int
__ffs (x)
int x;
{
int cnt;
asm ("cntlz %0,%1" : "=r" (cnt) : "r" (x & -x));
return 32 - cnt;
}
weak_alias (__ffs, ffs)
libc_hidden_builtin_def (ffs)
#else
#include <string/ffs.c>
#endif

View File

@ -1,59 +0,0 @@
# IBM POWER __mpn_lshift --
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
.toc
.extern __mpn_lshift[DS]
.extern .__mpn_lshift
.csect [PR]
.align 2
.globl __mpn_lshift
.globl .__mpn_lshift
.csect __mpn_lshift[DS]
__mpn_lshift:
.long .__mpn_lshift, TOC[tc0], 0
.csect [PR]
.__mpn_lshift:
sli 0,5,2
cax 9,3,0
cax 4,4,0
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
lu 0,-4(4) # read most significant limb
sre 3,0,8 # compute carry out limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,-4(4) # read 2:nd most significant limb
sreq 7,0,8 # compute most significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,-4(4) # load next lower limb
stu 7,-4(9) # store previous result during read latency
sreq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,-4(9) # store 2:nd least significant limb
Lend2: sle 7,0,6 # compute least significant limb
st 7,-4(9) # store it" \
br

View File

@ -1,86 +0,0 @@
/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdeps/generic/memcopy.h>
#undef OP_T_THRES
#define OP_T_THRES 32
#undef BYTE_COPY_FWD
#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
do \
{ \
size_t __nbytes = nbytes; \
asm volatile("mtspr 1,%2\n" \
"lsx 6,0,%1\n" \
"stsx 6,0,%0" : /* No outputs. */ : \
"b" (dst_bp), "b" (src_bp), "r" (__nbytes) : \
"6", "7", "8", "9", "10", "11", "12", "13"); \
dst_bp += __nbytes; \
src_bp += __nbytes; \
} while (0)
#undef BYTE_COPY_BWD
#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
do \
{ \
size_t __nbytes = (nbytes); \
dst_ep -= __nbytes; \
src_ep -= __nbytes; \
asm volatile("mtspr 1,%2\n" \
"lsx 6,0,%1\n" \
"stsx 6,0,%0" : /* No outputs. */ : \
"b" (dst_ep), "b" (src_ep), "r" (__nbytes) : \
"6", "7", "8", "9", "10", "11", "12", "13"); \
} while (0)
#undef WORD_COPY_FWD
#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
do \
{ \
size_t __nblocks = (nbytes) / 32; \
if (__nblocks != 0) \
asm volatile("mtctr %4\n" \
"lsi 6,%1,32\n" \
"ai %1,%1,32\n" \
"stsi 6,%0,32\n" \
"ai %0,%0,32\n" \
"bdn $-16" : \
"=b" (dst_bp), "=b" (src_bp) : \
"0" (dst_bp), "1" (src_bp), "r" (__nblocks) : \
"6", "7", "8", "9", "10", "11", "12", "13"); \
(nbytes_left) = (nbytes) % 32; \
} while (0)
#undef WORD_COPY_BWD
#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
do \
{ \
size_t __nblocks = (nbytes) / 32; \
if (__nblocks != 0) \
asm volatile("mtctr %4\n" \
"ai %1,%1,-32\n" \
"lsi 6,%1,32\n" \
"ai %0,%0,-32\n" \
"stsi 6,%0,32\n" \
"bdn $-16" : \
"=b" (dst_ep), "=b" (src_ep) : \
"0" (dst_ep), "1" (src_ep), "r" (__nblocks) : \
"6", "7", "8", "9", "10", "11", "12", "13"); \
(nbytes_left) = (nbytes) % 32; \
} while (0)

View File

@ -1,110 +0,0 @@
# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_mul_1[PR]
.align 2
.globl __mpn_mul_1
.globl .__mpn_mul_1
.csect __mpn_mul_1[DS]
__mpn_mul_1:
.long .__mpn_mul_1[PR], TOC[tc0], 0
.csect .__mpn_mul_1[PR]
.__mpn_mul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
ai 0,0,0 # reset carry
cax 9,9,7
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
cax 10,10,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,9
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
cax 9,9,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,10
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

View File

@ -1,57 +0,0 @@
# IBM POWER __mpn_rshift --
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
.toc
.extern __mpn_rshift[DS]
.extern .__mpn_rshift
.csect [PR]
.align 2
.globl __mpn_rshift
.globl .__mpn_rshift
.csect __mpn_rshift[DS]
__mpn_rshift:
.long .__mpn_rshift, TOC[tc0], 0
.csect [PR]
.__mpn_rshift:
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
l 0,0(4) # read least significant limb
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
sle 3,0,8 # compute carry limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,4(4) # read 2:nd least significant limb
sleq 7,0,8 # compute least significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,4(4) # load next higher limb
stu 7,4(9) # store previous result during read latency
sleq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,4(9) # store 2:nd most significant limb
Lend2: sre 7,0,6 # compute most significant limb
st 7,4(9) # store it" \
br

View File

@ -1,82 +0,0 @@
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
.toc
.extern __mpn_sub_n[DS]
.extern .__mpn_sub_n
.csect [PR]
.align 2
.globl __mpn_sub_n
.globl .__mpn_sub_n
.csect __mpn_sub_n[DS]
__mpn_sub_n:
.long .__mpn_sub_n, TOC[tc0], 0
.csect [PR]
.__mpn_sub_n:
andil. 10,6,1 # odd or even number of limbs?
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before it's used
sri 10,6,1 # count for unrolled loop
sf 7,0,8 # subtract least significant limbs, set cy
mtctr 10 # copy count into CTR
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
# We have an odd # of limbs. Add the first limbs separately.
cmpi 1,10,0 # is count for unrolled loop zero?
bne 1,L1 # branch if not
st 7,4(3)
sfe 3,0,0 # load !cy into ...
sfi 3,3,0 # ... return value register
br # return
# We added least significant limbs. Now reload the next limbs to enter loop.
L1: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3)
sfe 7,0,8 # subtract limbs, set cy
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
sfe 11,10,9 # subtract previous limbs with cy, set cy
stu 7,4(3) #
lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
sfe 7,0,8 # subtract previous limbs with cy, set cy
stu 11,4(3) #
bdn Loop # decrement CTR and loop back
Lend: sfe 11,10,9 # subtract limbs with cy, set cy
st 7,4(3) #
st 11,8(3) #
sfe 3,0,0 # load !cy into ...
sfi 3,3,0 # ... return value register
br

View File

@ -1,128 +0,0 @@
# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
# the result from a second limb vector.
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
.toc
.csect .__mpn_submul_1[PR]
.align 2
.globl __mpn_submul_1
.globl .__mpn_submul_1
.csect __mpn_submul_1[DS]
__mpn_submul_1:
.long .__mpn_submul_1[PR], TOC[tc0], 0
.csect .__mpn_submul_1[PR]
.__mpn_submul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 11
cax 9,9,7
l 7,4(3)
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 11,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 11,0,10
l 7,4(3)
aze 9,9
sf 8,11,7
a 11,8,11 # invert cy (r11 is junk)
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 11,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 11,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br