809201325a
From-SVN: r164756
145 lines
3.5 KiB
NASM
145 lines
3.5 KiB
NASM
###################################-
|
|
#
|
|
# Copyright 2009, 2010 Free Software Foundation, Inc.
|
|
#
|
|
# Contributed by Michael Eager <eager@eagercon.com>.
|
|
#
|
|
# This file is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 3, or (at your option) any
|
|
# later version.
|
|
#
|
|
# GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
# License for more details.
|
|
#
|
|
# Under Section 7 of GPL version 3, you are granted additional
|
|
# permissions described in the GCC Runtime Library Exception, version
|
|
# 3.1, as published by the Free Software Foundation.
|
|
#
|
|
# You should have received a copy of the GNU General Public License and
|
|
# a copy of the GCC Runtime Library Exception along with this program;
|
|
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
# <http://www.gnu.org/licenses/>.
|
|
#
|
|
# muldi3_hard.asm
|
|
#
|
|
# Multiply operation for 64 bit integers, for devices with hard multiply
|
|
# Input : Operand1[H] in Reg r5
|
|
# Operand1[L] in Reg r6
|
|
# Operand2[H] in Reg r7
|
|
# Operand2[L] in Reg r8
|
|
# Output: Result[H] in Reg r3
|
|
# Result[L] in Reg r4
|
|
#
|
|
# Explaination:
|
|
#
|
|
# Both the input numbers are divided into 16 bit number as follows
|
|
# op1 = A B C D
|
|
# op2 = E F G H
|
|
# result = D * H
|
|
# + (C * H + D * G) << 16
|
|
# + (B * H + C * G + D * F) << 32
|
|
# + (A * H + B * G + C * F + D * E) << 48
|
|
#
|
|
# Only 64 bits of the output are considered
|
|
#
|
|
#######################################
|
|
|
|
.globl muldi3_hardproc
|
|
.ent muldi3_hardproc
|
|
muldi3_hardproc:
|
|
addi r1,r1,-40
|
|
|
|
# Save the input operands on the caller's stack
|
|
swi r5,r1,44
|
|
swi r6,r1,48
|
|
swi r7,r1,52
|
|
swi r8,r1,56
|
|
|
|
# Store all the callee saved registers
|
|
sw r20,r1,r0
|
|
swi r21,r1,4
|
|
swi r22,r1,8
|
|
swi r23,r1,12
|
|
swi r24,r1,16
|
|
swi r25,r1,20
|
|
swi r26,r1,24
|
|
swi r27,r1,28
|
|
|
|
# Load all the 16 bit values for A thru H
|
|
lhui r20,r1,44 # A
|
|
lhui r21,r1,46 # B
|
|
lhui r22,r1,48 # C
|
|
lhui r23,r1,50 # D
|
|
lhui r24,r1,52 # E
|
|
lhui r25,r1,54 # F
|
|
lhui r26,r1,56 # G
|
|
lhui r27,r1,58 # H
|
|
|
|
# D * H ==> LSB of the result on stack ==> Store1
|
|
mul r9,r23,r27
|
|
swi r9,r1,36 # Pos2 and Pos3
|
|
|
|
# Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2
|
|
# Store the carry generated in position 2 for Pos 3
|
|
lhui r11,r1,36 # Pos2
|
|
mul r9,r22,r27 # C * H
|
|
mul r10,r23,r26 # D * G
|
|
add r9,r9,r10
|
|
addc r12,r0,r0
|
|
add r9,r9,r11
|
|
addc r12,r12,r0 # Store the Carry
|
|
shi r9,r1,36 # Store Pos2
|
|
swi r9,r1,32
|
|
lhui r11,r1,32
|
|
shi r11,r1,34 # Store Pos1
|
|
|
|
# Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1
|
|
mul r9,r21,r27 # B * H
|
|
mul r10,r22,r26 # C * G
|
|
mul r7,r23,r25 # D * F
|
|
add r9,r9,r11
|
|
add r9,r9,r10
|
|
add r9,r9,r7
|
|
swi r9,r1,32 # Pos0 and Pos1
|
|
|
|
# Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0
|
|
lhui r11,r1,32 # Pos0
|
|
mul r9,r20,r27 # A * H
|
|
mul r10,r21,r26 # B * G
|
|
mul r7,r22,r25 # C * F
|
|
mul r8,r23,r24 # D * E
|
|
add r9,r9,r11
|
|
add r9,r9,r10
|
|
add r9,r9,r7
|
|
add r9,r9,r8
|
|
sext16 r9,r9 # Sign extend the MSB
|
|
shi r9,r1,32
|
|
|
|
# Move results to r3 and r4
|
|
lhui r3,r1,32
|
|
add r3,r3,r12
|
|
shi r3,r1,32
|
|
lwi r3,r1,32 # Hi Part
|
|
lwi r4,r1,36 # Lo Part
|
|
|
|
# Restore Callee saved registers
|
|
lw r20,r1,r0
|
|
lwi r21,r1,4
|
|
lwi r22,r1,8
|
|
lwi r23,r1,12
|
|
lwi r24,r1,16
|
|
lwi r25,r1,20
|
|
lwi r26,r1,24
|
|
lwi r27,r1,28
|
|
|
|
# Restore Frame and return
|
|
rtsd r15,8
|
|
addi r1,r1,40
|
|
|
|
.end muldi3_hardproc
|
|
|
|
|