lib1funcs.S: Add new wrapper.

2016-07-11  Hale Wang  <hale.wang@arm.com>
	    Andre Vieira  <andre.simoesdiasvieira@arm.com>

	* config/arm/lib1funcs.S: Add new wrapper.

Co-Authored-By: Andre Vieira <andre.simoesdiasvieira@arm.com>

From-SVN: r238215
This commit is contained in:
Hale Wang 2016-07-11 17:11:31 +00:00 committed by Andre Vieira
parent 9a54f10dbb
commit 827424041e
2 changed files with 223 additions and 32 deletions

View File

@ -1,3 +1,8 @@
2016-07-11 Hale Wang <hale.wang@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
* config/arm/lib1funcs.S: Add new wrapper.
2016-07-07 Thomas Preud'homme <thomas.preudhomme@arm.com>
* config/arm/lib1funcs.S (__ARM_ARCH__): Define to 8 for ARMv8-M.

View File

@ -311,34 +311,13 @@ LSYM(Lend_fde):
#ifdef __ARM_EABI__
.macro THUMB_LDIV0 name signed
#ifdef NOT_ISA_TARGET_32BIT
.ifc \signed, unsigned
cmp r0, #0
beq 1f
push {r0, lr}
mov r0, #0
mvn r0, r0 @ 0xffffffff
1:
.else
cmp r0, #0
beq 2f
blt 3f
mov r0, #0
mvn r0, r0
lsr r0, r0, #1 @ 0x7fffffff
b 2f
3: mov r0, #0x80
lsl r0, r0, #24 @ 0x80000000
2:
.endif
push {r0, r1, r2}
ldr r0, 4f
adr r1, 4f
add r0, r1
str r0, [sp, #8]
bl SYM(__aeabi_idiv0)
@ We know we are not on armv4t, so pop pc is safe.
pop {r0, r1, pc}
.align 2
4:
.word __aeabi_idiv0 - 4b
pop {r1, pc}
#elif defined(__thumb2__)
.syntax unified
.ifc \signed, unsigned
@ -950,7 +929,170 @@ LSYM(Lover7):
add dividend, work
.endif
LSYM(Lgot_result):
.endm
.endm
/* If performance is preferred, the following functions are provided. */
#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
/* Branch to div(n), and jump to label if curbit is lo than divisior. */
.macro BranchToDiv n, label
lsr curbit, dividend, \n
cmp curbit, divisor
blo \label
.endm
/* Body of div(n). Shift the divisor in n bits and compare the divisor
and dividend. Update the dividend as the substruction result. */
.macro DoDiv n
lsr curbit, dividend, \n
cmp curbit, divisor
bcc 1f
lsl curbit, divisor, \n
sub dividend, dividend, curbit
1: adc result, result
.endm
/* The body of division with positive divisor. Unless the divisor is very
big, shift it up in multiples of four bits, since this is the amount of
unwinding in the main division loop. Continue shifting until the divisor
is larger than the dividend. */
.macro THUMB1_Div_Positive
mov result, #0
BranchToDiv #1, LSYM(Lthumb1_div1)
BranchToDiv #4, LSYM(Lthumb1_div4)
BranchToDiv #8, LSYM(Lthumb1_div8)
BranchToDiv #12, LSYM(Lthumb1_div12)
BranchToDiv #16, LSYM(Lthumb1_div16)
LSYM(Lthumb1_div_large_positive):
mov result, #0xff
lsl divisor, divisor, #8
rev result, result
lsr curbit, dividend, #16
cmp curbit, divisor
blo 1f
asr result, #8
lsl divisor, divisor, #8
beq LSYM(Ldivbyzero_waypoint)
1: lsr curbit, dividend, #12
cmp curbit, divisor
blo LSYM(Lthumb1_div12)
b LSYM(Lthumb1_div16)
LSYM(Lthumb1_div_loop):
lsr divisor, divisor, #8
LSYM(Lthumb1_div16):
Dodiv #15
Dodiv #14
Dodiv #13
Dodiv #12
LSYM(Lthumb1_div12):
Dodiv #11
Dodiv #10
Dodiv #9
Dodiv #8
bcs LSYM(Lthumb1_div_loop)
LSYM(Lthumb1_div8):
Dodiv #7
Dodiv #6
Dodiv #5
LSYM(Lthumb1_div5):
Dodiv #4
LSYM(Lthumb1_div4):
Dodiv #3
LSYM(Lthumb1_div3):
Dodiv #2
LSYM(Lthumb1_div2):
Dodiv #1
LSYM(Lthumb1_div1):
sub divisor, dividend, divisor
bcs 1f
cpy divisor, dividend
1: adc result, result
cpy dividend, result
RET
LSYM(Ldivbyzero_waypoint):
b LSYM(Ldiv0)
.endm
/* The body of division with negative divisor. Similar with
THUMB1_Div_Positive except that the shift steps are in multiples
of six bits. */
.macro THUMB1_Div_Negative
lsr result, divisor, #31
beq 1f
neg divisor, divisor
1: asr curbit, dividend, #32
bcc 2f
neg dividend, dividend
2: eor curbit, result
mov result, #0
cpy ip, curbit
BranchToDiv #4, LSYM(Lthumb1_div_negative4)
BranchToDiv #8, LSYM(Lthumb1_div_negative8)
LSYM(Lthumb1_div_large):
mov result, #0xfc
lsl divisor, divisor, #6
rev result, result
lsr curbit, dividend, #8
cmp curbit, divisor
blo LSYM(Lthumb1_div_negative8)
lsl divisor, divisor, #6
asr result, result, #6
cmp curbit, divisor
blo LSYM(Lthumb1_div_negative8)
lsl divisor, divisor, #6
asr result, result, #6
cmp curbit, divisor
blo LSYM(Lthumb1_div_negative8)
lsl divisor, divisor, #6
beq LSYM(Ldivbyzero_negative)
asr result, result, #6
b LSYM(Lthumb1_div_negative8)
LSYM(Lthumb1_div_negative_loop):
lsr divisor, divisor, #6
LSYM(Lthumb1_div_negative8):
DoDiv #7
DoDiv #6
DoDiv #5
DoDiv #4
LSYM(Lthumb1_div_negative4):
DoDiv #3
DoDiv #2
bcs LSYM(Lthumb1_div_negative_loop)
DoDiv #1
sub divisor, dividend, divisor
bcs 1f
cpy divisor, dividend
1: cpy curbit, ip
adc result, result
asr curbit, curbit, #1
cpy dividend, result
bcc 2f
neg dividend, dividend
cmp curbit, #0
2: bpl 3f
neg divisor, divisor
3: RET
LSYM(Ldivbyzero_negative):
cpy curbit, ip
asr curbit, curbit, #1
bcc LSYM(Ldiv0)
neg dividend, dividend
.endm
#endif /* ARM Thumb version. */
/* ------------------------------------------------------------------------ */
/* Start of the Real Functions */
/* ------------------------------------------------------------------------ */
@ -960,6 +1102,7 @@ LSYM(Lgot_result):
FUNC_START udivsi3
FUNC_ALIAS aeabi_uidiv udivsi3
#if defined(__OPTIMIZE_SIZE__)
cmp divisor, #0
beq LSYM(Ldiv0)
@ -977,6 +1120,14 @@ LSYM(udivsi3_skip_div0_test):
pop { work }
RET
/* Implementation of aeabi_uidiv for ARMv6m. This version is only
used in ARMv6-M when we need an efficient implementation. */
#else
LSYM(udivsi3_skip_div0_test):
THUMB1_Div_Positive
#endif /* __OPTIMIZE_SIZE__ */
#elif defined(__ARM_ARCH_EXT_IDIV__)
ARM_FUNC_START udivsi3
@ -1028,12 +1179,21 @@ LSYM(udivsi3_skip_div0_test):
FUNC_START aeabi_uidivmod
cmp r1, #0
beq LSYM(Ldiv0)
# if defined(__OPTIMIZE_SIZE__)
push {r0, r1, lr}
bl LSYM(udivsi3_skip_div0_test)
POP {r1, r2, r3}
mul r2, r0
sub r1, r1, r2
bx r3
# else
/* Both the quotient and remainder are calculated simultaneously
in THUMB1_Div_Positive. There is no need to calculate the
remainder again here. */
b LSYM(udivsi3_skip_div0_test)
RET
# endif /* __OPTIMIZE_SIZE__ */
#elif defined(__ARM_ARCH_EXT_IDIV__)
ARM_FUNC_START aeabi_uidivmod
cmp r1, #0
@ -1089,7 +1249,7 @@ LSYM(Lover10):
RET
#else /* ARM version. */
FUNC_START umodsi3
subs r2, r1, #1 @ compare divisor with 1
@ -1114,8 +1274,9 @@ LSYM(Lover10):
#if defined(__prefer_thumb__)
FUNC_START divsi3
FUNC_START divsi3
FUNC_ALIAS aeabi_idiv divsi3
#if defined(__OPTIMIZE_SIZE__)
cmp divisor, #0
beq LSYM(Ldiv0)
@ -1138,7 +1299,7 @@ LSYM(Lover11):
blo LSYM(Lgot_result)
THUMB_DIV_MOD_BODY 0
mov r0, result
mov work, ip
cmp work, #0
@ -1148,6 +1309,22 @@ LSYM(Lover12):
pop { work }
RET
/* Implementation of aeabi_idiv for ARMv6m. This version is only
used in ARMv6-M when we need an efficient implementation. */
#else
LSYM(divsi3_skip_div0_test):
cpy curbit, dividend
orr curbit, divisor
bmi LSYM(Lthumb1_div_negative)
LSYM(Lthumb1_div_positive):
THUMB1_Div_Positive
LSYM(Lthumb1_div_negative):
THUMB1_Div_Negative
#endif /* __OPTIMIZE_SIZE__ */
#elif defined(__ARM_ARCH_EXT_IDIV__)
ARM_FUNC_START divsi3
@ -1159,8 +1336,8 @@ LSYM(Lover12):
RET
#else /* ARM/Thumb-2 version. */
ARM_FUNC_START divsi3
ARM_FUNC_START divsi3
ARM_FUNC_ALIAS aeabi_idiv divsi3
cmp r1, #0
@ -1214,12 +1391,21 @@ LSYM(divsi3_skip_div0_test):
FUNC_START aeabi_idivmod
cmp r1, #0
beq LSYM(Ldiv0)
# if defined(__OPTIMIZE_SIZE__)
push {r0, r1, lr}
bl LSYM(divsi3_skip_div0_test)
POP {r1, r2, r3}
mul r2, r0
sub r1, r1, r2
bx r3
# else
/* Both the quotient and remainder are calculated simultaneously
in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
need to calculate the remainder again here. */
b LSYM(divsi3_skip_div0_test)
RET
# endif /* __OPTIMIZE_SIZE__ */
#elif defined(__ARM_ARCH_EXT_IDIV__)
ARM_FUNC_START aeabi_idivmod
cmp r1, #0