libgcc: arm: convert thumb1 code to unified syntax

Unified syntax has been the official syntax for thumb1 assembly for over 10 years now. It's time we made preparations for that becoming the default in the assembler. But before we can start doing that we really need to clean up some laggards from the olden days. Libgcc support for thumb1 is one such example. This patch converts all of the legacy (disjoint) syntax that I could find over to unified code. The identification was done by using a trick version of gas that defaulted to unified mode which then faults if legacy syntax is encountered. The code produced was then compared against the old code to check for differences. One such difference does exist, but that is because in unified syntax 'movs rd, rn' is encoded as 'lsls rd, rn, #0', rather than 'adds rd, rn, #0'; but that is a deliberate change that was introduced because the lsls encoding more closely reflects the behaviour of 'movs' in arm state (where only some of the condition flags are modified). * config/arm/bpabi-v6m.S (aeabi_lcmp): Convert thumb1 code to unified syntax. (aeabi_ulcmp, aeabi_ldivmod, aeabi_uldivmod): Likewise. (aeabi_frsub, aeabi_cfcmpeq, aeabi_fcmpeq): Likewise. (aeabi_fcmp, aeabi_drsub, aeabi_cdrcmple): Likewise. (aeabi_cdcmpeq, aeabi_dcmpeq, aeabi_dcmp): Likewise. * config/arm/lib1funcs.S (Lend_fde): Convert thumb1 code to unified syntax. (divsi3, modsi3): Likewise. (clzdi2, ctzsi2): Likewise. * config/arm/libunwind.S (restore_core_regs): Convert thumb1 code to unified syntax. (UNWIND_WRAPPER): Likewise.
2020-03-03 16:02:24 +00:00 · 2020-03-03 16:02:24 +00:00 · 6b9ce2b4eb
commit 6b9ce2b4eb
parent 8e6d0dba16
4 changed files with 314 additions and 279 deletions
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@ -1,3 +1,19 @@
+2020-03-03  Richard Earnshaw  <rearnsha@arm.com>
+
+	* config/arm/bpabi-v6m.S (aeabi_lcmp): Convert thumb1 code to unified
+	syntax.
+	(aeabi_ulcmp, aeabi_ldivmod, aeabi_uldivmod): Likewise.
+	(aeabi_frsub, aeabi_cfcmpeq, aeabi_fcmpeq): Likewise.
+	(aeabi_fcmp, aeabi_drsub, aeabi_cdrcmple): Likewise.
+	(aeabi_cdcmpeq, aeabi_dcmpeq, aeabi_dcmp): Likewise.
+	* config/arm/lib1funcs.S (Lend_fde): Convert thumb1 code to unified
+	syntax.
+	(divsi3, modsi3): Likewise.
+	(clzdi2, ctzsi2): Likewise.
+	* config/arm/libunwind.S (restore_core_regs): Convert thumb1 code to
+	unified syntax.
+	(UNWIND_WRAPPER): Likewise.
+
 2020-03-02  Martin Liska  <mliska@suse.cz>

 	* libgcov-interface.c: Remove duplicate
--- a/libgcc/config/arm/bpabi-v6m.S
+++ b/libgcc/config/arm/bpabi-v6m.S
@ -39,21 +39,21 @@ FUNC_START aeabi_lcmp
 	cmp	xxh, yyh
 	beq	1f
 	bgt	2f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 2:
-	mov	r0, #1
+	movs	r0, #1
 	RET
 1:
-	sub	r0, xxl, yyl
+	subs	r0, xxl, yyl
 	beq	1f
 	bhi	2f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 2:
-	mov	r0, #1
+	movs	r0, #1
 1:
 	RET
 	FUNC_END aeabi_lcmp
@ -65,15 +65,15 @@ FUNC_START aeabi_lcmp
 FUNC_START aeabi_ulcmp
 	cmp	xxh, yyh
 	bne	1f
-	sub	r0, xxl, yyl
+	subs	r0, xxl, yyl
 	beq	2f
 1:
 	bcs	1f
-	mov	r0, #1
-	neg	r0, r0
+	movs	r0, #1
+	negs	r0, r0
 	RET
 1:
-	mov	r0, #1
+	movs	r0, #1
 2:
 	RET
 	FUNC_END aeabi_ulcmp
@ -91,29 +91,29 @@ FUNC_START aeabi_ulcmp
 	cmp	xxl, #0
 2:
 	beq	3f
-	mov	xxh, #0
-	mvn	xxh, xxh		@ 0xffffffff
-	mov	xxl, xxh
+	movs	xxh, #0
+	mvns	xxh, xxh		@ 0xffffffff
+	movs	xxl, xxh
 3:
 	.else
 	blt	6f
 	bgt	4f
 	cmp	xxl, #0
 	beq	5f
-4:	mov	xxl, #0
-	mvn	xxl, xxl		@ 0xffffffff
-	lsr	xxh, xxl, #1		@ 0x7fffffff
+4:	movs	xxl, #0
+	mvns	xxl, xxl		@ 0xffffffff
+	lsrs	xxh, xxl, #1		@ 0x7fffffff
 	b	5f
-6:	mov	xxh, #0x80
-	lsl	xxh, xxh, #24		@ 0x80000000
-	mov	xxl, #0
+6:	movs	xxh, #0x80
+	lsls	xxh, xxh, #24		@ 0x80000000
+	movs	xxl, #0
 5:
 	.endif
 	@ tailcalls are tricky on v6-m.
 	push	{r0, r1, r2}
 	ldr	r0, 1f
 	adr	r1, 1f
-	add	r0, r1
+	adds	r0, r1
 	str	r0, [sp, #8]
 	@ We know we are not on armv4t, so pop pc is safe.
 	pop	{r0, r1, pc}
@ -128,15 +128,15 @@ FUNC_START aeabi_ulcmp
 FUNC_START aeabi_ldivmod
 	test_div_by_zero signed

-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__gnu_ldivmod_helper)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
+	push	{r0, r1}
+	mov	r0, sp
+	push	{r0, lr}
+	ldr	r0, [sp, #8]
+	bl	SYM(__gnu_ldivmod_helper)
+	ldr	r3, [sp, #4]
+	mov	lr, r3
+	add	sp, sp, #8
+	pop	{r2, r3}
 	RET
 	FUNC_END aeabi_ldivmod

@ -147,15 +147,15 @@ FUNC_START aeabi_ldivmod
 FUNC_START aeabi_uldivmod
 	test_div_by_zero unsigned

-	push {r0, r1}
-	mov r0, sp
-	push {r0, lr}
-	ldr r0, [sp, #8]
-	bl SYM(__udivmoddi4)
-	ldr r3, [sp, #4]
-	mov lr, r3
-	add sp, sp, #8
-	pop {r2, r3}
+	push	{r0, r1}
+	mov	r0, sp
+	push	{r0, lr}
+	ldr	r0, [sp, #8]
+	bl	SYM(__udivmoddi4)
+	ldr	r3, [sp, #4]
+	mov	lr, r3
+	add	sp, sp, #8
+	pop	{r2, r3}
 	RET
 	FUNC_END aeabi_uldivmod
 	
@ -166,9 +166,9 @@ FUNC_START aeabi_uldivmod
 FUNC_START aeabi_frsub

      push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	r0, r0, r4
+      movs	r4, #1
+      lsls	r4, #31
+      eors	r0, r0, r4
      bl	__aeabi_fadd
      pop	{r4, pc}

@ -181,7 +181,7 @@ FUNC_START aeabi_frsub
 FUNC_START aeabi_cfrcmple

 	mov	ip, r0
-	mov	r0, r1
+	movs	r0, r1
 	mov	r1, ip
 	b	6f

@ -196,8 +196,8 @@ FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
 	cmp	r0, #0
 	@ Clear the C flag if the return value was -1, indicating
 	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
+	bmi	1f
+	movs	r1, #0
 	cmn	r0, r1
 1:
 	pop	{r0, r1, r2, r3, r4, pc}
@ -210,8 +210,8 @@ FUNC_START	aeabi_fcmpeq

 	push	{r4, lr}
 	bl	__eqsf2
-	neg	r0, r0
-	add	r0, r0, #1
+	negs	r0, r0
+	adds	r0, r0, #1
 	pop	{r4, pc}

 	FUNC_END aeabi_fcmpeq
@ -223,10 +223,10 @@ FUNC_START	aeabi_fcmp\cond
 	bl	__\helper\mode
 	cmp	r0, #0
 	b\cond	1f
-	mov	r0, #0
+	movs	r0, #0
 	pop	{r4, pc}
 1:
-	mov	r0, #1
+	movs	r0, #1
 	pop	{r4, pc}

 	FUNC_END aeabi_fcmp\cond
@ -244,9 +244,9 @@ COMPARISON ge, ge
 FUNC_START aeabi_drsub

      push	{r4, lr}
-      mov	r4, #1
-      lsl	r4, #31
-      eor	xxh, xxh, r4
+      movs	r4, #1
+      lsls	r4, #31
+      eors	xxh, xxh, r4
      bl	__aeabi_dadd
      pop	{r4, pc}

@ -259,10 +259,10 @@ FUNC_START aeabi_drsub
 FUNC_START aeabi_cdrcmple

 	mov	ip, r0
-	mov	r0, r2
+	movs	r0, r2
 	mov	r2, ip
 	mov	ip, r1
-	mov	r1, r3
+	movs	r1, r3
 	mov	r3, ip
 	b	6f

@ -277,8 +277,8 @@ FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
 	cmp	r0, #0
 	@ Clear the C flag if the return value was -1, indicating
 	@ that the first operand was smaller than the second.
-	bmi 1f
-	mov	r1, #0
+	bmi	1f
+	movs	r1, #0
 	cmn	r0, r1
 1:
 	pop	{r0, r1, r2, r3, r4, pc}
@ -291,8 +291,8 @@ FUNC_START	aeabi_dcmpeq

 	push	{r4, lr}
 	bl	__eqdf2
-	neg	r0, r0
-	add	r0, r0, #1
+	negs	r0, r0
+	adds	r0, r0, #1
 	pop	{r4, pc}

 	FUNC_END aeabi_dcmpeq
@ -304,10 +304,10 @@ FUNC_START	aeabi_dcmp\cond
 	bl	__\helper\mode
 	cmp	r0, #0
 	b\cond	1f
-	mov	r0, #0
+	movs	r0, #0
 	pop	{r4, pc}
 1:
-	mov	r0, #1
+	movs	r0, #1
 	pop	{r4, pc}

 	FUNC_END aeabi_dcmp\cond
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@ -22,6 +22,10 @@ a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */

+/* Everything in this file should now use unified syntax.  */
+
+	.syntax unified
+
 /* An executable stack is *not* required for these functions.  */
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
@ -270,7 +274,7 @@ LSYM(Lend_fde):
 #ifdef NOT_ISA_TARGET_32BIT

 	push	{r0, lr}
-	mov	r0, #0
+	movs	r0, #0
 	bl	SYM(__aeabi_idiv0)
 	@ We know we are not on armv4t, so pop pc is safe.
 	pop	{r1, pc}
@ -310,7 +314,7 @@ LSYM(Lend_fde):
 	push	{ r1, lr }
 98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
 	bl	SYM (__div0)
-	mov	r0, #0			@ About as wrong as it could be.
+	movs	r0, #0			@ About as wrong as it could be.
 #if defined (__INTERWORKING__)
 	pop	{ r1, r2 }
 	bx	r2
@ -349,7 +353,7 @@ SYM (\name):
 #define THUMB_FUNC .thumb_func
 #define THUMB_CODE .force_thumb
 # if defined(__thumb2__)
-#define THUMB_SYNTAX .syntax divided
+#define THUMB_SYNTAX
 # else
 #define THUMB_SYNTAX
 # endif
@ -725,8 +729,8 @@ pc		.req	r15
 /* ------------------------------------------------------------------------ */
 .macro THUMB_DIV_MOD_BODY modulo
 	@ Load the constant 0x10000000 into our work register.
-	mov	work, #1
-	lsl	work, #28
+	movs	work, #1
+	lsls	work, #28
 LSYM(Loop1):
 	@ Unless the divisor is very big, shift it up in multiples of
 	@ four bits, since this is the amount of unwinding in the main
@ -736,12 +740,12 @@ LSYM(Loop1):
 	bhs	LSYM(Lbignum)
 	cmp	divisor, dividend
 	bhs	LSYM(Lbignum)
-	lsl	divisor, #4
-	lsl	curbit,  #4
+	lsls	divisor, #4
+	lsls	curbit,  #4
 	b	LSYM(Loop1)
 LSYM(Lbignum):
 	@ Set work to 0x80000000
-	lsl	work, #3
+	lsls	work, #3
 LSYM(Loop2):
 	@ For very big divisors, we must shift it a bit at a time, or
 	@ we will be in danger of overflowing.
@ -749,8 +753,8 @@ LSYM(Loop2):
 	bhs	LSYM(Loop3)
 	cmp	divisor, dividend
 	bhs	LSYM(Loop3)
-	lsl	divisor, #1
-	lsl	curbit,  #1
+	lsls	divisor, #1
+	lsls	curbit,  #1
 	b	LSYM(Loop2)
 LSYM(Loop3):
 	@ Test for possible subtractions ...
@ -758,39 +762,39 @@ LSYM(Loop3):
 	@ ... On the final pass, this may subtract too much from the dividend, 
 	@ so keep track of which subtractions are done, we can fix them up 
 	@ afterwards.
-	mov	overdone, #0
+	movs	overdone, #0
 	cmp	dividend, divisor
 	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
+	subs	dividend, dividend, divisor
 LSYM(Lover1):
-	lsr	work, divisor, #1
+	lsrs	work, divisor, #1
 	cmp	dividend, work
 	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #1
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #1
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover2):
-	lsr	work, divisor, #2
+	lsrs	work, divisor, #2
 	cmp	dividend, work
 	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #2
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #2
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover3):
-	lsr	work, divisor, #3
+	lsrs	work, divisor, #3
 	cmp	dividend, work
 	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
+	subs	dividend, dividend, work
 	mov	ip, curbit
-	mov	work, #3
-	ror	curbit, work
-	orr	overdone, curbit
+	movs	work, #3
+	rors	curbit, work
+	orrs	overdone, curbit
 	mov	curbit, ip
 LSYM(Lover4):
 	mov	ip, curbit
@ -800,46 +804,46 @@ LSYM(Lover4):
 	@ since the "bit" will have been shifted out at the bottom.
 	cmp	dividend, divisor
 	blo	LSYM(Lover1)
-	sub	dividend, dividend, divisor
-	orr	result, result, curbit
+	subs	dividend, dividend, divisor
+	orrs	result, result, curbit
 LSYM(Lover1):
-	lsr	work, divisor, #1
+	lsrs	work, divisor, #1
 	cmp	dividend, work
 	blo	LSYM(Lover2)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #1
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #1
+	orrs	result, work
 LSYM(Lover2):
-	lsr	work, divisor, #2
+	lsrs	work, divisor, #2
 	cmp	dividend, work
 	blo	LSYM(Lover3)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #2
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #2
+	orrs	result, work
 LSYM(Lover3):
-	lsr	work, divisor, #3
+	lsrs	work, divisor, #3
 	cmp	dividend, work
 	blo	LSYM(Lover4)
-	sub	dividend, dividend, work
-	lsr	work, curbit, #3
-	orr	result, work
+	subs	dividend, dividend, work
+	lsrs	work, curbit, #3
+	orrs	result, work
 LSYM(Lover4):
  .endif
 	
 	cmp	dividend, #0			@ Early termination?
 	beq	LSYM(Lover5)
-	lsr	curbit,  #4			@ No, any more bits to do?
+	lsrs	curbit,  #4			@ No, any more bits to do?
 	beq	LSYM(Lover5)
-	lsr	divisor, #4
+	lsrs	divisor, #4
 	b	LSYM(Loop3)
 LSYM(Lover5):
  .if \modulo
 	@ Any subtractions that we should not have done will be recorded in
 	@ the top three bits of "overdone".  Exactly which were not needed
 	@ are governed by the position of the bit, stored in ip.
-	mov	work, #0xe
-	lsl	work, #28
-	and	overdone, work
+	movs	work, #0xe
+	lsls	work, #28
+	ands	overdone, work
 	beq	LSYM(Lgot_result)
 	
 	@ If we terminated early, because dividend became zero, then the 
@ -849,33 +853,33 @@ LSYM(Lover5):
 	@ the bit in ip could be in the top two bits which might then match
 	@ with one of the smaller RORs.
 	mov	curbit, ip
-	mov	work, #0x7
+	movs	work, #0x7
 	tst	curbit, work
 	beq	LSYM(Lgot_result)
 	
 	mov	curbit, ip
-	mov	work, #3
-	ror	curbit, work
+	movs	work, #3
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lover6)
-	lsr	work, divisor, #3
-	add	dividend, work
+	lsrs	work, divisor, #3
+	adds	dividend, work
 LSYM(Lover6):
 	mov	curbit, ip
-	mov	work, #2
-	ror	curbit, work
+	movs	work, #2
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lover7)
-	lsr	work, divisor, #2
-	add	dividend, work
+	lsrs	work, divisor, #2
+	adds	dividend, work
 LSYM(Lover7):
 	mov	curbit, ip
-	mov	work, #1
-	ror	curbit, work
+	movs	work, #1
+	rors	curbit, work
 	tst	overdone, curbit
 	beq	LSYM(Lgot_result)
-	lsr	work, divisor, #1
-	add	dividend, work
+	lsrs	work, divisor, #1
+	adds	dividend, work
  .endif
 LSYM(Lgot_result):
 .endm
@ -885,7 +889,7 @@ LSYM(Lgot_result):

 /* Branch to div(n), and jump to label if curbit is lo than divisior.  */
 .macro BranchToDiv n, label
-	lsr	curbit, dividend, \n
+	lsrs	curbit, dividend, \n
 	cmp	curbit, divisor
 	blo	\label
 .endm
@ -893,13 +897,13 @@ LSYM(Lgot_result):
 /* Body of div(n).  Shift the divisor in n bits and compare the divisor
   and dividend.  Update the dividend as the substruction result.  */
 .macro DoDiv n
-	lsr	curbit, dividend, \n
+	lsrs	curbit, dividend, \n
 	cmp	curbit, divisor
 	bcc	1f
-	lsl	curbit, divisor, \n
-	sub	dividend, dividend, curbit
+	lsls	curbit, divisor, \n
+	subs	dividend, dividend, curbit

-1:	adc	result, result
+1:	adcs	result, result
 .endm

 /* The body of division with positive divisor.  Unless the divisor is very
@ -907,29 +911,29 @@ LSYM(Lgot_result):
   unwinding in the main division loop.  Continue shifting until the divisor
   is larger than the dividend.  */
 .macro THUMB1_Div_Positive
-	mov	result, #0
+	movs	result, #0
 	BranchToDiv #1, LSYM(Lthumb1_div1)
 	BranchToDiv #4, LSYM(Lthumb1_div4)
 	BranchToDiv #8, LSYM(Lthumb1_div8)
 	BranchToDiv #12, LSYM(Lthumb1_div12)
 	BranchToDiv #16, LSYM(Lthumb1_div16)
 LSYM(Lthumb1_div_large_positive):
-	mov	result, #0xff
-	lsl	divisor, divisor, #8
+	movs	result, #0xff
+	lsls	divisor, divisor, #8
 	rev	result, result
-	lsr	curbit, dividend, #16
+	lsrs	curbit, dividend, #16
 	cmp	curbit, divisor
 	blo	1f
-	asr	result, #8
-	lsl	divisor, divisor, #8
+	asrs	result, #8
+	lsls	divisor, divisor, #8
 	beq	LSYM(Ldivbyzero_waypoint)

-1:	lsr	curbit, dividend, #12
+1:	lsrs	curbit, dividend, #12
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div12)
 	b	LSYM(Lthumb1_div16)
 LSYM(Lthumb1_div_loop):
-	lsr	divisor, divisor, #8
+	lsrs	divisor, divisor, #8
 LSYM(Lthumb1_div16):
 	Dodiv	#15
 	Dodiv	#14
@ -954,11 +958,11 @@ LSYM(Lthumb1_div3):
 LSYM(Lthumb1_div2):
 	Dodiv	#1
 LSYM(Lthumb1_div1):
-	sub	divisor, dividend, divisor
+	subs	divisor, dividend, divisor
 	bcs	1f
 	cpy	divisor, dividend

-1:	adc	result, result
+1:	adcs	result, result
 	cpy	dividend, result
 	RET

@ -970,43 +974,43 @@ LSYM(Ldivbyzero_waypoint):
   THUMB1_Div_Positive except that the shift steps are in multiples
   of six bits.  */
 .macro THUMB1_Div_Negative
-	lsr	result, divisor, #31
+	lsrs	result, divisor, #31
 	beq	1f
-	neg	divisor, divisor
+	negs	divisor, divisor

-1:	asr	curbit, dividend, #32
+1:	asrs	curbit, dividend, #32
 	bcc	2f
-	neg	dividend, dividend
+	negs	dividend, dividend

-2:	eor	curbit, result
-	mov	result, #0
+2:	eors	curbit, result
+	movs	result, #0
 	cpy	ip, curbit
 	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
 	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
 LSYM(Lthumb1_div_large):
-	mov	result, #0xfc
-	lsl	divisor, divisor, #6
+	movs	result, #0xfc
+	lsls	divisor, divisor, #6
 	rev	result, result
-	lsr	curbit, dividend, #8
+	lsrs	curbit, dividend, #8
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)

-	lsl	divisor, divisor, #6
-	asr	result, result, #6
+	lsls	divisor, divisor, #6
+	asrs	result, result, #6
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)

-	lsl	divisor, divisor, #6
-	asr	result, result, #6
+	lsls	divisor, divisor, #6
+	asrs	result, result, #6
 	cmp	curbit, divisor
 	blo	LSYM(Lthumb1_div_negative8)

-	lsl	divisor, divisor, #6
+	lsls	divisor, divisor, #6
 	beq	LSYM(Ldivbyzero_negative)
-	asr	result, result, #6
+	asrs	result, result, #6
 	b	LSYM(Lthumb1_div_negative8)
 LSYM(Lthumb1_div_negative_loop):
-	lsr	divisor, divisor, #6
+	lsrs	divisor, divisor, #6
 LSYM(Lthumb1_div_negative8):
 	DoDiv	#7
 	DoDiv	#6
@ -1017,28 +1021,28 @@ LSYM(Lthumb1_div_negative4):
 	DoDiv	#2
 	bcs	LSYM(Lthumb1_div_negative_loop)
 	DoDiv	#1
-	sub	divisor, dividend, divisor
+	subs	divisor, dividend, divisor
 	bcs	1f
 	cpy	divisor, dividend

 1:	cpy	curbit, ip
-	adc	result, result
-	asr	curbit, curbit, #1
+	adcs	result, result
+	asrs	curbit, curbit, #1
 	cpy	dividend, result
 	bcc	2f
-	neg	dividend, dividend
+	negs	dividend, dividend
 	cmp	curbit, #0

 2:	bpl	3f
-	neg	divisor, divisor
+	negs	divisor, divisor

 3:	RET

 LSYM(Ldivbyzero_negative):
 	cpy	curbit, ip
-	asr	curbit, curbit, #1
+	asrs	curbit, curbit, #1
 	bcc	LSYM(Ldiv0)
-	neg	dividend, dividend
+	negs	dividend, dividend
 .endm
 #endif /* ARM Thumb version.  */

@ -1056,8 +1060,8 @@ LSYM(Ldivbyzero_negative):
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 LSYM(udivsi3_skip_div0_test):
-	mov	curbit, #1
-	mov	result, #0
+	movs	curbit, #1
+	movs	result, #0
 	
 	push	{ work }
 	cmp	dividend, divisor
@ -1065,7 +1069,7 @@ LSYM(udivsi3_skip_div0_test):

 	THUMB_DIV_MOD_BODY 0
 	
-	mov	r0, result
+	movs	r0, result
 	pop	{ work }
 	RET

@ -1184,7 +1188,7 @@ ARM_FUNC_START aeabi_uidivmod

 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
-	mov	curbit, #1
+	movs	curbit, #1
 	cmp	dividend, divisor
 	bhs	LSYM(Lover10)
 	RET	
@ -1263,7 +1267,7 @@ LSYM(Lover12):
 #else
 LSYM(divsi3_skip_div0_test):
 	cpy	curbit, dividend
-	orr	curbit, divisor
+	orrs	curbit, divisor
 	bmi	LSYM(Lthumb1_div_negative)

 LSYM(Lthumb1_div_positive):
@ -1395,11 +1399,11 @@ ARM_FUNC_START aeabi_idivmod

 	FUNC_START modsi3

-	mov	curbit, #1
+	movs	curbit, #1
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 	bpl	LSYM(Lover10)
-	neg	divisor, divisor		@ Loops below use unsigned.
+	negs	divisor, divisor		@ Loops below use unsigned.
 LSYM(Lover10):
 	push	{ work }
 	@ Need to save the sign of the dividend, unfortunately, we need
@ -1408,7 +1412,7 @@ LSYM(Lover10):
 	push	{ dividend }
 	cmp	dividend, #0
 	bpl	LSYM(Lover11)
-	neg	dividend, dividend
+	negs	dividend, dividend
 LSYM(Lover11):
 	cmp	dividend, divisor
 	blo	LSYM(Lgot_result)
@ -1418,7 +1422,7 @@ LSYM(Lover11):
 	pop	{ work }
 	cmp	work, #0
 	bpl	LSYM(Lover12)
-	neg	dividend, dividend
+	negs	dividend, dividend
 LSYM(Lover12):
 	pop	{ work }
 	RET	
@ -1540,12 +1544,12 @@ LSYM(Lover12):
 	   address, so just clear pc..pc+1.  */
 #if defined __thumb__ && !defined __thumb2__
 	push	{r7}
-	mov	r7, #0xf
-	lsl	r7, #16
-	add	r7, #2
+	movs	r7, #0xf
+	lsls	r7, #16
+	adds	r7, #2
 	adr	r0, . + 4
-	add	r1, r0, #1
-	mov	r2, #0
+	adds	r1, r0, #1
+	movs	r2, #0
 	svc	0
 	pop	{r7}
 #else
@ -1595,17 +1599,17 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_llsr lshrdi3
 	
 #ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	lsr	ah, r2
+	lsrs	al, r2
+	movs	r3, ah
+	lsrs	ah, r2
 	mov	ip, r3
-	sub	r2, #32
-	lsr	r3, r2
-	orr	al, r3
-	neg	r2, r2
+	subs	r2, #32
+	lsrs	r3, r2
+	orrs	al, r3
+	negs	r2, r2
 	mov	r3, ip
-	lsl	r3, r2
-	orr	al, r3
+	lsls	r3, r2
+	orrs	al, r3
 	RET
 #else
 	subs	r3, r2, #32
@ -1627,21 +1631,21 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_lasr ashrdi3
 	
 #ifdef __thumb__
-	lsr	al, r2
-	mov	r3, ah
-	asr	ah, r2
-	sub	r2, #32
+	lsrs	al, r2
+	movs	r3, ah
+	asrs	ah, r2
+	subs	r2, #32
 	@ If r2 is negative at this point the following step would OR
 	@ the sign bit into all of AL.  That's not what we want...
 	bmi	1f
 	mov	ip, r3
-	asr	r3, r2
-	orr	al, r3
+	asrs	r3, r2
+	orrs	al, r3
 	mov	r3, ip
 1:
-	neg	r2, r2
-	lsl	r3, r2
-	orr	al, r3
+	negs	r2, r2
+	lsls	r3, r2
+	orrs	al, r3
 	RET
 #else
 	subs	r3, r2, #32
@ -1664,17 +1668,17 @@ LSYM(Lover12):
 	FUNC_ALIAS aeabi_llsl ashldi3
 	
 #ifdef __thumb__
-	lsl	ah, r2
-	mov	r3, al
-	lsl	al, r2
+	lsls	ah, r2
+	movs	r3, al
+	lsls	al, r2
 	mov	ip, r3
-	sub	r2, #32
-	lsl	r3, r2
-	orr	ah, r3
-	neg	r2, r2
+	subs	r2, #32
+	lsls	r3, r2
+	orrs	ah, r3
+	negs	r2, r2
 	mov	r3, ip
-	lsr	r3, r2
-	orr	ah, r3
+	lsrs	r3, r2
+	orrs	ah, r3
 	RET
 #else
 	subs	r3, r2, #32
@ -1695,26 +1699,26 @@ LSYM(Lover12):
 #ifdef L_clzsi2
 #ifdef NOT_ISA_TARGET_32BIT
 FUNC_START clzsi2
-	mov	r1, #28
-	mov	r3, #1
-	lsl	r3, r3, #16
+	movs	r1, #28
+	movs	r3, #1
+	lsls	r3, r3, #16
 	cmp	r0, r3 /* 0x10000 */
 	bcc	2f
-	lsr	r0, r0, #16
-	sub	r1, r1, #16
-2:	lsr	r3, r3, #8
+	lsrs	r0, r0, #16
+	subs	r1, r1, #16
+2:	lsrs	r3, r3, #8
 	cmp	r0, r3 /* #0x100 */
 	bcc	2f
-	lsr	r0, r0, #8
-	sub	r1, r1, #8
-2:	lsr	r3, r3, #4
+	lsrs	r0, r0, #8
+	subs	r1, r1, #8
+2:	lsrs	r3, r3, #4
 	cmp	r0, r3 /* #0x10 */
 	bcc	2f
-	lsr	r0, r0, #4
-	sub	r1, r1, #4
+	lsrs	r0, r0, #4
+	subs	r1, r1, #4
 2:	adr	r2, 1f
 	ldrb	r0, [r2, r0]
-	add	r0, r0, r1
+	adds	r0, r0, r1
 	bx lr
 .align 2
 1:
@ -1757,34 +1761,49 @@ ARM_FUNC_START clzsi2
 # ifdef NOT_ISA_TARGET_32BIT
 FUNC_START clzdi2
 	push	{r4, lr}
-# else
-ARM_FUNC_START clzdi2
-	do_push	{r4, lr}
-# endif
 	cmp	xxh, #0
 	bne	1f
-# ifdef __ARMEB__
+#  ifdef __ARMEB__
+	movs	r0, xxl
+	bl	__clzsi2
+	adds	r0, r0, #32
+	b 2f
+1:
+	bl	__clzsi2
+#  else
+	bl	__clzsi2
+	adds	r0, r0, #32
+	b 2f
+1:
+	movs	r0, xxh
+	bl	__clzsi2
+#  endif
+2:
+	pop	{r4, pc}
+# else /* NOT_ISA_TARGET_32BIT */
+ARM_FUNC_START clzdi2
+	do_push	{r4, lr}
+	cmp	xxh, #0
+	bne	1f
+#  ifdef __ARMEB__
 	mov	r0, xxl
 	bl	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
 	bl	__clzsi2
-# else
+#  else
 	bl	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
 	mov	r0, xxh
 	bl	__clzsi2
-# endif
+#  endif
 2:
-# ifdef NOT_ISA_TARGET_32BIT
-	pop	{r4, pc}
-# else
 	RETLDM	r4
-# endif
 	FUNC_END clzdi2
+# endif /* NOT_ISA_TARGET_32BIT */

 #else /* defined (__ARM_FEATURE_CLZ) */

@ -1803,28 +1822,28 @@ ARM_FUNC_START clzdi2
 #ifdef L_ctzsi2
 #ifdef NOT_ISA_TARGET_32BIT
 FUNC_START ctzsi2
-	neg	r1, r0
-	and	r0, r0, r1
-	mov	r1, #28
-	mov	r3, #1
-	lsl	r3, r3, #16
+	negs	r1, r0
+	ands	r0, r0, r1
+	movs	r1, #28
+	movs	r3, #1
+	lsls	r3, r3, #16
 	cmp	r0, r3 /* 0x10000 */
 	bcc	2f
-	lsr	r0, r0, #16
-	sub	r1, r1, #16
-2:	lsr	r3, r3, #8
+	lsrs	r0, r0, #16
+	subs	r1, r1, #16
+2:	lsrs	r3, r3, #8
 	cmp	r0, r3 /* #0x100 */
 	bcc	2f
-	lsr	r0, r0, #8
-	sub	r1, r1, #8
-2:	lsr	r3, r3, #4
+	lsrs	r0, r0, #8
+	subs	r1, r1, #8
+2:	lsrs	r3, r3, #4
 	cmp	r0, r3 /* #0x10 */
 	bcc	2f
-	lsr	r0, r0, #4
-	sub	r1, r1, #4
+	lsrs	r0, r0, #4
+	subs	r1, r1, #4
 2:	adr	r2, 1f
 	ldrb	r0, [r2, r0]
-	sub	r0, r0, r1
+	subs	r0, r0, r1
 	bx lr
 .align 2
 1:
--- a/libgcc/config/arm/libunwind.S
+++ b/libgcc/config/arm/libunwind.S
@ -63,28 +63,28 @@
 /* r0 points to a 16-word block.  Upload these values to the actual core
   state.  */
 FUNC_START restore_core_regs
-	mov r1, r0
-	add r1, r1, #52
-	ldmia r1!, {r3, r4, r5}
-	sub r3, r3, #4
-	mov ip, r3
-	str r5, [r3]
-	mov lr, r4
+	movs	r1, r0
+	adds	r1, r1, #52
+	ldmia	r1!, {r3, r4, r5}
+	subs	r3, r3, #4
+	mov	ip, r3
+	str	r5, [r3]
+	mov	lr, r4
 	/* Restore r8-r11.  */
-	mov r1, r0
-	add r1, r1, #32
-	ldmia r1!, {r2, r3, r4, r5}
-	mov r8, r2
-	mov r9, r3
-	mov sl, r4
-	mov fp, r5
-	mov r1, r0
-	add r1, r1, #8
-	ldmia r1!, {r2, r3, r4, r5, r6, r7}
-	ldr r1, [r0, #4]
-	ldr r0, [r0]
-	mov sp, ip
-	pop {pc}
+	movs	r1, r0
+	adds	r1, r1, #32
+	ldmia	r1!, {r2, r3, r4, r5}
+	mov	r8, r2
+	mov	r9, r3
+	mov	sl, r4
+	mov	fp, r5
+	movs	r1, r0
+	adds	r1, r1, #8
+	ldmia	r1!, {r2, r3, r4, r5, r6, r7}
+	ldr	r1, [r0, #4]
+	ldr	r0, [r0]
+	mov	sp, ip
+	pop	{pc}
 	FUNC_END restore_core_regs
 	UNPREFIX restore_core_regs

@ -132,38 +132,38 @@ FUNC_START gnu_Unwind_Save_WMMXC
 	FUNC_START \name
 	/* Create a phase2_vrs structure.  */
 	/* Save r0 in the PC slot so we can use it as a scratch register.  */
-	push {r0}
-	add r0, sp, #4
-	push {r0, lr} /* Push original SP and LR.  */
+	push	{r0}
+	add	r0, sp, #4
+	push	{r0, lr} /* Push original SP and LR.  */
 	/* Make space for r8-r12.  */
-	sub sp, sp, #20
+	sub	sp, sp, #20
 	/* Save low registers.  */
-	push {r0, r1, r2, r3, r4, r5, r6, r7}
+	push	{r0, r1, r2, r3, r4, r5, r6, r7}
 	/* Save high registers.  */
-	add r0, sp, #32
-	mov r1, r8
-	mov r2, r9
-	mov r3, sl
-	mov r4, fp
-	mov r5, ip
-	stmia r0!, {r1, r2, r3, r4, r5}
+	add	r0, sp, #32
+	mov	r1, r8
+	mov	r2, r9
+	mov	r3, sl
+	mov	r4, fp
+	mov	r5, ip
+	stmia	r0!, {r1, r2, r3, r4, r5}
 	/* Restore original low register values.  */
-	add r0, sp, #4
-	ldmia r0!, {r1, r2, r3, r4, r5}
+	add	r0, sp, #4
+	ldmia	r0!, {r1, r2, r3, r4, r5}
 	/* Restore orginial r0.  */
-	ldr r0, [sp, #60]
-	str r0, [sp]
+	ldr	r0, [sp, #60]
+	str	r0, [sp]
 	/* Demand-save flags, plus an extra word for alignment.  */
-	mov r3, #0
-	push {r2, r3}
+	movs	r3, #0
+	push	{r2, r3}
 	/* Point r1 at the block.  Pass r[0..nargs) unchanged.  */
-	add r\nargs, sp, #4
+	add	r\nargs, sp, #4

-	bl SYM (__gnu\name)
+	bl	SYM (__gnu\name)

-	ldr r3, [sp, #64]
-	add sp, sp, #72
-	bx r3
+	ldr	r3, [sp, #64]
+	add	sp, sp, #72
+	bx	r3

 	FUNC_END \name
 	UNPREFIX \name