linux/arch/arm/lib/delay-loop.S

69 lines
1.3 KiB
ArmAsm
Raw Normal View History

/*
* linux/arch/arm/lib/delay.S
*
* Copyright (C) 1995, 1996 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/delay.h>
.text
.LC0: .word loops_per_jiffy
.LC1: .word UDELAY_MULT
/*
* r0 <= 2000
* lpj <= 0x01ffffff (max. 3355 bogomips)
* HZ <= 1000
*/
ENTRY(__loop_udelay)
ldr r2, .LC1
mul r0, r2, r0
ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
mov r1, #-1
ldr r2, .LC0
ldr r2, [r2] @ max = 0x01ffffff
add r0, r0, r1, lsr #32-14
mov r0, r0, lsr #14 @ max = 0x0001ffff
add r2, r2, r1, lsr #32-10
mov r2, r2, lsr #10 @ max = 0x00007fff
mul r0, r2, r0 @ max = 2^32-1
add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
moveq pc, lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
*/
ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS calculation Currently mx53 (CortexA8) running at 1GHz reports: Calibrating delay loop... 663.55 BogoMIPS (lpj=3317760) Tom Evans verified that alignments of 0x0 and 0x8 run the two instructions of __loop_delay in one clock cycle (1 clock/loop), while alignments of 0x4 and 0xc take 3 clocks to run the loop twice. (1.5 clock/loop) The original object code looks like this: 00000010 <__loop_const_udelay>: 10: e3e01000 mvn r1, #0 14: e51f201c ldr r2, [pc, #-28] ; 0 <__loop_udelay-0x8> 18: e5922000 ldr r2, [r2] 1c: e0800921 add r0, r0, r1, lsr #18 20: e1a00720 lsr r0, r0, #14 24: e0822b21 add r2, r2, r1, lsr #22 28: e1a02522 lsr r2, r2, #10 2c: e0000092 mul r0, r2, r0 30: e0800d21 add r0, r0, r1, lsr #26 34: e1b00320 lsrs r0, r0, #6 38: 01a0f00e moveq pc, lr 0000003c <__loop_delay>: 3c: e2500001 subs r0, r0, #1 40: 8afffffe bhi 3c <__loop_delay> 44: e1a0f00e mov pc, lr After adding the 'align 3' directive to __loop_delay (align to 8 bytes): 00000010 <__loop_const_udelay>: 10: e3e01000 mvn r1, #0 14: e51f201c ldr r2, [pc, #-28] ; 0 <__loop_udelay-0x8> 18: e5922000 ldr r2, [r2] 1c: e0800921 add r0, r0, r1, lsr #18 20: e1a00720 lsr r0, r0, #14 24: e0822b21 add r2, r2, r1, lsr #22 28: e1a02522 lsr r2, r2, #10 2c: e0000092 mul r0, r2, r0 30: e0800d21 add r0, r0, r1, lsr #26 34: e1b00320 lsrs r0, r0, #6 38: 01a0f00e moveq pc, lr 3c: e320f000 nop {0} 00000040 <__loop_delay>: 40: e2500001 subs r0, r0, #1 44: 8afffffe bhi 40 <__loop_delay> 48: e1a0f00e mov pc, lr 4c: e320f000 nop {0} , which now reports: Calibrating delay loop... 996.14 BogoMIPS (lpj=4980736) Some more test results: On mx31 (ARM1136) running at 532 MHz, before the patch: Calibrating delay loop... 351.43 BogoMIPS (lpj=1757184) On mx31 (ARM1136) running at 532 MHz after the patch: Calibrating delay loop... 528.79 BogoMIPS (lpj=2643968) Also tested on mx6 (CortexA9) and on mx27 (ARM926), which shows the same BogoMIPS value before and after this patch. Reported-by: Tom Evans <tom_usenet@optusnet.com.au> Suggested-by: Tom Evans <tom_usenet@optusnet.com.au> Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2013-11-30 15:24:42 +01:00
.align 3
@ Delay routine
ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
movls pc, lr
subs r0, r0, #1
#endif
bhi __loop_delay
mov pc, lr
ENDPROC(__loop_udelay)
ENDPROC(__loop_const_udelay)
ENDPROC(__loop_delay)