MIPS: memset: Limit excessive `noreorder' assembly mode use
Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro). No change in machine code produced.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
Fix conflict with commit 932afdeec1
("MIPS: Add Kconfig variable for
CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
parent
2f7619ae90
commit
68dec269ee
|
@ -78,7 +78,6 @@
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.set noreorder
|
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -94,13 +93,16 @@
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
sltiu t0, a2, STORSIZE /* very small region? */
|
sltiu t0, a2, STORSIZE /* very small region? */
|
||||||
|
.set noreorder
|
||||||
bnez t0, .Lsmall_memset\@
|
bnez t0, .Lsmall_memset\@
|
||||||
andi t0, a0, STORMASK /* aligned? */
|
andi t0, a0, STORMASK /* aligned? */
|
||||||
|
.set reorder
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_MICROMIPS
|
#ifdef CONFIG_CPU_MICROMIPS
|
||||||
move t8, a1 /* used by 'swp' instruction */
|
move t8, a1 /* used by 'swp' instruction */
|
||||||
move t9, a1
|
move t9, a1
|
||||||
#endif
|
#endif
|
||||||
|
.set noreorder
|
||||||
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
|
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
|
||||||
beqz t0, 1f
|
beqz t0, 1f
|
||||||
PTR_SUBU t0, STORSIZE /* alignment in bytes */
|
PTR_SUBU t0, STORSIZE /* alignment in bytes */
|
||||||
|
@ -111,6 +113,7 @@
|
||||||
PTR_SUBU t0, AT /* alignment in bytes */
|
PTR_SUBU t0, AT /* alignment in bytes */
|
||||||
.set at
|
.set at
|
||||||
#endif
|
#endif
|
||||||
|
.set reorder
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
|
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
|
@ -125,8 +128,10 @@
|
||||||
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
||||||
#define STORE_BYTE(N) \
|
#define STORE_BYTE(N) \
|
||||||
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
|
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
|
||||||
|
.set noreorder; \
|
||||||
beqz t0, 0f; \
|
beqz t0, 0f; \
|
||||||
PTR_ADDU t0, 1;
|
PTR_ADDU t0, 1; \
|
||||||
|
.set reorder;
|
||||||
|
|
||||||
PTR_ADDU a2, t0 /* correct size */
|
PTR_ADDU a2, t0 /* correct size */
|
||||||
PTR_ADDU t0, 1
|
PTR_ADDU t0, 1
|
||||||
|
@ -148,16 +153,14 @@
|
||||||
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
||||||
1: ori t1, a2, 0x3f /* # of full blocks */
|
1: ori t1, a2, 0x3f /* # of full blocks */
|
||||||
xori t1, 0x3f
|
xori t1, 0x3f
|
||||||
beqz t1, .Lmemset_partial\@ /* no block to fill */
|
|
||||||
andi t0, a2, 0x40-STORSIZE
|
andi t0, a2, 0x40-STORSIZE
|
||||||
|
beqz t1, .Lmemset_partial\@ /* no block to fill */
|
||||||
|
|
||||||
PTR_ADDU t1, a0 /* end address */
|
PTR_ADDU t1, a0 /* end address */
|
||||||
.set reorder
|
|
||||||
1: PTR_ADDIU a0, 64
|
1: PTR_ADDIU a0, 64
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
|
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
|
||||||
bne t1, a0, 1b
|
bne t1, a0, 1b
|
||||||
.set noreorder
|
|
||||||
|
|
||||||
.Lmemset_partial\@:
|
.Lmemset_partial\@:
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
|
@ -173,20 +176,18 @@
|
||||||
PTR_SUBU t1, AT
|
PTR_SUBU t1, AT
|
||||||
.set at
|
.set at
|
||||||
#endif
|
#endif
|
||||||
jr t1
|
|
||||||
PTR_ADDU a0, t0 /* dest ptr */
|
PTR_ADDU a0, t0 /* dest ptr */
|
||||||
|
jr t1
|
||||||
|
|
||||||
.set push
|
|
||||||
.set noreorder
|
|
||||||
.set nomacro
|
|
||||||
/* ... but first do longs ... */
|
/* ... but first do longs ... */
|
||||||
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
|
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
|
||||||
2: .set pop
|
2: andi a2, STORMASK /* At most one long to go */
|
||||||
andi a2, STORMASK /* At most one long to go */
|
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
beqz a2, 1f
|
beqz a2, 1f
|
||||||
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
|
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
|
||||||
PTR_ADDU a0, a2 /* What's left */
|
PTR_ADDU a0, a2 /* What's left */
|
||||||
|
.set reorder
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
#ifdef __MIPSEB__
|
#ifdef __MIPSEB__
|
||||||
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
|
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
|
||||||
|
@ -195,6 +196,7 @@
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
PTR_SUBU t0, $0, a2
|
PTR_SUBU t0, $0, a2
|
||||||
|
.set reorder
|
||||||
move a2, zero /* No remaining longs */
|
move a2, zero /* No remaining longs */
|
||||||
PTR_ADDIU t0, 1
|
PTR_ADDIU t0, 1
|
||||||
STORE_BYTE(0)
|
STORE_BYTE(0)
|
||||||
|
@ -210,20 +212,22 @@
|
||||||
#endif
|
#endif
|
||||||
0:
|
0:
|
||||||
#endif
|
#endif
|
||||||
1: jr ra
|
1: move a2, zero
|
||||||
move a2, zero
|
jr ra
|
||||||
|
|
||||||
.Lsmall_memset\@:
|
.Lsmall_memset\@:
|
||||||
beqz a2, 2f
|
|
||||||
PTR_ADDU t1, a0, a2
|
PTR_ADDU t1, a0, a2
|
||||||
|
beqz a2, 2f
|
||||||
|
|
||||||
1: PTR_ADDIU a0, 1 /* fill bytewise */
|
1: PTR_ADDIU a0, 1 /* fill bytewise */
|
||||||
R10KCBARRIER(0(ra))
|
R10KCBARRIER(0(ra))
|
||||||
|
.set noreorder
|
||||||
bne t1, a0, 1b
|
bne t1, a0, 1b
|
||||||
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
|
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
|
||||||
|
.set reorder
|
||||||
|
|
||||||
2: jr ra /* done */
|
2: move a2, zero
|
||||||
move a2, zero
|
jr ra /* done */
|
||||||
.if __memset == 1
|
.if __memset == 1
|
||||||
END(memset)
|
END(memset)
|
||||||
.set __memset, 0
|
.set __memset, 0
|
||||||
|
@ -237,14 +241,13 @@
|
||||||
* a2 = a2 - t0 + 1
|
* a2 = a2 - t0 + 1
|
||||||
*/
|
*/
|
||||||
PTR_SUBU a2, t0
|
PTR_SUBU a2, t0
|
||||||
jr ra
|
|
||||||
PTR_ADDIU a2, 1
|
PTR_ADDIU a2, 1
|
||||||
|
jr ra
|
||||||
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
|
||||||
|
|
||||||
.Lfirst_fixup\@:
|
.Lfirst_fixup\@:
|
||||||
/* unset_bytes already in a2 */
|
/* unset_bytes already in a2 */
|
||||||
jr ra
|
jr ra
|
||||||
nop
|
|
||||||
|
|
||||||
.Lfwd_fixup\@:
|
.Lfwd_fixup\@:
|
||||||
/*
|
/*
|
||||||
|
@ -255,8 +258,8 @@
|
||||||
andi a2, 0x3f
|
andi a2, 0x3f
|
||||||
LONG_L t0, THREAD_BUADDR(t0)
|
LONG_L t0, THREAD_BUADDR(t0)
|
||||||
LONG_ADDU a2, t1
|
LONG_ADDU a2, t1
|
||||||
jr ra
|
|
||||||
LONG_SUBU a2, t0
|
LONG_SUBU a2, t0
|
||||||
|
jr ra
|
||||||
|
|
||||||
.Lpartial_fixup\@:
|
.Lpartial_fixup\@:
|
||||||
/*
|
/*
|
||||||
|
@ -267,24 +270,21 @@
|
||||||
andi a2, STORMASK
|
andi a2, STORMASK
|
||||||
LONG_L t0, THREAD_BUADDR(t0)
|
LONG_L t0, THREAD_BUADDR(t0)
|
||||||
LONG_ADDU a2, a0
|
LONG_ADDU a2, a0
|
||||||
jr ra
|
|
||||||
LONG_SUBU a2, t0
|
LONG_SUBU a2, t0
|
||||||
|
jr ra
|
||||||
|
|
||||||
.Llast_fixup\@:
|
.Llast_fixup\@:
|
||||||
/* unset_bytes already in a2 */
|
/* unset_bytes already in a2 */
|
||||||
jr ra
|
jr ra
|
||||||
nop
|
|
||||||
|
|
||||||
.Lsmall_fixup\@:
|
.Lsmall_fixup\@:
|
||||||
/*
|
/*
|
||||||
* unset_bytes = end_addr - current_addr + 1
|
* unset_bytes = end_addr - current_addr + 1
|
||||||
* a2 = t1 - a0 + 1
|
* a2 = t1 - a0 + 1
|
||||||
*/
|
*/
|
||||||
.set reorder
|
|
||||||
PTR_SUBU a2, t1, a0
|
PTR_SUBU a2, t1, a0
|
||||||
PTR_ADDIU a2, 1
|
PTR_ADDIU a2, 1
|
||||||
jr ra
|
jr ra
|
||||||
.set noreorder
|
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -298,8 +298,8 @@
|
||||||
|
|
||||||
LEAF(memset)
|
LEAF(memset)
|
||||||
EXPORT_SYMBOL(memset)
|
EXPORT_SYMBOL(memset)
|
||||||
beqz a1, 1f
|
|
||||||
move v0, a0 /* result */
|
move v0, a0 /* result */
|
||||||
|
beqz a1, 1f
|
||||||
|
|
||||||
andi a1, 0xff /* spread fillword */
|
andi a1, 0xff /* spread fillword */
|
||||||
LONG_SLL t1, a1, 8
|
LONG_SLL t1, a1, 8
|
||||||
|
|
Loading…
Reference in New Issue