MIPS: memset: Limit excessive `noreorder' assembly mode use

Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro).  No change in machine code produced.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
  Fix conflict with commit 932afdeec1 ("MIPS: Add Kconfig variable for
  CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
Maciej W. Rozycki 2018-10-02 12:50:16 +01:00 committed by Paul Burton
parent 2f7619ae90
commit 68dec269ee
No known key found for this signature in database
GPG Key ID: 3EA79FACB57500DD
1 changed files with 24 additions and 24 deletions

View File

@ -78,7 +78,6 @@
#endif #endif
.endm .endm
.set noreorder
.align 5 .align 5
/* /*
@ -94,13 +93,16 @@
.endif .endif
sltiu t0, a2, STORSIZE /* very small region? */ sltiu t0, a2, STORSIZE /* very small region? */
.set noreorder
bnez t0, .Lsmall_memset\@ bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */ andi t0, a0, STORMASK /* aligned? */
.set reorder
#ifdef CONFIG_CPU_MICROMIPS #ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */ move t8, a1 /* used by 'swp' instruction */
move t9, a1 move t9, a1
#endif #endif
.set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */ PTR_SUBU t0, STORSIZE /* alignment in bytes */
@ -111,6 +113,7 @@
PTR_SUBU t0, AT /* alignment in bytes */ PTR_SUBU t0, AT /* alignment in bytes */
.set at .set at
#endif #endif
.set reorder
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
@ -125,8 +128,10 @@
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \ #define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \
.set noreorder; \
beqz t0, 0f; \ beqz t0, 0f; \
PTR_ADDU t0, 1; PTR_ADDU t0, 1; \
.set reorder;
PTR_ADDU a2, t0 /* correct size */ PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1 PTR_ADDU t0, 1
@ -148,16 +153,14 @@
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */ 1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f xori t1, 0x3f
andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */ beqz t1, .Lmemset_partial\@ /* no block to fill */
andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */ PTR_ADDU t1, a0 /* end address */
.set reorder
1: PTR_ADDIU a0, 64 1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b bne t1, a0, 1b
.set noreorder
.Lmemset_partial\@: .Lmemset_partial\@:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
@ -173,20 +176,18 @@
PTR_SUBU t1, AT PTR_SUBU t1, AT
.set at .set at
#endif #endif
PTR_ADDU a0, t0 /* dest ptr */
jr t1 jr t1
PTR_ADDU a0, t0 /* dest ptr */
.set push
.set noreorder
.set nomacro
/* ... but first do longs ... */ /* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
2: .set pop 2: andi a2, STORMASK /* At most one long to go */
andi a2, STORMASK /* At most one long to go */
.set noreorder
beqz a2, 1f beqz a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */ PTR_ADDU a0, a2 /* What's left */
.set reorder
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@ -195,6 +196,7 @@
#endif #endif
#else #else
PTR_SUBU t0, $0, a2 PTR_SUBU t0, $0, a2
.set reorder
move a2, zero /* No remaining longs */ move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1 PTR_ADDIU t0, 1
STORE_BYTE(0) STORE_BYTE(0)
@ -210,20 +212,22 @@
#endif #endif
0: 0:
#endif #endif
1: jr ra 1: move a2, zero
move a2, zero jr ra
.Lsmall_memset\@: .Lsmall_memset\@:
PTR_ADDU t1, a0, a2
beqz a2, 2f beqz a2, 2f
PTR_ADDU t1, a0, a2
1: PTR_ADDIU a0, 1 /* fill bytewise */ 1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
.set noreorder
bne t1, a0, 1b bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@) EX(sb, a1, -1(a0), .Lsmall_fixup\@)
.set reorder
2: jr ra /* done */ 2: move a2, zero
move a2, zero jr ra /* done */
.if __memset == 1 .if __memset == 1
END(memset) END(memset)
.set __memset, 0 .set __memset, 0
@ -237,14 +241,13 @@
* a2 = a2 - t0 + 1 * a2 = a2 - t0 + 1
*/ */
PTR_SUBU a2, t0 PTR_SUBU a2, t0
PTR_ADDIU a2, 1
jr ra jr ra
PTR_ADDIU a2, 1
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@: .Lfirst_fixup\@:
/* unset_bytes already in a2 */ /* unset_bytes already in a2 */
jr ra jr ra
nop
.Lfwd_fixup\@: .Lfwd_fixup\@:
/* /*
@ -255,8 +258,8 @@
andi a2, 0x3f andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1 LONG_ADDU a2, t1
LONG_SUBU a2, t0
jr ra jr ra
LONG_SUBU a2, t0
.Lpartial_fixup\@: .Lpartial_fixup\@:
/* /*
@ -267,24 +270,21 @@
andi a2, STORMASK andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0 LONG_ADDU a2, a0
LONG_SUBU a2, t0
jr ra jr ra
LONG_SUBU a2, t0
.Llast_fixup\@: .Llast_fixup\@:
/* unset_bytes already in a2 */ /* unset_bytes already in a2 */
jr ra jr ra
nop
.Lsmall_fixup\@: .Lsmall_fixup\@:
/* /*
* unset_bytes = end_addr - current_addr + 1 * unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1 * a2 = t1 - a0 + 1
*/ */
.set reorder
PTR_SUBU a2, t1, a0 PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1 PTR_ADDIU a2, 1
jr ra jr ra
.set noreorder
.endm .endm
@ -298,8 +298,8 @@
LEAF(memset) LEAF(memset)
EXPORT_SYMBOL(memset) EXPORT_SYMBOL(memset)
move v0, a0 /* result */
beqz a1, 1f beqz a1, 1f
move v0, a0 /* result */
andi a1, 0xff /* spread fillword */ andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8 LONG_SLL t1, a1, 8