This set of changes clarifies the conditions for the R5900 short loop fix and extends its test with the border cases of six and seven instructions.

* testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix
	test with border cases.
	* testsuite/gas/mips/r5900.d: Add extra expected disassembly.
	* config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short
	loop hardware bug conditions.  Correct note on the R5900
	instruction count short loop fix.
This commit is contained in:
Fredrik Noring 2018-10-19 09:47:55 +01:00 committed by Nick Clifton
parent 08acaf5caf
commit 33d64ca5db
4 changed files with 71 additions and 10 deletions

View File

@ -1,3 +1,12 @@
2018-10-19 Fredrik Noring <noring@nocrew.org>
* testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix
test with border cases.
* testsuite/gas/mips/r5900.d: Add extra expected disassembly.
* config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short
loop hardware bug conditions. Correct note on the R5900
instruction count short loop fix.
2018-10-16 Matthew Malcomson <matthew.malcomson@arm.com>
* testsuite/gas/aarch64/illegal-dotproduct.d: New test.

View File

@ -6982,9 +6982,21 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr,
&& insn_length (history) != 4)
return FALSE;
/* On R5900 short loops need to be fixed by inserting a nop in
the branch delay slots.
A short loop can be terminated too early. */
/* On the R5900 short loops need to be fixed by inserting a NOP in the
branch delay slot.
The short loop bug under certain conditions causes loops to execute
only once or twice. We must ensure that the assembler never
generates loops that satisfy all of the following conditions:
- a loop consists of less than or equal to six instructions
(including the branch delay slot);
- a loop contains only one conditional branch instruction at the end
of the loop;
- a loop does not contain any other branch or jump instructions;
- a branch delay slot of the loop is not NOP (EE 2.9 or later).
We need to do this because of a hardware bug in the R5900 chip. */
if (mips_opts.arch == CPU_R5900
/* Check if instruction has a parameter, ignore "j $31". */
&& (address_expr != NULL)
@ -7002,8 +7014,8 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr,
|| (ip->insn_opcode & 0xffff0000) == 0x04110000)) /* bgezal $0 */
{
int distance;
/* Check if loop is shorter than 6 instructions including
branch and delay slot. */
/* Check if loop is shorter than or equal to 6 instructions
including branch and delay slot. */
distance = frag_now_fix () - S_GET_VALUE (address_expr->X_add_symbol);
if (distance <= 20)
{

View File

@ -87,7 +87,23 @@ Disassembly of section \.text:
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop1>
[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop3>
[0-9a-f]+ <[^>]*> 00000000 nop
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop6>
[0-9a-f]+ <[^>]*> 00000000 nop
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop7>
[0-9a-f]+ <[^>]*> 2108ffff addi \$8,\$8,-1
[0-9a-f]+ <[^>]*> 24040003 li \$4,3
\.\.\.

View File

@ -120,13 +120,37 @@ stuff:
.set pop
.set push
.set reorder
# Short loop fix.
# Test the short loop fix with 3 loop instructions.
li $3, 300
short_loop1:
short_loop3:
addi $3, -1
addi $4, -1
# NOP should be inserted in branch delay.
bne $3, $0, short_loop1
# A NOP will be inserted in the branch delay slot.
bne $3, $0, short_loop3
# Test the short loop fix with 6 loop instructions.
li $3, 300
short_loop6:
addi $3, -1
addi $4, -1
addi $5, -1
addi $6, -1
addi $7, -1
# A NOP will be inserted in the branch delay slot.
bne $3, $0, short_loop6
# Test the short loop fix with 7 loop instructions.
li $3, 300
short_loop7:
addi $3, -1
addi $4, -1
addi $5, -1
addi $6, -1
addi $7, -1
addi $8, -1
# The short loop fix does not apply for loops with
# more than 6 instructions.
bne $3, $0, short_loop7
li $4, 3
.set pop