This set of changes clarifies the conditions for the R5900 short loop fix and extends its test with the border cases of six and seven instructions.
* testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix test with border cases. * testsuite/gas/mips/r5900.d: Add extra expected disassembly. * config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short loop hardware bug conditions. Correct note on the R5900 instruction count short loop fix.
This commit is contained in:
parent
08acaf5caf
commit
33d64ca5db
@ -1,3 +1,12 @@
|
||||
2018-10-19 Fredrik Noring <noring@nocrew.org>
|
||||
|
||||
* testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix
|
||||
test with border cases.
|
||||
* testsuite/gas/mips/r5900.d: Add extra expected disassembly.
|
||||
* config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short
|
||||
loop hardware bug conditions. Correct note on the R5900
|
||||
instruction count short loop fix.
|
||||
|
||||
2018-10-16 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
* testsuite/gas/aarch64/illegal-dotproduct.d: New test.
|
||||
|
@ -6982,9 +6982,21 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr,
|
||||
&& insn_length (history) != 4)
|
||||
return FALSE;
|
||||
|
||||
/* On R5900 short loops need to be fixed by inserting a nop in
|
||||
the branch delay slots.
|
||||
A short loop can be terminated too early. */
|
||||
/* On the R5900 short loops need to be fixed by inserting a NOP in the
|
||||
branch delay slot.
|
||||
|
||||
The short loop bug under certain conditions causes loops to execute
|
||||
only once or twice. We must ensure that the assembler never
|
||||
generates loops that satisfy all of the following conditions:
|
||||
|
||||
- a loop consists of less than or equal to six instructions
|
||||
(including the branch delay slot);
|
||||
- a loop contains only one conditional branch instruction at the end
|
||||
of the loop;
|
||||
- a loop does not contain any other branch or jump instructions;
|
||||
- a branch delay slot of the loop is not NOP (EE 2.9 or later).
|
||||
|
||||
We need to do this because of a hardware bug in the R5900 chip. */
|
||||
if (mips_opts.arch == CPU_R5900
|
||||
/* Check if instruction has a parameter, ignore "j $31". */
|
||||
&& (address_expr != NULL)
|
||||
@ -7002,8 +7014,8 @@ can_swap_branch_p (struct mips_cl_insn *ip, expressionS *address_expr,
|
||||
|| (ip->insn_opcode & 0xffff0000) == 0x04110000)) /* bgezal $0 */
|
||||
{
|
||||
int distance;
|
||||
/* Check if loop is shorter than 6 instructions including
|
||||
branch and delay slot. */
|
||||
/* Check if loop is shorter than or equal to 6 instructions
|
||||
including branch and delay slot. */
|
||||
distance = frag_now_fix () - S_GET_VALUE (address_expr->X_add_symbol);
|
||||
if (distance <= 20)
|
||||
{
|
||||
|
@ -87,7 +87,23 @@ Disassembly of section \.text:
|
||||
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
|
||||
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
|
||||
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
|
||||
[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop1>
|
||||
[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop3>
|
||||
[0-9a-f]+ <[^>]*> 00000000 nop
|
||||
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
|
||||
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
|
||||
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
|
||||
[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
|
||||
[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
|
||||
[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
|
||||
[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop6>
|
||||
[0-9a-f]+ <[^>]*> 00000000 nop
|
||||
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
|
||||
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
|
||||
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
|
||||
[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
|
||||
[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
|
||||
[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
|
||||
[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop7>
|
||||
[0-9a-f]+ <[^>]*> 2108ffff addi \$8,\$8,-1
|
||||
[0-9a-f]+ <[^>]*> 24040003 li \$4,3
|
||||
\.\.\.
|
||||
|
@ -120,13 +120,37 @@ stuff:
|
||||
.set pop
|
||||
.set push
|
||||
.set reorder
|
||||
# Short loop fix.
|
||||
# Test the short loop fix with 3 loop instructions.
|
||||
li $3, 300
|
||||
short_loop1:
|
||||
short_loop3:
|
||||
addi $3, -1
|
||||
addi $4, -1
|
||||
# NOP should be inserted in branch delay.
|
||||
bne $3, $0, short_loop1
|
||||
# A NOP will be inserted in the branch delay slot.
|
||||
bne $3, $0, short_loop3
|
||||
|
||||
# Test the short loop fix with 6 loop instructions.
|
||||
li $3, 300
|
||||
short_loop6:
|
||||
addi $3, -1
|
||||
addi $4, -1
|
||||
addi $5, -1
|
||||
addi $6, -1
|
||||
addi $7, -1
|
||||
# A NOP will be inserted in the branch delay slot.
|
||||
bne $3, $0, short_loop6
|
||||
|
||||
# Test the short loop fix with 7 loop instructions.
|
||||
li $3, 300
|
||||
short_loop7:
|
||||
addi $3, -1
|
||||
addi $4, -1
|
||||
addi $5, -1
|
||||
addi $6, -1
|
||||
addi $7, -1
|
||||
addi $8, -1
|
||||
# The short loop fix does not apply for loops with
|
||||
# more than 6 instructions.
|
||||
bne $3, $0, short_loop7
|
||||
|
||||
li $4, 3
|
||||
.set pop
|
||||
|
Loading…
Reference in New Issue
Block a user