sim: bfin: import optimizations from 32bit test into 16bit test

The 32bit allopcodes test had quite a bit of optimization added to it
so that it ran in a reasonable amount of time out of uncached memory.
Port those changes over to the 16bit test so the two share common code.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
This commit is contained in:
Mike Frysinger 2012-03-19 02:49:18 +00:00
parent 410bbc9498
commit 191a54be54
2 changed files with 73 additions and 35 deletions

View File

@ -1,3 +1,7 @@
2012-03-18 Mike Frysinger <vapier@gentoo.org>
* se_all16bitopcodes.S: Merge code from se_all32bitopcodes.S.
2011-09-28 Mike Frysinger <vapier@gentoo.org>
* vit_max2.s: New tests for parallel VIT_MAX insns.

View File

@ -31,24 +31,23 @@
SYSCFG = R0;
/* Lower to the code we want to single step through */
loadsym R1, _usr;
RETI = R1;
loadsym P1, _usr;
RETI = P1;
/* set up pointers to valid data (32Meg), to reduce address violations */
init_p_regs 0x2000000;
init_i_regs 0x2000000;
init_b_regs 0x2000000;
init_l_regs 0;
imm32 fp, 0x2000000;
usp = p0;
.macro reset_regs
imm32 r0, 0x2000000;
l0 = 0; l1 = 0; l2 = 0; l3 = 0;
p0 = r0; p1 = r0; p2 = r0; p3 = r0; p4 = r0; p5 = r0;
usp = r0; fp = r0;
i0 = r0; i1 = r0; i2 = r0; i3 = r0;
b0 = r0; b1 = r0; b2 = r0; b3 = r0;
.endm
reset_regs
RTI;
.align 4;
_usr:
.dd 0x0000;
jump fail_lvl;
_evx:
/* Make sure exception reason is single step */
R3 = SEQSTAT;
@ -56,12 +55,14 @@ _evx:
R3 = R3 & R4;
/* find a match */
loadsym P0, _usr;
loadsym P2, _location;
P1 = [P2];
R0 = W[P0];
loadsym P5, _usr;
loadsym P4, _location;
R2 = W[P5];
P1 = [P4];
R0 = R2;
_match:
P2 = P1;
R7 = W[P1++];
R6 = W[P1++];
R5 = W[P1++];
@ -69,7 +70,7 @@ _match:
/* is this the end of the table? */
R4 = 0;
CC = R4 == R7;
IF CC jump _legal_instruction;
IF CC jump _new_instruction;
/* is the opcode (R0) greater than the 2nd entry in the table (R6) */
/* if so look at the next line in the table */
@ -88,14 +89,38 @@ _match:
_match_done:
/* back up, and store the location to search next */
R0 = P1;
/* 3 back, * word/byte) */
R0 += (-3 * 2);
[P2] = R0;
[P4] = P2;
/* it matches, so fall through */
jump _next_instruction;
_new_instruction:
jump _legal_instruction;
/* output the insn (R0) and excause (R3) if diff from last */
loadsym P0, _last_excause;
R2 = [P0];
CC = R2 == R3;
IF CC jump _next_instruction;
[P0] = R3;
.ifdef BFIN_JTAG_xxxxx
R1 = R0;
R0 = 0x4;
call __emu_out;
R0 = R1 << 16;
R0 = R0 | R3;
call __emu_out;
.else
loadsym P0, _next_location;
P1 = [P0];
W[P1++] = R0;
W[P1++] = R3;
[P0] = P1;
.endif
jump _next_instruction;
_legal_instruction:
R4 = 0x10;
CC = R3 == R4;
@ -104,31 +129,29 @@ _legal_instruction:
_next_instruction:
/* increment, and go again. */
loadsym P0, _usr;
R0 = W[P0];
R0 = R2;
R0 += 1;
W[P0] = R0;
/* finish once we hit the 32bit limit */
R1 = 0xC000 (Z);
CC = R1 == R0;
IF CC JUMP pass_lvl;
W[P5] = R0;
/* Make sure the opcode isn't in a write buffer */
SSYNC;
loadsym R1, _usr
R1 = P5;
RETX = R1;
/* set up pointers to valid data (32Meg), to reduce address violations */
init_p_regs 0x2000000;
init_i_regs 0x2000000;
init_b_regs 0x2000000;
init_l_regs 0;
imm32 fp, 0x2000000;
usp = p0;
RETS = p0;
RETN = p0;
RETE = p0;
RETI = p0;
reset_regs
RETS = r0;
RETN = r0;
RETE = r0;
RETI = r0;
RTX;
pass_lvl:
@ -136,10 +159,20 @@ pass_lvl:
fail_lvl:
dbg_fail;
.section .text.usr
.align 4
_usr:
.dw 0x0000;
loadsym P0, fail_lvl;
JUMP (P0);
/* this table must be sorted, and end with zero */
.data
.align 4;
_last_excause:
.dd 0xffff
_next_location:
.dd _table_end
_location:
.dd 0
_table:
@ -442,3 +475,4 @@ _table:
.dw 0x9ef0, 0x9eff, 0x21
.dw 0x9f70, 0x9f7f, 0x21
.dw 0x0000, 0x0000, 0x00
_table_end: