binutils-gdb/sim/rx/rx.c

2251 lines
45 KiB
C

/* rx.c --- opcode semantics for stand-alone RX simulator.
Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Red Hat, Inc.
This file is part of the GNU simulators.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include "opcode/rx.h"
#include "cpu.h"
#include "mem.h"
#include "syscalls.h"
#include "fpu.h"
#include "err.h"
#include "misc.h"
#ifdef CYCLE_STATS
static const char * id_names[] = {
"RXO_unknown",
"RXO_mov", /* d = s (signed) */
"RXO_movbi", /* d = [s,s2] (signed) */
"RXO_movbir", /* [s,s2] = d (signed) */
"RXO_pushm", /* s..s2 */
"RXO_popm", /* s..s2 */
"RXO_xchg", /* s <-> d */
"RXO_stcc", /* d = s if cond(s2) */
"RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
/* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
that d may be "None". */
"RXO_and",
"RXO_or",
"RXO_xor",
"RXO_add",
"RXO_sub",
"RXO_mul",
"RXO_div",
"RXO_divu",
"RXO_shll",
"RXO_shar",
"RXO_shlr",
"RXO_adc", /* d = d + s + carry */
"RXO_sbb", /* d = d - s - ~carry */
"RXO_abs", /* d = |s| */
"RXO_max", /* d = max(d,s) */
"RXO_min", /* d = min(d,s) */
"RXO_emul", /* d:64 = d:32 * s */
"RXO_emulu", /* d:64 = d:32 * s (unsigned) */
"RXO_rolc", /* d <<= 1 through carry */
"RXO_rorc", /* d >>= 1 through carry*/
"RXO_rotl", /* d <<= #s without carry */
"RXO_rotr", /* d >>= #s without carry*/
"RXO_revw", /* d = revw(s) */
"RXO_revl", /* d = revl(s) */
"RXO_branch", /* pc = d if cond(s) */
"RXO_branchrel",/* pc += d if cond(s) */
"RXO_jsr", /* pc = d */
"RXO_jsrrel", /* pc += d */
"RXO_rts",
"RXO_nop",
"RXO_nop2",
"RXO_nop3",
"RXO_scmpu",
"RXO_smovu",
"RXO_smovb",
"RXO_suntil",
"RXO_swhile",
"RXO_smovf",
"RXO_sstr",
"RXO_rmpa",
"RXO_mulhi",
"RXO_mullo",
"RXO_machi",
"RXO_maclo",
"RXO_mvtachi",
"RXO_mvtaclo",
"RXO_mvfachi",
"RXO_mvfacmi",
"RXO_mvfaclo",
"RXO_racw",
"RXO_sat", /* sat(d) */
"RXO_satr",
"RXO_fadd", /* d op= s */
"RXO_fcmp",
"RXO_fsub",
"RXO_ftoi",
"RXO_fmul",
"RXO_fdiv",
"RXO_round",
"RXO_itof",
"RXO_bset", /* d |= (1<<s) */
"RXO_bclr", /* d &= ~(1<<s) */
"RXO_btst", /* s & (1<<s2) */
"RXO_bnot", /* d ^= (1<<s) */
"RXO_bmcc", /* d<s> = cond(s2) */
"RXO_clrpsw", /* flag index in d */
"RXO_setpsw", /* flag index in d */
"RXO_mvtipl", /* new IPL in s */
"RXO_rtfi",
"RXO_rte",
"RXO_rtd", /* undocumented */
"RXO_brk",
"RXO_dbt", /* undocumented */
"RXO_int", /* vector id in s */
"RXO_stop",
"RXO_wait",
"RXO_sccnd", /* d = cond(s) ? 1 : 0 */
};
static const char * optype_names[] = {
" - ",
"#Imm", /* #addend */
" Rn ", /* Rn */
"[Rn]", /* [Rn + addend] */
"Ps++", /* [Rn+] */
"--Pr", /* [-Rn] */
" cc ", /* eq, gtu, etc */
"Flag", /* [UIOSZC] */
"RbRi" /* [Rb + scale * Ri] */
};
#define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
#define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
#define N_MAP 30
static unsigned long long benchmark_start_cycle;
static unsigned long long benchmark_end_cycle;
static int op_cache[N_RXT][N_RXT][N_RXT];
static int op_cache_rev[N_MAP];
static int op_cache_idx = 0;
static int
op_lookup (int a, int b, int c)
{
if (op_cache[a][b][c])
return op_cache[a][b][c];
op_cache_idx ++;
if (op_cache_idx >= N_MAP)
{
printf("op_cache_idx exceeds %d\n", N_MAP);
exit(1);
}
op_cache[a][b][c] = op_cache_idx;
op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
return op_cache_idx;
}
static char *
op_cache_string (int map)
{
static int ci;
static char cb[5][20];
int a, b, c;
map = op_cache_rev[map];
a = (map >> 8) & 15;
b = (map >> 4) & 15;
c = (map >> 0) & 15;
ci = (ci + 1) % 5;
sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
return cb[ci];
}
static unsigned long long cycles_per_id[N_RXO][N_MAP];
static unsigned long long times_per_id[N_RXO][N_MAP];
static unsigned long long memory_stalls;
static unsigned long long register_stalls;
static unsigned long long branch_stalls;
static unsigned long long branch_alignment_stalls;
static unsigned long long fast_returns;
static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
static int prev_opcode_id = RXO_unknown;
static int po0;
#define STATS(x) x
#else
#define STATS(x)
#endif /* CYCLE_STATS */
#ifdef CYCLE_ACCURATE
static int new_rt = -1;
/* Number of cycles to add if an insn spans an 8-byte boundary. */
static int branch_alignment_penalty = 0;
#endif
static int running_benchmark = 1;
#define tprintf if (trace && running_benchmark) printf
jmp_buf decode_jmp_buf;
unsigned int rx_cycles = 0;
#ifdef CYCLE_ACCURATE
/* If nonzero, memory was read at some point and cycle latency might
take effect. */
static int memory_source = 0;
/* If nonzero, memory was written and extra cycles might be
needed. */
static int memory_dest = 0;
static void
cycles (int throughput)
{
tprintf("%d cycles\n", throughput);
regs.cycle_count += throughput;
}
/* Number of execution (E) cycles the op uses. For memory sources, we
include the load micro-op stall as two extra E cycles. */
#define E(c) cycles (memory_source ? c + 2 : c)
#define E1 cycles (1)
#define E2 cycles (2)
#define EBIT cycles (memory_source ? 2 : 1)
/* Check to see if a read latency must be applied for a given register. */
#define RL(r) \
if (regs.rt == r ) \
{ \
tprintf("register %d load stall\n", r); \
regs.cycle_count ++; \
STATS(register_stalls ++); \
regs.rt = -1; \
}
#define RLD(r) \
if (memory_source) \
{ \
tprintf ("Rt now %d\n", r); \
new_rt = r; \
}
static int
lsb_count (unsigned long v, int is_signed)
{
int i, lsb;
if (is_signed && (v & 0x80000000U))
v = (unsigned long)(long)(-v);
for (i=31; i>=0; i--)
if (v & (1 << i))
{
/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
lsb = (i + 2) / 2;
return lsb;
}
return 0;
}
static int
divu_cycles(unsigned long num, unsigned long den)
{
int nb = lsb_count (num, 0);
int db = lsb_count (den, 0);
int rv;
if (nb < db)
rv = 2;
else
rv = 3 + nb - db;
E (rv);
return rv;
}
static int
div_cycles(long num, long den)
{
int nb = lsb_count ((unsigned long)num, 1);
int db = lsb_count ((unsigned long)den, 1);
int rv;
if (nb < db)
rv = 3;
else
rv = 5 + nb - db;
E (rv);
return rv;
}
#else /* !CYCLE_ACCURATE */
#define cycles(t)
#define E(c)
#define E1
#define E2
#define EBIT
#define RL(r)
#define RLD(r)
#define divu_cycles(n,d)
#define div_cycles(n,d)
#endif /* else CYCLE_ACCURATE */
static int size2bytes[] = {
4, 1, 1, 1, 2, 2, 2, 3, 4
};
typedef struct {
unsigned long dpc;
} RX_Data;
#define rx_abort() _rx_abort(__FILE__, __LINE__)
static void
_rx_abort (const char *file, int line)
{
if (strrchr (file, '/'))
file = strrchr (file, '/') + 1;
fprintf(stderr, "abort at %s:%d\n", file, line);
abort();
}
static unsigned char *get_byte_base;
static RX_Opcode_Decoded **decode_cache_base;
static SI get_byte_page;
void
reset_decoder (void)
{
get_byte_base = 0;
decode_cache_base = 0;
get_byte_page = 0;
}
static inline void
maybe_get_mem_page (SI tpc)
{
if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
{
get_byte_page = tpc & NONPAGE_MASK;
get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
}
}
/* This gets called a *lot* so optimize it. */
static int
rx_get_byte (void *vdata)
{
RX_Data *rx_data = (RX_Data *)vdata;
SI tpc = rx_data->dpc;
/* See load.c for an explanation of this. */
if (rx_big_endian)
tpc ^= 3;
maybe_get_mem_page (tpc);
rx_data->dpc ++;
return get_byte_base [tpc];
}
static int
get_op (const RX_Opcode_Decoded *rd, int i)
{
const RX_Opcode_Operand *o = rd->op + i;
int addr, rv = 0;
switch (o->type)
{
case RX_Operand_None:
rx_abort ();
case RX_Operand_Immediate: /* #addend */
return o->addend;
case RX_Operand_Register: /* Rn */
RL (o->reg);
rv = get_reg (o->reg);
break;
case RX_Operand_Predec: /* [-Rn] */
put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
/* fall through */
case RX_Operand_Postinc: /* [Rn+] */
case RX_Operand_Indirect: /* [Rn + addend] */
case RX_Operand_TwoReg: /* [Rn + scale * R2] */
#ifdef CYCLE_ACCURATE
RL (o->reg);
if (o->type == RX_Operand_TwoReg)
RL (rd->op[2].reg);
regs.rt = -1;
if (regs.m2m == M2M_BOTH)
{
tprintf("src memory stall\n");
#ifdef CYCLE_STATS
memory_stalls ++;
#endif
regs.cycle_count ++;
regs.m2m = 0;
}
memory_source = 1;
#endif
if (o->type == RX_Operand_TwoReg)
addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
else
addr = get_reg (o->reg) + o->addend;
switch (o->size)
{
case RX_AnySize:
rx_abort ();
case RX_Byte: /* undefined extension */
case RX_UByte:
case RX_SByte:
rv = mem_get_qi (addr);
break;
case RX_Word: /* undefined extension */
case RX_UWord:
case RX_SWord:
rv = mem_get_hi (addr);
break;
case RX_3Byte:
rv = mem_get_psi (addr);
break;
case RX_Long:
rv = mem_get_si (addr);
break;
}
if (o->type == RX_Operand_Postinc)
put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
break;
case RX_Operand_Condition: /* eq, gtu, etc */
return condition_true (o->reg);
case RX_Operand_Flag: /* [UIOSZC] */
return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
}
/* if we've gotten here, we need to clip/extend the value according
to the size. */
switch (o->size)
{
case RX_AnySize:
rx_abort ();
case RX_Byte: /* undefined extension */
rv |= 0xdeadbe00; /* keep them honest */
break;
case RX_UByte:
rv &= 0xff;
break;
case RX_SByte:
rv = sign_ext (rv, 8);
break;
case RX_Word: /* undefined extension */
rv |= 0xdead0000; /* keep them honest */
break;
case RX_UWord:
rv &= 0xffff;
break;
case RX_SWord:
rv = sign_ext (rv, 16);
break;
case RX_3Byte:
rv &= 0xffffff;
break;
case RX_Long:
break;
}
return rv;
}
static void
put_op (const RX_Opcode_Decoded *rd, int i, int v)
{
const RX_Opcode_Operand *o = rd->op + i;
int addr;
switch (o->size)
{
case RX_AnySize:
if (o->type != RX_Operand_Register)
rx_abort ();
break;
case RX_Byte: /* undefined extension */
v |= 0xdeadbe00; /* keep them honest */
break;
case RX_UByte:
v &= 0xff;
break;
case RX_SByte:
v = sign_ext (v, 8);
break;
case RX_Word: /* undefined extension */
v |= 0xdead0000; /* keep them honest */
break;
case RX_UWord:
v &= 0xffff;
break;
case RX_SWord:
v = sign_ext (v, 16);
break;
case RX_3Byte:
v &= 0xffffff;
break;
case RX_Long:
break;
}
switch (o->type)
{
case RX_Operand_None:
/* Opcodes like TST and CMP use this. */
break;
case RX_Operand_Immediate: /* #addend */
case RX_Operand_Condition: /* eq, gtu, etc */
rx_abort ();
case RX_Operand_Register: /* Rn */
put_reg (o->reg, v);
RLD (o->reg);
break;
case RX_Operand_Predec: /* [-Rn] */
put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
/* fall through */
case RX_Operand_Postinc: /* [Rn+] */
case RX_Operand_Indirect: /* [Rn + addend] */
case RX_Operand_TwoReg: /* [Rn + scale * R2] */
#ifdef CYCLE_ACCURATE
if (regs.m2m == M2M_BOTH)
{
tprintf("dst memory stall\n");
regs.cycle_count ++;
#ifdef CYCLE_STATS
memory_stalls ++;
#endif
regs.m2m = 0;
}
memory_dest = 1;
#endif
if (o->type == RX_Operand_TwoReg)
addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
else
addr = get_reg (o->reg) + o->addend;
switch (o->size)
{
case RX_AnySize:
rx_abort ();
case RX_Byte: /* undefined extension */
case RX_UByte:
case RX_SByte:
mem_put_qi (addr, v);
break;
case RX_Word: /* undefined extension */
case RX_UWord:
case RX_SWord:
mem_put_hi (addr, v);
break;
case RX_3Byte:
mem_put_psi (addr, v);
break;
case RX_Long:
mem_put_si (addr, v);
break;
}
if (o->type == RX_Operand_Postinc)
put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
break;
case RX_Operand_Flag: /* [UIOSZC] */
if (v)
regs.r_psw |= (1 << o->reg);
else
regs.r_psw &= ~(1 << o->reg);
break;
}
}
#define PD(x) put_op (opcode, 0, x)
#define PS(x) put_op (opcode, 1, x)
#define PS2(x) put_op (opcode, 2, x)
#define GD() get_op (opcode, 0)
#define GS() get_op (opcode, 1)
#define GS2() get_op (opcode, 2)
#define DSZ() size2bytes[opcode->op[0].size]
#define SSZ() size2bytes[opcode->op[0].size]
#define S2SZ() size2bytes[opcode->op[0].size]
/* "Universal" sources. */
#define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
#define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
static void
push(int val)
{
int rsp = get_reg (sp);
rsp -= 4;
put_reg (sp, rsp);
mem_put_si (rsp, val);
}
/* Just like the above, but tag the memory as "pushed pc" so if anyone
tries to write to it, it will cause an error. */
static void
pushpc(int val)
{
int rsp = get_reg (sp);
rsp -= 4;
put_reg (sp, rsp);
mem_put_si (rsp, val);
mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
}
static int
pop()
{
int rv;
int rsp = get_reg (sp);
rv = mem_get_si (rsp);
rsp += 4;
put_reg (sp, rsp);
return rv;
}
static int
poppc()
{
int rv;
int rsp = get_reg (sp);
if (mem_get_content_type (rsp) != MC_PUSHED_PC)
execution_error (SIM_ERR_CORRUPT_STACK, rsp);
rv = mem_get_si (rsp);
mem_set_content_range (rsp, rsp+3, MC_UNINIT);
rsp += 4;
put_reg (sp, rsp);
return rv;
}
#define MATH_OP(vop,c) \
{ \
umb = US2(); \
uma = US1(); \
ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
ma = sign_ext (uma, DSZ() * 8); \
mb = sign_ext (umb, DSZ() * 8); \
sll = (long long) ma vop (long long) mb vop c; \
tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
PD (sll); \
E (1); \
}
#define LOGIC_OP(vop) \
{ \
mb = US2(); \
ma = US1(); \
v = ma vop mb; \
tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
set_sz (v, DSZ()); \
PD(v); \
E (1); \
}
#define SHIFT_OP(val, type, count, OP, carry_mask) \
{ \
int i, c=0; \
count = US2(); \
val = (type)US1(); \
tprintf("%lld " #OP " %d\n", val, count); \
for (i = 0; i < count; i ++) \
{ \
c = val & carry_mask; \
val OP 1; \
} \
if (count) \
set_oszc (val, 4, c); \
PD (val); \
}
typedef union {
int i;
float f;
} FloatInt;
static inline int
float2int (float f)
{
FloatInt fi;
fi.f = f;
return fi.i;
}
static inline float
int2float (int i)
{
FloatInt fi;
fi.i = i;
return fi.f;
}
static int
fop_fadd (fp_t s1, fp_t s2, fp_t *d)
{
*d = rxfp_add (s1, s2);
return 1;
}
static int
fop_fmul (fp_t s1, fp_t s2, fp_t *d)
{
*d = rxfp_mul (s1, s2);
return 1;
}
static int
fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
{
*d = rxfp_div (s1, s2);
return 1;
}
static int
fop_fsub (fp_t s1, fp_t s2, fp_t *d)
{
*d = rxfp_sub (s1, s2);
return 1;
}
#define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
#define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
#define FPCHECK() \
if (FPPENDING()) \
return do_fp_exception (opcode_pc)
#define FLOAT_OP(func) \
{ \
int do_store; \
fp_t fa, fb, fc; \
FPCLEAR(); \
fb = GS (); \
fa = GD (); \
do_store = fop_##func (fa, fb, &fc); \
tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
FPCHECK(); \
if (do_store) \
PD (fc); \
mb = 0; \
if ((fc & 0x80000000UL) != 0) \
mb |= FLAGBIT_S; \
if ((fc & 0x7fffffffUL) == 0) \
mb |= FLAGBIT_Z; \
set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
}
#define carry (FLAG_C ? 1 : 0)
static struct {
unsigned long vaddr;
const char *str;
int signal;
} exception_info[] = {
{ 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
{ 0xFFFFFFD4UL, "access violation", SIGSEGV },
{ 0xFFFFFFDCUL, "undefined opcode", SIGILL },
{ 0xFFFFFFE4UL, "floating point", SIGFPE }
};
#define EX_PRIVILEDGED 0
#define EX_ACCESS 1
#define EX_UNDEFINED 2
#define EX_FLOATING 3
#define EXCEPTION(n) \
return generate_exception (n, opcode_pc)
#define PRIVILEDGED() \
if (FLAG_PM) \
EXCEPTION (EX_PRIVILEDGED)
static int
generate_exception (unsigned long type, SI opcode_pc)
{
SI old_psw, old_pc, new_pc;
new_pc = mem_get_si (exception_info[type].vaddr);
/* 0x00020000 is the value used to initialise the known
exception vectors (see rx.ld), but it is a reserved
area of memory so do not try to access it, and if the
value has not been changed by the program then the
vector has not been installed. */
if (new_pc == 0 || new_pc == 0x00020000)
{
if (rx_in_gdb)
return RX_MAKE_STOPPED (exception_info[type].signal);
fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
exception_info[type].str, (unsigned long) opcode_pc);
if (type == EX_FLOATING)
{
int mask = FPPENDING ();
fprintf (stderr, "Pending FP exceptions:");
if (mask & FPSWBITS_FV)
fprintf(stderr, " Invalid");
if (mask & FPSWBITS_FO)
fprintf(stderr, " Overflow");
if (mask & FPSWBITS_FZ)
fprintf(stderr, " Division-by-zero");
if (mask & FPSWBITS_FU)
fprintf(stderr, " Underflow");
if (mask & FPSWBITS_FX)
fprintf(stderr, " Inexact");
if (mask & FPSWBITS_CE)
fprintf(stderr, " Unimplemented");
fprintf(stderr, "\n");
}
return RX_MAKE_EXITED (1);
}
tprintf ("Triggering %s exception\n", exception_info[type].str);
old_psw = regs.r_psw;
regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
old_pc = opcode_pc;
regs.r_pc = new_pc;
pushpc (old_psw);
pushpc (old_pc);
return RX_MAKE_STEPPED ();
}
void
generate_access_exception (void)
{
int rv;
rv = generate_exception (EX_ACCESS, regs.r_pc);
if (RX_EXITED (rv))
longjmp (decode_jmp_buf, rv);
}
static int
do_fp_exception (unsigned long opcode_pc)
{
while (FPPENDING())
EXCEPTION (EX_FLOATING);
return RX_MAKE_STEPPED ();
}
static int
op_is_memory (const RX_Opcode_Decoded *rd, int i)
{
switch (rd->op[i].type)
{
case RX_Operand_Predec:
case RX_Operand_Postinc:
case RX_Operand_Indirect:
return 1;
default:
return 0;
}
}
#define OM(i) op_is_memory (opcode, i)
#define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
int
decode_opcode ()
{
unsigned int uma=0, umb=0;
int ma=0, mb=0;
int opcode_size, v;
unsigned long long ll;
long long sll;
unsigned long opcode_pc;
RX_Data rx_data;
const RX_Opcode_Decoded *opcode;
#ifdef CYCLE_STATS
unsigned long long prev_cycle_count;
#endif
#ifdef CYCLE_ACCURATE
unsigned int tx;
#endif
#ifdef CYCLE_STATS
prev_cycle_count = regs.cycle_count;
#endif
#ifdef CYCLE_ACCURATE
memory_source = 0;
memory_dest = 0;
#endif
rx_cycles ++;
maybe_get_mem_page (regs.r_pc);
opcode_pc = regs.r_pc;
/* Note that we don't word-swap this point, there's no point. */
if (decode_cache_base[opcode_pc] == NULL)
{
RX_Opcode_Decoded *opcode_w;
rx_data.dpc = opcode_pc;
opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
rx_get_byte, &rx_data);
opcode = opcode_w;
}
else
{
opcode = decode_cache_base[opcode_pc];
opcode_size = opcode->n_bytes;
}
#ifdef CYCLE_ACCURATE
if (branch_alignment_penalty)
{
if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
{
tprintf("1 cycle branch alignment penalty\n");
cycles (branch_alignment_penalty);
#ifdef CYCLE_STATS
branch_alignment_stalls ++;
#endif
}
branch_alignment_penalty = 0;
}
#endif
regs.r_pc += opcode_size;
rx_flagmask = opcode->flags_s;
rx_flagand = ~(int)opcode->flags_0;
rx_flagor = opcode->flags_1;
switch (opcode->id)
{
case RXO_abs:
sll = GS ();
tprintf("|%lld| = ", sll);
if (sll < 0)
sll = -sll;
tprintf("%lld\n", sll);
PD (sll);
set_osz (sll, 4);
E (1);
break;
case RXO_adc:
MATH_OP (+,carry);
break;
case RXO_add:
MATH_OP (+,0);
break;
case RXO_and:
LOGIC_OP (&);
break;
case RXO_bclr:
ma = GD ();
mb = GS ();
if (opcode->op[0].type == RX_Operand_Register)
mb &= 0x1f;
else
mb &= 0x07;
ma &= ~(1 << mb);
PD (ma);
EBIT;
break;
case RXO_bmcc:
ma = GD ();
mb = GS ();
if (opcode->op[0].type == RX_Operand_Register)
mb &= 0x1f;
else
mb &= 0x07;
if (GS2 ())
ma |= (1 << mb);
else
ma &= ~(1 << mb);
PD (ma);
EBIT;
break;
case RXO_bnot:
ma = GD ();
mb = GS ();
if (opcode->op[0].type == RX_Operand_Register)
mb &= 0x1f;
else
mb &= 0x07;
ma ^= (1 << mb);
PD (ma);
EBIT;
break;
case RXO_branch:
if (opcode->op[1].type == RX_Operand_None || GS())
{
#ifdef CYCLE_ACCURATE
SI old_pc = regs.r_pc;
int delta;
#endif
regs.r_pc = GD();
#ifdef CYCLE_ACCURATE
delta = regs.r_pc - old_pc;
if (delta >= 0 && delta < 16
&& opcode_size > 1)
{
tprintf("near forward branch bonus\n");
cycles (2);
}
else
{
cycles (3);
branch_alignment_penalty = 1;
}
#ifdef CYCLE_STATS
branch_stalls ++;
#endif
#endif
}
#ifdef CYCLE_ACCURATE
else
cycles (1);
#endif
break;
case RXO_branchrel:
if (opcode->op[1].type == RX_Operand_None || GS())
{
int delta = GD();
regs.r_pc = opcode_pc + delta;
#ifdef CYCLE_ACCURATE
/* Note: specs say 3, chip says 2. */
if (delta >= 0 && delta < 16
&& opcode_size > 1)
{
tprintf("near forward branch bonus\n");
cycles (2);
}
else
{
cycles (3);
branch_alignment_penalty = 1;
}
#ifdef CYCLE_STATS
branch_stalls ++;
#endif
#endif
}
#ifdef CYCLE_ACCURATE
else
cycles (1);
#endif
break;
case RXO_brk:
{
int old_psw = regs.r_psw;
if (rx_in_gdb)
DO_RETURN (RX_MAKE_HIT_BREAK ());
if (regs.r_intb == 0)
{
tprintf("BREAK hit, no vector table.\n");
DO_RETURN (RX_MAKE_EXITED(1));
}
regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
pushpc (old_psw);
pushpc (regs.r_pc);
regs.r_pc = mem_get_si (regs.r_intb);
cycles(6);
}
break;
case RXO_bset:
ma = GD ();
mb = GS ();
if (opcode->op[0].type == RX_Operand_Register)
mb &= 0x1f;
else
mb &= 0x07;
ma |= (1 << mb);
PD (ma);
EBIT;
break;
case RXO_btst:
ma = GS ();
mb = GS2 ();
if (opcode->op[1].type == RX_Operand_Register)
mb &= 0x1f;
else
mb &= 0x07;
umb = ma & (1 << mb);
set_zc (! umb, umb);
EBIT;
break;
case RXO_clrpsw:
v = 1 << opcode->op[0].reg;
if (FLAG_PM
&& (v == FLAGBIT_I
|| v == FLAGBIT_U))
break;
regs.r_psw &= ~v;
cycles (1);
break;
case RXO_div: /* d = d / s */
ma = GS();
mb = GD();
tprintf("%d / %d = ", mb, ma);
if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
{
tprintf("#NAN\n");
set_flags (FLAGBIT_O, FLAGBIT_O);
cycles (3);
}
else
{
v = mb/ma;
tprintf("%d\n", v);
set_flags (FLAGBIT_O, 0);
PD (v);
div_cycles (mb, ma);
}
break;
case RXO_divu: /* d = d / s */
uma = GS();
umb = GD();
tprintf("%u / %u = ", umb, uma);
if (uma == 0)
{
tprintf("#NAN\n");
set_flags (FLAGBIT_O, FLAGBIT_O);
cycles (2);
}
else
{
v = umb / uma;
tprintf("%u\n", v);
set_flags (FLAGBIT_O, 0);
PD (v);
divu_cycles (umb, uma);
}
break;
case RXO_emul:
ma = GD ();
mb = GS ();
sll = (long long)ma * (long long)mb;
tprintf("%d * %d = %lld\n", ma, mb, sll);
put_reg (opcode->op[0].reg, sll);
put_reg (opcode->op[0].reg + 1, sll >> 32);
E2;
break;
case RXO_emulu:
uma = GD ();
umb = GS ();
ll = (long long)uma * (long long)umb;
tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
put_reg (opcode->op[0].reg, ll);
put_reg (opcode->op[0].reg + 1, ll >> 32);
E2;
break;
case RXO_fadd:
FLOAT_OP (fadd);
E (4);
break;
case RXO_fcmp:
ma = GD();
mb = GS();
FPCLEAR ();
rxfp_cmp (ma, mb);
FPCHECK ();
E (1);
break;
case RXO_fdiv:
FLOAT_OP (fdiv);
E (16);
break;
case RXO_fmul:
FLOAT_OP (fmul);
E (3);
break;
case RXO_rtfi:
PRIVILEDGED ();
regs.r_psw = regs.r_bpsw;
regs.r_pc = regs.r_bpc;
#ifdef CYCLE_ACCURATE
regs.fast_return = 0;
cycles(3);
#endif
break;
case RXO_fsub:
FLOAT_OP (fsub);
E (4);
break;
case RXO_ftoi:
ma = GS ();
FPCLEAR ();
mb = rxfp_ftoi (ma, FPRM_ZERO);
FPCHECK ();
PD (mb);
tprintf("(int) %g = %d\n", int2float(ma), mb);
set_sz (mb, 4);
E (2);
break;
case RXO_int:
v = GS ();
if (v == 255)
{
int rc = rx_syscall (regs.r[5]);
if (! RX_STEPPED (rc))
DO_RETURN (rc);
}
else
{
int old_psw = regs.r_psw;
regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
pushpc (old_psw);
pushpc (regs.r_pc);
regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
}
cycles (6);
break;
case RXO_itof:
ma = GS ();
FPCLEAR ();
mb = rxfp_itof (ma, regs.r_fpsw);
FPCHECK ();
tprintf("(float) %d = %x\n", ma, mb);
PD (mb);
set_sz (ma, 4);
E (2);
break;
case RXO_jsr:
case RXO_jsrrel:
{
#ifdef CYCLE_ACCURATE
int delta;
regs.m2m = 0;
#endif
v = GD ();
#ifdef CYCLE_ACCURATE
regs.link_register = regs.r_pc;
#endif
pushpc (get_reg (pc));
if (opcode->id == RXO_jsrrel)
v += regs.r_pc;
#ifdef CYCLE_ACCURATE
delta = v - regs.r_pc;
#endif
put_reg (pc, v);
#ifdef CYCLE_ACCURATE
/* Note: docs say 3, chip says 2 */
if (delta >= 0 && delta < 16)
{
tprintf ("near forward jsr bonus\n");
cycles (2);
}
else
{
branch_alignment_penalty = 1;
cycles (3);
}
regs.fast_return = 1;
#endif
}
break;
case RXO_machi:
ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
ll <<= 16;
put_reg64 (acc64, ll + regs.r_acc);
E1;
break;
case RXO_maclo:
ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
ll <<= 16;
put_reg64 (acc64, ll + regs.r_acc);
E1;
break;
case RXO_max:
mb = GS();
ma = GD();
if (ma > mb)
PD (ma);
else
PD (mb);
E (1);
break;
case RXO_min:
mb = GS();
ma = GD();
if (ma < mb)
PD (ma);
else
PD (mb);
E (1);
break;
case RXO_mov:
v = GS ();
if (opcode->op[1].type == RX_Operand_Register
&& opcode->op[1].reg == 17 /* PC */)
{
/* Special case. We want the address of the insn, not the
address of the next insn. */
v = opcode_pc;
}
if (opcode->op[0].type == RX_Operand_Register
&& opcode->op[0].reg == 16 /* PSW */)
{
/* Special case, LDC and POPC can't ever modify PM. */
int pm = regs.r_psw & FLAGBIT_PM;
v &= ~ FLAGBIT_PM;
v |= pm;
if (pm)
{
v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
v |= pm;
}
}
if (FLAG_PM)
{
/* various things can't be changed in user mode. */
if (opcode->op[0].type == RX_Operand_Register)
if (opcode->op[0].reg == 32)
{
v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
}
if (opcode->op[0].reg == 34 /* ISP */
|| opcode->op[0].reg == 37 /* BPSW */
|| opcode->op[0].reg == 39 /* INTB */
|| opcode->op[0].reg == 38 /* VCT */)
/* These are ignored. */
break;
}
if (OM(0) && OM(1))
cycles (2);
else
cycles (1);
PD (v);
#ifdef CYCLE_ACCURATE
if ((opcode->op[0].type == RX_Operand_Predec
&& opcode->op[1].type == RX_Operand_Register)
|| (opcode->op[0].type == RX_Operand_Postinc
&& opcode->op[1].type == RX_Operand_Register))
{
/* Special case: push reg doesn't cause a memory stall. */
memory_dest = 0;
tprintf("push special case\n");
}
#endif
set_sz (v, DSZ());
break;
case RXO_movbi:
PD (GS ());
cycles (1);
break;
case RXO_movbir:
PS (GD ());
cycles (1);
break;
case RXO_mul:
v = US2 ();
ll = (unsigned long long) US1() * (unsigned long long) v;
PD(ll);
E (1);
break;
case RXO_mulhi:
v = GS2 ();
ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
ll <<= 16;
put_reg64 (acc64, ll);
E1;
break;
case RXO_mullo:
v = GS2 ();
ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
ll <<= 16;
put_reg64 (acc64, ll);
E1;
break;
case RXO_mvfachi:
PD (get_reg (acchi));
E1;
break;
case RXO_mvfaclo:
PD (get_reg (acclo));
E1;
break;
case RXO_mvfacmi:
PD (get_reg (accmi));
E1;
break;
case RXO_mvtachi:
put_reg (acchi, GS ());
E1;
break;
case RXO_mvtaclo:
put_reg (acclo, GS ());
E1;
break;
case RXO_mvtipl:
regs.r_psw &= ~ FLAGBITS_IPL;
regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
E1;
break;
case RXO_nop:
case RXO_nop2:
case RXO_nop3:
E1;
break;
case RXO_or:
LOGIC_OP (|);
break;
case RXO_popm:
/* POPM cannot pop R0 (sp). */
if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
EXCEPTION (EX_UNDEFINED);
if (opcode->op[1].reg >= opcode->op[2].reg)
{
regs.r_pc = opcode_pc;
DO_RETURN (RX_MAKE_STOPPED (SIGILL));
}
for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
{
cycles (1);
RLD (v);
put_reg (v, pop ());
}
break;
case RXO_pushm:
/* PUSHM cannot push R0 (sp). */
if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
EXCEPTION (EX_UNDEFINED);
if (opcode->op[1].reg >= opcode->op[2].reg)
{
regs.r_pc = opcode_pc;
return RX_MAKE_STOPPED (SIGILL);
}
for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
{
RL (v);
push (get_reg (v));
}
cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
break;
case RXO_racw:
ll = get_reg64 (acc64) << GS ();
ll += 0x80000000ULL;
if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
ll = 0x00007fff00000000ULL;
else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
ll = 0xffff800000000000ULL;
else
ll &= 0xffffffff00000000ULL;
put_reg64 (acc64, ll);
E1;
break;
case RXO_rte:
PRIVILEDGED ();
regs.r_pc = poppc ();
regs.r_psw = poppc ();
if (FLAG_PM)
regs.r_psw |= FLAGBIT_U;
#ifdef CYCLE_ACCURATE
regs.fast_return = 0;
cycles (6);
#endif
break;
case RXO_revl:
uma = GS ();
umb = (((uma >> 24) & 0xff)
| ((uma >> 8) & 0xff00)
| ((uma << 8) & 0xff0000)
| ((uma << 24) & 0xff000000UL));
PD (umb);
E1;
break;
case RXO_revw:
uma = GS ();
umb = (((uma >> 8) & 0x00ff00ff)
| ((uma << 8) & 0xff00ff00UL));
PD (umb);
E1;
break;
case RXO_rmpa:
RL(4);
RL(5);
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
while (regs.r[3] != 0)
{
long long tmp;
switch (opcode->size)
{
case RX_Long:
ma = mem_get_si (regs.r[1]);
mb = mem_get_si (regs.r[2]);
regs.r[1] += 4;
regs.r[2] += 4;
break;
case RX_Word:
ma = sign_ext (mem_get_hi (regs.r[1]), 16);
mb = sign_ext (mem_get_hi (regs.r[2]), 16);
regs.r[1] += 2;
regs.r[2] += 2;
break;
case RX_Byte:
ma = sign_ext (mem_get_qi (regs.r[1]), 8);
mb = sign_ext (mem_get_qi (regs.r[2]), 8);
regs.r[1] += 1;
regs.r[2] += 1;
break;
default:
abort ();
}
/* We do the multiply as a signed value. */
sll = (long long)ma * (long long)mb;
tprintf(" %016llx = %d * %d\n", sll, ma, mb);
/* but we do the sum as unsigned, while sign extending the operands. */
tmp = regs.r[4] + (sll & 0xffffffffUL);
regs.r[4] = tmp & 0xffffffffUL;
tmp >>= 32;
sll >>= 32;
tmp += regs.r[5] + (sll & 0xffffffffUL);
regs.r[5] = tmp & 0xffffffffUL;
tmp >>= 32;
sll >>= 32;
tmp += regs.r[6] + (sll & 0xffffffffUL);
regs.r[6] = tmp & 0xffffffffUL;
tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
(unsigned long) regs.r[6],
(unsigned long) regs.r[5],
(unsigned long) regs.r[4]);
regs.r[3] --;
}
if (regs.r[6] & 0x00008000)
regs.r[6] |= 0xffff0000UL;
else
regs.r[6] &= 0x0000ffff;
ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
else
set_flags (FLAGBIT_O|FLAGBIT_S, ma);
#ifdef CYCLE_ACCURATE
switch (opcode->size)
{
case RX_Long:
cycles (6 + 4 * tx);
break;
case RX_Word:
cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
break;
case RX_Byte:
cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
break;
default:
abort ();
}
#endif
break;
case RXO_rolc:
v = GD ();
ma = v & 0x80000000UL;
v <<= 1;
v |= carry;
set_szc (v, 4, ma);
PD (v);
E1;
break;
case RXO_rorc:
uma = GD ();
mb = uma & 1;
uma >>= 1;
uma |= (carry ? 0x80000000UL : 0);
set_szc (uma, 4, mb);
PD (uma);
E1;
break;
case RXO_rotl:
mb = GS ();
uma = GD ();
if (mb)
{
uma = (uma << mb) | (uma >> (32-mb));
mb = uma & 1;
}
set_szc (uma, 4, mb);
PD (uma);
E1;
break;
case RXO_rotr:
mb = GS ();
uma = GD ();
if (mb)
{
uma = (uma >> mb) | (uma << (32-mb));
mb = uma & 0x80000000;
}
set_szc (uma, 4, mb);
PD (uma);
E1;
break;
case RXO_round:
ma = GS ();
FPCLEAR ();
mb = rxfp_ftoi (ma, regs.r_fpsw);
FPCHECK ();
PD (mb);
tprintf("(int) %g = %d\n", int2float(ma), mb);
set_sz (mb, 4);
E (2);
break;
case RXO_rts:
{
#ifdef CYCLE_ACCURATE
int cyc = 5;
#endif
regs.r_pc = poppc ();
#ifdef CYCLE_ACCURATE
/* Note: specs say 5, chip says 3. */
if (regs.fast_return && regs.link_register == regs.r_pc)
{
#ifdef CYCLE_STATS
fast_returns ++;
#endif
tprintf("fast return bonus\n");
cyc -= 2;
}
cycles (cyc);
regs.fast_return = 0;
branch_alignment_penalty = 1;
#endif
}
break;
case RXO_rtsd:
if (opcode->op[2].type == RX_Operand_Register)
{
int i;
/* RTSD cannot pop R0 (sp). */
put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
if (opcode->op[2].reg == 0)
EXCEPTION (EX_UNDEFINED);
#ifdef CYCLE_ACCURATE
tx = opcode->op[0].reg - opcode->op[2].reg + 1;
#endif
for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
{
RLD (i);
put_reg (i, pop ());
}
}
else
{
#ifdef CYCLE_ACCURATE
tx = 0;
#endif
put_reg (0, get_reg (0) + GS());
}
put_reg (pc, poppc());
#ifdef CYCLE_ACCURATE
if (regs.fast_return && regs.link_register == regs.r_pc)
{
tprintf("fast return bonus\n");
#ifdef CYCLE_STATS
fast_returns ++;
#endif
cycles (tx < 3 ? 3 : tx + 1);
}
else
{
cycles (tx < 5 ? 5 : tx + 1);
}
regs.fast_return = 0;
branch_alignment_penalty = 1;
#endif
break;
case RXO_sat:
if (FLAG_O && FLAG_S)
PD (0x7fffffffUL);
else if (FLAG_O && ! FLAG_S)
PD (0x80000000UL);
E1;
break;
case RXO_sbb:
MATH_OP (-, ! carry);
break;
case RXO_sccnd:
if (GS())
PD (1);
else
PD (0);
E1;
break;
case RXO_scmpu:
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
while (regs.r[3] != 0)
{
uma = mem_get_qi (regs.r[1] ++);
umb = mem_get_qi (regs.r[2] ++);
regs.r[3] --;
if (uma != umb || uma == 0)
break;
}
if (uma == umb)
set_zc (1, 1);
else
set_zc (0, ((int)uma - (int)umb) >= 0);
cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
break;
case RXO_setpsw:
v = 1 << opcode->op[0].reg;
if (FLAG_PM
&& (v == FLAGBIT_I
|| v == FLAGBIT_U))
break;
regs.r_psw |= v;
cycles (1);
break;
case RXO_smovb:
RL (3);
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
while (regs.r[3])
{
uma = mem_get_qi (regs.r[2] --);
mem_put_qi (regs.r[1]--, uma);
regs.r[3] --;
}
#ifdef CYCLE_ACCURATE
if (tx > 3)
cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
else
cycles (2 + 3 * (tx % 4));
#endif
break;
case RXO_smovf:
RL (3);
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
while (regs.r[3])
{
uma = mem_get_qi (regs.r[2] ++);
mem_put_qi (regs.r[1]++, uma);
regs.r[3] --;
}
cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
break;
case RXO_smovu:
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
while (regs.r[3] != 0)
{
uma = mem_get_qi (regs.r[2] ++);
mem_put_qi (regs.r[1]++, uma);
regs.r[3] --;
if (uma == 0)
break;
}
cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
break;
case RXO_shar: /* d = ma >> mb */
SHIFT_OP (sll, int, mb, >>=, 1);
E (1);
break;
case RXO_shll: /* d = ma << mb */
SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
E (1);
break;
case RXO_shlr: /* d = ma >> mb */
SHIFT_OP (ll, unsigned int, mb, >>=, 1);
E (1);
break;
case RXO_sstr:
RL (3);
#ifdef CYCLE_ACCURATE
tx = regs.r[3];
#endif
switch (opcode->size)
{
case RX_Long:
while (regs.r[3] != 0)
{
mem_put_si (regs.r[1], regs.r[2]);
regs.r[1] += 4;
regs.r[3] --;
}
cycles (2 + tx);
break;
case RX_Word:
while (regs.r[3] != 0)
{
mem_put_hi (regs.r[1], regs.r[2]);
regs.r[1] += 2;
regs.r[3] --;
}
cycles (2 + (int)(tx / 2) + tx % 2);
break;
case RX_Byte:
while (regs.r[3] != 0)
{
mem_put_qi (regs.r[1], regs.r[2]);
regs.r[1] ++;
regs.r[3] --;
}
cycles (2 + (int)(tx / 4) + tx % 4);
break;
default:
abort ();
}
break;
case RXO_stcc:
if (GS2())
PD (GS ());
E1;
break;
case RXO_stop:
PRIVILEDGED ();
regs.r_psw |= FLAGBIT_I;
DO_RETURN (RX_MAKE_STOPPED(0));
case RXO_sub:
MATH_OP (-, 0);
break;
case RXO_suntil:
RL(3);
#ifdef CYCLE_ACCURATE
tx = 0;
#endif
if (regs.r[3] == 0)
{
cycles (3);
break;
}
switch (opcode->size)
{
case RX_Long:
uma = get_reg (2);
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_si (get_reg (1));
regs.r[1] += 4;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb == uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * tx);
#endif
break;
case RX_Word:
uma = get_reg (2) & 0xffff;
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_hi (get_reg (1));
regs.r[1] += 2;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb == uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
#endif
break;
case RX_Byte:
uma = get_reg (2) & 0xff;
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_qi (regs.r[1]);
regs.r[1] += 1;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb == uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
#endif
break;
default:
abort();
}
if (uma == umb)
set_zc (1, 1);
else
set_zc (0, ((int)uma - (int)umb) >= 0);
break;
case RXO_swhile:
RL(3);
#ifdef CYCLE_ACCURATE
tx = 0;
#endif
if (regs.r[3] == 0)
break;
switch (opcode->size)
{
case RX_Long:
uma = get_reg (2);
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_si (get_reg (1));
regs.r[1] += 4;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb != uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * tx);
#endif
break;
case RX_Word:
uma = get_reg (2) & 0xffff;
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_hi (get_reg (1));
regs.r[1] += 2;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb != uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
#endif
break;
case RX_Byte:
uma = get_reg (2) & 0xff;
while (regs.r[3] != 0)
{
regs.r[3] --;
umb = mem_get_qi (regs.r[1]);
regs.r[1] += 1;
#ifdef CYCLE_ACCURATE
tx ++;
#endif
if (umb != uma)
break;
}
#ifdef CYCLE_ACCURATE
cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
#endif
break;
default:
abort();
}
if (uma == umb)
set_zc (1, 1);
else
set_zc (0, ((int)uma - (int)umb) >= 0);
break;
case RXO_wait:
PRIVILEDGED ();
regs.r_psw |= FLAGBIT_I;
DO_RETURN (RX_MAKE_STOPPED(0));
case RXO_xchg:
#ifdef CYCLE_ACCURATE
regs.m2m = 0;
#endif
v = GS (); /* This is the memory operand, if any. */
PS (GD ()); /* and this may change the address register. */
PD (v);
E2;
#ifdef CYCLE_ACCURATE
/* all M cycles happen during xchg's cycles. */
memory_dest = 0;
memory_source = 0;
#endif
break;
case RXO_xor:
LOGIC_OP (^);
break;
default:
EXCEPTION (EX_UNDEFINED);
}
#ifdef CYCLE_ACCURATE
regs.m2m = 0;
if (memory_source)
regs.m2m |= M2M_SRC;
if (memory_dest)
regs.m2m |= M2M_DST;
regs.rt = new_rt;
new_rt = -1;
#endif
#ifdef CYCLE_STATS
if (prev_cycle_count == regs.cycle_count)
{
printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
abort ();
}
#endif
#ifdef CYCLE_STATS
if (running_benchmark)
{
int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
times_per_id[opcode->id][omap] ++;
times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
prev_opcode_id = opcode->id;
po0 = omap;
}
#endif
return RX_MAKE_STEPPED ();
}
#ifdef CYCLE_STATS
void
reset_pipeline_stats (void)
{
memset (cycles_per_id, 0, sizeof(cycles_per_id));
memset (times_per_id, 0, sizeof(times_per_id));
memory_stalls = 0;
register_stalls = 0;
branch_stalls = 0;
branch_alignment_stalls = 0;
fast_returns = 0;
memset (times_per_pair, 0, sizeof(times_per_pair));
running_benchmark = 1;
benchmark_start_cycle = regs.cycle_count;
}
void
halt_pipeline_stats (void)
{
running_benchmark = 0;
benchmark_end_cycle = regs.cycle_count;
}
#endif
void
pipeline_stats (void)
{
#ifdef CYCLE_STATS
int i, o1;
int p, p1;
#endif
#ifdef CYCLE_ACCURATE
if (verbose == 1)
{
printf ("cycles: %llu\n", regs.cycle_count);
return;
}
printf ("cycles: %13s\n", comma (regs.cycle_count));
#endif
#ifdef CYCLE_STATS
if (benchmark_start_cycle)
printf ("bmark: %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
printf("\n");
for (i = 0; i < N_RXO; i++)
for (o1 = 0; o1 < N_MAP; o1 ++)
if (times_per_id[i][o1])
printf("%13s %13s %7.2f %s %s\n",
comma (cycles_per_id[i][o1]),
comma (times_per_id[i][o1]),
(double)cycles_per_id[i][o1] / times_per_id[i][o1],
op_cache_string(o1),
id_names[i]+4);
printf("\n");
for (p = 0; p < N_RXO; p ++)
for (p1 = 0; p1 < N_MAP; p1 ++)
for (i = 0; i < N_RXO; i ++)
for (o1 = 0; o1 < N_MAP; o1 ++)
if (times_per_pair[p][p1][i][o1])
{
printf("%13s %s %-9s -> %s %s\n",
comma (times_per_pair[p][p1][i][o1]),
op_cache_string(p1),
id_names[p]+4,
op_cache_string(o1),
id_names[i]+4);
}
printf("\n");
printf("%13s memory stalls\n", comma (memory_stalls));
printf("%13s register stalls\n", comma (register_stalls));
printf("%13s branches taken (non-return)\n", comma (branch_stalls));
printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
printf("%13s fast returns\n", comma (fast_returns));
#endif
}