Peter Maydell da97f52cb3 target-arm: Implement VFPv4 fused multiply-accumulate insns
Implement the fused multiply-accumulate instructions (VFMA, VFMS,
VFNMA, VFNMS) which are new in VFPv4.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2011-10-19 16:14:07 +00:00

539 lines
18 KiB
C

/*
* ARM virtual CPU header
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CPU_ARM_H
#define CPU_ARM_H
#define TARGET_LONG_BITS 32
#define ELF_MACHINE EM_ARM
#define CPUState struct CPUARMState
#include "config.h"
#include "qemu-common.h"
#include "cpu-defs.h"
#include "softfloat.h"
#define TARGET_HAS_ICE 1
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
#define EXCP_DATA_ABORT 4
#define EXCP_IRQ 5
#define EXCP_FIQ 6
#define EXCP_BKPT 7
#define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */
#define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */
#define EXCP_STREX 10
#define ARMV7M_EXCP_RESET 1
#define ARMV7M_EXCP_NMI 2
#define ARMV7M_EXCP_HARD 3
#define ARMV7M_EXCP_MEM 4
#define ARMV7M_EXCP_BUS 5
#define ARMV7M_EXCP_USAGE 6
#define ARMV7M_EXCP_SVC 11
#define ARMV7M_EXCP_DEBUG 12
#define ARMV7M_EXCP_PENDSV 14
#define ARMV7M_EXCP_SYSTICK 15
/* ARM-specific interrupt pending bits. */
#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1
typedef void ARMWriteCPFunc(void *opaque, int cp_info,
int srcreg, int operand, uint32_t value);
typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
int dstreg, int operand);
struct arm_boot_info;
#define NB_MMU_MODES 2
/* We currently assume float and double are IEEE single and double
precision respectively.
Doing runtime conversions is tricky because VFP registers may contain
integer values (eg. as the result of a FTOSI instruction).
s<2n> maps to the least significant half of d<n>
s<2n+1> maps to the most significant half of d<n>
*/
typedef struct CPUARMState {
/* Regs for current mode. */
uint32_t regs[16];
/* Frequently accessed CPSR bits are stored separately for efficiently.
This contains all the other bits. Use cpsr_{read,write} to access
the whole CPSR. */
uint32_t uncached_cpsr;
uint32_t spsr;
/* Banked registers. */
uint32_t banked_spsr[6];
uint32_t banked_r13[6];
uint32_t banked_r14[6];
/* These hold r8-r12. */
uint32_t usr_regs[5];
uint32_t fiq_regs[5];
/* cpsr flag cache for faster execution */
uint32_t CF; /* 0 or 1 */
uint32_t VF; /* V is the bit 31. All other bits are undefined */
uint32_t NF; /* N is bit 31. All other bits are undefined. */
uint32_t ZF; /* Z set if zero. */
uint32_t QF; /* 0 or 1 */
uint32_t GE; /* cpsr[19:16] */
uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */
/* System control coprocessor (cp15) */
struct {
uint32_t c0_cpuid;
uint32_t c0_cachetype;
uint32_t c0_ccsid[16]; /* Cache size. */
uint32_t c0_clid; /* Cache level. */
uint32_t c0_cssel; /* Cache size selection. */
uint32_t c0_c1[8]; /* Feature registers. */
uint32_t c0_c2[8]; /* Instruction set registers. */
uint32_t c1_sys; /* System control register. */
uint32_t c1_coproc; /* Coprocessor access register. */
uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */
uint32_t c2_base0; /* MMU translation table base 0. */
uint32_t c2_base1; /* MMU translation table base 1. */
uint32_t c2_control; /* MMU translation table base control. */
uint32_t c2_mask; /* MMU translation table base selection mask. */
uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
uint32_t c2_data; /* MPU data cachable bits. */
uint32_t c2_insn; /* MPU instruction cachable bits. */
uint32_t c3; /* MMU domain access control register
MPU write buffer control. */
uint32_t c5_insn; /* Fault status registers. */
uint32_t c5_data;
uint32_t c6_region[8]; /* MPU base/size registers. */
uint32_t c6_insn; /* Fault address registers. */
uint32_t c6_data;
uint32_t c7_par; /* Translation result. */
uint32_t c9_insn; /* Cache lockdown registers. */
uint32_t c9_data;
uint32_t c9_pmcr; /* performance monitor control register */
uint32_t c9_pmcnten; /* perf monitor counter enables */
uint32_t c9_pmovsr; /* perf monitor overflow status */
uint32_t c9_pmxevtyper; /* perf monitor event type */
uint32_t c9_pmuserenr; /* perf monitor user enable */
uint32_t c9_pminten; /* perf monitor interrupt enables */
uint32_t c13_fcse; /* FCSE PID. */
uint32_t c13_context; /* Context ID. */
uint32_t c13_tls1; /* User RW Thread register. */
uint32_t c13_tls2; /* User RO Thread register. */
uint32_t c13_tls3; /* Privileged Thread register. */
uint32_t c15_cpar; /* XScale Coprocessor Access Register */
uint32_t c15_ticonfig; /* TI925T configuration byte. */
uint32_t c15_i_max; /* Maximum D-cache dirty line index. */
uint32_t c15_i_min; /* Minimum D-cache dirty line index. */
uint32_t c15_threadid; /* TI debugger thread-ID. */
} cp15;
struct {
uint32_t other_sp;
uint32_t vecbase;
uint32_t basepri;
uint32_t control;
int current_sp;
int exception;
int pending_exception;
} v7m;
/* Thumb-2 EE state. */
uint32_t teecr;
uint32_t teehbr;
/* Internal CPU feature flags. */
uint32_t features;
/* VFP coprocessor state. */
struct {
float64 regs[32];
uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
int vec_len;
int vec_stride;
/* scratch space when Tn are not sufficient. */
uint32_t scratch[8];
/* fp_status is the "normal" fp status. standard_fp_status retains
* values corresponding to the ARM "Standard FPSCR Value", ie
* default-NaN, flush-to-zero, round-to-nearest and is used by
* any operations (generally Neon) which the architecture defines
* as controlled by the standard FPSCR value rather than the FPSCR.
*
* To avoid having to transfer exception bits around, we simply
* say that the FPSCR cumulative exception flags are the logical
* OR of the flags in the two fp statuses. This relies on the
* only thing which needs to read the exception flags being
* an explicit FPSCR read.
*/
float_status fp_status;
float_status standard_fp_status;
} vfp;
uint32_t exclusive_addr;
uint32_t exclusive_val;
uint32_t exclusive_high;
#if defined(CONFIG_USER_ONLY)
uint32_t exclusive_test;
uint32_t exclusive_info;
#endif
/* iwMMXt coprocessor state. */
struct {
uint64_t regs[16];
uint64_t val;
uint32_t cregs[16];
} iwmmxt;
#if defined(CONFIG_USER_ONLY)
/* For usermode syscall translation. */
int eabi;
#endif
CPU_COMMON
/* These fields after the common ones so they are preserved on reset. */
/* Coprocessor IO used by peripherals */
struct {
ARMReadCPFunc *cp_read;
ARMWriteCPFunc *cp_write;
void *opaque;
} cp[15];
void *nvic;
const struct arm_boot_info *boot_info;
} CPUARMState;
CPUARMState *cpu_arm_init(const char *cpu_model);
void arm_translate_init(void);
int cpu_arm_exec(CPUARMState *s);
void cpu_arm_close(CPUARMState *s);
void do_interrupt(CPUARMState *);
void switch_mode(CPUARMState *, int);
uint32_t do_arm_semihosting(CPUARMState *env);
/* you can call this signal handler from your SIGBUS and SIGSEGV
signal handlers to inform the virtual CPU of exceptions. non zero
is returned if the signal was handled by the virtual CPU. */
int cpu_arm_signal_handler(int host_signum, void *pinfo,
void *puc);
int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
int mmu_idx);
#define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
{
env->cp15.c13_tls2 = newtls;
}
#define CPSR_M (0x1f)
#define CPSR_T (1 << 5)
#define CPSR_F (1 << 6)
#define CPSR_I (1 << 7)
#define CPSR_A (1 << 8)
#define CPSR_E (1 << 9)
#define CPSR_IT_2_7 (0xfc00)
#define CPSR_GE (0xf << 16)
#define CPSR_RESERVED (0xf << 20)
#define CPSR_J (1 << 24)
#define CPSR_IT_0_1 (3 << 25)
#define CPSR_Q (1 << 27)
#define CPSR_V (1 << 28)
#define CPSR_C (1 << 29)
#define CPSR_Z (1 << 30)
#define CPSR_N (1 << 31)
#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
#define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7)
#define CACHED_CPSR_BITS (CPSR_T | CPSR_GE | CPSR_IT | CPSR_Q | CPSR_NZCV)
/* Bits writable in user mode. */
#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
/* Execution state bits. MRS read as zero, MSR writes ignored. */
#define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J)
/* Return the current CPSR value. */
uint32_t cpsr_read(CPUARMState *env);
/* Set the CPSR. Note that some bits of mask must be all-set or all-clear. */
void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask);
/* Return the current xPSR value. */
static inline uint32_t xpsr_read(CPUARMState *env)
{
int ZF;
ZF = (env->ZF == 0);
return (env->NF & 0x80000000) | (ZF << 30)
| (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
| (env->thumb << 24) | ((env->condexec_bits & 3) << 25)
| ((env->condexec_bits & 0xfc) << 8)
| env->v7m.exception;
}
/* Set the xPSR. Note that some bits of mask must be all-set or all-clear. */
static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
{
if (mask & CPSR_NZCV) {
env->ZF = (~val) & CPSR_Z;
env->NF = val;
env->CF = (val >> 29) & 1;
env->VF = (val << 3) & 0x80000000;
}
if (mask & CPSR_Q)
env->QF = ((val & CPSR_Q) != 0);
if (mask & (1 << 24))
env->thumb = ((val & (1 << 24)) != 0);
if (mask & CPSR_IT_0_1) {
env->condexec_bits &= ~3;
env->condexec_bits |= (val >> 25) & 3;
}
if (mask & CPSR_IT_2_7) {
env->condexec_bits &= 3;
env->condexec_bits |= (val >> 8) & 0xfc;
}
if (mask & 0x1ff) {
env->v7m.exception = val & 0x1ff;
}
}
/* Return the current FPSCR value. */
uint32_t vfp_get_fpscr(CPUARMState *env);
void vfp_set_fpscr(CPUARMState *env, uint32_t val);
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
ARM_CPU_MODE_FIQ = 0x11,
ARM_CPU_MODE_IRQ = 0x12,
ARM_CPU_MODE_SVC = 0x13,
ARM_CPU_MODE_ABT = 0x17,
ARM_CPU_MODE_UND = 0x1b,
ARM_CPU_MODE_SYS = 0x1f
};
/* VFP system registers. */
#define ARM_VFP_FPSID 0
#define ARM_VFP_FPSCR 1
#define ARM_VFP_MVFR1 6
#define ARM_VFP_MVFR0 7
#define ARM_VFP_FPEXC 8
#define ARM_VFP_FPINST 9
#define ARM_VFP_FPINST2 10
/* iwMMXt coprocessor control registers. */
#define ARM_IWMMXT_wCID 0
#define ARM_IWMMXT_wCon 1
#define ARM_IWMMXT_wCSSF 2
#define ARM_IWMMXT_wCASF 3
#define ARM_IWMMXT_wCGR0 8
#define ARM_IWMMXT_wCGR1 9
#define ARM_IWMMXT_wCGR2 10
#define ARM_IWMMXT_wCGR3 11
enum arm_features {
ARM_FEATURE_VFP,
ARM_FEATURE_AUXCR, /* ARM1026 Auxiliary control register. */
ARM_FEATURE_XSCALE, /* Intel XScale extensions. */
ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension. */
ARM_FEATURE_V6,
ARM_FEATURE_V6K,
ARM_FEATURE_V7,
ARM_FEATURE_THUMB2,
ARM_FEATURE_MPU, /* Only has Memory Protection Unit, not full MMU. */
ARM_FEATURE_VFP3,
ARM_FEATURE_VFP_FP16,
ARM_FEATURE_NEON,
ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */
ARM_FEATURE_M, /* Microcontroller profile. */
ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */
ARM_FEATURE_THUMB2EE,
ARM_FEATURE_V7MP, /* v7 Multiprocessing Extensions */
ARM_FEATURE_V4T,
ARM_FEATURE_V5,
ARM_FEATURE_STRONGARM,
ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
};
static inline int arm_feature(CPUARMState *env, int feature)
{
return (env->features & (1u << feature)) != 0;
}
void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
/* Interface between CPU and Interrupt controller. */
void armv7m_nvic_set_pending(void *opaque, int irq);
int armv7m_nvic_acknowledge_irq(void *opaque);
void armv7m_nvic_complete_irq(void *opaque, int irq);
void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
ARMReadCPFunc *cp_read, ARMWriteCPFunc *cp_write,
void *opaque);
/* Does the core conform to the the "MicroController" profile. e.g. Cortex-M3.
Note the M in older cores (eg. ARM7TDMI) stands for Multiply. These are
conventional cores (ie. Application or Realtime profile). */
#define IS_M(env) arm_feature(env, ARM_FEATURE_M)
#define ARM_CPUID(env) (env->cp15.c0_cpuid)
#define ARM_CPUID_ARM1026 0x4106a262
#define ARM_CPUID_ARM926 0x41069265
#define ARM_CPUID_ARM946 0x41059461
#define ARM_CPUID_TI915T 0x54029152
#define ARM_CPUID_TI925T 0x54029252
#define ARM_CPUID_SA1100 0x4401A11B
#define ARM_CPUID_SA1110 0x6901B119
#define ARM_CPUID_PXA250 0x69052100
#define ARM_CPUID_PXA255 0x69052d00
#define ARM_CPUID_PXA260 0x69052903
#define ARM_CPUID_PXA261 0x69052d05
#define ARM_CPUID_PXA262 0x69052d06
#define ARM_CPUID_PXA270 0x69054110
#define ARM_CPUID_PXA270_A0 0x69054110
#define ARM_CPUID_PXA270_A1 0x69054111
#define ARM_CPUID_PXA270_B0 0x69054112
#define ARM_CPUID_PXA270_B1 0x69054113
#define ARM_CPUID_PXA270_C0 0x69054114
#define ARM_CPUID_PXA270_C5 0x69054117
#define ARM_CPUID_ARM1136 0x4117b363
#define ARM_CPUID_ARM1136_R2 0x4107b362
#define ARM_CPUID_ARM1176 0x410fb767
#define ARM_CPUID_ARM11MPCORE 0x410fb022
#define ARM_CPUID_CORTEXA8 0x410fc080
#define ARM_CPUID_CORTEXA9 0x410fc090
#define ARM_CPUID_CORTEXM3 0x410fc231
#define ARM_CPUID_ANY 0xffffffff
#if defined(CONFIG_USER_ONLY)
#define TARGET_PAGE_BITS 12
#else
/* The ARM MMU allows 1k pages. */
/* ??? Linux doesn't actually use these, and they're deprecated in recent
architecture revisions. Maybe a configure option to disable them. */
#define TARGET_PAGE_BITS 10
#endif
#define TARGET_PHYS_ADDR_SPACE_BITS 32
#define TARGET_VIRT_ADDR_SPACE_BITS 32
#define cpu_init cpu_arm_init
#define cpu_exec cpu_arm_exec
#define cpu_gen_code cpu_arm_gen_code
#define cpu_signal_handler cpu_arm_signal_handler
#define cpu_list arm_cpu_list
#define CPU_SAVE_VERSION 4
/* MMU modes definitions */
#define MMU_MODE0_SUFFIX _kernel
#define MMU_MODE1_SUFFIX _user
#define MMU_USER_IDX 1
static inline int cpu_mmu_index (CPUState *env)
{
return (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR ? 1 : 0;
}
#if defined(CONFIG_USER_ONLY)
static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
{
if (newsp)
env->regs[13] = newsp;
env->regs[0] = 0;
}
#endif
#include "cpu-all.h"
/* Bit usage in the TB flags field: */
#define ARM_TBFLAG_THUMB_SHIFT 0
#define ARM_TBFLAG_THUMB_MASK (1 << ARM_TBFLAG_THUMB_SHIFT)
#define ARM_TBFLAG_VECLEN_SHIFT 1
#define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
#define ARM_TBFLAG_VECSTRIDE_SHIFT 4
#define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
#define ARM_TBFLAG_PRIV_SHIFT 6
#define ARM_TBFLAG_PRIV_MASK (1 << ARM_TBFLAG_PRIV_SHIFT)
#define ARM_TBFLAG_VFPEN_SHIFT 7
#define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT)
#define ARM_TBFLAG_CONDEXEC_SHIFT 8
#define ARM_TBFLAG_CONDEXEC_MASK (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
/* Bits 31..16 are currently unused. */
/* some convenience accessor macros */
#define ARM_TBFLAG_THUMB(F) \
(((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
#define ARM_TBFLAG_VECLEN(F) \
(((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
#define ARM_TBFLAG_VECSTRIDE(F) \
(((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
#define ARM_TBFLAG_PRIV(F) \
(((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT)
#define ARM_TBFLAG_VFPEN(F) \
(((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
#define ARM_TBFLAG_CONDEXEC(F) \
(((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
int privmode;
*pc = env->regs[15];
*cs_base = 0;
*flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
| (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
| (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
| (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT);
if (arm_feature(env, ARM_FEATURE_M)) {
privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
} else {
privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
}
if (privmode) {
*flags |= ARM_TBFLAG_PRIV_MASK;
}
if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
*flags |= ARM_TBFLAG_VFPEN_MASK;
}
}
static inline bool cpu_has_work(CPUState *env)
{
return env->interrupt_request &
(CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB);
}
#include "exec-all.h"
static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
{
env->regs[15] = tb->pc;
}
#endif