4f9a4cd37e
Performance improvement Add pkt and insn to DisasContext Many functions need information from all 3 structures, so merge them together. 2) Bug fix Fix predicated assignment to .tmp and .cur 3) Performance improvement Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat These functions will not be handled by idef-parser 4-11) The final 8 patches improve change-of-flow handling. Currently, we set the PC to a new address before exiting a TB. The ultimate goal is to use direct block chaining. However, several steps are needed along the way. 4) When a packet has more than one change-of-flow (COF) instruction, only the first one taken is considered. The runtime bookkeeping is only needed when there is more than one COF instruction in a packet. 5, 6) Remove PC and next_PC from the runtime state and always use a translation-time constant. Note that next_PC is used by call instructions to set LR and by conditional COF instructions to set the fall-through address. 7, 8, 9) Add helper overrides for COF instructions. In particular, we must distinguish those that use a PC-relative address for the destination. These are candidates for direct block chaining later. 10) Use direct block chaining for packets that have a single PC-relative COF instruction. Instead of generating the code while processing the instruction, we record the effect in DisasContext and generate the code during gen_end_tb. 11) Use direct block chaining for tight loops. We look for TBs that end with an endloop0 that will branch back to the TB start address. 12-21) Instruction definition parser (idef-parser) from rev.ng Parses the instruction semantics and generates TCG -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEENjXHiM5iuR/UxZq0ewJE+xLeRCIFAmOc2BEACgkQewJE+xLe RCKqFwf/U/uWaQiF59OXyLHj9PR/bTf7PmZL12g8MTrntzmtIpRiTQb7ajJaLwyn TcCG9j9Ss6kWBq+LH5TBvstnSN9/3qEgnj2b26y6EAn85mSh6fai4foUPjXFUy7m 2Of0kuc2WKmwxN9C2iw6Hm6pbL3FSnYzKtBuSFzYyAIS0doLFT97zE97XnBtTQ4C 49JdNgQW9CKt7cCpKTcQA4N3ZO8LdARdvOtTShX1++qd4Trm0haTGRdaygSrTlS7 Eeqs4nbakKEE6VH2iltPGKX+KHbMCf2ZW7lefxHi+EuzE0DBIVoM64UnalyFfcSU hVMGF15HgAIAjecim0Y4AbPB/zVlEw== =PC9+ -----END PGP SIGNATURE----- Merge tag 'pull-hex-20221216-1' of https://github.com/quic/qemu into staging 1) Performance improvement Add pkt and insn to DisasContext Many functions need information from all 3 structures, so merge them together. 2) Bug fix Fix predicated assignment to .tmp and .cur 3) Performance improvement Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat These functions will not be handled by idef-parser 4-11) The final 8 patches improve change-of-flow handling. Currently, we set the PC to a new address before exiting a TB. The ultimate goal is to use direct block chaining. However, several steps are needed along the way. 4) When a packet has more than one change-of-flow (COF) instruction, only the first one taken is considered. The runtime bookkeeping is only needed when there is more than one COF instruction in a packet. 5, 6) Remove PC and next_PC from the runtime state and always use a translation-time constant. Note that next_PC is used by call instructions to set LR and by conditional COF instructions to set the fall-through address. 7, 8, 9) Add helper overrides for COF instructions. In particular, we must distinguish those that use a PC-relative address for the destination. These are candidates for direct block chaining later. 10) Use direct block chaining for packets that have a single PC-relative COF instruction. Instead of generating the code while processing the instruction, we record the effect in DisasContext and generate the code during gen_end_tb. 11) Use direct block chaining for tight loops. We look for TBs that end with an endloop0 that will branch back to the TB start address. 12-21) Instruction definition parser (idef-parser) from rev.ng Parses the instruction semantics and generates TCG # gpg: Signature made Fri 16 Dec 2022 20:41:53 GMT # gpg: using RSA key 3635C788CE62B91FD4C59AB47B0244FB12DE4422 # gpg: Good signature from "Taylor Simpson (Rock on) <tsimpson@quicinc.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 3635 C788 CE62 B91F D4C5 9AB4 7B02 44FB 12DE 4422 * tag 'pull-hex-20221216-1' of https://github.com/quic/qemu: (21 commits) target/hexagon: import additional tests target/hexagon: call idef-parser functions target/hexagon: import parser for idef-parser target/hexagon: import lexer for idef-parser target/hexagon: prepare input for the idef-parser target/hexagon: introduce new helper functions target/hexagon: make helper functions non-static target/hexagon: make slot number an unsigned target/hexagon: import README for idef-parser target/hexagon: update MAINTAINERS for idef-parser Hexagon (target/hexagon) Use direct block chaining for tight loops Hexagon (target/hexagon) Use direct block chaining for direct jump/branch Hexagon (target/hexagon) Add overrides for various forms of jump Hexagon (target/hexagon) Add overrides for compound compare and jump Hexagon (target/hexagon) Add overrides for direct call instructions Hexagon (target/hexagon) Remove next_PC from runtime state Hexagon (target/hexagon) Remove PC from the runtime state Hexagon (target/hexagon) Only use branch_taken when packet has multi cof Hexagon (target/hexagon) Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat Hexagon (target/hexagon) Fix predicated assignment to .tmp and .cur ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
186 lines
5.0 KiB
C
186 lines
5.0 KiB
C
/*
|
|
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef HEXAGON_CPU_H
|
|
#define HEXAGON_CPU_H
|
|
|
|
#include "fpu/softfloat-types.h"
|
|
|
|
#include "exec/cpu-defs.h"
|
|
#include "hex_regs.h"
|
|
#include "mmvec/mmvec.h"
|
|
#include "qom/object.h"
|
|
#include "hw/core/cpu.h"
|
|
#include "hw/registerfields.h"
|
|
|
|
#define NUM_PREGS 4
|
|
#define TOTAL_PER_THREAD_REGS 64
|
|
|
|
#define SLOTS_MAX 4
|
|
#define STORES_MAX 2
|
|
#define REG_WRITES_MAX 32
|
|
#define PRED_WRITES_MAX 5 /* 4 insns + endloop */
|
|
#define VSTORES_MAX 2
|
|
|
|
#define TYPE_HEXAGON_CPU "hexagon-cpu"
|
|
|
|
#define HEXAGON_CPU_TYPE_SUFFIX "-" TYPE_HEXAGON_CPU
|
|
#define HEXAGON_CPU_TYPE_NAME(name) (name HEXAGON_CPU_TYPE_SUFFIX)
|
|
#define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU
|
|
|
|
#define TYPE_HEXAGON_CPU_V67 HEXAGON_CPU_TYPE_NAME("v67")
|
|
|
|
#define MMU_USER_IDX 0
|
|
|
|
typedef struct {
|
|
target_ulong va;
|
|
uint8_t width;
|
|
uint32_t data32;
|
|
uint64_t data64;
|
|
} MemLog;
|
|
|
|
typedef struct {
|
|
target_ulong va;
|
|
int size;
|
|
DECLARE_BITMAP(mask, MAX_VEC_SIZE_BYTES) QEMU_ALIGNED(16);
|
|
MMVector data QEMU_ALIGNED(16);
|
|
} VStoreLog;
|
|
|
|
#define EXEC_STATUS_OK 0x0000
|
|
#define EXEC_STATUS_STOP 0x0002
|
|
#define EXEC_STATUS_REPLAY 0x0010
|
|
#define EXEC_STATUS_LOCKED 0x0020
|
|
#define EXEC_STATUS_EXCEPTION 0x0100
|
|
|
|
|
|
#define EXCEPTION_DETECTED (env->status & EXEC_STATUS_EXCEPTION)
|
|
#define REPLAY_DETECTED (env->status & EXEC_STATUS_REPLAY)
|
|
#define CLEAR_EXCEPTION (env->status &= (~EXEC_STATUS_EXCEPTION))
|
|
#define SET_EXCEPTION (env->status |= EXEC_STATUS_EXCEPTION)
|
|
|
|
/* Maximum number of vector temps in a packet */
|
|
#define VECTOR_TEMPS_MAX 4
|
|
|
|
typedef struct CPUArchState {
|
|
target_ulong gpr[TOTAL_PER_THREAD_REGS];
|
|
target_ulong pred[NUM_PREGS];
|
|
target_ulong branch_taken;
|
|
|
|
/* For comparing with LLDB on target - see adjust_stack_ptrs function */
|
|
target_ulong last_pc_dumped;
|
|
target_ulong stack_start;
|
|
|
|
uint8_t slot_cancelled;
|
|
target_ulong new_value[TOTAL_PER_THREAD_REGS];
|
|
|
|
/*
|
|
* Only used when HEX_DEBUG is on, but unconditionally included
|
|
* to reduce recompile time when turning HEX_DEBUG on/off.
|
|
*/
|
|
target_ulong this_PC;
|
|
target_ulong reg_written[TOTAL_PER_THREAD_REGS];
|
|
|
|
target_ulong new_pred_value[NUM_PREGS];
|
|
target_ulong pred_written;
|
|
|
|
MemLog mem_log_stores[STORES_MAX];
|
|
target_ulong pkt_has_store_s1;
|
|
target_ulong dczero_addr;
|
|
|
|
float_status fp_status;
|
|
|
|
target_ulong llsc_addr;
|
|
target_ulong llsc_val;
|
|
uint64_t llsc_val_i64;
|
|
|
|
MMVector VRegs[NUM_VREGS] QEMU_ALIGNED(16);
|
|
MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
|
|
MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
|
|
|
|
VRegMask VRegs_updated;
|
|
|
|
MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
|
|
MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
|
|
QRegMask QRegs_updated;
|
|
|
|
/* Temporaries used within instructions */
|
|
MMVectorPair VuuV QEMU_ALIGNED(16);
|
|
MMVectorPair VvvV QEMU_ALIGNED(16);
|
|
MMVectorPair VxxV QEMU_ALIGNED(16);
|
|
MMVector vtmp QEMU_ALIGNED(16);
|
|
MMQReg qtmp QEMU_ALIGNED(16);
|
|
|
|
VStoreLog vstore[VSTORES_MAX];
|
|
target_ulong vstore_pending[VSTORES_MAX];
|
|
bool vtcm_pending;
|
|
VTCMStoreLog vtcm_log;
|
|
} CPUHexagonState;
|
|
|
|
OBJECT_DECLARE_CPU_TYPE(HexagonCPU, HexagonCPUClass, HEXAGON_CPU)
|
|
|
|
typedef struct HexagonCPUClass {
|
|
/*< private >*/
|
|
CPUClass parent_class;
|
|
/*< public >*/
|
|
DeviceRealize parent_realize;
|
|
ResettablePhases parent_phases;
|
|
} HexagonCPUClass;
|
|
|
|
struct ArchCPU {
|
|
/*< private >*/
|
|
CPUState parent_obj;
|
|
/*< public >*/
|
|
CPUNegativeOffsetState neg;
|
|
CPUHexagonState env;
|
|
|
|
bool lldb_compat;
|
|
target_ulong lldb_stack_adjust;
|
|
};
|
|
|
|
#include "cpu_bits.h"
|
|
|
|
FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
|
|
|
|
static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
|
|
target_ulong *cs_base, uint32_t *flags)
|
|
{
|
|
uint32_t hex_flags = 0;
|
|
*pc = env->gpr[HEX_REG_PC];
|
|
*cs_base = 0;
|
|
if (*pc == env->gpr[HEX_REG_SA0]) {
|
|
hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
|
|
}
|
|
*flags = hex_flags;
|
|
}
|
|
|
|
static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
|
|
{
|
|
#ifdef CONFIG_USER_ONLY
|
|
return MMU_USER_IDX;
|
|
#else
|
|
#error System mode not supported on Hexagon yet
|
|
#endif
|
|
}
|
|
|
|
typedef HexagonCPU ArchCPU;
|
|
|
|
void hexagon_translate_init(void);
|
|
|
|
#include "exec/cpu-all.h"
|
|
|
|
#endif /* HEXAGON_CPU_H */
|