Hexagon HVX (target/hexagon) TCG generation

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
This commit is contained in:
Taylor Simpson 2021-09-30 14:29:00 -05:00
parent 33e9ed11d5
commit a82dd54862
3 changed files with 311 additions and 4 deletions

View File

@ -165,6 +165,9 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num,
} else if (reg_num == HEX_REG_QEMU_INSN_CNT) {
tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_INSN_CNT],
ctx->num_insns);
} else if (reg_num == HEX_REG_QEMU_HVX_CNT) {
tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT],
ctx->num_hvx_insns);
} else {
tcg_gen_mov_tl(dest, hex_gpr[reg_num]);
}
@ -191,6 +194,12 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num,
tcg_gen_concat_i32_i64(dest, pkt_cnt, insn_cnt);
tcg_temp_free(pkt_cnt);
tcg_temp_free(insn_cnt);
} else if (reg_num == HEX_REG_QEMU_HVX_CNT) {
TCGv hvx_cnt = tcg_temp_new();
tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT],
ctx->num_hvx_insns);
tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]);
tcg_temp_free(hvx_cnt);
} else {
tcg_gen_concat_i32_i64(dest,
hex_gpr[reg_num],
@ -226,6 +235,9 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num,
if (reg_num == HEX_REG_QEMU_INSN_CNT) {
ctx->num_insns = 0;
}
if (reg_num == HEX_REG_QEMU_HVX_CNT) {
ctx->num_hvx_insns = 0;
}
}
}
@ -247,6 +259,9 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
ctx->num_packets = 0;
ctx->num_insns = 0;
}
if (reg_num == HEX_REG_QEMU_HVX_CNT) {
ctx->num_hvx_insns = 0;
}
}
}

View File

@ -19,6 +19,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
#include "exec/cpu_ldst.h"
#include "exec/log.h"
#include "internal.h"
@ -47,11 +48,60 @@ TCGv hex_dczero_addr;
TCGv hex_llsc_addr;
TCGv hex_llsc_val;
TCGv_i64 hex_llsc_val_i64;
TCGv hex_VRegs_updated;
TCGv hex_QRegs_updated;
TCGv hex_vstore_addr[VSTORES_MAX];
TCGv hex_vstore_size[VSTORES_MAX];
TCGv hex_vstore_pending[VSTORES_MAX];
static const char * const hexagon_prednames[] = {
"p0", "p1", "p2", "p3"
};
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok)
{
intptr_t offset;
/* See if it is already allocated */
for (int i = 0; i < ctx->future_vregs_idx; i++) {
if (ctx->future_vregs_num[i] == regnum) {
return offsetof(CPUHexagonState, future_VRegs[i]);
}
}
g_assert(alloc_ok);
offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
for (int i = 0; i < num; i++) {
ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
}
ctx->future_vregs_idx += num;
g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
return offset;
}
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok)
{
intptr_t offset;
/* See if it is already allocated */
for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
if (ctx->tmp_vregs_num[i] == regnum) {
return offsetof(CPUHexagonState, tmp_VRegs[i]);
}
}
g_assert(alloc_ok);
offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
for (int i = 0; i < num; i++) {
ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
}
ctx->tmp_vregs_idx += num;
g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
return offset;
}
static void gen_exception_raw(int excp)
{
gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
@ -63,6 +113,8 @@ static void gen_exec_counters(DisasContext *ctx)
hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
}
static void gen_end_tb(DisasContext *ctx)
@ -167,11 +219,19 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS);
ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
}
tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
ctx->s1_store_processed = false;
ctx->pre_commit = true;
if (HEX_DEBUG) {
/* Handy place to set a breakpoint before the packet executes */
@ -193,6 +253,26 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
if (need_pred_written(pkt)) {
tcg_gen_movi_tl(hex_pred_written, 0);
}
if (pkt->pkt_has_hvx) {
tcg_gen_movi_tl(hex_VRegs_updated, 0);
tcg_gen_movi_tl(hex_QRegs_updated, 0);
}
}
bool is_gather_store_insn(Insn *insn, Packet *pkt)
{
if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
insn->new_value_producer_slot == 1) {
/* Look for gather instruction */
for (int i = 0; i < pkt->num_insns; i++) {
Insn *in = &pkt->insn[i];
if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
return true;
}
}
}
return false;
}
/*
@ -448,10 +528,98 @@ static void process_dczeroa(DisasContext *ctx, Packet *pkt)
}
}
static bool pkt_has_hvx_store(Packet *pkt)
{
int i;
for (i = 0; i < pkt->num_insns; i++) {
int opcode = pkt->insn[i].opcode;
if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
return true;
}
}
return false;
}
static void gen_commit_hvx(DisasContext *ctx, Packet *pkt)
{
int i;
/*
* for (i = 0; i < ctx->vreg_log_idx; i++) {
* int rnum = ctx->vreg_log[i];
* if (ctx->vreg_is_predicated[i]) {
* if (env->VRegs_updated & (1 << rnum)) {
* env->VRegs[rnum] = env->future_VRegs[rnum];
* }
* } else {
* env->VRegs[rnum] = env->future_VRegs[rnum];
* }
* }
*/
for (i = 0; i < ctx->vreg_log_idx; i++) {
int rnum = ctx->vreg_log[i];
bool is_predicated = ctx->vreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
size_t size = sizeof(MMVector);
if (is_predicated) {
TCGv cmp = tcg_temp_new();
TCGLabel *label_skip = gen_new_label();
tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
tcg_temp_free(cmp);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
gen_set_label(label_skip);
} else {
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
}
/*
* for (i = 0; i < ctx->qreg_log_idx; i++) {
* int rnum = ctx->qreg_log[i];
* if (ctx->qreg_is_predicated[i]) {
* if (env->QRegs_updated) & (1 << rnum)) {
* env->QRegs[rnum] = env->future_QRegs[rnum];
* }
* } else {
* env->QRegs[rnum] = env->future_QRegs[rnum];
* }
* }
*/
for (i = 0; i < ctx->qreg_log_idx; i++) {
int rnum = ctx->qreg_log[i];
bool is_predicated = ctx->qreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
size_t size = sizeof(MMQReg);
if (is_predicated) {
TCGv cmp = tcg_temp_new();
TCGLabel *label_skip = gen_new_label();
tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
tcg_temp_free(cmp);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
gen_set_label(label_skip);
} else {
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
}
if (pkt_has_hvx_store(pkt)) {
gen_helper_commit_hvx_stores(cpu_env);
}
}
static void update_exec_counters(DisasContext *ctx, Packet *pkt)
{
int num_insns = pkt->num_insns;
int num_real_insns = 0;
int num_hvx_insns = 0;
for (int i = 0; i < num_insns; i++) {
if (!pkt->insn[i].is_endloop &&
@ -459,13 +627,18 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt)
!GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
num_real_insns++;
}
if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
num_hvx_insns++;
}
}
ctx->num_packets++;
ctx->num_insns += num_real_insns;
ctx->num_hvx_insns += num_hvx_insns;
}
static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
static void gen_commit_packet(CPUHexagonState *env, DisasContext *ctx,
Packet *pkt)
{
/*
* If there is more than one store in a packet, make sure they are all OK
@ -474,6 +647,10 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
* dczeroa has to be the only store operation in the packet, so we go
* ahead and process that first.
*
* When there is an HVX store, there can also be a scalar store in either
* slot 0 or slot1, so we create a mask for the helper to indicate what
* work to do.
*
* When there are two scalar stores, we probe the one in slot 0.
*
* Note that we don't call the probe helper for packets with only one
@ -482,13 +659,35 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
*/
bool has_store_s0 = pkt->pkt_has_store_s0;
bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
bool has_hvx_store = pkt_has_hvx_store(pkt);
if (pkt->pkt_has_dczeroa) {
/*
* The dczeroa will be the store in slot 0, check that we don't have
* a store in slot 1.
* a store in slot 1 or an HVX store.
*/
g_assert(has_store_s0 && !has_store_s1);
g_assert(has_store_s0 && !has_store_s1 && !has_hvx_store);
process_dczeroa(ctx, pkt);
} else if (has_hvx_store) {
TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
if (!has_store_s0 && !has_store_s1) {
gen_helper_probe_hvx_stores(cpu_env, mem_idx);
} else {
int mask = 0;
TCGv mask_tcgv;
if (has_store_s0) {
mask |= (1 << 0);
}
if (has_store_s1) {
mask |= (1 << 1);
}
if (has_hvx_store) {
mask |= (1 << 2);
}
mask_tcgv = tcg_constant_tl(mask);
gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
}
} else if (has_store_s0 && has_store_s1) {
/*
* process_store_log will execute the slot 1 store first,
@ -502,6 +701,9 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
gen_reg_writes(ctx);
gen_pred_writes(ctx, pkt);
if (pkt->pkt_has_hvx) {
gen_commit_hvx(ctx, pkt);
}
update_exec_counters(ctx, pkt);
if (HEX_DEBUG) {
TCGv has_st0 =
@ -513,6 +715,11 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
}
if (pkt->vhist_insn != NULL) {
ctx->pre_commit = false;
pkt->vhist_insn->generate(env, ctx, pkt->vhist_insn, pkt);
}
if (pkt->pkt_has_cof) {
gen_end_tb(ctx);
}
@ -537,7 +744,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
for (i = 0; i < pkt.num_insns; i++) {
gen_insn(env, ctx, &pkt.insn[i], &pkt);
}
gen_commit_packet(ctx, &pkt);
gen_commit_packet(env, ctx, &pkt);
ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
} else {
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
@ -552,6 +759,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
ctx->mem_idx = MMU_USER_IDX;
ctx->num_packets = 0;
ctx->num_insns = 0;
ctx->num_hvx_insns = 0;
}
static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@ -656,6 +864,9 @@ static char store_addr_names[STORES_MAX][NAME_LEN];
static char store_width_names[STORES_MAX][NAME_LEN];
static char store_val32_names[STORES_MAX][NAME_LEN];
static char store_val64_names[STORES_MAX][NAME_LEN];
static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
static char vstore_size_names[VSTORES_MAX][NAME_LEN];
static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
void hexagon_translate_init(void)
{
@ -718,6 +929,10 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, llsc_val), "llsc_val");
hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
hex_VRegs_updated = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
hex_QRegs_updated = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
for (i = 0; i < STORES_MAX; i++) {
snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
hex_store_addr[i] = tcg_global_mem_new(cpu_env,
@ -739,4 +954,20 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, mem_log_stores[i].data64),
store_val64_names[i]);
}
for (int i = 0; i < VSTORES_MAX; i++) {
snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, vstore[i].va),
vstore_addr_names[i]);
snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, vstore[i].size),
vstore_size_names[i]);
snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, vstore_pending[i]),
vstore_pending_names[i]);
}
}

View File

@ -29,6 +29,7 @@ typedef struct DisasContext {
uint32_t mem_idx;
uint32_t num_packets;
uint32_t num_insns;
uint32_t num_hvx_insns;
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
@ -37,6 +38,20 @@ typedef struct DisasContext {
DECLARE_BITMAP(pregs_written, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
int future_vregs_num[VECTOR_TEMPS_MAX];
int tmp_vregs_idx;
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
bool vreg_is_predicated[NUM_VREGS];
int vreg_log_idx;
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS);
int qreg_log[NUM_QREGS];
bool qreg_is_predicated[NUM_QREGS];
int qreg_log_idx;
bool pre_commit;
} DisasContext;
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
@ -67,6 +82,46 @@ static inline bool is_preloaded(DisasContext *ctx, int num)
return test_bit(num, ctx->regs_written);
}
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
static inline void ctx_log_vreg_write(DisasContext *ctx,
int rnum, VRegWriteType type,
bool is_predicated)
{
if (type != EXT_TMP) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated;
ctx->vreg_log_idx++;
set_bit(rnum, ctx->vregs_updated);
}
if (type == EXT_NEW) {
set_bit(rnum, ctx->vregs_select);
}
if (type == EXT_TMP) {
set_bit(rnum, ctx->vregs_updated_tmp);
}
}
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
int rnum, VRegWriteType type,
bool is_predicated)
{
ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
int rnum, bool is_predicated)
{
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated;
ctx->qreg_log_idx++;
}
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
extern TCGv hex_pred[NUM_PREGS];
extern TCGv hex_next_PC;
@ -85,6 +140,12 @@ extern TCGv hex_dczero_addr;
extern TCGv hex_llsc_addr;
extern TCGv hex_llsc_val;
extern TCGv_i64 hex_llsc_val_i64;
extern TCGv hex_VRegs_updated;
extern TCGv hex_QRegs_updated;
extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
bool is_gather_store_insn(Insn *insn, Packet *pkt);
void process_store(DisasContext *ctx, Packet *pkt, int slot_num);
#endif