90bbf9d9db
This gets especially confusing if you start plugging in host addresses from a trace and you wonder why the output keeps changing. Report when read_memory_func fails instead of blindly disassembling the buffer contents. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Message-Id: <20220929114231.583801-35-alex.bennee@linaro.org>
340 lines
9.5 KiB
C
340 lines
9.5 KiB
C
/*
|
|
* Interface to the capstone disassembler.
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/bswap.h"
|
|
#include "disas/dis-asm.h"
|
|
#include "disas/capstone.h"
|
|
|
|
|
|
/*
|
|
* Temporary storage for the capstone library. This will be alloced via
|
|
* malloc with a size private to the library; thus there's no reason not
|
|
* to share this across calls and across host vs target disassembly.
|
|
*/
|
|
static __thread cs_insn *cap_insn;
|
|
|
|
/*
|
|
* The capstone library always skips 2 bytes for S390X.
|
|
* This is less than ideal, since we can tell from the first two bits
|
|
* the size of the insn and thus stay in sync with the insn stream.
|
|
*/
|
|
static size_t CAPSTONE_API
|
|
cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
|
|
size_t offset, void *user_data)
|
|
{
|
|
size_t ilen;
|
|
|
|
/* See get_ilen() in target/s390x/internal.h. */
|
|
switch (code[offset] >> 6) {
|
|
case 0:
|
|
ilen = 2;
|
|
break;
|
|
case 1:
|
|
case 2:
|
|
ilen = 4;
|
|
break;
|
|
default:
|
|
ilen = 6;
|
|
break;
|
|
}
|
|
|
|
return ilen;
|
|
}
|
|
|
|
static const cs_opt_skipdata cap_skipdata_s390x = {
|
|
.mnemonic = ".byte",
|
|
.callback = cap_skipdata_s390x_cb
|
|
};
|
|
|
|
/*
|
|
* Initialize the Capstone library.
|
|
*
|
|
* ??? It would be nice to cache this. We would need one handle for the
|
|
* host and one for the target. For most targets we can reset specific
|
|
* parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
|
|
* CS_ARCH_* in this way. Thus we would need to be able to close and
|
|
* re-open the target handle with a different arch for the target in order
|
|
* to handle AArch64 vs AArch32 mode switching.
|
|
*/
|
|
static cs_err cap_disas_start(disassemble_info *info, csh *handle)
|
|
{
|
|
cs_mode cap_mode = info->cap_mode;
|
|
cs_err err;
|
|
|
|
cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
|
|
: CS_MODE_LITTLE_ENDIAN);
|
|
|
|
err = cs_open(info->cap_arch, cap_mode, handle);
|
|
if (err != CS_ERR_OK) {
|
|
return err;
|
|
}
|
|
|
|
/* "Disassemble" unknown insns as ".byte W,X,Y,Z". */
|
|
cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
|
|
|
|
switch (info->cap_arch) {
|
|
case CS_ARCH_SYSZ:
|
|
cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
|
|
(uintptr_t)&cap_skipdata_s390x);
|
|
break;
|
|
|
|
case CS_ARCH_X86:
|
|
/*
|
|
* We don't care about errors (if for some reason the library
|
|
* is compiled without AT&T syntax); the user will just have
|
|
* to deal with the Intel syntax.
|
|
*/
|
|
cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
|
|
break;
|
|
}
|
|
|
|
/* Allocate temp space for cs_disasm_iter. */
|
|
if (cap_insn == NULL) {
|
|
cap_insn = cs_malloc(*handle);
|
|
if (cap_insn == NULL) {
|
|
cs_close(handle);
|
|
return CS_ERR_MEM;
|
|
}
|
|
}
|
|
return CS_ERR_OK;
|
|
}
|
|
|
|
static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
|
|
int i, int n)
|
|
{
|
|
fprintf_function print = info->fprintf_func;
|
|
FILE *stream = info->stream;
|
|
|
|
switch (info->cap_insn_unit) {
|
|
case 4:
|
|
if (info->endian == BFD_ENDIAN_BIG) {
|
|
for (; i < n; i += 4) {
|
|
print(stream, " %08x", ldl_be_p(insn->bytes + i));
|
|
|
|
}
|
|
} else {
|
|
for (; i < n; i += 4) {
|
|
print(stream, " %08x", ldl_le_p(insn->bytes + i));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
if (info->endian == BFD_ENDIAN_BIG) {
|
|
for (; i < n; i += 2) {
|
|
print(stream, " %04x", lduw_be_p(insn->bytes + i));
|
|
}
|
|
} else {
|
|
for (; i < n; i += 2) {
|
|
print(stream, " %04x", lduw_le_p(insn->bytes + i));
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
for (; i < n; i++) {
|
|
print(stream, " %02x", insn->bytes[i]);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
|
|
{
|
|
fprintf_function print = info->fprintf_func;
|
|
FILE *stream = info->stream;
|
|
int i, n, split;
|
|
|
|
print(stream, "0x%08" PRIx64 ": ", insn->address);
|
|
|
|
n = insn->size;
|
|
split = info->cap_insn_split;
|
|
|
|
/* Dump the first SPLIT bytes of the instruction. */
|
|
cap_dump_insn_units(info, insn, 0, MIN(n, split));
|
|
|
|
/* Add padding up to SPLIT so that mnemonics line up. */
|
|
if (n < split) {
|
|
int width = (split - n) / info->cap_insn_unit;
|
|
width *= (2 * info->cap_insn_unit + 1);
|
|
print(stream, "%*s", width, "");
|
|
}
|
|
|
|
/* Print the actual instruction. */
|
|
print(stream, " %-8s %s\n", insn->mnemonic, insn->op_str);
|
|
|
|
/* Dump any remaining part of the insn on subsequent lines. */
|
|
for (i = split; i < n; i += split) {
|
|
print(stream, "0x%08" PRIx64 ": ", insn->address + i);
|
|
cap_dump_insn_units(info, insn, i, MIN(n, i + split));
|
|
print(stream, "\n");
|
|
}
|
|
}
|
|
|
|
/* Disassemble SIZE bytes at PC for the target. */
|
|
bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
|
|
{
|
|
uint8_t cap_buf[1024];
|
|
csh handle;
|
|
cs_insn *insn;
|
|
size_t csize = 0;
|
|
|
|
if (cap_disas_start(info, &handle) != CS_ERR_OK) {
|
|
return false;
|
|
}
|
|
insn = cap_insn;
|
|
|
|
while (1) {
|
|
size_t tsize = MIN(sizeof(cap_buf) - csize, size);
|
|
const uint8_t *cbuf = cap_buf;
|
|
|
|
if (info->read_memory_func(pc + csize, cap_buf + csize, tsize, info) == 0) {
|
|
csize += tsize;
|
|
size -= tsize;
|
|
|
|
while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
|
|
cap_dump_insn(info, insn);
|
|
}
|
|
|
|
/* If the target memory is not consumed, go back for more... */
|
|
if (size != 0) {
|
|
/*
|
|
* ... taking care to move any remaining fractional insn
|
|
* to the beginning of the buffer.
|
|
*/
|
|
if (csize != 0) {
|
|
memmove(cap_buf, cbuf, csize);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Since the target memory is consumed, we should not have
|
|
* a remaining fractional insn.
|
|
*/
|
|
if (csize != 0) {
|
|
info->fprintf_func(info->stream,
|
|
"Disassembler disagrees with translator "
|
|
"over instruction decoding\n"
|
|
"Please report this to qemu-devel@nongnu.org\n");
|
|
}
|
|
break;
|
|
|
|
} else {
|
|
info->fprintf_func(info->stream,
|
|
"0x%08" PRIx64 ": unable to read memory\n", pc);
|
|
break;
|
|
}
|
|
}
|
|
|
|
cs_close(&handle);
|
|
return true;
|
|
}
|
|
|
|
/* Disassemble SIZE bytes at CODE for the host. */
|
|
bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
|
|
{
|
|
csh handle;
|
|
const uint8_t *cbuf;
|
|
cs_insn *insn;
|
|
uint64_t pc;
|
|
|
|
if (cap_disas_start(info, &handle) != CS_ERR_OK) {
|
|
return false;
|
|
}
|
|
insn = cap_insn;
|
|
|
|
cbuf = code;
|
|
pc = (uintptr_t)code;
|
|
|
|
while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
|
|
cap_dump_insn(info, insn);
|
|
}
|
|
if (size != 0) {
|
|
info->fprintf_func(info->stream,
|
|
"Disassembler disagrees with TCG over instruction encoding\n"
|
|
"Please report this to qemu-devel@nongnu.org\n");
|
|
}
|
|
|
|
cs_close(&handle);
|
|
return true;
|
|
}
|
|
|
|
/* Disassemble COUNT insns at PC for the target. */
|
|
bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
|
|
{
|
|
uint8_t cap_buf[32];
|
|
csh handle;
|
|
cs_insn *insn;
|
|
size_t csize = 0;
|
|
|
|
if (cap_disas_start(info, &handle) != CS_ERR_OK) {
|
|
return false;
|
|
}
|
|
insn = cap_insn;
|
|
|
|
while (1) {
|
|
/*
|
|
* We want to read memory for one insn, but generically we do not
|
|
* know how much memory that is. We have a small buffer which is
|
|
* known to be sufficient for all supported targets. Try to not
|
|
* read beyond the page, Just In Case. For even more simplicity,
|
|
* ignore the actual target page size and use a 1k boundary. If
|
|
* that turns out to be insufficient, we'll come back around the
|
|
* loop and read more.
|
|
*/
|
|
uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
|
|
size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
|
|
const uint8_t *cbuf = cap_buf;
|
|
|
|
/* Make certain that we can make progress. */
|
|
assert(tsize != 0);
|
|
if (info->read_memory_func(pc + csize, cap_buf + csize,
|
|
tsize, info) == 0)
|
|
{
|
|
csize += tsize;
|
|
|
|
if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
|
|
cap_dump_insn(info, insn);
|
|
if (--count <= 0) {
|
|
break;
|
|
}
|
|
}
|
|
memmove(cap_buf, cbuf, csize);
|
|
} else {
|
|
info->fprintf_func(info->stream,
|
|
"0x%08" PRIx64 ": unable to read memory\n", pc);
|
|
break;
|
|
}
|
|
}
|
|
|
|
cs_close(&handle);
|
|
return true;
|
|
}
|
|
|
|
/* Disassemble a single instruction directly into plugin output */
|
|
bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
|
|
{
|
|
uint8_t cap_buf[32];
|
|
const uint8_t *cbuf = cap_buf;
|
|
csh handle;
|
|
|
|
if (cap_disas_start(info, &handle) != CS_ERR_OK) {
|
|
return false;
|
|
}
|
|
|
|
assert(size < sizeof(cap_buf));
|
|
info->read_memory_func(pc, cap_buf, size, info);
|
|
|
|
if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
|
|
info->fprintf_func(info->stream, "%s %s",
|
|
cap_insn->mnemonic, cap_insn->op_str);
|
|
}
|
|
|
|
cs_close(&handle);
|
|
return true;
|
|
}
|