5d25fcb702
One problem with flaky tests is they often only fail under CI conditions which makes it hard to debug. We add an optional allow_fail job so developers can trigger the only the flaky tests in the CI environment if they are debugging. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Acked-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Message-Id: <20231201093633.2551497-8-alex.bennee@linaro.org>
277 lines
9.2 KiB
Python
277 lines
9.2 KiB
Python
# Reverse debugging test
|
|
#
|
|
# Copyright (c) 2020 ISP RAS
|
|
#
|
|
# Author:
|
|
# Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
|
|
#
|
|
# This work is licensed under the terms of the GNU GPL, version 2 or
|
|
# later. See the COPYING file in the top-level directory.
|
|
import os
|
|
import logging
|
|
|
|
from avocado import skipUnless
|
|
from avocado_qemu import BUILD_DIR
|
|
from avocado.utils import datadrainer
|
|
from avocado.utils import gdb
|
|
from avocado.utils import process
|
|
from avocado.utils.network.ports import find_free_port
|
|
from avocado.utils.path import find_command
|
|
from boot_linux_console import LinuxKernelTest
|
|
|
|
class ReverseDebugging(LinuxKernelTest):
|
|
"""
|
|
Test GDB reverse debugging commands: reverse step and reverse continue.
|
|
Recording saves the execution of some instructions and makes an initial
|
|
VM snapshot to allow reverse execution.
|
|
Replay saves the order of the first instructions and then checks that they
|
|
are executed backwards in the correct order.
|
|
After that the execution is replayed to the end, and reverse continue
|
|
command is checked by setting several breakpoints, and asserting
|
|
that the execution is stopped at the last of them.
|
|
"""
|
|
|
|
timeout = 10
|
|
STEPS = 10
|
|
endian_is_le = True
|
|
|
|
def run_vm(self, record, shift, args, replay_path, image_path, port):
|
|
logger = logging.getLogger('replay')
|
|
vm = self.get_vm()
|
|
vm.set_console()
|
|
if record:
|
|
logger.info('recording the execution...')
|
|
mode = 'record'
|
|
else:
|
|
logger.info('replaying the execution...')
|
|
mode = 'replay'
|
|
vm.add_args('-gdb', 'tcp::%d' % port, '-S')
|
|
vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
|
|
(shift, mode, replay_path),
|
|
'-net', 'none')
|
|
vm.add_args('-drive', 'file=%s,if=none' % image_path)
|
|
if args:
|
|
vm.add_args(*args)
|
|
vm.launch()
|
|
console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
|
|
logger=self.log.getChild('console'),
|
|
stop_check=(lambda : not vm.is_running()))
|
|
console_drainer.start()
|
|
return vm
|
|
|
|
@staticmethod
|
|
def get_reg_le(g, reg):
|
|
res = g.cmd(b'p%x' % reg)
|
|
num = 0
|
|
for i in range(len(res))[-2::-2]:
|
|
num = 0x100 * num + int(res[i:i + 2], 16)
|
|
return num
|
|
|
|
@staticmethod
|
|
def get_reg_be(g, reg):
|
|
res = g.cmd(b'p%x' % reg)
|
|
return int(res, 16)
|
|
|
|
def get_reg(self, g, reg):
|
|
# value may be encoded in BE or LE order
|
|
if self.endian_is_le:
|
|
return self.get_reg_le(g, reg)
|
|
else:
|
|
return self.get_reg_be(g, reg)
|
|
|
|
def get_pc(self, g):
|
|
return self.get_reg(g, self.REG_PC)
|
|
|
|
def check_pc(self, g, addr):
|
|
pc = self.get_pc(g)
|
|
if pc != addr:
|
|
self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
|
|
|
|
@staticmethod
|
|
def gdb_step(g):
|
|
g.cmd(b's', b'T05thread:01;')
|
|
|
|
@staticmethod
|
|
def gdb_bstep(g):
|
|
g.cmd(b'bs', b'T05thread:01;')
|
|
|
|
@staticmethod
|
|
def vm_get_icount(vm):
|
|
return vm.qmp('query-replay')['return']['icount']
|
|
|
|
def reverse_debugging(self, shift=7, args=None):
|
|
logger = logging.getLogger('replay')
|
|
|
|
# create qcow2 for snapshots
|
|
logger.info('creating qcow2 image for VM snapshots')
|
|
image_path = os.path.join(self.workdir, 'disk.qcow2')
|
|
qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
|
|
if not os.path.exists(qemu_img):
|
|
qemu_img = find_command('qemu-img', False)
|
|
if qemu_img is False:
|
|
self.cancel('Could not find "qemu-img", which is required to '
|
|
'create the temporary qcow2 image')
|
|
cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
|
|
process.run(cmd)
|
|
|
|
replay_path = os.path.join(self.workdir, 'replay.bin')
|
|
port = find_free_port()
|
|
|
|
# record the log
|
|
vm = self.run_vm(True, shift, args, replay_path, image_path, port)
|
|
while self.vm_get_icount(vm) <= self.STEPS:
|
|
pass
|
|
last_icount = self.vm_get_icount(vm)
|
|
vm.shutdown()
|
|
|
|
logger.info("recorded log with %s+ steps" % last_icount)
|
|
|
|
# replay and run debug commands
|
|
vm = self.run_vm(False, shift, args, replay_path, image_path, port)
|
|
logger.info('connecting to gdbstub')
|
|
g = gdb.GDBRemote('127.0.0.1', port, False, False)
|
|
g.connect()
|
|
r = g.cmd(b'qSupported')
|
|
if b'qXfer:features:read+' in r:
|
|
g.cmd(b'qXfer:features:read:target.xml:0,ffb')
|
|
if b'ReverseStep+' not in r:
|
|
self.fail('Reverse step is not supported by QEMU')
|
|
if b'ReverseContinue+' not in r:
|
|
self.fail('Reverse continue is not supported by QEMU')
|
|
|
|
logger.info('stepping forward')
|
|
steps = []
|
|
# record first instruction addresses
|
|
for _ in range(self.STEPS):
|
|
pc = self.get_pc(g)
|
|
logger.info('saving position %x' % pc)
|
|
steps.append(pc)
|
|
self.gdb_step(g)
|
|
|
|
# visit the recorded instruction in reverse order
|
|
logger.info('stepping backward')
|
|
for addr in steps[::-1]:
|
|
self.gdb_bstep(g)
|
|
self.check_pc(g, addr)
|
|
logger.info('found position %x' % addr)
|
|
|
|
# visit the recorded instruction in forward order
|
|
logger.info('stepping forward')
|
|
for addr in steps:
|
|
self.check_pc(g, addr)
|
|
self.gdb_step(g)
|
|
logger.info('found position %x' % addr)
|
|
|
|
# set breakpoints for the instructions just stepped over
|
|
logger.info('setting breakpoints')
|
|
for addr in steps:
|
|
# hardware breakpoint at addr with len=1
|
|
g.cmd(b'Z1,%x,1' % addr, b'OK')
|
|
|
|
# this may hit a breakpoint if first instructions are executed
|
|
# again
|
|
logger.info('continuing execution')
|
|
vm.qmp('replay-break', icount=last_icount - 1)
|
|
# continue - will return after pausing
|
|
# This could stop at the end and get a T02 return, or by
|
|
# re-executing one of the breakpoints and get a T05 return.
|
|
g.cmd(b'c')
|
|
if self.vm_get_icount(vm) == last_icount - 1:
|
|
logger.info('reached the end (icount %s)' % (last_icount - 1))
|
|
else:
|
|
logger.info('hit a breakpoint again at %x (icount %s)' %
|
|
(self.get_pc(g), self.vm_get_icount(vm)))
|
|
|
|
logger.info('running reverse continue to reach %x' % steps[-1])
|
|
# reverse continue - will return after stopping at the breakpoint
|
|
g.cmd(b'bc', b'T05thread:01;')
|
|
|
|
# assume that none of the first instructions is executed again
|
|
# breaking the order of the breakpoints
|
|
self.check_pc(g, steps[-1])
|
|
logger.info('successfully reached %x' % steps[-1])
|
|
|
|
logger.info('exitting gdb and qemu')
|
|
vm.shutdown()
|
|
|
|
class ReverseDebugging_X86_64(ReverseDebugging):
|
|
"""
|
|
:avocado: tags=accel:tcg
|
|
"""
|
|
|
|
REG_PC = 0x10
|
|
REG_CS = 0x12
|
|
def get_pc(self, g):
|
|
return self.get_reg_le(g, self.REG_PC) \
|
|
+ self.get_reg_le(g, self.REG_CS) * 0x10
|
|
|
|
# unidentified gitlab timeout problem
|
|
@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
|
|
|
|
def test_x86_64_pc(self):
|
|
"""
|
|
:avocado: tags=arch:x86_64
|
|
:avocado: tags=machine:pc
|
|
"""
|
|
# start with BIOS only
|
|
self.reverse_debugging()
|
|
|
|
class ReverseDebugging_AArch64(ReverseDebugging):
|
|
"""
|
|
:avocado: tags=accel:tcg
|
|
"""
|
|
|
|
REG_PC = 32
|
|
|
|
# unidentified gitlab timeout problem
|
|
@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
|
|
|
|
def test_aarch64_virt(self):
|
|
"""
|
|
:avocado: tags=arch:aarch64
|
|
:avocado: tags=machine:virt
|
|
:avocado: tags=cpu:cortex-a53
|
|
"""
|
|
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
|
|
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
|
|
'/vmlinuz')
|
|
kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493'
|
|
kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
|
|
|
|
self.reverse_debugging(
|
|
args=('-kernel', kernel_path))
|
|
|
|
class ReverseDebugging_ppc64(ReverseDebugging):
|
|
"""
|
|
:avocado: tags=accel:tcg
|
|
"""
|
|
|
|
REG_PC = 0x40
|
|
|
|
# unidentified gitlab timeout problem
|
|
@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
|
|
|
|
def test_ppc64_pseries(self):
|
|
"""
|
|
:avocado: tags=arch:ppc64
|
|
:avocado: tags=machine:pseries
|
|
:avocado: tags=flaky
|
|
"""
|
|
# SLOF branches back to its entry point, which causes this test
|
|
# to take the 'hit a breakpoint again' path. That's not a problem,
|
|
# just slightly different than the other machines.
|
|
self.endian_is_le = False
|
|
self.reverse_debugging()
|
|
|
|
# See https://gitlab.com/qemu-project/qemu/-/issues/1992
|
|
@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
|
|
|
|
def test_ppc64_powernv(self):
|
|
"""
|
|
:avocado: tags=arch:ppc64
|
|
:avocado: tags=machine:powernv
|
|
:avocado: tags=flaky
|
|
"""
|
|
self.endian_is_le = False
|
|
self.reverse_debugging()
|