Migration fixes 2021-07-26

Peter's fix for a bunch of races
  -> Seem to fix the occasional crash seen by Peter
 
 Wei's fix for migration with free page hinting
  -> Bug has been around for a while, but makes a huge difference
 
 My fix for OpenBSD test corner case
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAmD+ragACgkQBRYzHrxb
 /efqKxAAqaor4INEZTqlK+zxSpt+if3v/3WcyMlghmFO30soz0nGuiq5qBLkOMrN
 LB+1gRxIa7bg8SgDVyGdPlOTa3SQjhM2CJFM789Y0V+8HfMIyFXlVNEZmRxegLtL
 9M5muf1VO+gG8YPTskAKEjeHNWPspv4+wjpHnY/MxRt4TZ0sPq6Q7glzfPU94Ajm
 P8e3Sx8zYhKV342M/wy6F8LSn6q5+amRV8NeO+1VtTnic/Uft2Q7Jp91kH8jbYKs
 gPy6Ly3UGZHDxjBWcUj7lA57fKTYVuAPpZm5Q+ZJLW9iwo1Mik6cYBkdaX/Wf9yD
 yQBNkDcYQoKmRJoiDEFjqmp6RbRnmD4pCXOQBzyjIu1fQXAXy3PmETAePfdk3mkR
 ssmWRJoxCG1sR/rpU738oNZFYAe8FSTPiiBHVJor/iqMIHJaoRoNlfU9bV67ak7f
 XEWU6GmP2yko34zlXCSpctuXJYkME6e74PKhS6Vh9RDIXc+6UBQwSKQMJg8oDezd
 Dqu3nOT9XO6OyD2/XWM3K+DVqQU+SOhKTvN3Gpl9BfkhDDpcpnmnySjbmj5tU70Z
 EaCqhTFuH6krbk2SnukhRyOt/uFkNhIsLahqMkdxfFMxTMZ5P0xWPC+EOvZFZjOP
 l4IrrdIxBGmP5j6rGlzWRzfz5VsN560QvMo/C6b/HzquGanPWp8=
 =jNI0
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-migration-20210726a' into staging

Migration fixes 2021-07-26

Peter's fix for a bunch of races
 -> Seem to fix the occasional crash seen by Peter

Wei's fix for migration with free page hinting
 -> Bug has been around for a while, but makes a huge difference

My fix for OpenBSD test corner case

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Mon 26 Jul 2021 13:42:16 BST
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert-gitlab/tags/pull-migration-20210726a:
  migration: clear the memory region dirty bitmap when skipping free pages
  migration: Move the yank unregister of channel_close out
  migration: Teach QEMUFile to be QIOChannel-aware
  migration: Introduce migration_ioc_[un]register_yank()
  migration: Make from_dst_file accesses thread-safe
  migration: Fix missing join() of rp_thread
  tests/qtest/migration-test.c: use 127.0.0.1 instead of 0

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-07-27 10:55:50 +01:00
commit ca4b5ef371
12 changed files with 196 additions and 68 deletions

View File

@ -44,13 +44,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
TYPE_QIO_CHANNEL_TLS)) {
migration_tls_channel_process_incoming(s, ioc, &local_err);
} else {
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
migration_ioc_register_yank(ioc);
migration_ioc_process_incoming(ioc, &local_err);
}
@ -94,12 +88,7 @@ void migration_channel_connect(MigrationState *s,
} else {
QEMUFile *f = qemu_fopen_channel_output(ioc);
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
migration_ioc_register_yank(ioc);
qemu_mutex_lock(&s->qemu_file_lock);
s->to_dst_file = f;

View File

@ -59,6 +59,7 @@
#include "multifd.h"
#include "qemu/yank.h"
#include "sysemu/cpus.h"
#include "yank_functions.h"
#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void)
}
if (mis->from_src_file) {
migration_ioc_unregister_yank_from_file(mis->from_src_file);
qemu_fclose(mis->from_src_file);
mis->from_src_file = NULL;
}
@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s)
* Close the file handle without the lock to make sure the
* critical section won't block for long.
*/
migration_ioc_unregister_yank_from_file(tmp);
qemu_fclose(tmp);
}
@ -1879,9 +1882,11 @@ static void migrate_fd_cancel(MigrationState *s)
QEMUFile *f = migrate_get_current()->to_dst_file;
trace_migrate_fd_cancel();
if (s->rp_state.from_dst_file) {
/* shutdown the rp socket, so causing the rp thread to shutdown */
qemu_file_shutdown(s->rp_state.from_dst_file);
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
if (s->rp_state.from_dst_file) {
/* shutdown the rp socket, so causing the rp thread to shutdown */
qemu_file_shutdown(s->rp_state.from_dst_file);
}
}
do {
@ -2686,6 +2691,23 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
return 0;
}
/* Release ms->rp_state.from_dst_file in a safe way */
static void migration_release_from_dst_file(MigrationState *ms)
{
QEMUFile *file;
WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
/*
* Reset the from_dst_file pointer first before releasing it, as we
* can't block within lock section
*/
file = ms->rp_state.from_dst_file;
ms->rp_state.from_dst_file = NULL;
}
qemu_fclose(file);
}
/*
* Handles messages sent on the return path towards the source VM
*
@ -2827,11 +2849,13 @@ out:
* Maybe there is something we can do: it looks like a
* network down issue, and we pause for a recovery.
*/
qemu_fclose(rp);
ms->rp_state.from_dst_file = NULL;
migration_release_from_dst_file(ms);
rp = NULL;
if (postcopy_pause_return_path_thread(ms)) {
/* Reload rp, reset the rest */
/*
* Reload rp, reset the rest. Referencing it is safe since
* it's reset only by us above, or when migration completes
*/
rp = ms->rp_state.from_dst_file;
ms->rp_state.error = false;
goto retry;
@ -2843,8 +2867,7 @@ out:
}
trace_source_return_path_thread_end();
ms->rp_state.from_dst_file = NULL;
qemu_fclose(rp);
migration_release_from_dst_file(ms);
rcu_unregister_thread();
return NULL;
}
@ -2852,7 +2875,6 @@ out:
static int open_return_path_on_source(MigrationState *ms,
bool create_thread)
{
ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
if (!ms->rp_state.from_dst_file) {
return -1;
@ -2867,6 +2889,7 @@ static int open_return_path_on_source(MigrationState *ms,
qemu_thread_create(&ms->rp_state.rp_thread, "return path",
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
ms->rp_state.rp_thread_created = true;
trace_open_return_path_on_source_continue();
@ -2891,6 +2914,7 @@ static int await_return_path_close_on_source(MigrationState *ms)
}
trace_await_return_path_close_on_source_joining();
qemu_thread_join(&ms->rp_state.rp_thread);
ms->rp_state.rp_thread_created = false;
trace_await_return_path_close_on_source_close();
return ms->rp_state.error;
}
@ -3170,7 +3194,7 @@ static void migration_completion(MigrationState *s)
* it will wait for the destination to send it's status in
* a SHUT command).
*/
if (s->rp_state.from_dst_file) {
if (s->rp_state.rp_thread_created) {
int rp_error;
trace_migration_return_path_end_before();
rp_error = await_return_path_close_on_source(s);
@ -3330,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s)
while (true) {
QEMUFile *file;
/* Current channel is possibly broken. Release it. */
/*
* Current channel is possibly broken. Release it. Note that this is
* guaranteed even without lock because to_dst_file should only be
* modified by the migration thread. That also guarantees that the
* unregister of yank is safe too without the lock. It should be safe
* even to be within the qemu_file_lock, but we didn't do that to avoid
* taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make
* the qemu_file_lock critical section as small as possible.
*/
assert(s->to_dst_file);
migration_ioc_unregister_yank_from_file(s->to_dst_file);
qemu_mutex_lock(&s->qemu_file_lock);
file = s->to_dst_file;
s->to_dst_file = NULL;
@ -3744,7 +3777,7 @@ static void *migration_thread(void *opaque)
* If we opened the return path, we need to make sure dst has it
* opened as well.
*/
if (s->rp_state.from_dst_file) {
if (s->rp_state.rp_thread_created) {
/* Now tell the dest that it should open its end so it can reply */
qemu_savevm_send_open_return_path(s->to_dst_file);

View File

@ -154,12 +154,13 @@ struct MigrationState {
QemuThread thread;
QEMUBH *vm_start_bh;
QEMUBH *cleanup_bh;
/* Protected by qemu_file_lock */
QEMUFile *to_dst_file;
QIOChannelBuffer *bioc;
/*
* Protects to_dst_file pointer. We need to make sure we won't
* yield or hang during the critical section, since this lock will
* be used in OOB command handler.
* Protects to_dst_file/from_dst_file pointers. We need to make sure we
* won't yield or hang during the critical section, since this lock will be
* used in OOB command handler.
*/
QemuMutex qemu_file_lock;
@ -192,9 +193,17 @@ struct MigrationState {
/* State related to return path */
struct {
/* Protected by qemu_file_lock */
QEMUFile *from_dst_file;
QemuThread rp_thread;
bool error;
/*
* We can also check non-zero of rp_thread, but there's no "official"
* way to do this, so this bool makes it slightly more elegant.
* Checking from_dst_file for this is racy because from_dst_file will
* be cleared in the rp_thread!
*/
bool rp_thread_created;
QemuSemaphore rp_sem;
} rp_state;

View File

@ -987,12 +987,8 @@ int multifd_load_cleanup(Error **errp)
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS))
&& OBJECT(p->c)->ref == 1) {
yank_unregister_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(p->c));
if (OBJECT(p->c)->ref == 1) {
migration_ioc_unregister_yank(p->c);
}
object_unref(OBJECT(p->c));

View File

@ -107,13 +107,6 @@ static int channel_close(void *opaque, Error **errp)
int ret;
QIOChannel *ioc = QIO_CHANNEL(opaque);
ret = qio_channel_close(ioc, errp);
if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS))
&& OBJECT(ioc)->ref == 1) {
yank_unregister_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
object_unref(OBJECT(ioc));
return ret;
}
@ -191,11 +184,11 @@ static const QEMUFileOps channel_output_ops = {
QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
{
object_ref(OBJECT(ioc));
return qemu_fopen_ops(ioc, &channel_input_ops);
return qemu_fopen_ops(ioc, &channel_input_ops, true);
}
QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
{
object_ref(OBJECT(ioc));
return qemu_fopen_ops(ioc, &channel_output_ops);
return qemu_fopen_ops(ioc, &channel_output_ops, true);
}

View File

@ -55,6 +55,8 @@ struct QEMUFile {
Error *last_error_obj;
/* has the file has been shutdown */
bool shutdown;
/* Whether opaque points to a QIOChannel */
bool has_ioc;
};
/*
@ -101,7 +103,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
return false;
}
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
{
QEMUFile *f;
@ -109,6 +111,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
f->opaque = opaque;
f->ops = ops;
f->has_ioc = has_ioc;
return f;
}
@ -851,3 +854,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block)
f->ops->set_blocking(f->opaque, block, NULL);
}
}
/*
* Return the ioc object if it's a migration channel. Note: it can return NULL
* for callers passing in a non-migration qemufile. E.g. see qemu_fopen_bdrv()
* and its usage in e.g. load_snapshot(). So we need to check against NULL
* before using it. If without the check, migration_incoming_state_destroy()
* could fail for load_snapshot().
*/
QIOChannel *qemu_file_get_ioc(QEMUFile *file)
{
return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL;
}

View File

@ -27,6 +27,7 @@
#include <zlib.h>
#include "exec/cpu-common.h"
#include "io/channel.h"
/* Read a chunk of data from a file at the given position. The pos argument
* can be ignored if the file is only be used for streaming. The number of
@ -119,7 +120,7 @@ typedef struct QEMUFileHooks {
QEMURamSaveFunc *save_page;
} QEMUFileHooks;
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc);
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f);
@ -179,5 +180,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size,
uint64_t *bytes_sent);
QIOChannel *qemu_file_get_ioc(QEMUFile *file);
#endif

View File

@ -550,7 +550,7 @@ static int compress_threads_save_setup(void)
/* comp_param[i].file is just used as a dummy buffer to save data,
* set its ops to empty.
*/
comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
comp_param[i].done = true;
comp_param[i].quit = false;
qemu_mutex_init(&comp_param[i].mutex);
@ -789,6 +789,53 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
return find_next_bit(bitmap, size, start);
}
static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
RAMBlock *rb,
unsigned long page)
{
uint8_t shift;
hwaddr size, start;
if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
return;
}
shift = rb->clear_bmap_shift;
/*
* CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
* can make things easier sometimes since then start address
* of the small chunk will always be 64 pages aligned so the
* bitmap will always be aligned to unsigned long. We should
* even be able to remove this restriction but I'm simply
* keeping it.
*/
assert(shift >= 6);
size = 1ULL << (TARGET_PAGE_BITS + shift);
start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
memory_region_clear_dirty_bitmap(rb->mr, start, size);
}
static void
migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
RAMBlock *rb,
unsigned long start,
unsigned long npages)
{
unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
/*
* Clear pages from start to start + npages - 1, so the end boundary is
* exclusive.
*/
for (i = chunk_start; i < chunk_end; i += chunk_pages) {
migration_clear_memory_region_dirty_bitmap(rs, rb, i);
}
}
static inline bool migration_bitmap_clear_dirty(RAMState *rs,
RAMBlock *rb,
unsigned long page)
@ -803,26 +850,9 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
* the page in the chunk we clear the remote dirty bitmap for all.
* Clearing it earlier won't be a problem, but too late will.
*/
if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
uint8_t shift = rb->clear_bmap_shift;
hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
/*
* CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
* can make things easier sometimes since then start address
* of the small chunk will always be 64 pages aligned so the
* bitmap will always be aligned to unsigned long. We should
* even be able to remove this restriction but I'm simply
* keeping it.
*/
assert(shift >= 6);
trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
memory_region_clear_dirty_bitmap(rb->mr, start, size);
}
migration_clear_memory_region_dirty_bitmap(rs, rb, page);
ret = test_and_clear_bit(page, rb->bmap);
if (ret) {
rs->migration_dirty_pages--;
}
@ -2741,6 +2771,14 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
npages = used_len >> TARGET_PAGE_BITS;
qemu_mutex_lock(&ram_state->bitmap_mutex);
/*
* The skipped free pages are equavalent to be sent from clear_bmap's
* perspective, so clear the bits from the memory region bitmap which
* are initially set. Otherwise those skipped pages will be sent in
* the next round after syncing from the memory region bitmap.
*/
migration_clear_memory_region_dirty_bitmap_range(ram_state, block,
start, npages);
ram_state->migration_dirty_pages -=
bitmap_count_one_with_offset(block->bmap, start, npages);
bitmap_clear(block->bmap, start, npages);
@ -4012,6 +4050,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState *s)
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
{
int ret = -EINVAL;
/* from_dst_file is always valid because we're within rp_thread */
QEMUFile *file = s->rp_state.from_dst_file;
unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
uint64_t local_size = DIV_ROUND_UP(nbits, 8);

View File

@ -65,6 +65,7 @@
#include "qemu/bitmap.h"
#include "net/announce.h"
#include "qemu/yank.h"
#include "yank_functions.h"
const unsigned int postcopy_ram_discard_version;
@ -168,9 +169,9 @@ static const QEMUFileOps bdrv_write_ops = {
static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
{
if (is_writable) {
return qemu_fopen_ops(bs, &bdrv_write_ops);
return qemu_fopen_ops(bs, &bdrv_write_ops, false);
}
return qemu_fopen_ops(bs, &bdrv_read_ops);
return qemu_fopen_ops(bs, &bdrv_read_ops, false);
}
@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
/* Clear the triggered bit to allow one recovery */
mis->postcopy_recover_triggered = false;
/*
* Unregister yank with either from/to src would work, since ioc behind it
* is the same
*/
migration_ioc_unregister_yank_from_file(mis->from_src_file);
assert(mis->from_src_file);
qemu_file_shutdown(mis->from_src_file);
qemu_fclose(mis->from_src_file);

View File

@ -11,6 +11,10 @@
#include "qapi/error.h"
#include "io/channel.h"
#include "yank_functions.h"
#include "qemu/yank.h"
#include "io/channel-socket.h"
#include "io/channel-tls.h"
#include "qemu-file.h"
void migration_yank_iochannel(void *opaque)
{
@ -18,3 +22,41 @@ void migration_yank_iochannel(void *opaque)
qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
}
/* Return whether yank is supported on this ioc */
static bool migration_ioc_yank_supported(QIOChannel *ioc)
{
return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS);
}
void migration_ioc_register_yank(QIOChannel *ioc)
{
if (migration_ioc_yank_supported(ioc)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
}
void migration_ioc_unregister_yank(QIOChannel *ioc)
{
if (migration_ioc_yank_supported(ioc)) {
yank_unregister_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
}
void migration_ioc_unregister_yank_from_file(QEMUFile *file)
{
QIOChannel *ioc = qemu_file_get_ioc(file);
if (ioc) {
/*
* For migration qemufiles, we'll always reach here. Though we'll skip
* calls from e.g. savevm/loadvm as they don't use yank.
*/
migration_ioc_unregister_yank(ioc);
}
}

View File

@ -15,3 +15,6 @@
* @opaque: QIOChannel to shutdown
*/
void migration_yank_iochannel(void *opaque);
void migration_ioc_register_yank(QIOChannel *ioc);
void migration_ioc_unregister_yank(QIOChannel *ioc);
void migration_ioc_unregister_yank_from_file(QEMUFile *file);

View File

@ -787,10 +787,10 @@ static void test_baddest(void)
args->hide_stderr = true;
if (test_migrate_start(&from, &to, "tcp:0:0", args)) {
if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) {
return;
}
migrate_qmp(from, "tcp:0:0", "{}");
migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
wait_for_migration_fail(from, false);
test_migrate_end(from, to, false);
}