Migration Pull request (20231017)
Hi Same that yesterday one, except: - rebased to latest (clean rebase) - fixed 64 bits read on big endian host CI: https://gitlab.com/juan.quintela/qemu/-/pipelines/1039214198 Please, apply. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmUuReUACgkQ9IfvGFhy 1yO+FQ/+Nx2botbrUVJb3vLeG6f+x5xeWJjB0boOqhk7227cKmAA33Oqwx5l4UtL oLOHA6P4ThqacpaluGOMMp44BSr/jOMDC/HUDVJtSplTD+droPiklIIGUfYScLbA oYx6lXfSB2jMpSuSU19STbjwBRvd4bjJix3zDGwEIgXYqYt0tY0FY/nnGTmImnM1 KDjRerf1lg4Rt0vvwg7I0onIDvh3CKX26Sj5a3wSRaLoocUe3jpsuBNH7MMqroHs WpocBIsLiBAf/CbeLZsQlhbVeOi1R+kSAR5hDPvvJCPWHIrd2wf8+3NXjcFepb7d M4wE2jLjCvHhzwYwSc0ir4n74jwD22IirEPQs8ONHrjLCb5VoBKYV5bqsFUHF55N SbFvcZIzJFiOm2anEWiiqiNTLtYAdQCKtUvbyJ7Mq4ck6icIInLdX9zrm4voofYJ 02lX/IIGlT3C3dGSz09LBoJ6E82zmQWNHmov8A90+3RYvMF9uSpxi0z40lhj6jWC 6Q2AHxrJJ040ZboeOfJQG78BtvZ/9PQ2ORhJ3ceRDND4kSTDtfe/TSNAZ3thM33y Sv99o+F/HaqrKnxK8eTJrvIEWxojDu3lnqJERWAm2AOxTnQ+6mgGtsCfLEdrv5D1 xVsY2QczB1quRjaU2ml/7Cxe4Q1urTtfl82IEXGded6UL+cmF/I= =br93 -----END PGP SIGNATURE----- Merge tag 'migration-20231017-pull-request' of https://gitlab.com/juan.quintela/qemu into staging Migration Pull request (20231017) Hi Same that yesterday one, except: - rebased to latest (clean rebase) - fixed 64 bits read on big endian host CI: https://gitlab.com/juan.quintela/qemu/-/pipelines/1039214198 Please, apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmUuReUACgkQ9IfvGFhy # 1yO+FQ/+Nx2botbrUVJb3vLeG6f+x5xeWJjB0boOqhk7227cKmAA33Oqwx5l4UtL # oLOHA6P4ThqacpaluGOMMp44BSr/jOMDC/HUDVJtSplTD+droPiklIIGUfYScLbA # oYx6lXfSB2jMpSuSU19STbjwBRvd4bjJix3zDGwEIgXYqYt0tY0FY/nnGTmImnM1 # KDjRerf1lg4Rt0vvwg7I0onIDvh3CKX26Sj5a3wSRaLoocUe3jpsuBNH7MMqroHs # WpocBIsLiBAf/CbeLZsQlhbVeOi1R+kSAR5hDPvvJCPWHIrd2wf8+3NXjcFepb7d # M4wE2jLjCvHhzwYwSc0ir4n74jwD22IirEPQs8ONHrjLCb5VoBKYV5bqsFUHF55N # SbFvcZIzJFiOm2anEWiiqiNTLtYAdQCKtUvbyJ7Mq4ck6icIInLdX9zrm4voofYJ # 02lX/IIGlT3C3dGSz09LBoJ6E82zmQWNHmov8A90+3RYvMF9uSpxi0z40lhj6jWC # 6Q2AHxrJJ040ZboeOfJQG78BtvZ/9PQ2ORhJ3ceRDND4kSTDtfe/TSNAZ3thM33y # Sv99o+F/HaqrKnxK8eTJrvIEWxojDu3lnqJERWAm2AOxTnQ+6mgGtsCfLEdrv5D1 # xVsY2QczB1quRjaU2ml/7Cxe4Q1urTtfl82IEXGded6UL+cmF/I= # =br93 # -----END PGP SIGNATURE----- # gpg: Signature made Tue 17 Oct 2023 04:29:25 EDT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'migration-20231017-pull-request' of https://gitlab.com/juan.quintela/qemu: (38 commits) migration/multifd: Clarify Error usage in multifd_channel_connect migration/multifd: Unify multifd_send_thread error paths migration/multifd: Remove direct "socket" references migration/ram: Merge save_zero_page functions migration/ram: Move xbzrle zero page handling into save_zero_page migration/ram: Stop passing QEMUFile around in save_zero_page migration/ram: Remove RAMState from xbzrle_cache_zero_page migration/ram: Refactor precopy ram loading code multifd: reset next_packet_len after sending pages multifd: fix counters in multifd_send_thread migration: check for rate_limit_max for RATE_LIMIT_DISABLED migration: Improve json and formatting migration/rdma: Remove all "ret" variables that are used only once migration/rdma: Declare for index variables local migration/rdma: Use i as for index instead of idx migration/rdma: Check sooner if we are in postcopy for save_page() migration/rdma: Remove qemu_ prefix from exported functions migration/rdma: Move rdma constants from qemu-file.h to rdma.h qemu-file: Remove QEMUFileHooks migration/rdma: Create rdma_control_save_page() ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
ec6f9f135d
@ -25,6 +25,7 @@ typedef struct SaveVMHandlers {
|
||||
* used to perform early checks.
|
||||
*/
|
||||
int (*save_prepare)(void *opaque, Error **errp);
|
||||
int (*save_setup)(QEMUFile *f, void *opaque);
|
||||
void (*save_cleanup)(void *opaque);
|
||||
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
|
||||
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
|
||||
@ -50,7 +51,6 @@ typedef struct SaveVMHandlers {
|
||||
int (*save_live_iterate)(QEMUFile *f, void *opaque);
|
||||
|
||||
/* This runs outside the iothread lock! */
|
||||
int (*save_setup)(QEMUFile *f, void *opaque);
|
||||
/* Note for save_live_pending:
|
||||
* must_precopy:
|
||||
* - must be migrated in precopy or in stopped state
|
||||
|
@ -1214,9 +1214,7 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
||||
DBMSaveState *s = &((DBMState *)opaque)->save;
|
||||
SaveBitmapState *dbms = NULL;
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
if (init_dirty_bitmap_migration(s) < 0) {
|
||||
qemu_mutex_unlock_iothread();
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1224,7 +1222,6 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
||||
send_bitmap_start(f, s, dbms);
|
||||
}
|
||||
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
|
||||
qemu_mutex_unlock_iothread();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -731,18 +731,13 @@ static int block_save_setup(QEMUFile *f, void *opaque)
|
||||
trace_migration_block_save("setup", block_mig_state.submitted,
|
||||
block_mig_state.transferred);
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
ret = init_blk_migration(f);
|
||||
if (ret < 0) {
|
||||
qemu_mutex_unlock_iothread();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* start track dirty blocks */
|
||||
ret = set_dirty_tracking();
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -321,6 +321,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
|
||||
monitor_printf(mon, "%s: %" PRIu64 " bytes/second\n",
|
||||
MigrationParameter_str(MIGRATION_PARAMETER_MAX_BANDWIDTH),
|
||||
params->max_bandwidth);
|
||||
assert(params->has_avail_switchover_bandwidth);
|
||||
monitor_printf(mon, "%s: %" PRIu64 " bytes/second\n",
|
||||
MigrationParameter_str(MIGRATION_PARAMETER_AVAIL_SWITCHOVER_BANDWIDTH),
|
||||
params->avail_switchover_bandwidth);
|
||||
assert(params->has_downtime_limit);
|
||||
monitor_printf(mon, "%s: %" PRIu64 " ms\n",
|
||||
MigrationParameter_str(MIGRATION_PARAMETER_DOWNTIME_LIMIT),
|
||||
@ -574,6 +578,16 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
|
||||
}
|
||||
p->max_bandwidth = valuebw;
|
||||
break;
|
||||
case MIGRATION_PARAMETER_AVAIL_SWITCHOVER_BANDWIDTH:
|
||||
p->has_avail_switchover_bandwidth = true;
|
||||
ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
|
||||
if (ret < 0 || valuebw > INT64_MAX
|
||||
|| (size_t)valuebw != valuebw) {
|
||||
error_setg(&err, "Invalid size %s", valuestr);
|
||||
break;
|
||||
}
|
||||
p->avail_switchover_bandwidth = valuebw;
|
||||
break;
|
||||
case MIGRATION_PARAMETER_DOWNTIME_LIMIT:
|
||||
p->has_downtime_limit = true;
|
||||
visit_type_size(v, param, &p->downtime_limit, &err);
|
||||
|
@ -24,14 +24,15 @@ bool migration_rate_exceeded(QEMUFile *f)
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
|
||||
uint64_t rate_limit_current = migration_transferred_bytes(f);
|
||||
uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
|
||||
uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);
|
||||
|
||||
uint64_t rate_limit_max = migration_rate_get();
|
||||
if (rate_limit_max == RATE_LIMIT_DISABLED) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
|
||||
uint64_t rate_limit_current = migration_transferred_bytes(f);
|
||||
uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
|
||||
|
||||
if (rate_limit_max > 0 && rate_limit_used > rate_limit_max) {
|
||||
return true;
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ static int migration_maybe_pause(MigrationState *s,
|
||||
int *current_active_state,
|
||||
int new_state);
|
||||
static void migrate_fd_cancel(MigrationState *s);
|
||||
static int await_return_path_close_on_source(MigrationState *s);
|
||||
static int close_return_path_on_source(MigrationState *s);
|
||||
|
||||
static bool migration_needs_multiple_sockets(void)
|
||||
{
|
||||
@ -1191,7 +1191,7 @@ static void migrate_fd_cleanup(MigrationState *s)
|
||||
* We already cleaned up to_dst_file, so errors from the return
|
||||
* path might be due to that, ignore them.
|
||||
*/
|
||||
await_return_path_close_on_source(s);
|
||||
close_return_path_on_source(s);
|
||||
|
||||
assert(!migration_is_active(s));
|
||||
|
||||
@ -1442,6 +1442,7 @@ int migrate_init(MigrationState *s, Error **errp)
|
||||
error_free(s->error);
|
||||
s->error = NULL;
|
||||
s->hostname = NULL;
|
||||
s->vmdesc = NULL;
|
||||
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
|
||||
|
||||
@ -1451,6 +1452,7 @@ int migrate_init(MigrationState *s, Error **errp)
|
||||
s->iteration_initial_bytes = 0;
|
||||
s->threshold_size = 0;
|
||||
s->switchover_acked = false;
|
||||
s->rdma_migration = false;
|
||||
/*
|
||||
* set mig_stats compression_counters memory to zero for a
|
||||
* new migration
|
||||
@ -2049,8 +2051,7 @@ static int open_return_path_on_source(MigrationState *ms)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
|
||||
static int await_return_path_close_on_source(MigrationState *ms)
|
||||
static int close_return_path_on_source(MigrationState *ms)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -2317,90 +2318,65 @@ static int migration_maybe_pause(MigrationState *s,
|
||||
return s->state == new_state ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* migration_completion: Used by migration_thread when there's not much left.
|
||||
* The caller 'breaks' the loop when this returns.
|
||||
*
|
||||
* @s: Current migration state
|
||||
*/
|
||||
static void migration_completion(MigrationState *s)
|
||||
static int migration_completion_precopy(MigrationState *s,
|
||||
int *current_active_state)
|
||||
{
|
||||
int ret;
|
||||
int current_active_state = s->state;
|
||||
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
qemu_mutex_lock_iothread();
|
||||
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
|
||||
qemu_mutex_lock_iothread();
|
||||
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
|
||||
|
||||
s->vm_old_state = runstate_get();
|
||||
global_state_store();
|
||||
s->vm_old_state = runstate_get();
|
||||
global_state_store();
|
||||
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
trace_migration_completion_vm_stop(ret);
|
||||
if (ret >= 0) {
|
||||
ret = migration_maybe_pause(s, ¤t_active_state,
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
/*
|
||||
* Inactivate disks except in COLO, and track that we
|
||||
* have done so in order to remember to reactivate
|
||||
* them if migration fails or is cancelled.
|
||||
*/
|
||||
s->block_inactive = !migrate_colo();
|
||||
migration_rate_set(RATE_LIMIT_DISABLED);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
s->block_inactive);
|
||||
}
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||
trace_migration_completion_postcopy_end();
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_savevm_state_complete_postcopy(s->to_dst_file);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
/*
|
||||
* Shutdown the postcopy fast path thread. This is only needed
|
||||
* when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need
|
||||
* this.
|
||||
*/
|
||||
if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
|
||||
postcopy_preempt_shutdown_file(s);
|
||||
}
|
||||
|
||||
trace_migration_completion_postcopy_end_after_complete();
|
||||
} else {
|
||||
goto fail;
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
trace_migration_completion_vm_stop(ret);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (await_return_path_close_on_source(s)) {
|
||||
goto fail;
|
||||
ret = migration_maybe_pause(s, current_active_state,
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->to_dst_file)) {
|
||||
trace_migration_completion_file_err();
|
||||
goto fail;
|
||||
/*
|
||||
* Inactivate disks except in COLO, and track that we have done so in order
|
||||
* to remember to reactivate them if migration fails or is cancelled.
|
||||
*/
|
||||
s->block_inactive = !migrate_colo();
|
||||
migration_rate_set(RATE_LIMIT_DISABLED);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
s->block_inactive);
|
||||
out_unlock:
|
||||
qemu_mutex_unlock_iothread();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void migration_completion_postcopy(MigrationState *s)
|
||||
{
|
||||
trace_migration_completion_postcopy_end();
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_savevm_state_complete_postcopy(s->to_dst_file);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
/*
|
||||
* Shutdown the postcopy fast path thread. This is only needed when dest
|
||||
* QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this.
|
||||
*/
|
||||
if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
|
||||
postcopy_preempt_shutdown_file(s);
|
||||
}
|
||||
|
||||
if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
/* COLO does not support postcopy */
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_COLO);
|
||||
} else {
|
||||
migrate_set_state(&s->state, current_active_state,
|
||||
MIGRATION_STATUS_COMPLETED);
|
||||
}
|
||||
trace_migration_completion_postcopy_end_after_complete();
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
static void migration_completion_failed(MigrationState *s,
|
||||
int current_active_state)
|
||||
{
|
||||
if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
s->state == MIGRATION_STATUS_DEVICE)) {
|
||||
/*
|
||||
@ -2423,6 +2399,53 @@ fail:
|
||||
MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
|
||||
/**
|
||||
* migration_completion: Used by migration_thread when there's not much left.
|
||||
* The caller 'breaks' the loop when this returns.
|
||||
*
|
||||
* @s: Current migration state
|
||||
*/
|
||||
static void migration_completion(MigrationState *s)
|
||||
{
|
||||
int ret = 0;
|
||||
int current_active_state = s->state;
|
||||
|
||||
if (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
ret = migration_completion_precopy(s, ¤t_active_state);
|
||||
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||
migration_completion_postcopy(s);
|
||||
} else {
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (close_return_path_on_source(s)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->to_dst_file)) {
|
||||
trace_migration_completion_file_err();
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
|
||||
/* COLO does not support postcopy */
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_COLO);
|
||||
} else {
|
||||
migrate_set_state(&s->state, current_active_state,
|
||||
MIGRATION_STATUS_COMPLETED);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
migration_completion_failed(s, current_active_state);
|
||||
}
|
||||
|
||||
/**
|
||||
* bg_migration_completion: Used by bg_migration_thread when after all the
|
||||
* RAM has been saved. The caller 'breaks' the loop when this returns.
|
||||
@ -2563,7 +2586,7 @@ static MigThrError postcopy_pause(MigrationState *s)
|
||||
* path and just wait for the thread to finish. It will be
|
||||
* re-created when we resume.
|
||||
*/
|
||||
await_return_path_close_on_source(s);
|
||||
close_return_path_on_source(s);
|
||||
|
||||
migrate_set_state(&s->state, s->state,
|
||||
MIGRATION_STATUS_POSTCOPY_PAUSED);
|
||||
@ -2689,17 +2712,33 @@ static void migration_update_counters(MigrationState *s,
|
||||
{
|
||||
uint64_t transferred, transferred_pages, time_spent;
|
||||
uint64_t current_bytes; /* bytes transferred since the beginning */
|
||||
uint64_t switchover_bw;
|
||||
/* Expected bandwidth when switching over to destination QEMU */
|
||||
double expected_bw_per_ms;
|
||||
double bandwidth;
|
||||
|
||||
if (current_time < s->iteration_start_time + BUFFER_DELAY) {
|
||||
return;
|
||||
}
|
||||
|
||||
switchover_bw = migrate_avail_switchover_bandwidth();
|
||||
current_bytes = migration_transferred_bytes(s->to_dst_file);
|
||||
transferred = current_bytes - s->iteration_initial_bytes;
|
||||
time_spent = current_time - s->iteration_start_time;
|
||||
bandwidth = (double)transferred / time_spent;
|
||||
s->threshold_size = bandwidth * migrate_downtime_limit();
|
||||
|
||||
if (switchover_bw) {
|
||||
/*
|
||||
* If the user specified a switchover bandwidth, let's trust the
|
||||
* user so that can be more accurate than what we estimated.
|
||||
*/
|
||||
expected_bw_per_ms = switchover_bw / 1000;
|
||||
} else {
|
||||
/* If the user doesn't specify bandwidth, we use the estimated */
|
||||
expected_bw_per_ms = bandwidth;
|
||||
}
|
||||
|
||||
s->threshold_size = expected_bw_per_ms * migrate_downtime_limit();
|
||||
|
||||
s->mbps = (((double) transferred * 8.0) /
|
||||
((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
|
||||
@ -2716,7 +2755,7 @@ static void migration_update_counters(MigrationState *s,
|
||||
if (stat64_get(&mig_stats.dirty_pages_rate) &&
|
||||
transferred > 10000) {
|
||||
s->expected_downtime =
|
||||
stat64_get(&mig_stats.dirty_bytes_last_sync) / bandwidth;
|
||||
stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms;
|
||||
}
|
||||
|
||||
migration_rate_reset(s->to_dst_file);
|
||||
@ -2724,7 +2763,9 @@ static void migration_update_counters(MigrationState *s,
|
||||
update_iteration_initial_status(s);
|
||||
|
||||
trace_migrate_transferred(transferred, time_spent,
|
||||
bandwidth, s->threshold_size);
|
||||
/* Both in unit bytes/ms */
|
||||
bandwidth, switchover_bw / 1000,
|
||||
s->threshold_size);
|
||||
}
|
||||
|
||||
static bool migration_can_switchover(MigrationState *s)
|
||||
@ -2980,7 +3021,9 @@ static void *migration_thread(void *opaque)
|
||||
object_ref(OBJECT(s));
|
||||
update_iteration_initial_status(s);
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_savevm_state_header(s->to_dst_file);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
/*
|
||||
* If we opened the return path, we need to make sure dst has it
|
||||
@ -3008,7 +3051,9 @@ static void *migration_thread(void *opaque)
|
||||
qemu_savevm_send_colo_enable(s->to_dst_file);
|
||||
}
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_savevm_state_setup(s->to_dst_file);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_ACTIVE);
|
||||
@ -3119,8 +3164,10 @@ static void *bg_migration_thread(void *opaque)
|
||||
ram_write_tracking_prepare();
|
||||
#endif
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_savevm_state_header(s->to_dst_file);
|
||||
qemu_savevm_state_setup(s->to_dst_file);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_ACTIVE);
|
||||
|
@ -294,7 +294,7 @@ struct MigrationState {
|
||||
/*
|
||||
* The final stage happens when the remaining data is smaller than
|
||||
* this threshold; it's calculated from the requested downtime and
|
||||
* measured bandwidth
|
||||
* measured bandwidth, or avail-switchover-bandwidth if specified.
|
||||
*/
|
||||
int64_t threshold_size;
|
||||
|
||||
@ -469,6 +469,8 @@ struct MigrationState {
|
||||
* switchover has been received.
|
||||
*/
|
||||
bool switchover_acked;
|
||||
/* Is this a rdma migration */
|
||||
bool rdma_migration;
|
||||
};
|
||||
|
||||
void migrate_set_state(int *state, int old_state, int new_state);
|
||||
|
@ -510,6 +510,11 @@ static void multifd_send_terminate_threads(Error *err)
|
||||
}
|
||||
}
|
||||
|
||||
static int multifd_send_channel_destroy(QIOChannel *send)
|
||||
{
|
||||
return socket_send_channel_destroy(send);
|
||||
}
|
||||
|
||||
void multifd_save_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
@ -532,7 +537,7 @@ void multifd_save_cleanup(void)
|
||||
if (p->registered_yank) {
|
||||
migration_ioc_unregister_yank(p->c);
|
||||
}
|
||||
socket_send_channel_destroy(p->c);
|
||||
multifd_send_channel_destroy(p->c);
|
||||
p->c = NULL;
|
||||
qemu_mutex_destroy(&p->mutex);
|
||||
qemu_sem_destroy(&p->sem);
|
||||
@ -714,8 +719,6 @@ static void *multifd_send_thread(void *opaque)
|
||||
if (ret != 0) {
|
||||
break;
|
||||
}
|
||||
stat64_add(&mig_stats.multifd_bytes, p->packet_len);
|
||||
stat64_add(&mig_stats.transferred, p->packet_len);
|
||||
} else {
|
||||
/* Send header using the same writev call */
|
||||
p->iov[0].iov_len = p->packet_len;
|
||||
@ -728,8 +731,11 @@ static void *multifd_send_thread(void *opaque)
|
||||
break;
|
||||
}
|
||||
|
||||
stat64_add(&mig_stats.multifd_bytes, p->next_packet_size);
|
||||
stat64_add(&mig_stats.transferred, p->next_packet_size);
|
||||
stat64_add(&mig_stats.multifd_bytes,
|
||||
p->next_packet_size + p->packet_len);
|
||||
stat64_add(&mig_stats.transferred,
|
||||
p->next_packet_size + p->packet_len);
|
||||
p->next_packet_size = 0;
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
p->pending_job--;
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
@ -747,19 +753,13 @@ static void *multifd_send_thread(void *opaque)
|
||||
}
|
||||
|
||||
out:
|
||||
if (local_err) {
|
||||
if (ret) {
|
||||
assert(local_err);
|
||||
trace_multifd_send_error(p->id);
|
||||
multifd_send_terminate_threads(local_err);
|
||||
error_free(local_err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Error happen, I will exit, but I can't just leave, tell
|
||||
* who pay attention to me.
|
||||
*/
|
||||
if (ret != 0) {
|
||||
qemu_sem_post(&p->sem_sync);
|
||||
qemu_sem_post(&multifd_send_state->channels_ready);
|
||||
error_free(local_err);
|
||||
}
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
@ -775,7 +775,7 @@ out:
|
||||
|
||||
static bool multifd_channel_connect(MultiFDSendParams *p,
|
||||
QIOChannel *ioc,
|
||||
Error *error);
|
||||
Error **errp);
|
||||
|
||||
static void multifd_tls_outgoing_handshake(QIOTask *task,
|
||||
gpointer opaque)
|
||||
@ -784,21 +784,22 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,
|
||||
QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
|
||||
Error *err = NULL;
|
||||
|
||||
if (qio_task_propagate_error(task, &err)) {
|
||||
trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
|
||||
} else {
|
||||
if (!qio_task_propagate_error(task, &err)) {
|
||||
trace_multifd_tls_outgoing_handshake_complete(ioc);
|
||||
if (multifd_channel_connect(p, ioc, &err)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!multifd_channel_connect(p, ioc, err)) {
|
||||
/*
|
||||
* Error happen, mark multifd_send_thread status as 'quit' although it
|
||||
* is not created, and then tell who pay attention to me.
|
||||
*/
|
||||
p->quit = true;
|
||||
qemu_sem_post(&multifd_send_state->channels_ready);
|
||||
qemu_sem_post(&p->sem_sync);
|
||||
}
|
||||
trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
|
||||
|
||||
/*
|
||||
* Error happen, mark multifd_send_thread status as 'quit' although it
|
||||
* is not created, and then tell who pay attention to me.
|
||||
*/
|
||||
p->quit = true;
|
||||
qemu_sem_post(&multifd_send_state->channels_ready);
|
||||
qemu_sem_post(&p->sem_sync);
|
||||
}
|
||||
|
||||
static void *multifd_tls_handshake_thread(void *opaque)
|
||||
@ -814,7 +815,7 @@ static void *multifd_tls_handshake_thread(void *opaque)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void multifd_tls_channel_connect(MultiFDSendParams *p,
|
||||
static bool multifd_tls_channel_connect(MultiFDSendParams *p,
|
||||
QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
@ -824,7 +825,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p,
|
||||
|
||||
tioc = migration_tls_client_create(ioc, hostname, errp);
|
||||
if (!tioc) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
object_unref(OBJECT(ioc));
|
||||
@ -834,31 +835,25 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p,
|
||||
qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",
|
||||
multifd_tls_handshake_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool multifd_channel_connect(MultiFDSendParams *p,
|
||||
QIOChannel *ioc,
|
||||
Error *error)
|
||||
Error **errp)
|
||||
{
|
||||
trace_multifd_set_outgoing_channel(
|
||||
ioc, object_get_typename(OBJECT(ioc)),
|
||||
migrate_get_current()->hostname, error);
|
||||
migrate_get_current()->hostname);
|
||||
|
||||
if (error) {
|
||||
return false;
|
||||
}
|
||||
if (migrate_channel_requires_tls_upgrade(ioc)) {
|
||||
multifd_tls_channel_connect(p, ioc, &error);
|
||||
if (!error) {
|
||||
/*
|
||||
* tls_channel_connect will call back to this
|
||||
* function after the TLS handshake,
|
||||
* so we mustn't call multifd_send_thread until then
|
||||
*/
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* tls_channel_connect will call back to this
|
||||
* function after the TLS handshake,
|
||||
* so we mustn't call multifd_send_thread until then
|
||||
*/
|
||||
return multifd_tls_channel_connect(p, ioc, errp);
|
||||
|
||||
} else {
|
||||
migration_ioc_register_yank(ioc);
|
||||
p->registered_yank = true;
|
||||
@ -889,20 +884,26 @@ static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
|
||||
static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
|
||||
{
|
||||
MultiFDSendParams *p = opaque;
|
||||
QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
|
||||
QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
|
||||
Error *local_err = NULL;
|
||||
|
||||
trace_multifd_new_send_channel_async(p->id);
|
||||
if (!qio_task_propagate_error(task, &local_err)) {
|
||||
p->c = sioc;
|
||||
p->c = ioc;
|
||||
qio_channel_set_delay(p->c, false);
|
||||
p->running = true;
|
||||
if (multifd_channel_connect(p, sioc, local_err)) {
|
||||
if (multifd_channel_connect(p, ioc, &local_err)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
multifd_new_send_channel_cleanup(p, sioc, local_err);
|
||||
trace_multifd_new_send_channel_async_error(p->id, local_err);
|
||||
multifd_new_send_channel_cleanup(p, ioc, local_err);
|
||||
}
|
||||
|
||||
static void multifd_new_send_channel_create(gpointer opaque)
|
||||
{
|
||||
socket_send_channel_create(multifd_new_send_channel_async, opaque);
|
||||
}
|
||||
|
||||
int multifd_save_setup(Error **errp)
|
||||
@ -951,7 +952,7 @@ int multifd_save_setup(Error **errp)
|
||||
p->write_flags = 0;
|
||||
}
|
||||
|
||||
socket_send_channel_create(multifd_new_send_channel_async, p);
|
||||
multifd_new_send_channel_create(p);
|
||||
}
|
||||
|
||||
for (i = 0; i < thread_count; i++) {
|
||||
|
@ -125,6 +125,8 @@ Property migration_properties[] = {
|
||||
parameters.cpu_throttle_tailslow, false),
|
||||
DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
|
||||
parameters.max_bandwidth, MAX_THROTTLE),
|
||||
DEFINE_PROP_SIZE("avail-switchover-bandwidth", MigrationState,
|
||||
parameters.avail_switchover_bandwidth, 0),
|
||||
DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
|
||||
parameters.downtime_limit,
|
||||
DEFAULT_MIGRATE_SET_DOWNTIME),
|
||||
@ -376,6 +378,13 @@ bool migrate_postcopy(void)
|
||||
return migrate_postcopy_ram() || migrate_dirty_bitmaps();
|
||||
}
|
||||
|
||||
bool migrate_rdma(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
return s->rdma_migration;
|
||||
}
|
||||
|
||||
bool migrate_tls(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@ -780,6 +789,13 @@ uint64_t migrate_max_bandwidth(void)
|
||||
return s->parameters.max_bandwidth;
|
||||
}
|
||||
|
||||
uint64_t migrate_avail_switchover_bandwidth(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
return s->parameters.avail_switchover_bandwidth;
|
||||
}
|
||||
|
||||
uint64_t migrate_max_postcopy_bandwidth(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@ -917,6 +933,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
|
||||
s->parameters.tls_authz : "");
|
||||
params->has_max_bandwidth = true;
|
||||
params->max_bandwidth = s->parameters.max_bandwidth;
|
||||
params->has_avail_switchover_bandwidth = true;
|
||||
params->avail_switchover_bandwidth = s->parameters.avail_switchover_bandwidth;
|
||||
params->has_downtime_limit = true;
|
||||
params->downtime_limit = s->parameters.downtime_limit;
|
||||
params->has_x_checkpoint_delay = true;
|
||||
@ -1056,6 +1074,15 @@ bool migrate_params_check(MigrationParameters *params, Error **errp)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_avail_switchover_bandwidth &&
|
||||
(params->avail_switchover_bandwidth > SIZE_MAX)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"avail_switchover_bandwidth",
|
||||
"an integer in the range of 0 to "stringify(SIZE_MAX)
|
||||
" bytes/second");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params->has_downtime_limit &&
|
||||
(params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
@ -1225,6 +1252,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
|
||||
dest->max_bandwidth = params->max_bandwidth;
|
||||
}
|
||||
|
||||
if (params->has_avail_switchover_bandwidth) {
|
||||
dest->avail_switchover_bandwidth = params->avail_switchover_bandwidth;
|
||||
}
|
||||
|
||||
if (params->has_downtime_limit) {
|
||||
dest->downtime_limit = params->downtime_limit;
|
||||
}
|
||||
@ -1341,6 +1372,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
if (params->has_avail_switchover_bandwidth) {
|
||||
s->parameters.avail_switchover_bandwidth = params->avail_switchover_bandwidth;
|
||||
}
|
||||
|
||||
if (params->has_downtime_limit) {
|
||||
s->parameters.downtime_limit = params->downtime_limit;
|
||||
}
|
||||
|
@ -56,6 +56,7 @@ bool migrate_zero_copy_send(void);
|
||||
|
||||
bool migrate_multifd_flush_after_each_section(void);
|
||||
bool migrate_postcopy(void);
|
||||
bool migrate_rdma(void);
|
||||
bool migrate_tls(void);
|
||||
|
||||
/* capabilities helpers */
|
||||
@ -80,6 +81,7 @@ int migrate_decompress_threads(void);
|
||||
uint64_t migrate_downtime_limit(void);
|
||||
uint8_t migrate_max_cpu_throttle(void);
|
||||
uint64_t migrate_max_bandwidth(void);
|
||||
uint64_t migrate_avail_switchover_bandwidth(void);
|
||||
uint64_t migrate_max_postcopy_bandwidth(void);
|
||||
int migrate_multifd_channels(void);
|
||||
MultiFDCompression migrate_multifd_compression(void);
|
||||
|
@ -32,12 +32,12 @@
|
||||
#include "trace.h"
|
||||
#include "options.h"
|
||||
#include "qapi/error.h"
|
||||
#include "rdma.h"
|
||||
|
||||
#define IO_BUF_SIZE 32768
|
||||
#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
|
||||
|
||||
struct QEMUFile {
|
||||
const QEMUFileHooks *hooks;
|
||||
QIOChannel *ioc;
|
||||
bool is_writable;
|
||||
|
||||
@ -132,11 +132,6 @@ QEMUFile *qemu_file_new_input(QIOChannel *ioc)
|
||||
return qemu_file_new_impl(ioc, false);
|
||||
}
|
||||
|
||||
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
|
||||
{
|
||||
f->hooks = hooks;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get last error for stream f with optional Error*
|
||||
*
|
||||
@ -297,60 +292,6 @@ void qemu_fflush(QEMUFile *f)
|
||||
f->iovcnt = 0;
|
||||
}
|
||||
|
||||
void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (f->hooks && f->hooks->before_ram_iterate) {
|
||||
ret = f->hooks->before_ram_iterate(f, flags, NULL);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (f->hooks && f->hooks->after_ram_iterate) {
|
||||
ret = f->hooks->after_ram_iterate(f, flags, NULL);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
|
||||
{
|
||||
if (f->hooks && f->hooks->hook_ram_load) {
|
||||
int ret = f->hooks->hook_ram_load(f, flags, data);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
ram_addr_t offset, size_t size)
|
||||
{
|
||||
if (f->hooks && f->hooks->save_page) {
|
||||
int ret = f->hooks->save_page(f, block_offset, offset, size);
|
||||
/*
|
||||
* RAM_SAVE_CONTROL_* are negative values
|
||||
*/
|
||||
if (ret != RAM_SAVE_CONTROL_DELAYED &&
|
||||
ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
return RAM_SAVE_CONTROL_NOT_SUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to fill the buffer from the underlying file
|
||||
* Returns the number of bytes read, or negative value for an error.
|
||||
|
@ -29,41 +29,8 @@
|
||||
#include "exec/cpu-common.h"
|
||||
#include "io/channel.h"
|
||||
|
||||
/*
|
||||
* This function provides hooks around different
|
||||
* stages of RAM migration.
|
||||
* 'data' is call specific data associated with the 'flags' value
|
||||
*/
|
||||
typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data);
|
||||
|
||||
/*
|
||||
* Constants used by ram_control_* hooks
|
||||
*/
|
||||
#define RAM_CONTROL_SETUP 0
|
||||
#define RAM_CONTROL_ROUND 1
|
||||
#define RAM_CONTROL_HOOK 2
|
||||
#define RAM_CONTROL_FINISH 3
|
||||
#define RAM_CONTROL_BLOCK_REG 4
|
||||
|
||||
/*
|
||||
* This function allows override of where the RAM page
|
||||
* is saved (such as RDMA, for example.)
|
||||
*/
|
||||
typedef int (QEMURamSaveFunc)(QEMUFile *f,
|
||||
ram_addr_t block_offset,
|
||||
ram_addr_t offset,
|
||||
size_t size);
|
||||
|
||||
typedef struct QEMUFileHooks {
|
||||
QEMURamHookFunc *before_ram_iterate;
|
||||
QEMURamHookFunc *after_ram_iterate;
|
||||
QEMURamHookFunc *hook_ram_load;
|
||||
QEMURamSaveFunc *save_page;
|
||||
} QEMUFileHooks;
|
||||
|
||||
QEMUFile *qemu_file_new_input(QIOChannel *ioc);
|
||||
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
|
||||
void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
|
||||
int qemu_fclose(QEMUFile *f);
|
||||
|
||||
/*
|
||||
@ -127,22 +94,6 @@ void qemu_fflush(QEMUFile *f);
|
||||
void qemu_file_set_blocking(QEMUFile *f, bool block);
|
||||
int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
|
||||
|
||||
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
|
||||
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
|
||||
void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
|
||||
|
||||
/* Whenever this is found in the data stream, the flags
|
||||
* will be passed to ram_control_load_hook in the incoming-migration
|
||||
* side. This lets before_ram_iterate/after_ram_iterate add
|
||||
* transport-specific sections to the RAM migration data.
|
||||
*/
|
||||
#define RAM_SAVE_FLAG_HOOK 0x80
|
||||
|
||||
#define RAM_SAVE_CONTROL_NOT_SUPP -1000
|
||||
#define RAM_SAVE_CONTROL_DELAYED -2000
|
||||
|
||||
int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
ram_addr_t offset, size_t size);
|
||||
QIOChannel *qemu_file_get_ioc(QEMUFile *file);
|
||||
|
||||
#endif
|
||||
|
306
migration/ram.c
306
migration/ram.c
@ -59,6 +59,7 @@
|
||||
#include "qemu/iov.h"
|
||||
#include "multifd.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "rdma.h"
|
||||
#include "options.h"
|
||||
#include "sysemu/dirtylimit.h"
|
||||
#include "sysemu/kvm.h"
|
||||
@ -88,7 +89,7 @@
|
||||
#define RAM_SAVE_FLAG_EOS 0x10
|
||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
|
||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
||||
/* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */
|
||||
/* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */
|
||||
#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
|
||||
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
|
||||
/* We can't use any flag that is bigger than 0x200 */
|
||||
@ -569,7 +570,6 @@ void mig_throttle_counter_reset(void)
|
||||
/**
|
||||
* xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @current_addr: address for the zero page
|
||||
*
|
||||
* Update the xbzrle cache to reflect a page that's been sent as all 0.
|
||||
@ -578,7 +578,7 @@ void mig_throttle_counter_reset(void)
|
||||
* As a bonus, if the page wasn't in the cache it gets added so that
|
||||
* when a small write is made into the 0'd page it gets XBZRLE sent.
|
||||
*/
|
||||
static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
|
||||
static void xbzrle_cache_zero_page(ram_addr_t current_addr)
|
||||
{
|
||||
/* We don't care if this fails to allocate a new cache page
|
||||
* as long as it updated an old one */
|
||||
@ -1137,51 +1137,46 @@ void ram_release_page(const char *rbname, uint64_t offset)
|
||||
ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* save_zero_page_to_file: send the zero page to the file
|
||||
*
|
||||
* Returns the size of data written to the file, 0 means the page is not
|
||||
* a zero page
|
||||
*
|
||||
* @pss: current PSS channel
|
||||
* @block: block that contains the page we want to send
|
||||
* @offset: offset inside the block for the page
|
||||
*/
|
||||
static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file,
|
||||
RAMBlock *block, ram_addr_t offset)
|
||||
{
|
||||
uint8_t *p = block->host + offset;
|
||||
int len = 0;
|
||||
|
||||
if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
|
||||
len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO);
|
||||
qemu_put_byte(file, 0);
|
||||
len += 1;
|
||||
ram_release_page(block->idstr, offset);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* save_zero_page: send the zero page to the stream
|
||||
*
|
||||
* Returns the number of pages written.
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @pss: current PSS channel
|
||||
* @block: block that contains the page we want to send
|
||||
* @offset: offset inside the block for the page
|
||||
*/
|
||||
static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block,
|
||||
static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block,
|
||||
ram_addr_t offset)
|
||||
{
|
||||
int len = save_zero_page_to_file(pss, f, block, offset);
|
||||
uint8_t *p = block->host + offset;
|
||||
QEMUFile *file = pss->pss_channel;
|
||||
int len = 0;
|
||||
|
||||
if (len) {
|
||||
stat64_add(&mig_stats.zero_pages, 1);
|
||||
ram_transferred_add(len);
|
||||
return 1;
|
||||
if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
|
||||
len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO);
|
||||
qemu_put_byte(file, 0);
|
||||
len += 1;
|
||||
ram_release_page(block->idstr, offset);
|
||||
|
||||
stat64_add(&mig_stats.zero_pages, 1);
|
||||
ram_transferred_add(len);
|
||||
|
||||
/*
|
||||
* Must let xbzrle know, otherwise a previous (now 0'd) cached
|
||||
* page would be stale.
|
||||
*/
|
||||
if (rs->xbzrle_started) {
|
||||
XBZRLE_cache_lock();
|
||||
xbzrle_cache_zero_page(block->offset + offset);
|
||||
XBZRLE_cache_unlock();
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1196,8 +1191,8 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ram_control_save_page(pss->pss_channel, block->offset, offset,
|
||||
TARGET_PAGE_SIZE);
|
||||
ret = rdma_control_save_page(pss->pss_channel, block->offset, offset,
|
||||
TARGET_PAGE_SIZE);
|
||||
if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||
return false;
|
||||
}
|
||||
@ -1395,7 +1390,8 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
|
||||
pss->page = 0;
|
||||
pss->block = QLIST_NEXT_RCU(pss->block, next);
|
||||
if (!pss->block) {
|
||||
if (!migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() &&
|
||||
!migrate_multifd_flush_after_each_section()) {
|
||||
QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
|
||||
int ret = multifd_send_sync_main(f);
|
||||
if (ret < 0) {
|
||||
@ -2137,17 +2133,8 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
|
||||
return 1;
|
||||
}
|
||||
|
||||
res = save_zero_page(pss, pss->pss_channel, block, offset);
|
||||
if (res > 0) {
|
||||
/* Must let xbzrle know, otherwise a previous (now 0'd) cached
|
||||
* page would be stale
|
||||
*/
|
||||
if (rs->xbzrle_started) {
|
||||
XBZRLE_cache_lock();
|
||||
xbzrle_cache_zero_page(rs, block->offset + offset);
|
||||
XBZRLE_cache_unlock();
|
||||
}
|
||||
return res;
|
||||
if (save_zero_page(rs, pss, block, offset)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2891,8 +2878,6 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
|
||||
|
||||
static void ram_init_bitmaps(RAMState *rs)
|
||||
{
|
||||
/* For memory_global_dirty_log_start below. */
|
||||
qemu_mutex_lock_iothread();
|
||||
qemu_mutex_lock_ramlist();
|
||||
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
@ -2904,7 +2889,6 @@ static void ram_init_bitmaps(RAMState *rs)
|
||||
}
|
||||
}
|
||||
qemu_mutex_unlock_ramlist();
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
/*
|
||||
* After an eventual first bitmap sync, fixup the initial bitmap
|
||||
@ -3062,17 +3046,27 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
|
||||
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
|
||||
ret = rdma_registration_start(f, RAM_CONTROL_SETUP);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
ret = rdma_registration_stop(f, RAM_CONTROL_SETUP);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
migration_ops = g_malloc0(sizeof(MigrationOps));
|
||||
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
ret = multifd_send_sync_main(f);
|
||||
qemu_mutex_lock_iothread();
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
|
||||
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
||||
}
|
||||
|
||||
@ -3122,7 +3116,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
||||
/* Read version before ram_list.blocks */
|
||||
smp_rmb();
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_ROUND);
|
||||
ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
|
||||
i = 0;
|
||||
@ -3179,12 +3176,15 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
||||
* Must occur before EOS (or any QEMUFile operation)
|
||||
* because of RDMA protocol.
|
||||
*/
|
||||
ram_control_after_iterate(f, RAM_CONTROL_ROUND);
|
||||
ret = rdma_registration_stop(f, RAM_CONTROL_ROUND);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret >= 0
|
||||
&& migration_is_setup_or_active(migrate_get_current()->state)) {
|
||||
if (migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() && migrate_multifd_flush_after_each_section()) {
|
||||
ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
@ -3227,7 +3227,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
migration_bitmap_sync_precopy(rs, true);
|
||||
}
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_FINISH);
|
||||
ret = rdma_registration_start(f, RAM_CONTROL_FINISH);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
/* try transferring iterative blocks of memory */
|
||||
|
||||
@ -3249,7 +3252,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
qemu_mutex_unlock(&rs->bitmap_mutex);
|
||||
|
||||
ram_flush_compressed_data(rs);
|
||||
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
|
||||
|
||||
int ret = rdma_registration_stop(f, RAM_CONTROL_FINISH);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
@ -3261,7 +3268,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
|
||||
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
||||
}
|
||||
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
|
||||
@ -3768,7 +3775,8 @@ int ram_load_postcopy(QEMUFile *f, int channel)
|
||||
break;
|
||||
case RAM_SAVE_FLAG_EOS:
|
||||
/* normal exit */
|
||||
if (migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() &&
|
||||
migrate_multifd_flush_after_each_section()) {
|
||||
multifd_recv_sync_main();
|
||||
}
|
||||
break;
|
||||
@ -3861,6 +3869,85 @@ void colo_flush_ram_cache(void)
|
||||
trace_colo_flush_ram_cache_end();
|
||||
}
|
||||
|
||||
static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
|
||||
{
|
||||
int ret = 0;
|
||||
/* ADVISE is earlier, it shows the source has the postcopy capability on */
|
||||
bool postcopy_advised = migration_incoming_postcopy_advised();
|
||||
|
||||
assert(block);
|
||||
|
||||
if (!qemu_ram_is_migratable(block)) {
|
||||
error_report("block %s should not be migrated !", block->idstr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (length != block->used_length) {
|
||||
Error *local_err = NULL;
|
||||
|
||||
ret = qemu_ram_resize(block, length, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
}
|
||||
}
|
||||
/* For postcopy we need to check hugepage sizes match */
|
||||
if (postcopy_advised && migrate_postcopy_ram() &&
|
||||
block->page_size != qemu_host_page_size) {
|
||||
uint64_t remote_page_size = qemu_get_be64(f);
|
||||
if (remote_page_size != block->page_size) {
|
||||
error_report("Mismatched RAM page size %s "
|
||||
"(local) %zd != %" PRId64, block->idstr,
|
||||
block->page_size, remote_page_size);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
if (migrate_ignore_shared()) {
|
||||
hwaddr addr = qemu_get_be64(f);
|
||||
if (migrate_ram_is_ignored(block) &&
|
||||
block->mr->addr != addr) {
|
||||
error_report("Mismatched GPAs for block %s "
|
||||
"%" PRId64 "!= %" PRId64, block->idstr,
|
||||
(uint64_t)addr, (uint64_t)block->mr->addr);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
ret = rdma_block_notification_handle(f, block->idstr);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_ramblocks(QEMUFile *f, ram_addr_t total_ram_bytes)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* Synchronize RAM block list */
|
||||
while (!ret && total_ram_bytes) {
|
||||
RAMBlock *block;
|
||||
char id[256];
|
||||
ram_addr_t length;
|
||||
int len = qemu_get_byte(f);
|
||||
|
||||
qemu_get_buffer(f, (uint8_t *)id, len);
|
||||
id[len] = 0;
|
||||
length = qemu_get_be64(f);
|
||||
|
||||
block = qemu_ram_block_by_name(id);
|
||||
if (block) {
|
||||
ret = parse_ramblock(f, block, length);
|
||||
} else {
|
||||
error_report("Unknown ramblock \"%s\", cannot accept "
|
||||
"migration", id);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
total_ram_bytes -= length;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ram_load_precopy: load pages in precopy case
|
||||
*
|
||||
@ -3875,14 +3962,13 @@ static int ram_load_precopy(QEMUFile *f)
|
||||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
|
||||
/* ADVISE is earlier, it shows the source has the postcopy capability on */
|
||||
bool postcopy_advised = migration_incoming_postcopy_advised();
|
||||
|
||||
if (!migrate_compress()) {
|
||||
invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
|
||||
}
|
||||
|
||||
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
|
||||
ram_addr_t addr, total_ram_bytes;
|
||||
ram_addr_t addr;
|
||||
void *host = NULL, *host_bak = NULL;
|
||||
uint8_t ch;
|
||||
|
||||
@ -3953,65 +4039,7 @@ static int ram_load_precopy(QEMUFile *f)
|
||||
|
||||
switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
|
||||
case RAM_SAVE_FLAG_MEM_SIZE:
|
||||
/* Synchronize RAM block list */
|
||||
total_ram_bytes = addr;
|
||||
while (!ret && total_ram_bytes) {
|
||||
RAMBlock *block;
|
||||
char id[256];
|
||||
ram_addr_t length;
|
||||
|
||||
len = qemu_get_byte(f);
|
||||
qemu_get_buffer(f, (uint8_t *)id, len);
|
||||
id[len] = 0;
|
||||
length = qemu_get_be64(f);
|
||||
|
||||
block = qemu_ram_block_by_name(id);
|
||||
if (block && !qemu_ram_is_migratable(block)) {
|
||||
error_report("block %s should not be migrated !", id);
|
||||
ret = -EINVAL;
|
||||
} else if (block) {
|
||||
if (length != block->used_length) {
|
||||
Error *local_err = NULL;
|
||||
|
||||
ret = qemu_ram_resize(block, length,
|
||||
&local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
}
|
||||
}
|
||||
/* For postcopy we need to check hugepage sizes match */
|
||||
if (postcopy_advised && migrate_postcopy_ram() &&
|
||||
block->page_size != qemu_host_page_size) {
|
||||
uint64_t remote_page_size = qemu_get_be64(f);
|
||||
if (remote_page_size != block->page_size) {
|
||||
error_report("Mismatched RAM page size %s "
|
||||
"(local) %zd != %" PRId64,
|
||||
id, block->page_size,
|
||||
remote_page_size);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
if (migrate_ignore_shared()) {
|
||||
hwaddr addr2 = qemu_get_be64(f);
|
||||
if (migrate_ram_is_ignored(block) &&
|
||||
block->mr->addr != addr2) {
|
||||
error_report("Mismatched GPAs for block %s "
|
||||
"%" PRId64 "!= %" PRId64,
|
||||
id, (uint64_t)addr2,
|
||||
(uint64_t)block->mr->addr);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
|
||||
block->idstr);
|
||||
} else {
|
||||
error_report("Unknown ramblock \"%s\", cannot "
|
||||
"accept migration", id);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
total_ram_bytes -= length;
|
||||
}
|
||||
ret = parse_ramblocks(f, addr);
|
||||
break;
|
||||
|
||||
case RAM_SAVE_FLAG_ZERO:
|
||||
@ -4046,12 +4074,16 @@ static int ram_load_precopy(QEMUFile *f)
|
||||
break;
|
||||
case RAM_SAVE_FLAG_EOS:
|
||||
/* normal exit */
|
||||
if (migrate_multifd_flush_after_each_section()) {
|
||||
if (migrate_multifd() &&
|
||||
migrate_multifd_flush_after_each_section()) {
|
||||
multifd_recv_sync_main();
|
||||
}
|
||||
break;
|
||||
case RAM_SAVE_FLAG_HOOK:
|
||||
ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
|
||||
ret = rdma_registration_handle(f);
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error_report("Unknown combination of migration flags: 0x%x", flags);
|
||||
@ -4159,7 +4191,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
|
||||
int ret = -EINVAL;
|
||||
/* from_dst_file is always valid because we're within rp_thread */
|
||||
QEMUFile *file = s->rp_state.from_dst_file;
|
||||
unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
|
||||
g_autofree unsigned long *le_bitmap = NULL;
|
||||
unsigned long nbits = block->used_length >> TARGET_PAGE_BITS;
|
||||
uint64_t local_size = DIV_ROUND_UP(nbits, 8);
|
||||
uint64_t size, end_mark;
|
||||
RAMState *rs = ram_state;
|
||||
@ -4188,8 +4221,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
|
||||
error_report("%s: ramblock '%s' bitmap size mismatch "
|
||||
"(0x%"PRIx64" != 0x%"PRIx64")", __func__,
|
||||
block->idstr, size, local_size);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
|
||||
@ -4200,15 +4232,13 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
|
||||
error_report("%s: read bitmap failed for ramblock '%s': %d"
|
||||
" (size 0x%"PRIx64", got: 0x%"PRIx64")",
|
||||
__func__, block->idstr, ret, local_size, size);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
|
||||
error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
|
||||
__func__, block->idstr, end_mark);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4240,10 +4270,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
|
||||
*/
|
||||
migration_rp_kick(s);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
g_free(le_bitmap);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ram_resume_prepare(MigrationState *s, void *opaque)
|
||||
@ -4290,6 +4317,11 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
|
||||
RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
|
||||
Error *err = NULL;
|
||||
|
||||
if (!rb) {
|
||||
error_report("RAM block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
if (migrate_ram_is_ignored(rb)) {
|
||||
return;
|
||||
}
|
||||
|
259
migration/rdma.c
259
migration/rdma.c
@ -559,10 +559,8 @@ static void rdma_add_block(RDMAContext *rdma, const char *block_name,
|
||||
local->block = g_new0(RDMALocalBlock, local->nb_blocks + 1);
|
||||
|
||||
if (local->nb_blocks) {
|
||||
int x;
|
||||
|
||||
if (rdma->blockmap) {
|
||||
for (x = 0; x < local->nb_blocks; x++) {
|
||||
for (int x = 0; x < local->nb_blocks; x++) {
|
||||
g_hash_table_remove(rdma->blockmap,
|
||||
(void *)(uintptr_t)old[x].offset);
|
||||
g_hash_table_insert(rdma->blockmap,
|
||||
@ -649,15 +647,12 @@ static void rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
|
||||
{
|
||||
RDMALocalBlocks *local = &rdma->local_ram_blocks;
|
||||
RDMALocalBlock *old = local->block;
|
||||
int x;
|
||||
|
||||
if (rdma->blockmap) {
|
||||
g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset);
|
||||
}
|
||||
if (block->pmr) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < block->nb_chunks; j++) {
|
||||
for (int j = 0; j < block->nb_chunks; j++) {
|
||||
if (!block->pmr[j]) {
|
||||
continue;
|
||||
}
|
||||
@ -687,7 +682,7 @@ static void rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
|
||||
block->block_name = NULL;
|
||||
|
||||
if (rdma->blockmap) {
|
||||
for (x = 0; x < local->nb_blocks; x++) {
|
||||
for (int x = 0; x < local->nb_blocks; x++) {
|
||||
g_hash_table_remove(rdma->blockmap,
|
||||
(void *)(uintptr_t)old[x].offset);
|
||||
}
|
||||
@ -705,7 +700,7 @@ static void rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
|
||||
memcpy(local->block + block->index, old + (block->index + 1),
|
||||
sizeof(RDMALocalBlock) *
|
||||
(local->nb_blocks - (block->index + 1)));
|
||||
for (x = block->index; x < local->nb_blocks - 1; x++) {
|
||||
for (int x = block->index; x < local->nb_blocks - 1; x++) {
|
||||
local->block[x].index--;
|
||||
}
|
||||
}
|
||||
@ -725,7 +720,7 @@ static void rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
|
||||
local->nb_blocks--;
|
||||
|
||||
if (local->nb_blocks && rdma->blockmap) {
|
||||
for (x = 0; x < local->nb_blocks; x++) {
|
||||
for (int x = 0; x < local->nb_blocks; x++) {
|
||||
g_hash_table_insert(rdma->blockmap,
|
||||
(void *)(uintptr_t)local->block[x].offset,
|
||||
&local->block[x]);
|
||||
@ -828,12 +823,12 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
|
||||
* Otherwise, there are no guarantees until the bug is fixed in linux.
|
||||
*/
|
||||
if (!verbs) {
|
||||
int num_devices, x;
|
||||
int num_devices;
|
||||
struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
|
||||
bool roce_found = false;
|
||||
bool ib_found = false;
|
||||
|
||||
for (x = 0; x < num_devices; x++) {
|
||||
for (int x = 0; x < num_devices; x++) {
|
||||
verbs = ibv_open_device(dev_list[x]);
|
||||
/*
|
||||
* ibv_open_device() is not documented to set errno. If
|
||||
@ -925,7 +920,6 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
|
||||
char port_str[16];
|
||||
struct rdma_cm_event *cm_event;
|
||||
char ip[40] = "unknown";
|
||||
struct rdma_addrinfo *e;
|
||||
|
||||
if (rdma->host == NULL || !strcmp(rdma->host, "")) {
|
||||
error_setg(errp, "RDMA ERROR: RDMA hostname has not been set");
|
||||
@ -957,7 +951,7 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
|
||||
}
|
||||
|
||||
/* Try all addresses, saving the first error in @err */
|
||||
for (e = res; e != NULL; e = e->ai_next) {
|
||||
for (struct rdma_addrinfo *e = res; e != NULL; e = e->ai_next) {
|
||||
Error **local_errp = err ? NULL : &err;
|
||||
|
||||
inet_ntop(e->ai_family,
|
||||
@ -1113,7 +1107,6 @@ err_alloc_pd_cq:
|
||||
static int qemu_rdma_alloc_qp(RDMAContext *rdma)
|
||||
{
|
||||
struct ibv_qp_init_attr attr = { 0 };
|
||||
int ret;
|
||||
|
||||
attr.cap.max_send_wr = RDMA_SIGNALED_SEND_MAX;
|
||||
attr.cap.max_recv_wr = 3;
|
||||
@ -1123,8 +1116,7 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
|
||||
attr.recv_cq = rdma->recv_cq;
|
||||
attr.qp_type = IBV_QPT_RC;
|
||||
|
||||
ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr);
|
||||
if (ret < 0) {
|
||||
if (rdma_create_qp(rdma->cm_id, rdma->pd, &attr) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1136,8 +1128,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
|
||||
static bool rdma_support_odp(struct ibv_context *dev)
|
||||
{
|
||||
struct ibv_device_attr_ex attr = {0};
|
||||
int ret = ibv_query_device_ex(dev, NULL, &attr);
|
||||
if (ret) {
|
||||
|
||||
if (ibv_query_device_ex(dev, NULL, &attr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1514,7 +1506,6 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
|
||||
struct ibv_comp_channel *comp_channel)
|
||||
{
|
||||
struct rdma_cm_event *cm_event;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Coroutine doesn't start until migration_fd_process_incoming()
|
||||
@ -1550,8 +1541,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
|
||||
}
|
||||
|
||||
if (pfds[1].revents) {
|
||||
ret = rdma_get_cm_event(rdma->channel, &cm_event);
|
||||
if (ret < 0) {
|
||||
if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -2323,12 +2313,10 @@ static int qemu_rdma_write(RDMAContext *rdma,
|
||||
uint64_t current_addr = block_offset + offset;
|
||||
uint64_t index = rdma->current_index;
|
||||
uint64_t chunk = rdma->current_chunk;
|
||||
int ret;
|
||||
|
||||
/* If we cannot merge it, we flush the current buffer first. */
|
||||
if (!qemu_rdma_buffer_mergeable(rdma, current_addr, len)) {
|
||||
ret = qemu_rdma_write_flush(rdma, errp);
|
||||
if (ret < 0) {
|
||||
if (qemu_rdma_write_flush(rdma, errp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
rdma->current_length = 0;
|
||||
@ -2354,7 +2342,6 @@ static int qemu_rdma_write(RDMAContext *rdma,
|
||||
static void qemu_rdma_cleanup(RDMAContext *rdma)
|
||||
{
|
||||
Error *err = NULL;
|
||||
int idx;
|
||||
|
||||
if (rdma->cm_id && rdma->connected) {
|
||||
if ((rdma->errored ||
|
||||
@ -2381,12 +2368,12 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
|
||||
g_free(rdma->dest_blocks);
|
||||
rdma->dest_blocks = NULL;
|
||||
|
||||
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
||||
if (rdma->wr_data[idx].control_mr) {
|
||||
for (int i = 0; i < RDMA_WRID_MAX; i++) {
|
||||
if (rdma->wr_data[i].control_mr) {
|
||||
rdma->total_registrations--;
|
||||
ibv_dereg_mr(rdma->wr_data[idx].control_mr);
|
||||
ibv_dereg_mr(rdma->wr_data[i].control_mr);
|
||||
}
|
||||
rdma->wr_data[idx].control_mr = NULL;
|
||||
rdma->wr_data[i].control_mr = NULL;
|
||||
}
|
||||
|
||||
if (rdma->local_ram_blocks.block) {
|
||||
@ -2452,7 +2439,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
|
||||
|
||||
static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
|
||||
{
|
||||
int ret, idx;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Will be validated against destination's actual capabilities
|
||||
@ -2480,18 +2467,17 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
|
||||
|
||||
/* Build the hash that maps from offset to RAMBlock */
|
||||
rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
|
||||
for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
|
||||
for (int i = 0; i < rdma->local_ram_blocks.nb_blocks; i++) {
|
||||
g_hash_table_insert(rdma->blockmap,
|
||||
(void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
|
||||
&rdma->local_ram_blocks.block[idx]);
|
||||
(void *)(uintptr_t)rdma->local_ram_blocks.block[i].offset,
|
||||
&rdma->local_ram_blocks.block[i]);
|
||||
}
|
||||
|
||||
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
||||
ret = qemu_rdma_reg_control(rdma, idx);
|
||||
for (int i = 0; i < RDMA_WRID_MAX; i++) {
|
||||
ret = qemu_rdma_reg_control(rdma, i);
|
||||
if (ret < 0) {
|
||||
error_setg(errp,
|
||||
"RDMA ERROR: rdma migration: error registering %d control!",
|
||||
idx);
|
||||
error_setg(errp, "RDMA ERROR: rdma migration: error "
|
||||
"registering %d control!", i);
|
||||
goto err_rdma_source_init;
|
||||
}
|
||||
}
|
||||
@ -2625,16 +2611,16 @@ err_rdma_source_connect:
|
||||
static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
|
||||
{
|
||||
Error *err = NULL;
|
||||
int ret, idx;
|
||||
int ret;
|
||||
struct rdma_cm_id *listen_id;
|
||||
char ip[40] = "unknown";
|
||||
struct rdma_addrinfo *res, *e;
|
||||
char port_str[16];
|
||||
int reuse = 1;
|
||||
|
||||
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
||||
rdma->wr_data[idx].control_len = 0;
|
||||
rdma->wr_data[idx].control_curr = NULL;
|
||||
for (int i = 0; i < RDMA_WRID_MAX; i++) {
|
||||
rdma->wr_data[i].control_len = 0;
|
||||
rdma->wr_data[i].control_curr = NULL;
|
||||
}
|
||||
|
||||
if (!rdma->host || !rdma->host[0]) {
|
||||
@ -2723,11 +2709,9 @@ err_dest_init_create_listen_id:
|
||||
static void qemu_rdma_return_path_dest_init(RDMAContext *rdma_return_path,
|
||||
RDMAContext *rdma)
|
||||
{
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
||||
rdma_return_path->wr_data[idx].control_len = 0;
|
||||
rdma_return_path->wr_data[idx].control_curr = NULL;
|
||||
for (int i = 0; i < RDMA_WRID_MAX; i++) {
|
||||
rdma_return_path->wr_data[i].control_len = 0;
|
||||
rdma_return_path->wr_data[i].control_curr = NULL;
|
||||
}
|
||||
|
||||
/*the CM channel and CM id is shared*/
|
||||
@ -2781,7 +2765,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
||||
RDMAContext *rdma;
|
||||
int ret;
|
||||
ssize_t done = 0;
|
||||
size_t i, len;
|
||||
size_t len;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
@ -2807,7 +2791,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
for (int i = 0; i < niov; i++) {
|
||||
size_t remaining = iov[i].iov_len;
|
||||
uint8_t * data = (void *)iov[i].iov_base;
|
||||
while (remaining) {
|
||||
@ -2870,7 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
||||
RDMAControlHeader head;
|
||||
int ret;
|
||||
ssize_t done = 0;
|
||||
size_t i, len;
|
||||
size_t len;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rdma = qatomic_rcu_read(&rioc->rdmain);
|
||||
@ -2886,7 +2870,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
for (int i = 0; i < niov; i++) {
|
||||
size_t want = iov[i].iov_len;
|
||||
uint8_t *data = (void *)iov[i].iov_base;
|
||||
|
||||
@ -2946,7 +2930,6 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
||||
static int qemu_rdma_drain_cq(RDMAContext *rdma)
|
||||
{
|
||||
Error *err = NULL;
|
||||
int ret;
|
||||
|
||||
if (qemu_rdma_write_flush(rdma, &err) < 0) {
|
||||
error_report_err(err);
|
||||
@ -2954,8 +2937,7 @@ static int qemu_rdma_drain_cq(RDMAContext *rdma)
|
||||
}
|
||||
|
||||
while (rdma->nb_sent) {
|
||||
ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
|
||||
if (ret < 0) {
|
||||
if (qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL) < 0) {
|
||||
error_report("rdma migration: complete polling error!");
|
||||
return -1;
|
||||
}
|
||||
@ -3240,10 +3222,6 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
RDMAContext *rdma;
|
||||
int ret;
|
||||
|
||||
if (migration_in_postcopy()) {
|
||||
return RAM_SAVE_CONTROL_NOT_SUPP;
|
||||
}
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
|
||||
@ -3314,17 +3292,33 @@ err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
ram_addr_t offset, size_t size)
|
||||
{
|
||||
if (!migrate_rdma() || migration_in_postcopy()) {
|
||||
return RAM_SAVE_CONTROL_NOT_SUPP;
|
||||
}
|
||||
|
||||
int ret = qemu_rdma_save_page(f, block_offset, offset, size);
|
||||
|
||||
if (ret != RAM_SAVE_CONTROL_DELAYED &&
|
||||
ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||
if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rdma_accept_incoming_migration(void *opaque);
|
||||
|
||||
static void rdma_cm_poll_handler(void *opaque)
|
||||
{
|
||||
RDMAContext *rdma = opaque;
|
||||
int ret;
|
||||
struct rdma_cm_event *cm_event;
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
ret = rdma_get_cm_event(rdma->channel, &cm_event);
|
||||
if (ret < 0) {
|
||||
if (rdma_get_cm_event(rdma->channel, &cm_event) < 0) {
|
||||
error_report("get_cm_event failed %d", errno);
|
||||
return;
|
||||
}
|
||||
@ -3362,7 +3356,6 @@ static int qemu_rdma_accept(RDMAContext *rdma)
|
||||
struct rdma_cm_event *cm_event;
|
||||
struct ibv_context *verbs;
|
||||
int ret;
|
||||
int idx;
|
||||
|
||||
ret = rdma_get_cm_event(rdma->channel, &cm_event);
|
||||
if (ret < 0) {
|
||||
@ -3448,10 +3441,10 @@ static int qemu_rdma_accept(RDMAContext *rdma)
|
||||
|
||||
qemu_rdma_init_ram_blocks(rdma);
|
||||
|
||||
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
|
||||
ret = qemu_rdma_reg_control(rdma, idx);
|
||||
for (int i = 0; i < RDMA_WRID_MAX; i++) {
|
||||
ret = qemu_rdma_reg_control(rdma, i);
|
||||
if (ret < 0) {
|
||||
error_report("rdma: error registering %d control", idx);
|
||||
error_report("rdma: error registering %d control", i);
|
||||
goto err_rdma_dest_wait;
|
||||
}
|
||||
}
|
||||
@ -3522,7 +3515,7 @@ static int dest_ram_sort_func(const void *a, const void *b)
|
||||
*
|
||||
* Keep doing this until the source tells us to stop.
|
||||
*/
|
||||
static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
int rdma_registration_handle(QEMUFile *f)
|
||||
{
|
||||
RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult),
|
||||
.type = RDMA_CONTROL_REGISTER_RESULT,
|
||||
@ -3534,7 +3527,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
};
|
||||
RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
|
||||
.repeat = 1 };
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
QIOChannelRDMA *rioc;
|
||||
Error *err = NULL;
|
||||
RDMAContext *rdma;
|
||||
RDMALocalBlocks *local;
|
||||
@ -3547,10 +3540,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
void *host_addr;
|
||||
int ret;
|
||||
int idx = 0;
|
||||
int count = 0;
|
||||
int i = 0;
|
||||
|
||||
if (!migrate_rdma()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
rdma = qatomic_rcu_read(&rioc->rdmain);
|
||||
|
||||
if (!rdma) {
|
||||
@ -3563,7 +3559,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
|
||||
local = &rdma->local_ram_blocks;
|
||||
do {
|
||||
trace_qemu_rdma_registration_handle_wait();
|
||||
trace_rdma_registration_handle_wait();
|
||||
|
||||
ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE, &err);
|
||||
|
||||
@ -3583,9 +3579,9 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
comp = (RDMACompress *) rdma->wr_data[idx].control_curr;
|
||||
network_to_compress(comp);
|
||||
|
||||
trace_qemu_rdma_registration_handle_compress(comp->length,
|
||||
comp->block_idx,
|
||||
comp->offset);
|
||||
trace_rdma_registration_handle_compress(comp->length,
|
||||
comp->block_idx,
|
||||
comp->offset);
|
||||
if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) {
|
||||
error_report("rdma: 'compress' bad block index %u (vs %d)",
|
||||
(unsigned int)comp->block_idx,
|
||||
@ -3601,11 +3597,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
break;
|
||||
|
||||
case RDMA_CONTROL_REGISTER_FINISHED:
|
||||
trace_qemu_rdma_registration_handle_finished();
|
||||
trace_rdma_registration_handle_finished();
|
||||
return 0;
|
||||
|
||||
case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
|
||||
trace_qemu_rdma_registration_handle_ram_blocks();
|
||||
trace_rdma_registration_handle_ram_blocks();
|
||||
|
||||
/* Sort our local RAM Block list so it's the same as the source,
|
||||
* we can do this since we've filled in a src_index in the list
|
||||
@ -3614,7 +3610,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
qsort(rdma->local_ram_blocks.block,
|
||||
rdma->local_ram_blocks.nb_blocks,
|
||||
sizeof(RDMALocalBlock), dest_ram_sort_func);
|
||||
for (i = 0; i < local->nb_blocks; i++) {
|
||||
for (int i = 0; i < local->nb_blocks; i++) {
|
||||
local->block[i].index = i;
|
||||
}
|
||||
|
||||
@ -3632,7 +3628,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
* Both sides use the "remote" structure to communicate and update
|
||||
* their "local" descriptions with what was sent.
|
||||
*/
|
||||
for (i = 0; i < local->nb_blocks; i++) {
|
||||
for (int i = 0; i < local->nb_blocks; i++) {
|
||||
rdma->dest_blocks[i].remote_host_addr =
|
||||
(uintptr_t)(local->block[i].local_host_addr);
|
||||
|
||||
@ -3644,7 +3640,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
rdma->dest_blocks[i].length = local->block[i].length;
|
||||
|
||||
dest_block_to_network(&rdma->dest_blocks[i]);
|
||||
trace_qemu_rdma_registration_handle_ram_blocks_loop(
|
||||
trace_rdma_registration_handle_ram_blocks_loop(
|
||||
local->block[i].block_name,
|
||||
local->block[i].offset,
|
||||
local->block[i].length,
|
||||
@ -3667,12 +3663,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
|
||||
break;
|
||||
case RDMA_CONTROL_REGISTER_REQUEST:
|
||||
trace_qemu_rdma_registration_handle_register(head.repeat);
|
||||
trace_rdma_registration_handle_register(head.repeat);
|
||||
|
||||
reg_resp.repeat = head.repeat;
|
||||
registers = (RDMARegister *) rdma->wr_data[idx].control_curr;
|
||||
|
||||
for (count = 0; count < head.repeat; count++) {
|
||||
for (int count = 0; count < head.repeat; count++) {
|
||||
uint64_t chunk;
|
||||
uint8_t *chunk_start, *chunk_end;
|
||||
|
||||
@ -3681,7 +3677,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
|
||||
reg_result = &results[count];
|
||||
|
||||
trace_qemu_rdma_registration_handle_register_loop(count,
|
||||
trace_rdma_registration_handle_register_loop(count,
|
||||
reg->current_index, reg->key.current_addr, reg->chunks);
|
||||
|
||||
if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) {
|
||||
@ -3729,8 +3725,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
|
||||
reg_result->host_addr = (uintptr_t)block->local_host_addr;
|
||||
|
||||
trace_qemu_rdma_registration_handle_register_rkey(
|
||||
reg_result->rkey);
|
||||
trace_rdma_registration_handle_register_rkey(reg_result->rkey);
|
||||
|
||||
result_to_network(reg_result);
|
||||
}
|
||||
@ -3744,15 +3739,15 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
}
|
||||
break;
|
||||
case RDMA_CONTROL_UNREGISTER_REQUEST:
|
||||
trace_qemu_rdma_registration_handle_unregister(head.repeat);
|
||||
trace_rdma_registration_handle_unregister(head.repeat);
|
||||
unreg_resp.repeat = head.repeat;
|
||||
registers = (RDMARegister *) rdma->wr_data[idx].control_curr;
|
||||
|
||||
for (count = 0; count < head.repeat; count++) {
|
||||
for (int count = 0; count < head.repeat; count++) {
|
||||
reg = ®isters[count];
|
||||
network_to_register(reg);
|
||||
|
||||
trace_qemu_rdma_registration_handle_unregister_loop(count,
|
||||
trace_rdma_registration_handle_unregister_loop(count,
|
||||
reg->current_index, reg->key.chunk);
|
||||
|
||||
block = &(rdma->local_ram_blocks.block[reg->current_index]);
|
||||
@ -3768,8 +3763,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
|
||||
|
||||
rdma->total_registrations--;
|
||||
|
||||
trace_qemu_rdma_registration_handle_unregister_success(
|
||||
reg->key.chunk);
|
||||
trace_rdma_registration_handle_unregister_success(reg->key.chunk);
|
||||
}
|
||||
|
||||
ret = qemu_rdma_post_send_control(rdma, NULL, &unreg_resp, &err);
|
||||
@ -3794,22 +3788,23 @@ err:
|
||||
}
|
||||
|
||||
/* Destination:
|
||||
* Called via a ram_control_load_hook during the initial RAM load section which
|
||||
* lists the RAMBlocks by name. This lets us know the order of the RAMBlocks
|
||||
* on the source.
|
||||
* We've already built our local RAMBlock list, but not yet sent the list to
|
||||
* the source.
|
||||
* Called during the initial RAM load section which lists the
|
||||
* RAMBlocks by name. This lets us know the order of the RAMBlocks on
|
||||
* the source. We've already built our local RAMBlock list, but not
|
||||
* yet sent the list to the source.
|
||||
*/
|
||||
static int
|
||||
rdma_block_notification_handle(QEMUFile *f, const char *name)
|
||||
int rdma_block_notification_handle(QEMUFile *f, const char *name)
|
||||
{
|
||||
RDMAContext *rdma;
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
int curr;
|
||||
int found = -1;
|
||||
|
||||
if (!migrate_rdma()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rdma = qatomic_rcu_read(&rioc->rdmain);
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
RDMAContext *rdma = qatomic_rcu_read(&rioc->rdmain);
|
||||
|
||||
if (!rdma) {
|
||||
return -1;
|
||||
@ -3835,33 +3830,15 @@ rdma_block_notification_handle(QEMUFile *f, const char *name)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdma_load_hook(QEMUFile *f, uint64_t flags, void *data)
|
||||
int rdma_registration_start(QEMUFile *f, uint64_t flags)
|
||||
{
|
||||
switch (flags) {
|
||||
case RAM_CONTROL_BLOCK_REG:
|
||||
return rdma_block_notification_handle(f, data);
|
||||
|
||||
case RAM_CONTROL_HOOK:
|
||||
return qemu_rdma_registration_handle(f);
|
||||
|
||||
default:
|
||||
/* Shouldn't be called with any other values */
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static int qemu_rdma_registration_start(QEMUFile *f,
|
||||
uint64_t flags, void *data)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
RDMAContext *rdma;
|
||||
|
||||
if (migration_in_postcopy()) {
|
||||
if (!migrate_rdma() || migration_in_postcopy()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
RDMAContext *rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
if (!rdma) {
|
||||
return -1;
|
||||
}
|
||||
@ -3870,7 +3847,7 @@ static int qemu_rdma_registration_start(QEMUFile *f,
|
||||
return -1;
|
||||
}
|
||||
|
||||
trace_qemu_rdma_registration_start(flags);
|
||||
trace_rdma_registration_start(flags);
|
||||
qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
|
||||
qemu_fflush(f);
|
||||
|
||||
@ -3881,20 +3858,20 @@ static int qemu_rdma_registration_start(QEMUFile *f,
|
||||
* Inform dest that dynamic registrations are done for now.
|
||||
* First, flush writes, if any.
|
||||
*/
|
||||
static int qemu_rdma_registration_stop(QEMUFile *f,
|
||||
uint64_t flags, void *data)
|
||||
int rdma_registration_stop(QEMUFile *f, uint64_t flags)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
QIOChannelRDMA *rioc;
|
||||
Error *err = NULL;
|
||||
RDMAContext *rdma;
|
||||
RDMAControlHeader head = { .len = 0, .repeat = 1 };
|
||||
int ret;
|
||||
|
||||
if (migration_in_postcopy()) {
|
||||
if (!migrate_rdma() || migration_in_postcopy()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
|
||||
rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
if (!rdma) {
|
||||
return -1;
|
||||
@ -3914,10 +3891,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
|
||||
if (flags == RAM_CONTROL_SETUP) {
|
||||
RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
|
||||
RDMALocalBlocks *local = &rdma->local_ram_blocks;
|
||||
int reg_result_idx, i, nb_dest_blocks;
|
||||
int reg_result_idx, nb_dest_blocks;
|
||||
|
||||
head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
|
||||
trace_qemu_rdma_registration_stop_ram();
|
||||
trace_rdma_registration_stop_ram();
|
||||
|
||||
/*
|
||||
* Make sure that we parallelize the pinning on both sides.
|
||||
@ -3962,7 +3939,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
|
||||
qemu_rdma_move_header(rdma, reg_result_idx, &resp);
|
||||
memcpy(rdma->dest_blocks,
|
||||
rdma->wr_data[reg_result_idx].control_curr, resp.len);
|
||||
for (i = 0; i < nb_dest_blocks; i++) {
|
||||
for (int i = 0; i < nb_dest_blocks; i++) {
|
||||
network_to_dest_block(&rdma->dest_blocks[i]);
|
||||
|
||||
/* We require that the blocks are in the same order */
|
||||
@ -3981,7 +3958,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
|
||||
}
|
||||
}
|
||||
|
||||
trace_qemu_rdma_registration_stop(flags);
|
||||
trace_rdma_registration_stop(flags);
|
||||
|
||||
head.type = RDMA_CONTROL_REGISTER_FINISHED;
|
||||
ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL, &err);
|
||||
@ -3997,17 +3974,6 @@ err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static const QEMUFileHooks rdma_read_hooks = {
|
||||
.hook_ram_load = rdma_load_hook,
|
||||
};
|
||||
|
||||
static const QEMUFileHooks rdma_write_hooks = {
|
||||
.before_ram_iterate = qemu_rdma_registration_start,
|
||||
.after_ram_iterate = qemu_rdma_registration_stop,
|
||||
.save_page = qemu_rdma_save_page,
|
||||
};
|
||||
|
||||
|
||||
static void qio_channel_rdma_finalize(Object *obj)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
|
||||
@ -4059,7 +4025,6 @@ static QEMUFile *rdma_new_input(RDMAContext *rdma)
|
||||
rioc->file = qemu_file_new_input(QIO_CHANNEL(rioc));
|
||||
rioc->rdmain = rdma;
|
||||
rioc->rdmaout = rdma->return_path;
|
||||
qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
|
||||
|
||||
return rioc->file;
|
||||
}
|
||||
@ -4071,7 +4036,6 @@ static QEMUFile *rdma_new_output(RDMAContext *rdma)
|
||||
rioc->file = qemu_file_new_output(QIO_CHANNEL(rioc));
|
||||
rioc->rdmaout = rdma;
|
||||
rioc->rdmain = rdma->return_path;
|
||||
qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
|
||||
|
||||
return rioc->file;
|
||||
}
|
||||
@ -4079,14 +4043,11 @@ static QEMUFile *rdma_new_output(RDMAContext *rdma)
|
||||
static void rdma_accept_incoming_migration(void *opaque)
|
||||
{
|
||||
RDMAContext *rdma = opaque;
|
||||
int ret;
|
||||
QEMUFile *f;
|
||||
Error *local_err = NULL;
|
||||
|
||||
trace_qemu_rdma_accept_incoming_migration();
|
||||
ret = qemu_rdma_accept(rdma);
|
||||
|
||||
if (ret < 0) {
|
||||
if (qemu_rdma_accept(rdma) < 0) {
|
||||
error_report("RDMA ERROR: Migration initialization failed");
|
||||
return;
|
||||
}
|
||||
@ -4113,6 +4074,7 @@ static void rdma_accept_incoming_migration(void *opaque)
|
||||
|
||||
void rdma_start_incoming_migration(const char *host_port, Error **errp)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
int ret;
|
||||
RDMAContext *rdma;
|
||||
|
||||
@ -4144,7 +4106,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
|
||||
}
|
||||
|
||||
trace_rdma_start_incoming_migration_after_rdma_listen();
|
||||
|
||||
s->rdma_migration = true;
|
||||
qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
|
||||
NULL, (void *)(intptr_t)rdma);
|
||||
return;
|
||||
@ -4220,6 +4182,7 @@ void rdma_start_outgoing_migration(void *opaque,
|
||||
trace_rdma_start_outgoing_migration_after_rdma_connect();
|
||||
|
||||
s->to_dst_file = rdma_new_output(rdma);
|
||||
s->rdma_migration = true;
|
||||
migrate_fd_connect(s, NULL);
|
||||
return;
|
||||
return_path_err:
|
||||
|
@ -17,9 +17,51 @@
|
||||
#ifndef QEMU_MIGRATION_RDMA_H
|
||||
#define QEMU_MIGRATION_RDMA_H
|
||||
|
||||
#include "exec/memory.h"
|
||||
|
||||
void rdma_start_outgoing_migration(void *opaque, const char *host_port,
|
||||
Error **errp);
|
||||
|
||||
void rdma_start_incoming_migration(const char *host_port, Error **errp);
|
||||
|
||||
/*
|
||||
* Constants used by rdma return codes
|
||||
*/
|
||||
#define RAM_CONTROL_SETUP 0
|
||||
#define RAM_CONTROL_ROUND 1
|
||||
#define RAM_CONTROL_FINISH 3
|
||||
|
||||
/*
|
||||
* Whenever this is found in the data stream, the flags
|
||||
* will be passed to rdma functions in the incoming-migration
|
||||
* side.
|
||||
*/
|
||||
#define RAM_SAVE_FLAG_HOOK 0x80
|
||||
|
||||
#define RAM_SAVE_CONTROL_NOT_SUPP -1000
|
||||
#define RAM_SAVE_CONTROL_DELAYED -2000
|
||||
|
||||
#ifdef CONFIG_RDMA
|
||||
int rdma_registration_handle(QEMUFile *f);
|
||||
int rdma_registration_start(QEMUFile *f, uint64_t flags);
|
||||
int rdma_registration_stop(QEMUFile *f, uint64_t flags);
|
||||
int rdma_block_notification_handle(QEMUFile *f, const char *name);
|
||||
int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
ram_addr_t offset, size_t size);
|
||||
#else
|
||||
static inline
|
||||
int rdma_registration_handle(QEMUFile *f) { return 0; }
|
||||
static inline
|
||||
int rdma_registration_start(QEMUFile *f, uint64_t flags) { return 0; }
|
||||
static inline
|
||||
int rdma_registration_stop(QEMUFile *f, uint64_t flags) { return 0; }
|
||||
static inline
|
||||
int rdma_block_notification_handle(QEMUFile *f, const char *name) { return 0; }
|
||||
static inline
|
||||
int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
ram_addr_t offset, size_t size)
|
||||
{
|
||||
return RAM_SAVE_CONTROL_NOT_SUPP;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -1217,13 +1217,27 @@ void qemu_savevm_non_migratable_list(strList **reasons)
|
||||
|
||||
void qemu_savevm_state_header(QEMUFile *f)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
s->vmdesc = json_writer_new(false);
|
||||
|
||||
trace_savevm_state_header();
|
||||
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
|
||||
qemu_put_be32(f, QEMU_VM_FILE_VERSION);
|
||||
|
||||
if (migrate_get_current()->send_configuration) {
|
||||
if (s->send_configuration) {
|
||||
qemu_put_byte(f, QEMU_VM_CONFIGURATION);
|
||||
vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
|
||||
|
||||
/*
|
||||
* This starts the main json object and is paired with the
|
||||
* json_writer_end_object in
|
||||
* qemu_savevm_state_complete_precopy_non_iterable
|
||||
*/
|
||||
json_writer_start_object(s->vmdesc, NULL);
|
||||
|
||||
json_writer_start_object(s->vmdesc, "configuration");
|
||||
vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
|
||||
json_writer_end_object(s->vmdesc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1272,8 +1286,6 @@ void qemu_savevm_state_setup(QEMUFile *f)
|
||||
Error *local_err = NULL;
|
||||
int ret;
|
||||
|
||||
ms->vmdesc = json_writer_new(false);
|
||||
json_writer_start_object(ms->vmdesc, NULL);
|
||||
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
|
||||
json_writer_start_array(ms->vmdesc, "devices");
|
||||
|
||||
@ -1660,10 +1672,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
||||
}
|
||||
ms->to_dst_file = f;
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_savevm_state_header(f);
|
||||
qemu_savevm_state_setup(f);
|
||||
qemu_mutex_lock_iothread();
|
||||
|
||||
while (qemu_file_get_error(f) == 0) {
|
||||
if (qemu_savevm_state_iterate(f, false) > 0) {
|
||||
|
@ -125,6 +125,7 @@ postcopy_preempt_reset_channel(void) ""
|
||||
|
||||
# multifd.c
|
||||
multifd_new_send_channel_async(uint8_t id) "channel %u"
|
||||
multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p"
|
||||
multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u"
|
||||
multifd_recv_new_channel(uint8_t id) "channel %u"
|
||||
multifd_recv_sync_main(long packet_num) "packet num %ld"
|
||||
@ -144,7 +145,7 @@ multifd_send_thread_start(uint8_t id) "%u"
|
||||
multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
|
||||
multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"
|
||||
multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p"
|
||||
multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err) "ioc=%p ioctype=%s hostname=%s err=%p"
|
||||
multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname) "ioc=%p ioctype=%s hostname=%s"
|
||||
|
||||
# migration.c
|
||||
await_return_path_close_on_source_close(void) ""
|
||||
@ -186,7 +187,7 @@ source_return_path_thread_shut(uint32_t val) "0x%x"
|
||||
source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32
|
||||
source_return_path_thread_switchover_acked(void) ""
|
||||
migration_thread_low_pending(uint64_t pending) "%" PRIu64
|
||||
migrate_transferred(uint64_t transferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64
|
||||
migrate_transferred(uint64_t transferred, uint64_t time_spent, uint64_t bandwidth, uint64_t avail_bw, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " switchover_bw %" PRIu64 " max_size %" PRId64
|
||||
process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
|
||||
process_incoming_migration_co_postcopy_end_main(void) ""
|
||||
postcopy_preempt_enabled(bool value) "%d"
|
||||
@ -231,20 +232,6 @@ qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.."
|
||||
qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p"
|
||||
qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s"
|
||||
qemu_rdma_advise_mr(const char *name, uint32_t len, uint64_t addr, const char *res) "Try to advise block %s prefetch at %" PRIu32 "@0x%" PRIx64 ": %s"
|
||||
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
|
||||
qemu_rdma_registration_handle_finished(void) ""
|
||||
qemu_rdma_registration_handle_ram_blocks(void) ""
|
||||
qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @0x%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u"
|
||||
qemu_rdma_registration_handle_register(int requests) "%d requests"
|
||||
qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64
|
||||
qemu_rdma_registration_handle_register_rkey(int rkey) "0x%x"
|
||||
qemu_rdma_registration_handle_unregister(int requests) "%d requests"
|
||||
qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64
|
||||
qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64
|
||||
qemu_rdma_registration_handle_wait(void) ""
|
||||
qemu_rdma_registration_start(uint64_t flags) "%" PRIu64
|
||||
qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64
|
||||
qemu_rdma_registration_stop_ram(void) ""
|
||||
qemu_rdma_resolve_host_trying(const char *host, const char *ip) "Trying %s => %s"
|
||||
qemu_rdma_signal_unregister_append(uint64_t chunk, int pos) "Appending unregister chunk %" PRIu64 " at position %d"
|
||||
qemu_rdma_signal_unregister_already(uint64_t chunk) "Unregister chunk %" PRIu64 " already in queue"
|
||||
@ -263,6 +250,20 @@ qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "En
|
||||
rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
|
||||
rdma_block_notification_handle(const char *name, int index) "%s at %d"
|
||||
rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
|
||||
rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
|
||||
rdma_registration_handle_finished(void) ""
|
||||
rdma_registration_handle_ram_blocks(void) ""
|
||||
rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @0x%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u"
|
||||
rdma_registration_handle_register(int requests) "%d requests"
|
||||
rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64
|
||||
rdma_registration_handle_register_rkey(int rkey) "0x%x"
|
||||
rdma_registration_handle_unregister(int requests) "%d requests"
|
||||
rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64
|
||||
rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64
|
||||
rdma_registration_handle_wait(void) ""
|
||||
rdma_registration_start(uint64_t flags) "%" PRIu64
|
||||
rdma_registration_stop(uint64_t flags) "%" PRIu64
|
||||
rdma_registration_stop_ram(void) ""
|
||||
rdma_start_incoming_migration(void) ""
|
||||
rdma_start_incoming_migration_after_dest_init(void) ""
|
||||
rdma_start_incoming_migration_after_rdma_listen(void) ""
|
||||
|
@ -73,7 +73,7 @@
|
||||
{ 'struct': 'MigrationStats',
|
||||
'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
|
||||
'duplicate': 'int',
|
||||
'skipped': { 'type': 'int', 'features': ['deprecated'] },
|
||||
'skipped': { 'type': 'int', 'features': [ 'deprecated' ] },
|
||||
'normal': 'int',
|
||||
'normal-bytes': 'int', 'dirty-pages-rate': 'int',
|
||||
'mbps': 'number', 'dirty-sync-count': 'int',
|
||||
@ -440,10 +440,9 @@
|
||||
# compress and xbzrle are both on, compress only takes effect in
|
||||
# the ram bulk stage, after that, it will be disabled and only
|
||||
# xbzrle takes effect, this can help to minimize migration
|
||||
# traffic. The feature is disabled by default. (since 2.4 )
|
||||
# traffic. The feature is disabled by default. (since 2.4)
|
||||
#
|
||||
# @events: generate events for each migration state change (since 2.4
|
||||
# )
|
||||
# @events: generate events for each migration state change (since 2.4)
|
||||
#
|
||||
# @auto-converge: If enabled, QEMU will automatically throttle down
|
||||
# the guest to speed up convergence of RAM migration. (since 1.6)
|
||||
@ -758,6 +757,16 @@
|
||||
# @max-bandwidth: to set maximum speed for migration. maximum speed
|
||||
# in bytes per second. (Since 2.8)
|
||||
#
|
||||
# @avail-switchover-bandwidth: to set the available bandwidth that
|
||||
# migration can use during switchover phase. NOTE! This does not
|
||||
# limit the bandwidth during switchover, but only for calculations when
|
||||
# making decisions to switchover. By default, this value is zero,
|
||||
# which means QEMU will estimate the bandwidth automatically. This can
|
||||
# be set when the estimated value is not accurate, while the user is
|
||||
# able to guarantee such bandwidth is available when switching over.
|
||||
# When specified correctly, this can make the switchover decision much
|
||||
# more accurate. (Since 8.2)
|
||||
#
|
||||
# @downtime-limit: set maximum tolerated downtime for migration.
|
||||
# maximum downtime in milliseconds (Since 2.8)
|
||||
#
|
||||
@ -839,7 +848,7 @@
|
||||
'cpu-throttle-initial', 'cpu-throttle-increment',
|
||||
'cpu-throttle-tailslow',
|
||||
'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
|
||||
'downtime-limit',
|
||||
'avail-switchover-bandwidth', 'downtime-limit',
|
||||
{ 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] },
|
||||
'block-incremental',
|
||||
'multifd-channels',
|
||||
@ -924,6 +933,16 @@
|
||||
# @max-bandwidth: to set maximum speed for migration. maximum speed
|
||||
# in bytes per second. (Since 2.8)
|
||||
#
|
||||
# @avail-switchover-bandwidth: to set the available bandwidth that
|
||||
# migration can use during switchover phase. NOTE! This does not
|
||||
# limit the bandwidth during switchover, but only for calculations when
|
||||
# making decisions to switchover. By default, this value is zero,
|
||||
# which means QEMU will estimate the bandwidth automatically. This can
|
||||
# be set when the estimated value is not accurate, while the user is
|
||||
# able to guarantee such bandwidth is available when switching over.
|
||||
# When specified correctly, this can make the switchover decision much
|
||||
# more accurate. (Since 8.2)
|
||||
#
|
||||
# @downtime-limit: set maximum tolerated downtime for migration.
|
||||
# maximum downtime in milliseconds (Since 2.8)
|
||||
#
|
||||
@ -1017,6 +1036,7 @@
|
||||
'*tls-hostname': 'StrOrNull',
|
||||
'*tls-authz': 'StrOrNull',
|
||||
'*max-bandwidth': 'size',
|
||||
'*avail-switchover-bandwidth': 'size',
|
||||
'*downtime-limit': 'uint64',
|
||||
'*x-checkpoint-delay': { 'type': 'uint32',
|
||||
'features': [ 'unstable' ] },
|
||||
@ -1127,6 +1147,16 @@
|
||||
# @max-bandwidth: to set maximum speed for migration. maximum speed
|
||||
# in bytes per second. (Since 2.8)
|
||||
#
|
||||
# @avail-switchover-bandwidth: to set the available bandwidth that
|
||||
# migration can use during switchover phase. NOTE! This does not
|
||||
# limit the bandwidth during switchover, but only for calculations when
|
||||
# making decisions to switchover. By default, this value is zero,
|
||||
# which means QEMU will estimate the bandwidth automatically. This can
|
||||
# be set when the estimated value is not accurate, while the user is
|
||||
# able to guarantee such bandwidth is available when switching over.
|
||||
# When specified correctly, this can make the switchover decision much
|
||||
# more accurate. (Since 8.2)
|
||||
#
|
||||
# @downtime-limit: set maximum tolerated downtime for migration.
|
||||
# maximum downtime in milliseconds (Since 2.8)
|
||||
#
|
||||
@ -1217,6 +1247,7 @@
|
||||
'*tls-hostname': 'str',
|
||||
'*tls-authz': 'str',
|
||||
'*max-bandwidth': 'size',
|
||||
'*avail-switchover-bandwidth': 'size',
|
||||
'*downtime-limit': 'uint64',
|
||||
'*x-checkpoint-delay': { 'type': 'uint32',
|
||||
'features': [ 'unstable' ] },
|
||||
|
@ -38,13 +38,13 @@ class MigrationFile(object):
|
||||
self.file = open(self.filename, "rb")
|
||||
|
||||
def read64(self):
|
||||
return int.from_bytes(self.file.read(8), byteorder='big', signed=True)
|
||||
return int.from_bytes(self.file.read(8), byteorder='big', signed=False)
|
||||
|
||||
def read32(self):
|
||||
return int.from_bytes(self.file.read(4), byteorder='big', signed=True)
|
||||
return int.from_bytes(self.file.read(4), byteorder='big', signed=False)
|
||||
|
||||
def read16(self):
|
||||
return int.from_bytes(self.file.read(2), byteorder='big', signed=True)
|
||||
return int.from_bytes(self.file.read(2), byteorder='big', signed=False)
|
||||
|
||||
def read8(self):
|
||||
return int.from_bytes(self.file.read(1), byteorder='big', signed=True)
|
||||
@ -123,6 +123,7 @@ class RamSection(object):
|
||||
self.TARGET_PAGE_SIZE = ramargs['page_size']
|
||||
self.dump_memory = ramargs['dump_memory']
|
||||
self.write_memory = ramargs['write_memory']
|
||||
self.ignore_shared = ramargs['ignore_shared']
|
||||
self.sizeinfo = collections.OrderedDict()
|
||||
self.data = collections.OrderedDict()
|
||||
self.data['section sizes'] = self.sizeinfo
|
||||
@ -169,6 +170,8 @@ class RamSection(object):
|
||||
f.truncate(0)
|
||||
f.truncate(len)
|
||||
self.files[self.name] = f
|
||||
if self.ignore_shared:
|
||||
mr_addr = self.file.read64()
|
||||
flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE
|
||||
|
||||
if flags & self.RAM_SAVE_FLAG_COMPRESS:
|
||||
@ -261,12 +264,41 @@ class HTABSection(object):
|
||||
|
||||
|
||||
class ConfigurationSection(object):
|
||||
def __init__(self, file):
|
||||
def __init__(self, file, desc):
|
||||
self.file = file
|
||||
self.desc = desc
|
||||
self.caps = []
|
||||
|
||||
def parse_capabilities(self, vmsd_caps):
|
||||
if not vmsd_caps:
|
||||
return
|
||||
|
||||
ncaps = vmsd_caps.data['caps_count'].data
|
||||
self.caps = vmsd_caps.data['capabilities']
|
||||
|
||||
if type(self.caps) != list:
|
||||
self.caps = [self.caps]
|
||||
|
||||
if len(self.caps) != ncaps:
|
||||
raise Exception("Number of capabilities doesn't match "
|
||||
"caps_count field")
|
||||
|
||||
def has_capability(self, cap):
|
||||
return any([str(c) == cap for c in self.caps])
|
||||
|
||||
def read(self):
|
||||
name_len = self.file.read32()
|
||||
name = self.file.readstr(len = name_len)
|
||||
if self.desc:
|
||||
version_id = self.desc['version']
|
||||
section = VMSDSection(self.file, version_id, self.desc,
|
||||
'configuration')
|
||||
section.read()
|
||||
self.parse_capabilities(
|
||||
section.data.get("configuration/capabilities"))
|
||||
else:
|
||||
# backward compatibility for older streams that don't have
|
||||
# the configuration section in the json
|
||||
name_len = self.file.read32()
|
||||
name = self.file.readstr(len = name_len)
|
||||
|
||||
class VMSDFieldGeneric(object):
|
||||
def __init__(self, desc, file):
|
||||
@ -288,6 +320,23 @@ class VMSDFieldGeneric(object):
|
||||
self.data = self.file.readvar(size)
|
||||
return self.data
|
||||
|
||||
class VMSDFieldCap(object):
|
||||
def __init__(self, desc, file):
|
||||
self.file = file
|
||||
self.desc = desc
|
||||
self.data = ""
|
||||
|
||||
def __repr__(self):
|
||||
return self.data
|
||||
|
||||
def __str__(self):
|
||||
return self.data
|
||||
|
||||
def read(self):
|
||||
len = self.file.read8()
|
||||
self.data = self.file.readstr(len)
|
||||
|
||||
|
||||
class VMSDFieldInt(VMSDFieldGeneric):
|
||||
def __init__(self, desc, file):
|
||||
super(VMSDFieldInt, self).__init__(desc, file)
|
||||
@ -462,6 +511,7 @@ vmsd_field_readers = {
|
||||
"unused_buffer" : VMSDFieldGeneric,
|
||||
"bitmap" : VMSDFieldGeneric,
|
||||
"struct" : VMSDFieldStruct,
|
||||
"capability": VMSDFieldCap,
|
||||
"unknown" : VMSDFieldGeneric,
|
||||
}
|
||||
|
||||
@ -525,6 +575,7 @@ class MigrationDump(object):
|
||||
ramargs['page_size'] = self.vmsd_desc['page_size']
|
||||
ramargs['dump_memory'] = dump_memory
|
||||
ramargs['write_memory'] = write_memory
|
||||
ramargs['ignore_shared'] = False
|
||||
self.section_classes[('ram',0)][1] = ramargs
|
||||
|
||||
while True:
|
||||
@ -532,8 +583,10 @@ class MigrationDump(object):
|
||||
if section_type == self.QEMU_VM_EOF:
|
||||
break
|
||||
elif section_type == self.QEMU_VM_CONFIGURATION:
|
||||
section = ConfigurationSection(file)
|
||||
config_desc = self.vmsd_desc.get('configuration')
|
||||
section = ConfigurationSection(file, config_desc)
|
||||
section.read()
|
||||
ramargs['ignore_shared'] = section.has_capability('x-ignore-shared')
|
||||
elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL:
|
||||
section_id = file.read32()
|
||||
name = file.readstr()
|
||||
|
@ -357,6 +357,8 @@ foreach dir : target_dirs
|
||||
test_deps += [qsd]
|
||||
endif
|
||||
|
||||
qtest_env.set('PYTHON', python.full_path())
|
||||
|
||||
foreach test : target_qtests
|
||||
# Executables are shared across targets, declare them only the first time we
|
||||
# encounter them
|
||||
|
@ -66,6 +66,12 @@ static bool got_dst_resume;
|
||||
*/
|
||||
#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
|
||||
|
||||
#define ANALYZE_SCRIPT "scripts/analyze-migration.py"
|
||||
|
||||
#define QEMU_VM_FILE_MAGIC 0x5145564d
|
||||
#define FILE_TEST_FILENAME "migfile"
|
||||
#define FILE_TEST_OFFSET 0x1000
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/vfs.h>
|
||||
@ -882,6 +888,7 @@ static void test_migrate_end(QTestState *from, QTestState *to, bool test_dest)
|
||||
cleanup("migsocket");
|
||||
cleanup("src_serial");
|
||||
cleanup("dest_serial");
|
||||
cleanup(FILE_TEST_FILENAME);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GNUTLS
|
||||
@ -1501,6 +1508,61 @@ static void test_baddest(void)
|
||||
test_migrate_end(from, to, false);
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
static void test_analyze_script(void)
|
||||
{
|
||||
MigrateStart args = {
|
||||
.opts_source = "-uuid 11111111-1111-1111-1111-111111111111",
|
||||
};
|
||||
QTestState *from, *to;
|
||||
g_autofree char *uri = NULL;
|
||||
g_autofree char *file = NULL;
|
||||
int pid, wstatus;
|
||||
const char *python = g_getenv("PYTHON");
|
||||
|
||||
if (!python) {
|
||||
g_test_skip("PYTHON variable not set");
|
||||
return;
|
||||
}
|
||||
|
||||
/* dummy url */
|
||||
if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting these two capabilities causes the "configuration"
|
||||
* vmstate to include subsections for them. The script needs to
|
||||
* parse those subsections properly.
|
||||
*/
|
||||
migrate_set_capability(from, "validate-uuid", true);
|
||||
migrate_set_capability(from, "x-ignore-shared", true);
|
||||
|
||||
file = g_strdup_printf("%s/migfile", tmpfs);
|
||||
uri = g_strdup_printf("exec:cat > %s", file);
|
||||
|
||||
migrate_ensure_converge(from);
|
||||
migrate_qmp(from, uri, "{}");
|
||||
wait_for_migration_complete(from);
|
||||
|
||||
pid = fork();
|
||||
if (!pid) {
|
||||
close(1);
|
||||
open("/dev/null", O_WRONLY);
|
||||
execl(python, python, ANALYZE_SCRIPT, "-f", file, NULL);
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
g_assert(waitpid(pid, &wstatus, 0) == pid);
|
||||
if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
|
||||
g_test_message("Failed to analyze the migration stream");
|
||||
g_test_fail();
|
||||
}
|
||||
test_migrate_end(from, to, false);
|
||||
cleanup("migfile");
|
||||
}
|
||||
#endif
|
||||
|
||||
static void test_precopy_common(MigrateCommon *args)
|
||||
{
|
||||
QTestState *from, *to;
|
||||
@ -1610,6 +1672,70 @@ finish:
|
||||
test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
|
||||
}
|
||||
|
||||
static void test_file_common(MigrateCommon *args, bool stop_src)
|
||||
{
|
||||
QTestState *from, *to;
|
||||
void *data_hook = NULL;
|
||||
g_autofree char *connect_uri = g_strdup(args->connect_uri);
|
||||
|
||||
if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* File migration is never live. We can keep the source VM running
|
||||
* during migration, but the destination will not be running
|
||||
* concurrently.
|
||||
*/
|
||||
g_assert_false(args->live);
|
||||
|
||||
if (args->start_hook) {
|
||||
data_hook = args->start_hook(from, to);
|
||||
}
|
||||
|
||||
migrate_ensure_converge(from);
|
||||
wait_for_serial("src_serial");
|
||||
|
||||
if (stop_src) {
|
||||
qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
|
||||
if (!got_src_stop) {
|
||||
qtest_qmp_eventwait(from, "STOP");
|
||||
}
|
||||
}
|
||||
|
||||
if (args->result == MIG_TEST_QMP_ERROR) {
|
||||
migrate_qmp_fail(from, connect_uri, "{}");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
migrate_qmp(from, connect_uri, "{}");
|
||||
wait_for_migration_complete(from);
|
||||
|
||||
/*
|
||||
* We need to wait for the source to finish before starting the
|
||||
* destination.
|
||||
*/
|
||||
migrate_incoming_qmp(to, connect_uri, "{}");
|
||||
wait_for_migration_complete(to);
|
||||
|
||||
if (stop_src) {
|
||||
qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
|
||||
}
|
||||
|
||||
if (!got_dst_resume) {
|
||||
qtest_qmp_eventwait(to, "RESUME");
|
||||
}
|
||||
|
||||
wait_for_serial("dest_serial");
|
||||
|
||||
finish:
|
||||
if (args->finish_hook) {
|
||||
args->finish_hook(from, to, data_hook);
|
||||
}
|
||||
|
||||
test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
|
||||
}
|
||||
|
||||
static void test_precopy_unix_plain(void)
|
||||
{
|
||||
g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
|
||||
@ -1805,6 +1931,76 @@ static void test_precopy_unix_compress_nowait(void)
|
||||
test_precopy_common(&args);
|
||||
}
|
||||
|
||||
static void test_precopy_file(void)
|
||||
{
|
||||
g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
|
||||
FILE_TEST_FILENAME);
|
||||
MigrateCommon args = {
|
||||
.connect_uri = uri,
|
||||
.listen_uri = "defer",
|
||||
};
|
||||
|
||||
test_file_common(&args, true);
|
||||
}
|
||||
|
||||
static void file_offset_finish_hook(QTestState *from, QTestState *to,
|
||||
void *opaque)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
|
||||
size_t size = FILE_TEST_OFFSET + sizeof(QEMU_VM_FILE_MAGIC);
|
||||
uintptr_t *addr, *p;
|
||||
int fd;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
g_assert(fd != -1);
|
||||
addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
g_assert(addr != MAP_FAILED);
|
||||
|
||||
/*
|
||||
* Ensure the skipped offset contains zeros and the migration
|
||||
* stream starts at the right place.
|
||||
*/
|
||||
p = addr;
|
||||
while (p < addr + FILE_TEST_OFFSET / sizeof(uintptr_t)) {
|
||||
g_assert(*p == 0);
|
||||
p++;
|
||||
}
|
||||
g_assert_cmpint(cpu_to_be64(*p) >> 32, ==, QEMU_VM_FILE_MAGIC);
|
||||
|
||||
munmap(addr, size);
|
||||
close(fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_precopy_file_offset(void)
|
||||
{
|
||||
g_autofree char *uri = g_strdup_printf("file:%s/%s,offset=%d", tmpfs,
|
||||
FILE_TEST_FILENAME,
|
||||
FILE_TEST_OFFSET);
|
||||
MigrateCommon args = {
|
||||
.connect_uri = uri,
|
||||
.listen_uri = "defer",
|
||||
.finish_hook = file_offset_finish_hook,
|
||||
};
|
||||
|
||||
test_file_common(&args, false);
|
||||
}
|
||||
|
||||
static void test_precopy_file_offset_bad(void)
|
||||
{
|
||||
/* using a value not supported by qemu_strtosz() */
|
||||
g_autofree char *uri = g_strdup_printf("file:%s/%s,offset=0x20M",
|
||||
tmpfs, FILE_TEST_FILENAME);
|
||||
MigrateCommon args = {
|
||||
.connect_uri = uri,
|
||||
.listen_uri = "defer",
|
||||
.result = MIG_TEST_QMP_ERROR,
|
||||
};
|
||||
|
||||
test_file_common(&args, false);
|
||||
}
|
||||
|
||||
static void test_precopy_tcp_plain(void)
|
||||
{
|
||||
MigrateCommon args = {
|
||||
@ -2837,6 +3033,9 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
qtest_add_func("/migration/bad_dest", test_baddest);
|
||||
#ifndef _WIN32
|
||||
qtest_add_func("/migration/analyze-script", test_analyze_script);
|
||||
#endif
|
||||
qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
|
||||
qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
|
||||
/*
|
||||
@ -2849,6 +3048,14 @@ int main(int argc, char **argv)
|
||||
qtest_add_func("/migration/precopy/unix/compress/nowait",
|
||||
test_precopy_unix_compress_nowait);
|
||||
}
|
||||
|
||||
qtest_add_func("/migration/precopy/file",
|
||||
test_precopy_file);
|
||||
qtest_add_func("/migration/precopy/file/offset",
|
||||
test_precopy_file_offset);
|
||||
qtest_add_func("/migration/precopy/file/offset/bad",
|
||||
test_precopy_file_offset_bad);
|
||||
|
||||
#ifdef CONFIG_GNUTLS
|
||||
qtest_add_func("/migration/precopy/unix/tls/psk",
|
||||
test_precopy_unix_tls_psk);
|
||||
|
Loading…
Reference in New Issue
Block a user