Merge remote-tracking branch 'quintela/migration.next' into staging
# By Chegu Vinod # Via Juan Quintela * quintela/migration.next: Force auto-convegence of live migration Add 'auto-converge' migration capability Introduce async_run_on_cpu() Message-id: 1373664508-5404-1-git-send-email-quintela@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
commit
6453a3a694
81
arch_init.c
81
arch_init.c
@ -104,6 +104,9 @@ int graphic_depth = 32;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const uint32_t arch_type = QEMU_ARCH;
|
const uint32_t arch_type = QEMU_ARCH;
|
||||||
|
static bool mig_throttle_on;
|
||||||
|
static int dirty_rate_high_cnt;
|
||||||
|
static void check_guest_throttling(void);
|
||||||
|
|
||||||
/***********************************************************/
|
/***********************************************************/
|
||||||
/* ram save/restore */
|
/* ram save/restore */
|
||||||
@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
|
|||||||
uint64_t num_dirty_pages_init = migration_dirty_pages;
|
uint64_t num_dirty_pages_init = migration_dirty_pages;
|
||||||
MigrationState *s = migrate_get_current();
|
MigrationState *s = migrate_get_current();
|
||||||
static int64_t start_time;
|
static int64_t start_time;
|
||||||
|
static int64_t bytes_xfer_prev;
|
||||||
static int64_t num_dirty_pages_period;
|
static int64_t num_dirty_pages_period;
|
||||||
int64_t end_time;
|
int64_t end_time;
|
||||||
|
int64_t bytes_xfer_now;
|
||||||
|
|
||||||
|
if (!bytes_xfer_prev) {
|
||||||
|
bytes_xfer_prev = ram_bytes_transferred();
|
||||||
|
}
|
||||||
|
|
||||||
if (!start_time) {
|
if (!start_time) {
|
||||||
start_time = qemu_get_clock_ms(rt_clock);
|
start_time = qemu_get_clock_ms(rt_clock);
|
||||||
@ -404,6 +413,25 @@ static void migration_bitmap_sync(void)
|
|||||||
|
|
||||||
/* more than 1 second = 1000 millisecons */
|
/* more than 1 second = 1000 millisecons */
|
||||||
if (end_time > start_time + 1000) {
|
if (end_time > start_time + 1000) {
|
||||||
|
if (migrate_auto_converge()) {
|
||||||
|
/* The following detection logic can be refined later. For now:
|
||||||
|
Check to see if the dirtied bytes is 50% more than the approx.
|
||||||
|
amount of bytes that just got transferred since the last time we
|
||||||
|
were in this routine. If that happens >N times (for now N==4)
|
||||||
|
we turn on the throttle down logic */
|
||||||
|
bytes_xfer_now = ram_bytes_transferred();
|
||||||
|
if (s->dirty_pages_rate &&
|
||||||
|
(num_dirty_pages_period * TARGET_PAGE_SIZE >
|
||||||
|
(bytes_xfer_now - bytes_xfer_prev)/2) &&
|
||||||
|
(dirty_rate_high_cnt++ > 4)) {
|
||||||
|
trace_migration_throttle();
|
||||||
|
mig_throttle_on = true;
|
||||||
|
dirty_rate_high_cnt = 0;
|
||||||
|
}
|
||||||
|
bytes_xfer_prev = bytes_xfer_now;
|
||||||
|
} else {
|
||||||
|
mig_throttle_on = false;
|
||||||
|
}
|
||||||
s->dirty_pages_rate = num_dirty_pages_period * 1000
|
s->dirty_pages_rate = num_dirty_pages_period * 1000
|
||||||
/ (end_time - start_time);
|
/ (end_time - start_time);
|
||||||
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
|
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
|
||||||
@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
|||||||
migration_bitmap = bitmap_new(ram_pages);
|
migration_bitmap = bitmap_new(ram_pages);
|
||||||
bitmap_set(migration_bitmap, 0, ram_pages);
|
bitmap_set(migration_bitmap, 0, ram_pages);
|
||||||
migration_dirty_pages = ram_pages;
|
migration_dirty_pages = ram_pages;
|
||||||
|
mig_throttle_on = false;
|
||||||
|
dirty_rate_high_cnt = 0;
|
||||||
|
|
||||||
if (migrate_use_xbzrle()) {
|
if (migrate_use_xbzrle()) {
|
||||||
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
|
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
|
||||||
@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
|||||||
}
|
}
|
||||||
total_sent += bytes_sent;
|
total_sent += bytes_sent;
|
||||||
acct_info.iterations++;
|
acct_info.iterations++;
|
||||||
|
check_guest_throttling();
|
||||||
/* we want to check in the 1st loop, just in case it was the 1st time
|
/* we want to check in the 1st loop, just in case it was the 1st time
|
||||||
and we had to sync the dirty bitmap.
|
and we had to sync the dirty bitmap.
|
||||||
qemu_get_clock_ns() is a bit expensive, so we only check each some
|
qemu_get_clock_ns() is a bit expensive, so we only check each some
|
||||||
@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp)
|
|||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Stub function that's gets run on the vcpu when its brought out of the
|
||||||
|
VM to run inside qemu via async_run_on_cpu()*/
|
||||||
|
static void mig_sleep_cpu(void *opq)
|
||||||
|
{
|
||||||
|
qemu_mutex_unlock_iothread();
|
||||||
|
g_usleep(30*1000);
|
||||||
|
qemu_mutex_lock_iothread();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* To reduce the dirty rate explicitly disallow the VCPUs from spending
|
||||||
|
much time in the VM. The migration thread will try to catchup.
|
||||||
|
Workload will experience a performance drop.
|
||||||
|
*/
|
||||||
|
static void mig_throttle_cpu_down(CPUState *cpu, void *data)
|
||||||
|
{
|
||||||
|
async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mig_throttle_guest_down(void)
|
||||||
|
{
|
||||||
|
qemu_mutex_lock_iothread();
|
||||||
|
qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
|
||||||
|
qemu_mutex_unlock_iothread();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_guest_throttling(void)
|
||||||
|
{
|
||||||
|
static int64_t t0;
|
||||||
|
int64_t t1;
|
||||||
|
|
||||||
|
if (!mig_throttle_on) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!t0) {
|
||||||
|
t0 = qemu_get_clock_ns(rt_clock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
t1 = qemu_get_clock_ns(rt_clock);
|
||||||
|
|
||||||
|
/* If it has been more than 40 ms since the last time the guest
|
||||||
|
* was throttled then do it again.
|
||||||
|
*/
|
||||||
|
if (40 < (t1-t0)/1000000) {
|
||||||
|
mig_throttle_guest_down();
|
||||||
|
t0 = t1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
29
cpus.c
29
cpus.c
@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
|
|||||||
|
|
||||||
wi.func = func;
|
wi.func = func;
|
||||||
wi.data = data;
|
wi.data = data;
|
||||||
|
wi.free = false;
|
||||||
if (cpu->queued_work_first == NULL) {
|
if (cpu->queued_work_first == NULL) {
|
||||||
cpu->queued_work_first = &wi;
|
cpu->queued_work_first = &wi;
|
||||||
} else {
|
} else {
|
||||||
@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
|
||||||
|
{
|
||||||
|
struct qemu_work_item *wi;
|
||||||
|
|
||||||
|
if (qemu_cpu_is_self(cpu)) {
|
||||||
|
func(data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
wi = g_malloc0(sizeof(struct qemu_work_item));
|
||||||
|
wi->func = func;
|
||||||
|
wi->data = data;
|
||||||
|
wi->free = true;
|
||||||
|
if (cpu->queued_work_first == NULL) {
|
||||||
|
cpu->queued_work_first = wi;
|
||||||
|
} else {
|
||||||
|
cpu->queued_work_last->next = wi;
|
||||||
|
}
|
||||||
|
cpu->queued_work_last = wi;
|
||||||
|
wi->next = NULL;
|
||||||
|
wi->done = false;
|
||||||
|
|
||||||
|
qemu_cpu_kick(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
static void flush_queued_work(CPUState *cpu)
|
static void flush_queued_work(CPUState *cpu)
|
||||||
{
|
{
|
||||||
struct qemu_work_item *wi;
|
struct qemu_work_item *wi;
|
||||||
@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu)
|
|||||||
cpu->queued_work_first = wi->next;
|
cpu->queued_work_first = wi->next;
|
||||||
wi->func(wi->data);
|
wi->func(wi->data);
|
||||||
wi->done = true;
|
wi->done = true;
|
||||||
|
if (wi->free) {
|
||||||
|
g_free(wi);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
cpu->queued_work_last = NULL;
|
cpu->queued_work_last = NULL;
|
||||||
qemu_cond_broadcast(&qemu_work_cond);
|
qemu_cond_broadcast(&qemu_work_cond);
|
||||||
|
@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason);
|
|||||||
|
|
||||||
bool migrate_rdma_pin_all(void);
|
bool migrate_rdma_pin_all(void);
|
||||||
|
|
||||||
|
bool migrate_auto_converge(void);
|
||||||
|
|
||||||
int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||||
uint8_t *dst, int dlen);
|
uint8_t *dst, int dlen);
|
||||||
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
|
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
|
||||||
|
@ -293,6 +293,7 @@ struct qemu_work_item {
|
|||||||
void (*func)(void *data);
|
void (*func)(void *data);
|
||||||
void *data;
|
void *data;
|
||||||
int done;
|
int done;
|
||||||
|
bool free;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -378,6 +378,16 @@ bool cpu_is_stopped(CPUState *cpu);
|
|||||||
*/
|
*/
|
||||||
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
|
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* async_run_on_cpu:
|
||||||
|
* @cpu: The vCPU to run on.
|
||||||
|
* @func: The function to be executed.
|
||||||
|
* @data: Data to pass to the function.
|
||||||
|
*
|
||||||
|
* Schedules the function @func for execution on the vCPU @cpu asynchronously.
|
||||||
|
*/
|
||||||
|
void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qemu_for_each_cpu:
|
* qemu_for_each_cpu:
|
||||||
* @func: The function to be executed.
|
* @func: The function to be executed.
|
||||||
|
@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void)
|
|||||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
|
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool migrate_auto_converge(void)
|
||||||
|
{
|
||||||
|
MigrationState *s;
|
||||||
|
|
||||||
|
s = migrate_get_current();
|
||||||
|
|
||||||
|
return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
|
||||||
|
}
|
||||||
|
|
||||||
int migrate_use_xbzrle(void)
|
int migrate_use_xbzrle(void)
|
||||||
{
|
{
|
||||||
MigrationState *s;
|
MigrationState *s;
|
||||||
|
@ -616,7 +616,7 @@
|
|||||||
# Since: 1.2
|
# Since: 1.2
|
||||||
##
|
##
|
||||||
{ 'enum': 'MigrationCapability',
|
{ 'enum': 'MigrationCapability',
|
||||||
'data': ['xbzrle', 'x-rdma-pin-all'] }
|
'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @MigrationCapabilityStatus
|
# @MigrationCapabilityStatus
|
||||||
|
@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u"
|
|||||||
# arch_init.c
|
# arch_init.c
|
||||||
migration_bitmap_sync_start(void) ""
|
migration_bitmap_sync_start(void) ""
|
||||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
|
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
|
||||||
|
migration_throttle(void) ""
|
||||||
|
|
||||||
# hw/qxl.c
|
# hw/qxl.c
|
||||||
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
|
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
|
||||||
|
Loading…
Reference in New Issue
Block a user