Merge remote-tracking branch 'quintela/migration.next' into staging

# By Chegu Vinod
# Via Juan Quintela
* quintela/migration.next:
  Force auto-convegence of live migration
  Add 'auto-converge' migration capability
  Introduce async_run_on_cpu()

Message-id: 1373664508-5404-1-git-send-email-quintela@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-07-15 14:49:15 -05:00
commit 6453a3a694
8 changed files with 134 additions and 1 deletions

View File

@ -104,6 +104,9 @@ int graphic_depth = 32;
#endif #endif
const uint32_t arch_type = QEMU_ARCH; const uint32_t arch_type = QEMU_ARCH;
static bool mig_throttle_on;
static int dirty_rate_high_cnt;
static void check_guest_throttling(void);
/***********************************************************/ /***********************************************************/
/* ram save/restore */ /* ram save/restore */
@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
uint64_t num_dirty_pages_init = migration_dirty_pages; uint64_t num_dirty_pages_init = migration_dirty_pages;
MigrationState *s = migrate_get_current(); MigrationState *s = migrate_get_current();
static int64_t start_time; static int64_t start_time;
static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period; static int64_t num_dirty_pages_period;
int64_t end_time; int64_t end_time;
int64_t bytes_xfer_now;
if (!bytes_xfer_prev) {
bytes_xfer_prev = ram_bytes_transferred();
}
if (!start_time) { if (!start_time) {
start_time = qemu_get_clock_ms(rt_clock); start_time = qemu_get_clock_ms(rt_clock);
@ -404,6 +413,25 @@ static void migration_bitmap_sync(void)
/* more than 1 second = 1000 millisecons */ /* more than 1 second = 1000 millisecons */
if (end_time > start_time + 1000) { if (end_time > start_time + 1000) {
if (migrate_auto_converge()) {
/* The following detection logic can be refined later. For now:
Check to see if the dirtied bytes is 50% more than the approx.
amount of bytes that just got transferred since the last time we
were in this routine. If that happens >N times (for now N==4)
we turn on the throttle down logic */
bytes_xfer_now = ram_bytes_transferred();
if (s->dirty_pages_rate &&
(num_dirty_pages_period * TARGET_PAGE_SIZE >
(bytes_xfer_now - bytes_xfer_prev)/2) &&
(dirty_rate_high_cnt++ > 4)) {
trace_migration_throttle();
mig_throttle_on = true;
dirty_rate_high_cnt = 0;
}
bytes_xfer_prev = bytes_xfer_now;
} else {
mig_throttle_on = false;
}
s->dirty_pages_rate = num_dirty_pages_period * 1000 s->dirty_pages_rate = num_dirty_pages_period * 1000
/ (end_time - start_time); / (end_time - start_time);
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_pages); migration_bitmap = bitmap_new(ram_pages);
bitmap_set(migration_bitmap, 0, ram_pages); bitmap_set(migration_bitmap, 0, ram_pages);
migration_dirty_pages = ram_pages; migration_dirty_pages = ram_pages;
mig_throttle_on = false;
dirty_rate_high_cnt = 0;
if (migrate_use_xbzrle()) { if (migrate_use_xbzrle()) {
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
} }
total_sent += bytes_sent; total_sent += bytes_sent;
acct_info.iterations++; acct_info.iterations++;
check_guest_throttling();
/* we want to check in the 1st loop, just in case it was the 1st time /* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap. and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some qemu_get_clock_ns() is a bit expensive, so we only check each some
@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp)
return info; return info;
} }
/* Stub function that's gets run on the vcpu when its brought out of the
VM to run inside qemu via async_run_on_cpu()*/
static void mig_sleep_cpu(void *opq)
{
qemu_mutex_unlock_iothread();
g_usleep(30*1000);
qemu_mutex_lock_iothread();
}
/* To reduce the dirty rate explicitly disallow the VCPUs from spending
much time in the VM. The migration thread will try to catchup.
Workload will experience a performance drop.
*/
static void mig_throttle_cpu_down(CPUState *cpu, void *data)
{
async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
}
static void mig_throttle_guest_down(void)
{
qemu_mutex_lock_iothread();
qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
qemu_mutex_unlock_iothread();
}
static void check_guest_throttling(void)
{
static int64_t t0;
int64_t t1;
if (!mig_throttle_on) {
return;
}
if (!t0) {
t0 = qemu_get_clock_ns(rt_clock);
return;
}
t1 = qemu_get_clock_ns(rt_clock);
/* If it has been more than 40 ms since the last time the guest
* was throttled then do it again.
*/
if (40 < (t1-t0)/1000000) {
mig_throttle_guest_down();
t0 = t1;
}
}

29
cpus.c
View File

@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
wi.func = func; wi.func = func;
wi.data = data; wi.data = data;
wi.free = false;
if (cpu->queued_work_first == NULL) { if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = &wi; cpu->queued_work_first = &wi;
} else { } else {
@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
} }
} }
void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
{
struct qemu_work_item *wi;
if (qemu_cpu_is_self(cpu)) {
func(data);
return;
}
wi = g_malloc0(sizeof(struct qemu_work_item));
wi->func = func;
wi->data = data;
wi->free = true;
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = wi;
} else {
cpu->queued_work_last->next = wi;
}
cpu->queued_work_last = wi;
wi->next = NULL;
wi->done = false;
qemu_cpu_kick(cpu);
}
static void flush_queued_work(CPUState *cpu) static void flush_queued_work(CPUState *cpu)
{ {
struct qemu_work_item *wi; struct qemu_work_item *wi;
@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu)
cpu->queued_work_first = wi->next; cpu->queued_work_first = wi->next;
wi->func(wi->data); wi->func(wi->data);
wi->done = true; wi->done = true;
if (wi->free) {
g_free(wi);
}
} }
cpu->queued_work_last = NULL; cpu->queued_work_last = NULL;
qemu_cond_broadcast(&qemu_work_cond); qemu_cond_broadcast(&qemu_work_cond);

View File

@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason);
bool migrate_rdma_pin_all(void); bool migrate_rdma_pin_all(void);
bool migrate_auto_converge(void);
int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen); uint8_t *dst, int dlen);
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);

View File

@ -293,6 +293,7 @@ struct qemu_work_item {
void (*func)(void *data); void (*func)(void *data);
void *data; void *data;
int done; int done;
bool free;
}; };

View File

@ -378,6 +378,16 @@ bool cpu_is_stopped(CPUState *cpu);
*/ */
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data); void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
/**
* async_run_on_cpu:
* @cpu: The vCPU to run on.
* @func: The function to be executed.
* @data: Data to pass to the function.
*
* Schedules the function @func for execution on the vCPU @cpu asynchronously.
*/
void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
/** /**
* qemu_for_each_cpu: * qemu_for_each_cpu:
* @func: The function to be executed. * @func: The function to be executed.

View File

@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]; return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
} }
bool migrate_auto_converge(void)
{
MigrationState *s;
s = migrate_get_current();
return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}
int migrate_use_xbzrle(void) int migrate_use_xbzrle(void)
{ {
MigrationState *s; MigrationState *s;

View File

@ -616,7 +616,7 @@
# Since: 1.2 # Since: 1.2
## ##
{ 'enum': 'MigrationCapability', { 'enum': 'MigrationCapability',
'data': ['xbzrle', 'x-rdma-pin-all'] } 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
## ##
# @MigrationCapabilityStatus # @MigrationCapabilityStatus

View File

@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u"
# arch_init.c # arch_init.c
migration_bitmap_sync_start(void) "" migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
migration_throttle(void) ""
# hw/qxl.c # hw/qxl.c
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d" disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"