Merge remote-tracking branch 'quintela/migration.next' into staging

# By Chegu Vinod # Via Juan Quintela * quintela/migration.next: Force auto-convegence of live migration Add 'auto-converge' migration capability Introduce async_run_on_cpu() Message-id: 1373664508-5404-1-git-send-email-quintela@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-07-15 14:49:15 -05:00 · 2013-07-15 14:49:15 -05:00 · 6453a3a694
parent 2562becfc1 7ca1dfad95
commit 6453a3a694
8 changed files with 134 additions and 1 deletions
--- a/arch_init.c
+++ b/arch_init.c
@ -104,6 +104,9 @@ int graphic_depth = 32;
 #endif
 const uint32_t arch_type = QEMU_ARCH;
 static bool mig_throttle_on;
 static int dirty_rate_high_cnt;
 static void check_guest_throttling(void);
 /***********************************************************/
 /* ram save/restore */
@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
    static int64_t start_time;
    static int64_t bytes_xfer_prev;
    static int64_t num_dirty_pages_period;
    int64_t end_time;
    int64_t bytes_xfer_now;
    if (!bytes_xfer_prev) {
        bytes_xfer_prev = ram_bytes_transferred();
    }
    if (!start_time) {
        start_time = qemu_get_clock_ms(rt_clock);
@ -404,6 +413,25 @@ static void migration_bitmap_sync(void)
    /* more than 1 second = 1000 millisecons */
    if (end_time > start_time + 1000) {
        if (migrate_auto_converge()) {
            /* The following detection logic can be refined later. For now:
               Check to see if the dirtied bytes is 50% more than the approx.
               amount of bytes that just got transferred since the last time we
               were in this routine. If that happens >N times (for now N==4)
               we turn on the throttle down logic */
            bytes_xfer_now = ram_bytes_transferred();
            if (s->dirty_pages_rate &&
               (num_dirty_pages_period * TARGET_PAGE_SIZE >
                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
               (dirty_rate_high_cnt++ > 4)) {
                    trace_migration_throttle();
                    mig_throttle_on = true;
                    dirty_rate_high_cnt = 0;
             }
             bytes_xfer_prev = bytes_xfer_now;
        } else {
             mig_throttle_on = false;
        }
        s->dirty_pages_rate = num_dirty_pages_period * 1000
            / (end_time - start_time);
        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    migration_bitmap = bitmap_new(ram_pages);
    bitmap_set(migration_bitmap, 0, ram_pages);
    migration_dirty_pages = ram_pages;
    mig_throttle_on = false;
    dirty_rate_high_cnt = 0;
    if (migrate_use_xbzrle()) {
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        }
        total_sent += bytes_sent;
        acct_info.iterations++;
        check_guest_throttling();
        /* we want to check in the 1st loop, just in case it was the 1st time
           and we had to sync the dirty bitmap.
           qemu_get_clock_ns() is a bit expensive, so we only check each some
@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp)
    return info;
 }
 /* Stub function that's gets run on the vcpu when its brought out of the
   VM to run inside qemu via async_run_on_cpu()*/
 static void mig_sleep_cpu(void *opq)
 {
    qemu_mutex_unlock_iothread();
    g_usleep(30*1000);
    qemu_mutex_lock_iothread();
 }
 /* To reduce the dirty rate explicitly disallow the VCPUs from spending
   much time in the VM. The migration thread will try to catchup.
   Workload will experience a performance drop.
 */
 static void mig_throttle_cpu_down(CPUState *cpu, void *data)
 {
    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
 }
 static void mig_throttle_guest_down(void)
 {
    qemu_mutex_lock_iothread();
    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
    qemu_mutex_unlock_iothread();
 }
 static void check_guest_throttling(void)
 {
    static int64_t t0;
    int64_t        t1;
    if (!mig_throttle_on) {
        return;
    }
    if (!t0)  {
        t0 = qemu_get_clock_ns(rt_clock);
        return;
    }
    t1 = qemu_get_clock_ns(rt_clock);
    /* If it has been more than 40 ms since the last time the guest
     * was throttled then do it again.
     */
    if (40 < (t1-t0)/1000000) {
        mig_throttle_guest_down();
        t0 = t1;
    }
 }
--- a/cpus.c
+++ b/cpus.c
@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
    wi.func = func;
    wi.data = data;
    wi.free = false;
    if (cpu->queued_work_first == NULL) {
        cpu->queued_work_first = &wi;
    } else {
@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
    }
 }
 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 {
    struct qemu_work_item *wi;
    if (qemu_cpu_is_self(cpu)) {
        func(data);
        return;
    }
    wi = g_malloc0(sizeof(struct qemu_work_item));
    wi->func = func;
    wi->data = data;
    wi->free = true;
    if (cpu->queued_work_first == NULL) {
        cpu->queued_work_first = wi;
    } else {
        cpu->queued_work_last->next = wi;
    }
    cpu->queued_work_last = wi;
    wi->next = NULL;
    wi->done = false;
    qemu_cpu_kick(cpu);
 }
 static void flush_queued_work(CPUState *cpu)
 {
    struct qemu_work_item *wi;
@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu)
        cpu->queued_work_first = wi->next;
        wi->func(wi->data);
        wi->done = true;
        if (wi->free) {
            g_free(wi);
        }
    }
    cpu->queued_work_last = NULL;
    qemu_cond_broadcast(&qemu_work_cond);
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason);
 bool migrate_rdma_pin_all(void);
 bool migrate_auto_converge(void);
 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
                         uint8_t *dst, int dlen);
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@ -293,6 +293,7 @@ struct qemu_work_item {
    void (*func)(void *data);
    void *data;
    int done;
    bool free;
 };
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@ -378,6 +378,16 @@ bool cpu_is_stopped(CPUState *cpu);
 */
 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
 /**
 * async_run_on_cpu:
 * @cpu: The vCPU to run on.
 * @func: The function to be executed.
 * @data: Data to pass to the function.
 *
 * Schedules the function @func for execution on the vCPU @cpu asynchronously.
 */
 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
 /**
 * qemu_for_each_cpu:
 * @func: The function to be executed.
--- a/migration.c
+++ b/migration.c
@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void)
    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
 }
 bool migrate_auto_converge(void)
 {
    MigrationState *s;
    s = migrate_get_current();
    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }
 int migrate_use_xbzrle(void)
 {
    MigrationState *s;
--- a/qapi-schema.json
+++ b/qapi-schema.json
@ -616,7 +616,7 @@
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle', 'x-rdma-pin-all'] }
+  'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
 ##
 # @MigrationCapabilityStatus
--- a/1
+++ b/1
@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u"
 # arch_init.c
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""
 # hw/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"