diff --git a/Makefile b/Makefile index fe5d868e58..489c5a074e 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,7 @@ obj-y += qemu-char.o aio.o savevm.o obj-y += msmouse.o ps2.o obj-y += qdev.o qdev-properties.o obj-y += qint.o qstring.o qdict.o qlist.o qemu-config.o +obj-y += block-migration.o obj-$(CONFIG_BRLAPI) += baum.o obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o diff --git a/block-migration.c b/block-migration.c new file mode 100644 index 0000000000..4b4eddf62a --- /dev/null +++ b/block-migration.c @@ -0,0 +1,558 @@ +/* + * QEMU live block migration + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Liran Schour + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "block_int.h" +#include "hw/hw.h" +#include "block-migration.h" +#include +#include + +#define SECTOR_BITS 9 +#define SECTOR_SIZE (1 << SECTOR_BITS) +#define SECTOR_MASK ~(SECTOR_SIZE - 1); + +#define BLOCK_SIZE (block_mig_state->sectors_per_block << SECTOR_BITS) + +#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 +#define BLK_MIG_FLAG_EOS 0x02 + +#define MAX_IS_ALLOCATED_SEARCH 65536 +#define MAX_BLOCKS_READ 10000 +#define BLOCKS_READ_CHANGE 100 +#define INITIAL_BLOCKS_READ 100 + +//#define DEBUG_BLK_MIGRATION + +#ifdef DEBUG_BLK_MIGRATION +#define dprintf(fmt, ...) \ + do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0) +#else +#define dprintf(fmt, ...) \ + do { } while (0) +#endif + +typedef struct BlkMigBlock { + uint8_t *buf; + BlkMigDevState *bmds; + int64_t sector; + struct iovec iov; + QEMUIOVector qiov; + BlockDriverAIOCB *aiocb; + int ret; + struct BlkMigBlock *next; +} BlkMigBlock; + +typedef struct BlkMigState { + int bulk_completed; + int blk_enable; + int shared_base; + int no_dirty; + QEMUFile *load_file; + BlkMigDevState *bmds_first; + int sectors_per_block; + BlkMigBlock *first_blk; + BlkMigBlock *last_blk; + int submitted; + int read_done; + int transferred; + int64_t print_completion; +} BlkMigState; + +static BlkMigState *block_mig_state = NULL; + +static void blk_mig_read_cb(void *opaque, int ret) +{ + BlkMigBlock *blk = opaque; + + blk->ret = ret; + + /* insert at the end */ + if(block_mig_state->last_blk == NULL) { + block_mig_state->first_blk = blk; + block_mig_state->last_blk = blk; + } else { + block_mig_state->last_blk->next = blk; + block_mig_state->last_blk = blk; + } + + block_mig_state->submitted--; + block_mig_state->read_done++; + assert(block_mig_state->submitted >= 0); + + return; +} + +static int mig_read_device_bulk(QEMUFile *f, BlkMigDevState *bms) +{ + int nr_sectors; + int64_t total_sectors, cur_sector = 0; + BlockDriverState *bs = bms->bs; + BlkMigBlock *blk; + + blk = qemu_malloc(sizeof(BlkMigBlock)); + blk->buf = qemu_malloc(BLOCK_SIZE); + + cur_sector = bms->cur_sector; + total_sectors = bdrv_getlength(bs) >> SECTOR_BITS; + + if(bms->shared_base) { + while(cur_sector < bms->total_sectors && + !bdrv_is_allocated(bms->bs, cur_sector, + MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { + cur_sector += nr_sectors; + } + } + + if(cur_sector >= total_sectors) { + bms->cur_sector = total_sectors; + qemu_free(blk->buf); + qemu_free(blk); + return 1; + } + + if(cur_sector >= block_mig_state->print_completion) { + printf("Completed %" PRId64 " %%\r", cur_sector * 100 / total_sectors); + fflush(stdout); + block_mig_state->print_completion += + (block_mig_state->sectors_per_block * 10000); + } + + /* we going to transfder BLOCK_SIZE any way even if it is not allocated */ + nr_sectors = block_mig_state->sectors_per_block; + + cur_sector &= ~((int64_t)block_mig_state->sectors_per_block -1); + + if(total_sectors - cur_sector < block_mig_state->sectors_per_block) { + nr_sectors = (total_sectors - cur_sector); + } + + bms->cur_sector = cur_sector + nr_sectors; + blk->sector = cur_sector; + blk->bmds = bms; + blk->next = NULL; + + blk->iov.iov_base = blk->buf; + blk->iov.iov_len = nr_sectors * SECTOR_SIZE; + qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + + blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, + nr_sectors, blk_mig_read_cb, blk); + + if(!blk->aiocb) { + printf("Error reading sector %" PRId64 "\n", cur_sector); + qemu_free(blk->buf); + qemu_free(blk); + return 0; + } + + bdrv_reset_dirty(bms->bs, cur_sector, nr_sectors); + block_mig_state->submitted++; + + return (bms->cur_sector >= total_sectors); +} + +static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) +{ + int len, nr_sectors; + int64_t total_sectors = bmds->total_sectors, cur_sector = 0; + uint8_t *tmp_buf = NULL; + BlockDriverState *bs = bmds->bs; + + tmp_buf = qemu_malloc(BLOCK_SIZE); + + cur_sector = bmds->cur_sector; + + if(bmds->shared_base) { + while(cur_sector < bmds->total_sectors && + !bdrv_is_allocated(bmds->bs, cur_sector, + MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { + cur_sector += nr_sectors; + } + } + + if(cur_sector >= total_sectors) { + bmds->cur_sector = total_sectors; + qemu_free(tmp_buf); + return 1; + } + + if(cur_sector >= block_mig_state->print_completion) { + printf("Completed %" PRId64 " %%\r", cur_sector * 100 / total_sectors); + fflush(stdout); + block_mig_state->print_completion += + (block_mig_state->sectors_per_block * 10000); + } + + cur_sector &= ~((int64_t)block_mig_state->sectors_per_block -1); + + /* we going to transfer + BLOCK_SIZE + any way even if it is not allocated */ + nr_sectors = block_mig_state->sectors_per_block; + + if(total_sectors - cur_sector < block_mig_state->sectors_per_block) { + nr_sectors = (total_sectors - cur_sector); + } + + if(bdrv_read(bs, cur_sector, tmp_buf, nr_sectors) < 0) { + printf("Error reading sector %" PRId64 "\n", cur_sector); + } + + bdrv_reset_dirty(bs, cur_sector, nr_sectors); + + /* Device name */ + qemu_put_be64(f,(cur_sector << SECTOR_BITS) | BLK_MIG_FLAG_DEVICE_BLOCK); + + len = strlen(bs->device_name); + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)bs->device_name, len); + + qemu_put_buffer(f, tmp_buf, + BLOCK_SIZE); + + bmds->cur_sector = cur_sector + block_mig_state->sectors_per_block; + + qemu_free(tmp_buf); + + return (bmds->cur_sector >= total_sectors); +} + +static void send_blk(QEMUFile *f, BlkMigBlock * blk) +{ + int len; + + /* Device name */ + qemu_put_be64(f,(blk->sector << SECTOR_BITS) | BLK_MIG_FLAG_DEVICE_BLOCK); + + len = strlen(blk->bmds->bs->device_name); + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len); + + qemu_put_buffer(f, blk->buf, + BLOCK_SIZE); + + return; +} + +static void blk_mig_save_dev_info(QEMUFile *f, BlkMigDevState *bmds) +{ +} + +static void set_dirty_tracking(int enable) +{ + BlkMigDevState *bmds; + for(bmds = block_mig_state->bmds_first; bmds != NULL; bmds = bmds->next) { + bdrv_set_dirty_tracking(bmds->bs,enable); + } + + return; +} + +static void init_blk_migration(QEMUFile *f) +{ + BlkMigDevState **pbmds, *bmds; + BlockDriverState *bs; + + for (bs = bdrv_first; bs != NULL; bs = bs->next) { + if(bs->type == BDRV_TYPE_HD) { + bmds = qemu_mallocz(sizeof(BlkMigDevState)); + bmds->bs = bs; + bmds->bulk_completed = 0; + bmds->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS; + bmds->shared_base = block_mig_state->shared_base; + + if(bmds->shared_base) { + printf("Start migration for %s with shared base image\n", + bs->device_name); + } else { + printf("Start full migration for %s\n", bs->device_name); + } + + /* insert at the end */ + pbmds = &block_mig_state->bmds_first; + while (*pbmds != NULL) + pbmds = &(*pbmds)->next; + *pbmds = bmds; + + blk_mig_save_dev_info(f, bmds); + + } + } + + block_mig_state->sectors_per_block = bdrv_get_sectors_per_chunk(); + + return; +} + +static int blk_mig_save_bulked_block(QEMUFile *f, int is_async) +{ + BlkMigDevState *bmds; + + for (bmds = block_mig_state->bmds_first; bmds != NULL; bmds = bmds->next) { + if(bmds->bulk_completed == 0) { + if(is_async) { + if(mig_read_device_bulk(f, bmds) == 1) { + /* completed bulk section for this device */ + bmds->bulk_completed = 1; + } + } else { + if(mig_save_device_bulk(f,bmds) == 1) { + /* completed bulk section for this device */ + bmds->bulk_completed = 1; + } + } + return 1; + } + } + + /* we reached here means bulk is completed */ + block_mig_state->bulk_completed = 1; + + return 0; + +} + +#define MAX_NUM_BLOCKS 4 + +static void blk_mig_save_dirty_blocks(QEMUFile *f) +{ + BlkMigDevState *bmds; + uint8_t buf[BLOCK_SIZE]; + int64_t sector; + int len; + + for(bmds = block_mig_state->bmds_first; bmds != NULL; bmds = bmds->next) { + for(sector = 0; sector < bmds->cur_sector;) { + + if(bdrv_get_dirty(bmds->bs,sector)) { + + if(bdrv_read(bmds->bs, sector, buf, + block_mig_state->sectors_per_block) < 0) { + } + + /* device name */ + qemu_put_be64(f,(sector << SECTOR_BITS) + | BLK_MIG_FLAG_DEVICE_BLOCK); + + len = strlen(bmds->bs->device_name); + + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)bmds->bs->device_name, len); + + qemu_put_buffer(f, buf, + (block_mig_state->sectors_per_block * + SECTOR_SIZE)); + + bdrv_reset_dirty(bmds->bs, sector, + block_mig_state->sectors_per_block); + + sector += block_mig_state->sectors_per_block; + } else { + /* sector is clean */ + sector += block_mig_state->sectors_per_block; + } + } + } + + return; +} + +static void flush_blks(QEMUFile* f) +{ + BlkMigBlock *blk, *tmp; + + dprintf("%s Enter submitted %d read_done %d transfered\n", __FUNCTION__, + submitted, read_done, transfered); + + for(blk = block_mig_state->first_blk; + blk != NULL && !qemu_file_rate_limit(f); blk = tmp) { + send_blk(f, blk); + + tmp = blk->next; + qemu_free(blk->buf); + qemu_free(blk); + + block_mig_state->read_done--; + block_mig_state->transferred++; + assert(block_mig_state->read_done >= 0); + } + block_mig_state->first_blk = blk; + + if(block_mig_state->first_blk == NULL) { + block_mig_state->last_blk = NULL; + } + + dprintf("%s Exit submitted %d read_done %d transferred%d\n", __FUNCTION__, + block_mig_state->submitted, block_mig_state->read_done, + block_mig_state->transferred); + + return; +} + +static int is_stage2_completed(void) +{ + BlkMigDevState *bmds; + + if(block_mig_state->submitted > 0) { + return 0; + } + + for (bmds = block_mig_state->bmds_first; bmds != NULL; bmds = bmds->next) { + if(bmds->bulk_completed == 0) { + return 0; + } + } + + return 1; +} + +static int block_save_live(QEMUFile *f, int stage, void *opaque) +{ + int ret = 1; + + dprintf("Enter save live stage %d submitted %d transferred %d\n", stage, + submitted, transferred); + + if(block_mig_state->blk_enable != 1) { + /* no need to migrate storage */ + + qemu_put_be64(f,BLK_MIG_FLAG_EOS); + return 1; + } + + if(stage == 1) { + init_blk_migration(f); + + /* start track dirty blocks */ + set_dirty_tracking(1); + + } + + flush_blks(f); + + /* control the rate of transfer */ + while ((block_mig_state->submitted + block_mig_state->read_done) * + (BLOCK_SIZE) < + (qemu_file_get_rate_limit(f))) { + + ret = blk_mig_save_bulked_block(f, 1); + + if (ret == 0) /* no more bulk blocks for now*/ + break; + } + + flush_blks(f); + + if(stage == 3) { + + while(blk_mig_save_bulked_block(f, 0) != 0); + + blk_mig_save_dirty_blocks(f); + + /* stop track dirty blocks */ + set_dirty_tracking(0);; + + printf("\nBlock migration completed\n"); + } + + qemu_put_be64(f,BLK_MIG_FLAG_EOS); + + return ((stage == 2) && is_stage2_completed()); +} + +static int block_load(QEMUFile *f, void *opaque, int version_id) +{ + int len, flags; + char device_name[256]; + int64_t addr; + BlockDriverState *bs; + uint8_t *buf; + + block_mig_state->sectors_per_block = bdrv_get_sectors_per_chunk(); + buf = qemu_malloc(BLOCK_SIZE); + + do { + + addr = qemu_get_be64(f); + + flags = addr & ~SECTOR_MASK; + addr &= SECTOR_MASK; + + if(flags & BLK_MIG_FLAG_DEVICE_BLOCK) { + + /* get device name */ + len = qemu_get_byte(f); + + qemu_get_buffer(f, (uint8_t *)device_name, len); + device_name[len] = '\0'; + + bs = bdrv_find(device_name); + + qemu_get_buffer(f, buf, + BLOCK_SIZE); + if(bs != NULL) { + + bdrv_write(bs, (addr >> SECTOR_BITS), + buf, block_mig_state->sectors_per_block); + } else { + printf("Error unknown block device %s\n", device_name); + } + } else if(flags & BLK_MIG_FLAG_EOS) { + + } else { + printf("Unknown flags\n"); + } + } while(!(flags & BLK_MIG_FLAG_EOS)); + + qemu_free(buf); + + return 0; +} + +static void block_set_params(int blk_enable, int shared_base, void *opaque) +{ + assert(opaque == block_mig_state); + + block_mig_state->blk_enable = blk_enable; + block_mig_state->shared_base = shared_base; + + /* shared base means that blk_enable = 1 */ + block_mig_state->blk_enable |= shared_base; + + return; +} + +void blk_mig_info(void) +{ + BlockDriverState *bs; + + for (bs = bdrv_first; bs != NULL; bs = bs->next) { + printf("Device %s\n", bs->device_name); + if(bs->type == BDRV_TYPE_HD) { + printf("device %s format %s\n", + bs->device_name, bs->drv->format_name); + } + } +} + +void blk_mig_init(void) +{ + + block_mig_state = qemu_mallocz(sizeof(BlkMigState)); + + register_savevm_live("block", 0, 1, block_set_params, block_save_live, + NULL, block_load, block_mig_state); + + +} diff --git a/block-migration.h b/block-migration.h new file mode 100644 index 0000000000..c33d3cbf05 --- /dev/null +++ b/block-migration.h @@ -0,0 +1,29 @@ +/* + * QEMU live block migration + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Liran Schour + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef BLOCK_MIGRATION_H +#define BLOCK_MIGRATION_H + +typedef struct BlkMigDevState { + BlockDriverState *bs; + int bulk_completed; + int shared_base; + struct BlkMigDevState *next; + int64_t cur_sector; + int64_t total_sectors; + int64_t dirty; +} BlkMigDevState; + +void blk_mig_init(void); +void blk_mig_info(void); +#endif /* BLOCK_MIGRATION_H */ diff --git a/buffered_file.c b/buffered_file.c index 56b4418ed9..217f75e60e 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -211,6 +211,13 @@ out: return s->xfer_limit; } +static size_t buffered_get_rate_limit(void *opaque) +{ + QEMUFileBuffered *s = opaque; + + return s->xfer_limit; +} + static void buffered_rate_tick(void *opaque) { QEMUFileBuffered *s = opaque; @@ -251,7 +258,8 @@ QEMUFile *qemu_fopen_ops_buffered(void *opaque, s->file = qemu_fopen_ops(s, buffered_put_buffer, NULL, buffered_close, buffered_rate_limit, - buffered_set_rate_limit); + buffered_set_rate_limit, + buffered_get_rate_limit); s->timer = qemu_new_timer(rt_clock, buffered_rate_tick, s); diff --git a/hw/hw.h b/hw/hw.h index 1d4af2a9df..7889aa3cee 100644 --- a/hw/hw.h +++ b/hw/hw.h @@ -43,12 +43,14 @@ typedef int (QEMUFileRateLimit)(void *opaque); * the old rate otherwise */ typedef size_t (QEMUFileSetRateLimit)(void *opaque, size_t new_rate); +typedef size_t (QEMUFileGetRateLimit)(void *opaque); QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, QEMUFileGetBufferFunc *get_buffer, QEMUFileCloseFunc *close, QEMUFileRateLimit *rate_limit, - QEMUFileSetRateLimit *set_rate_limit); + QEMUFileSetRateLimit *set_rate_limit, + QEMUFileGetRateLimit *get_rate_limit); QEMUFile *qemu_fopen(const char *filename, const char *mode); QEMUFile *qemu_fdopen(int fd, const char *mode); QEMUFile *qemu_fopen_socket(int fd); @@ -85,6 +87,7 @@ unsigned int qemu_get_be32(QEMUFile *f); uint64_t qemu_get_be64(QEMUFile *f); int qemu_file_rate_limit(QEMUFile *f); size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate); +size_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_has_error(QEMUFile *f); void qemu_file_set_error(QEMUFile *f); @@ -239,6 +242,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) int64_t qemu_ftell(QEMUFile *f); int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence); +typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque); typedef void SaveStateHandler(QEMUFile *f, void *opaque); typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); @@ -253,7 +257,8 @@ int register_savevm(const char *idstr, int register_savevm_live(const char *idstr, int instance_id, int version_id, - SaveLiveStateHandler *save_live_state, + SaveSetParamsHandler *set_params, + SaveLiveStateHandler *save_live_state, SaveStateHandler *save_state, LoadStateHandler *load_state, void *opaque); diff --git a/migration-exec.c b/migration-exec.c index b45c83385a..c83066959a 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -53,8 +53,10 @@ static int exec_close(FdMigrationState *s) } MigrationState *exec_start_outgoing_migration(const char *command, - int64_t bandwidth_limit, - int detach) + int64_t bandwidth_limit, + int detach, + int blk, + int inc) { FdMigrationState *s; FILE *f; @@ -84,6 +86,9 @@ MigrationState *exec_start_outgoing_migration(const char *command, s->mig_state.get_status = migrate_fd_get_status; s->mig_state.release = migrate_fd_release; + s->mig_state.blk = blk; + s->mig_state.shared = inc; + s->state = MIG_STATE_ACTIVE; s->mon_resume = NULL; s->bandwidth_limit = bandwidth_limit; diff --git a/migration-fd.c b/migration-fd.c index 15b44158ff..587f9d8e84 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -54,7 +54,9 @@ static int fd_close(FdMigrationState *s) MigrationState *fd_start_outgoing_migration(Monitor *mon, const char *fdname, int64_t bandwidth_limit, - int detach) + int detach, + int blk, + int inc) { FdMigrationState *s; @@ -78,6 +80,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon, s->mig_state.get_status = migrate_fd_get_status; s->mig_state.release = migrate_fd_release; + s->mig_state.blk = blk; + s->mig_state.shared = inc; + s->state = MIG_STATE_ACTIVE; s->mon_resume = NULL; s->bandwidth_limit = bandwidth_limit; diff --git a/migration-tcp.c b/migration-tcp.c index 9ed92b4427..efa7c74c67 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -78,7 +78,9 @@ static void tcp_wait_for_connect(void *opaque) MigrationState *tcp_start_outgoing_migration(const char *host_port, int64_t bandwidth_limit, - int detach) + int detach, + int blk, + int inc) { struct sockaddr_in addr; FdMigrationState *s; @@ -96,6 +98,9 @@ MigrationState *tcp_start_outgoing_migration(const char *host_port, s->mig_state.get_status = migrate_fd_get_status; s->mig_state.release = migrate_fd_release; + s->mig_state.blk = blk; + s->mig_state.shared = inc; + s->state = MIG_STATE_ACTIVE; s->mon_resume = NULL; s->bandwidth_limit = bandwidth_limit; diff --git a/migration-unix.c b/migration-unix.c index a26587a3ea..25cd6d3d2d 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -77,7 +77,9 @@ static void unix_wait_for_connect(void *opaque) MigrationState *unix_start_outgoing_migration(const char *path, int64_t bandwidth_limit, - int detach) + int detach, + int blk, + int inc) { FdMigrationState *s; struct sockaddr_un addr; @@ -95,6 +97,9 @@ MigrationState *unix_start_outgoing_migration(const char *path, s->mig_state.get_status = migrate_fd_get_status; s->mig_state.release = migrate_fd_release; + s->mig_state.blk = blk; + s->mig_state.shared = inc; + s->state = MIG_STATE_ACTIVE; s->mon_resume = NULL; s->bandwidth_limit = bandwidth_limit; diff --git a/migration.c b/migration.c index b20beb7302..3ae0be86c8 100644 --- a/migration.c +++ b/migration.c @@ -58,16 +58,24 @@ void do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data) const char *p; int detach = qdict_get_int(qdict, "detach"); const char *uri = qdict_get_str(qdict, "uri"); - + if (strstart(uri, "tcp:", &p)) - s = tcp_start_outgoing_migration(p, max_throttle, detach); + s = tcp_start_outgoing_migration(p, max_throttle, detach, + (int)qdict_get_int(qdict, "blk"), + (int)qdict_get_int(qdict, "inc")); #if !defined(WIN32) else if (strstart(uri, "exec:", &p)) - s = exec_start_outgoing_migration(p, max_throttle, detach); + s = exec_start_outgoing_migration(p, max_throttle, detach, + (int)qdict_get_int(qdict, "blk"), + (int)qdict_get_int(qdict, "inc")); else if (strstart(uri, "unix:", &p)) - s = unix_start_outgoing_migration(p, max_throttle, detach); + s = unix_start_outgoing_migration(p, max_throttle, detach, + (int)qdict_get_int(qdict, "blk"), + (int)qdict_get_int(qdict, "inc")); else if (strstart(uri, "fd:", &p)) - s = fd_start_outgoing_migration(mon, p, max_throttle, detach); + s = fd_start_outgoing_migration(mon, p, max_throttle, detach, + (int)qdict_get_int(qdict, "blk"), + (int)qdict_get_int(qdict, "inc")); #endif else monitor_printf(mon, "unknown migration protocol: %s\n", uri); @@ -251,13 +259,14 @@ void migrate_fd_connect(FdMigrationState *s) migrate_fd_close); dprintf("beginning savevm\n"); - ret = qemu_savevm_state_begin(s->file); + ret = qemu_savevm_state_begin(s->file, s->mig_state.blk, + s->mig_state.shared); if (ret < 0) { dprintf("failed, %d\n", ret); migrate_fd_error(s); return; } - + migrate_fd_put_ready(s); } diff --git a/migration.h b/migration.h index 2d28b8f395..56adf05e6f 100644 --- a/migration.h +++ b/migration.h @@ -30,6 +30,8 @@ struct MigrationState void (*cancel)(MigrationState *s); int (*get_status)(MigrationState *s); void (*release)(MigrationState *s); + int blk; + int shared; }; typedef struct FdMigrationState FdMigrationState; @@ -65,27 +67,35 @@ void do_info_migrate(Monitor *mon); int exec_start_incoming_migration(const char *host_port); MigrationState *exec_start_outgoing_migration(const char *host_port, - int64_t bandwidth_limit, - int detach); + int64_t bandwidth_limit, + int detach, + int blk, + int inc); int tcp_start_incoming_migration(const char *host_port); MigrationState *tcp_start_outgoing_migration(const char *host_port, int64_t bandwidth_limit, - int detach); + int detach, + int blk, + int inc); int unix_start_incoming_migration(const char *path); MigrationState *unix_start_outgoing_migration(const char *path, int64_t bandwidth_limit, - int detach); + int detach, + int blk, + int inc); int fd_start_incoming_migration(const char *path); MigrationState *fd_start_outgoing_migration(Monitor *mon, const char *fdname, int64_t bandwidth_limit, - int detach); + int detach, + int blk, + int inc); void migrate_fd_monitor_suspend(FdMigrationState *s); diff --git a/savevm.c b/savevm.c index 74f2c66d29..4668843c3f 100644 --- a/savevm.c +++ b/savevm.c @@ -169,6 +169,7 @@ struct QEMUFile { QEMUFileCloseFunc *close; QEMUFileRateLimit *rate_limit; QEMUFileSetRateLimit *set_rate_limit; + QEMUFileGetRateLimit *get_rate_limit; void *opaque; int is_write; @@ -264,9 +265,11 @@ QEMUFile *qemu_popen(FILE *stdio_file, const char *mode) s->stdio_file = stdio_file; if(mode[0] == 'r') { - s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_pclose, NULL, NULL); + s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_pclose, + NULL, NULL, NULL); } else { - s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_pclose, NULL, NULL); + s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_pclose, + NULL, NULL, NULL); } return s->file; } @@ -311,9 +314,11 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) goto fail; if(mode[0] == 'r') { - s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_fclose, NULL, NULL); + s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_fclose, + NULL, NULL, NULL); } else { - s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_fclose, NULL, NULL); + s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_fclose, + NULL, NULL, NULL); } return s->file; @@ -327,7 +332,8 @@ QEMUFile *qemu_fopen_socket(int fd) QEMUFileSocket *s = qemu_mallocz(sizeof(QEMUFileSocket)); s->fd = fd; - s->file = qemu_fopen_ops(s, NULL, socket_get_buffer, socket_close, NULL, NULL); + s->file = qemu_fopen_ops(s, NULL, socket_get_buffer, socket_close, + NULL, NULL, NULL); return s->file; } @@ -363,11 +369,13 @@ QEMUFile *qemu_fopen(const char *filename, const char *mode) s->stdio_file = fopen(filename, mode); if (!s->stdio_file) goto fail; - + if(mode[0] == 'w') { - s->file = qemu_fopen_ops(s, file_put_buffer, NULL, stdio_fclose, NULL, NULL); + s->file = qemu_fopen_ops(s, file_put_buffer, NULL, stdio_fclose, + NULL, NULL, NULL); } else { - s->file = qemu_fopen_ops(s, NULL, file_get_buffer, stdio_fclose, NULL, NULL); + s->file = qemu_fopen_ops(s, NULL, file_get_buffer, stdio_fclose, + NULL, NULL, NULL); } return s->file; fail: @@ -395,15 +403,17 @@ static int bdrv_fclose(void *opaque) static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) - return qemu_fopen_ops(bs, block_put_buffer, NULL, bdrv_fclose, NULL, NULL); - return qemu_fopen_ops(bs, NULL, block_get_buffer, bdrv_fclose, NULL, NULL); + return qemu_fopen_ops(bs, block_put_buffer, NULL, bdrv_fclose, + NULL, NULL, NULL); + return qemu_fopen_ops(bs, NULL, block_get_buffer, bdrv_fclose, NULL, NULL, NULL); } QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, QEMUFileGetBufferFunc *get_buffer, QEMUFileCloseFunc *close, QEMUFileRateLimit *rate_limit, - QEMUFileSetRateLimit *set_rate_limit) + QEMUFileSetRateLimit *set_rate_limit, + QEMUFileGetRateLimit *get_rate_limit) { QEMUFile *f; @@ -415,6 +425,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, f->close = close; f->rate_limit = rate_limit; f->set_rate_limit = set_rate_limit; + f->get_rate_limit = get_rate_limit; f->is_write = 0; return f; @@ -592,6 +603,14 @@ int qemu_file_rate_limit(QEMUFile *f) return 0; } +size_t qemu_file_get_rate_limit(QEMUFile *f) +{ + if (f->get_rate_limit) + return f->get_rate_limit(f->opaque); + + return 0; +} + size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate) { /* any failed or completed migration keeps its state to allow probing of @@ -964,6 +983,7 @@ typedef struct SaveStateEntry { int instance_id; int version_id; int section_id; + SaveSetParamsHandler *set_params; SaveLiveStateHandler *save_live_state; SaveStateHandler *save_state; LoadStateHandler *load_state; @@ -971,6 +991,7 @@ typedef struct SaveStateEntry { void *opaque; } SaveStateEntry; + static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers = QTAILQ_HEAD_INITIALIZER(savevm_handlers); static int global_section_id; @@ -996,6 +1017,7 @@ static int calculate_new_instance_id(const char *idstr) int register_savevm_live(const char *idstr, int instance_id, int version_id, + SaveSetParamsHandler *set_params, SaveLiveStateHandler *save_live_state, SaveStateHandler *save_state, LoadStateHandler *load_state, @@ -1003,10 +1025,11 @@ int register_savevm_live(const char *idstr, { SaveStateEntry *se; - se = qemu_malloc(sizeof(SaveStateEntry)); + se = qemu_mallocz(sizeof(SaveStateEntry)); pstrcpy(se->idstr, sizeof(se->idstr), idstr); se->version_id = version_id; se->section_id = global_section_id++; + se->set_params = set_params; se->save_live_state = save_live_state; se->save_state = save_state; se->load_state = load_state; @@ -1031,7 +1054,7 @@ int register_savevm(const char *idstr, void *opaque) { return register_savevm_live(idstr, instance_id, version_id, - NULL, save_state, load_state, opaque); + NULL, NULL, save_state, load_state, opaque); } void unregister_savevm(const char *idstr, void *opaque) @@ -1051,7 +1074,7 @@ int vmstate_register(int instance_id, const VMStateDescription *vmsd, { SaveStateEntry *se; - se = qemu_malloc(sizeof(SaveStateEntry)); + se = qemu_mallocz(sizeof(SaveStateEntry)); pstrcpy(se->idstr, sizeof(se->idstr), vmsd->name); se->version_id = vmsd->version_id; se->section_id = global_section_id++; @@ -1213,10 +1236,17 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se) #define QEMU_VM_SECTION_END 0x03 #define QEMU_VM_SECTION_FULL 0x04 -int qemu_savevm_state_begin(QEMUFile *f) +int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared) { SaveStateEntry *se; + QTAILQ_FOREACH(se, &savevm_handlers, entry) { + if(se->set_params == NULL) { + continue; + } + se->set_params(blk_enable, shared, se->opaque); + } + qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); @@ -1326,7 +1356,7 @@ int qemu_savevm_state(QEMUFile *f) bdrv_flush_all(); - ret = qemu_savevm_state_begin(f); + ret = qemu_savevm_state_begin(f, 0, 0); if (ret < 0) goto out; diff --git a/sysemu.h b/sysemu.h index 96804b497b..b1887ef1f4 100644 --- a/sysemu.h +++ b/sysemu.h @@ -61,7 +61,7 @@ void qemu_announce_self(void); void main_loop_wait(int timeout); -int qemu_savevm_state_begin(QEMUFile *f); +int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared); int qemu_savevm_state_iterate(QEMUFile *f); int qemu_savevm_state_complete(QEMUFile *f); int qemu_savevm_state(QEMUFile *f); diff --git a/vl.c b/vl.c index 18353f289e..c3f3c8f195 100644 --- a/vl.c +++ b/vl.c @@ -148,6 +148,8 @@ int main(int argc, char **argv) #include "qemu-char.h" #include "cache-utils.h" #include "block.h" +#include "block_int.h" +#include "block-migration.h" #include "dma.h" #include "audio/audio.h" #include "migration.h" @@ -2972,9 +2974,7 @@ static int ram_save_live(QEMUFile *f, int stage, void *opaque) bwidth = 0.000001; /* try transferring iterative blocks of memory */ - if (stage == 3) { - /* flush all remaining blocks regardless of rate limiting */ while (ram_save_block(f) != 0) { bytes_transferred += TARGET_PAGE_SIZE; @@ -5553,6 +5553,8 @@ int main(int argc, char **argv, char **envp) bdrv_init_with_whitelist(); + blk_mig_init(); + /* we always create the cdrom drive, even if no disk is there */ drive_add(NULL, CDROM_ALIAS); @@ -5569,7 +5571,8 @@ int main(int argc, char **argv, char **envp) exit(1); vmstate_register(0, &vmstate_timers ,&timers_state); - register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL); + register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL, + ram_load, NULL); /* Maintain compatibility with multiple stdio monitors */ if (!strcmp(monitor_devices[0],"stdio")) {