migration/next for 20140225
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJTDKLvAAoJEPSH7xhYctcjhA8QAKDslw9iovAHU4c0NgQxp3yE 08dAD6bznHPkc6ENZEbV4+Yx9AvtGwYeKE4IlVqxDaSCBQ1T/lGr6Di/X/Yuwjo9 80/av6cFpFsO9fw4fhFRNjU0n8xKeN2S/kjCQhz07Zky2mD2fEoLnTrhmjBRCsVN tVCWOYzbkNbIFUCsJB0OBfC/qH0r5RuB2/SuNnwk4NwT5r7+UxMtfZ+BIE4Kez3n l6G4L1XO3julErp/8BQmIChnHH7QtTfQzBahJIlBsiLiqHhX1f1v6Q0CRln+A9S1 jfAK/1zqpYVOAb59R2u0FCgB793sV0P+aa71ORRP1g57lFC5KsGJghQq0OoWr1YA OHrOFPm2YHdTBsU7BG3ndMSbNgZspVAxns6mcSkcDWEH0JDv+FhK08+45tDqkAOu 9hWuYA5p6hodOEBLprNit7lK+7coAKDCkIM4hzPMVZxGCucDqRmtI0oHadjar1Wi nTbxeDqsh67mr6+QXSR8PRQ3y0TDsuBS6Sm2+Bchv1Nt5GiAKaMySiPuXGQlMSS1 3ohy77Ltz42ci1+mFSp6aVaZO8hEkakaN8Hg53T57IVTSqy4B9t/R3bvi+SsysCt BMaHONUnOuloKtA5dnOd6Q+hLE8tw3UNGFB71VZoj1tEbXj48WpIZ1IpQYbVAoyQ DR2+Wccft0O3GVAgLAo0 =yrmU -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20140225' into staging migration/next for 20140225 # gpg: Signature made Tue 25 Feb 2014 14:04:31 GMT using RSA key ID 5872D723 # gpg: Can't check signature: public key not found * remotes/juanquintela/tags/migration/20140225: rdma: rename 'x-rdma' => 'rdma' Fix two XBZRLE corruption issues Fix vmstate_info_int32_le comparison/assign qemu_file: use fwrite() correctly Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
6f6831f61a
64
arch_init.c
64
arch_init.c
@ -122,7 +122,6 @@ static void check_guest_throttling(void);
|
||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
||||
/* 0x80 is reserved in migration.h start with 0x100 next */
|
||||
|
||||
|
||||
static struct defconfig_file {
|
||||
const char *filename;
|
||||
/* Indicates it is an user config file (disabled by -no-user-config) */
|
||||
@ -133,6 +132,7 @@ static struct defconfig_file {
|
||||
{ NULL }, /* end of list */
|
||||
};
|
||||
|
||||
static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
|
||||
|
||||
int qemu_read_default_config_files(bool userconfig)
|
||||
{
|
||||
@ -273,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
|
||||
return size;
|
||||
}
|
||||
|
||||
/* This is the last block that we have visited serching for dirty pages
|
||||
*/
|
||||
static RAMBlock *last_seen_block;
|
||||
/* This is the last block from where we have sent data */
|
||||
static RAMBlock *last_sent_block;
|
||||
static ram_addr_t last_offset;
|
||||
static unsigned long *migration_bitmap;
|
||||
static uint64_t migration_dirty_pages;
|
||||
static uint32_t last_version;
|
||||
static bool ram_bulk_stage;
|
||||
|
||||
/* Update the xbzrle cache to reflect a page that's been sent as all 0.
|
||||
* The important thing is that a stale (not-yet-0'd) page be replaced
|
||||
* by the new data.
|
||||
* As a bonus, if the page wasn't in the cache it gets added so that
|
||||
* when a small write is made into the 0'd page it gets XBZRLE sent
|
||||
*/
|
||||
static void xbzrle_cache_zero_page(ram_addr_t current_addr)
|
||||
{
|
||||
if (ram_bulk_stage || !migrate_use_xbzrle()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* We don't care if this fails to allocate a new cache page
|
||||
* as long as it updated an old one */
|
||||
cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
|
||||
}
|
||||
|
||||
#define ENCODING_FLAG_XBZRLE 0x1
|
||||
|
||||
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
|
||||
@ -329,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
|
||||
return bytes_sent;
|
||||
}
|
||||
|
||||
|
||||
/* This is the last block that we have visited serching for dirty pages
|
||||
*/
|
||||
static RAMBlock *last_seen_block;
|
||||
/* This is the last block from where we have sent data */
|
||||
static RAMBlock *last_sent_block;
|
||||
static ram_addr_t last_offset;
|
||||
static unsigned long *migration_bitmap;
|
||||
static uint64_t migration_dirty_pages;
|
||||
static uint32_t last_version;
|
||||
static bool ram_bulk_stage;
|
||||
|
||||
static inline
|
||||
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
|
||||
ram_addr_t start)
|
||||
@ -512,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
||||
} else {
|
||||
int ret;
|
||||
uint8_t *p;
|
||||
bool send_async = true;
|
||||
int cont = (block == last_sent_block) ?
|
||||
RAM_SAVE_FLAG_CONTINUE : 0;
|
||||
|
||||
@ -522,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
||||
ret = ram_control_save_page(f, block->offset,
|
||||
offset, TARGET_PAGE_SIZE, &bytes_sent);
|
||||
|
||||
current_addr = block->offset + offset;
|
||||
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||
if (ret != RAM_SAVE_CONTROL_DELAYED) {
|
||||
if (bytes_sent > 0) {
|
||||
@ -536,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
||||
RAM_SAVE_FLAG_COMPRESS);
|
||||
qemu_put_byte(f, 0);
|
||||
bytes_sent++;
|
||||
/* Must let xbzrle know, otherwise a previous (now 0'd) cached
|
||||
* page would be stale
|
||||
*/
|
||||
xbzrle_cache_zero_page(current_addr);
|
||||
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
|
||||
current_addr = block->offset + offset;
|
||||
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
|
||||
offset, cont, last_stage);
|
||||
if (!last_stage) {
|
||||
/* We must send exactly what's in the xbzrle cache
|
||||
* even if the page wasn't xbzrle compressed, so that
|
||||
* it's right next time.
|
||||
*/
|
||||
p = get_cached_data(XBZRLE.cache, current_addr);
|
||||
|
||||
/* Can't send this cached data async, since the cache page
|
||||
* might get updated before it gets to the wire
|
||||
*/
|
||||
send_async = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* XBZRLE overflow or normal page */
|
||||
if (bytes_sent == -1) {
|
||||
bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
|
||||
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
|
||||
if (send_async) {
|
||||
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
|
||||
} else {
|
||||
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
|
||||
}
|
||||
bytes_sent += TARGET_PAGE_SIZE;
|
||||
acct_info.norm_pages++;
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
|
||||
high-performance RDMA hardware using the following command:
|
||||
|
||||
QEMU Monitor Command:
|
||||
$ migrate_set_capability x-rdma-pin-all on # disabled by default
|
||||
$ migrate_set_capability rdma-pin-all on # disabled by default
|
||||
|
||||
Performing this action will cause all 8GB to be pinned, so if that's
|
||||
not what you want, then please ignore this step altogether.
|
||||
@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device
|
||||
|
||||
Next, on the destination machine, add the following to the QEMU command line:
|
||||
|
||||
qemu ..... -incoming x-rdma:host:port
|
||||
qemu ..... -incoming rdma:host:port
|
||||
|
||||
Finally, perform the actual migration on the source machine:
|
||||
|
||||
QEMU Monitor Command:
|
||||
$ migrate -d x-rdma:host:port
|
||||
$ migrate -d rdma:host:port
|
||||
|
||||
PERFORMANCE
|
||||
===========
|
||||
@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
|
||||
active use and the VM itself is completely idle using the same 40 gbps
|
||||
infiniband link:
|
||||
|
||||
1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
|
||||
2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
|
||||
1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
|
||||
2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
|
||||
|
||||
These numbers would of course scale up to whatever size virtual machine
|
||||
you have to migrate using RDMA.
|
||||
@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.
|
||||
|
||||
TODO:
|
||||
=====
|
||||
1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
|
||||
renamed to 'rdma' after the experimental phase of this work has
|
||||
completed upstream.
|
||||
2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
|
||||
1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
|
||||
are not compatible with infinband memory pinning and will result in
|
||||
an aborted migration (but with the source VM left unaffected).
|
||||
3. Use of the recent /proc/<pid>/pagemap would likely speed up
|
||||
2. Use of the recent /proc/<pid>/pagemap would likely speed up
|
||||
the use of KSM and ballooning while using RDMA.
|
||||
4. Also, some form of balloon-device usage tracking would also
|
||||
3. Also, some form of balloon-device usage tracking would also
|
||||
help alleviate some issues.
|
||||
5. Move UNREGISTER requests to a separate thread.
|
||||
6. Use LRU to provide more fine-grained direction of UNREGISTER
|
||||
4. Use LRU to provide more fine-grained direction of UNREGISTER
|
||||
requests for unpinning memory in an overcommitted environment.
|
||||
7. Expose UNREGISTER support to the user by way of workload-specific
|
||||
5. Expose UNREGISTER support to the user by way of workload-specific
|
||||
hints about application behavior.
|
||||
|
@ -66,7 +66,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
|
||||
* @addr: page address
|
||||
* @pdata: pointer to the page
|
||||
*/
|
||||
int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
|
||||
int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
|
||||
|
||||
/**
|
||||
* cache_resize: resize the page cache. In case of size reduction the extra
|
||||
|
@ -3412,7 +3412,7 @@ void rdma_start_outgoing_migration(void *opaque,
|
||||
}
|
||||
|
||||
ret = qemu_rdma_source_init(rdma, &local_err,
|
||||
s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
|
||||
s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
|
||||
|
||||
if (ret) {
|
||||
goto err;
|
||||
|
@ -82,7 +82,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
|
||||
if (strstart(uri, "tcp:", &p))
|
||||
tcp_start_incoming_migration(p, errp);
|
||||
#ifdef CONFIG_RDMA
|
||||
else if (strstart(uri, "x-rdma:", &p))
|
||||
else if (strstart(uri, "rdma:", &p))
|
||||
rdma_start_incoming_migration(p, errp);
|
||||
#endif
|
||||
#if !defined(WIN32)
|
||||
@ -438,7 +438,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
||||
if (strstart(uri, "tcp:", &p)) {
|
||||
tcp_start_outgoing_migration(s, p, &local_err);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "x-rdma:", &p)) {
|
||||
} else if (strstart(uri, "rdma:", &p)) {
|
||||
rdma_start_outgoing_migration(s, p, &local_err);
|
||||
#endif
|
||||
#if !defined(WIN32)
|
||||
@ -532,7 +532,7 @@ bool migrate_rdma_pin_all(void)
|
||||
|
||||
s = migrate_get_current();
|
||||
|
||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
|
||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
|
||||
}
|
||||
|
||||
bool migrate_auto_converge(void)
|
||||
|
@ -150,7 +150,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
|
||||
return cache_get_by_addr(cache, addr)->it_data;
|
||||
}
|
||||
|
||||
int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata)
|
||||
int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
|
||||
{
|
||||
|
||||
CacheItem *it = NULL;
|
||||
|
@ -751,10 +751,9 @@
|
||||
# This feature allows us to minimize migration traffic for certain work
|
||||
# loads, by sending compressed difference of the pages
|
||||
#
|
||||
# @x-rdma-pin-all: Controls whether or not the entire VM memory footprint is
|
||||
# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
|
||||
# mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
|
||||
# Disabled by default. Experimental: may (or may not) be renamed after
|
||||
# further testing is complete. (since 1.6)
|
||||
# Disabled by default. (since 2.0)
|
||||
#
|
||||
# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
|
||||
# essentially saves 1MB of zeroes per block on the wire. Enabling requires
|
||||
@ -768,7 +767,7 @@
|
||||
# Since: 1.2
|
||||
##
|
||||
{ 'enum': 'MigrationCapability',
|
||||
'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
|
||||
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks'] }
|
||||
|
||||
##
|
||||
# @MigrationCapabilityStatus
|
||||
|
@ -100,7 +100,14 @@ static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
|
||||
int size)
|
||||
{
|
||||
QEMUFileStdio *s = opaque;
|
||||
return fwrite(buf, 1, size, s->stdio_file);
|
||||
int res;
|
||||
|
||||
res = fwrite(buf, 1, size, s->stdio_file);
|
||||
|
||||
if (res != size) {
|
||||
return -EIO; /* fake errno value */
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
|
||||
|
15
vmstate.c
15
vmstate.c
@ -321,23 +321,24 @@ const VMStateInfo vmstate_info_int32_equal = {
|
||||
.put = put_int32,
|
||||
};
|
||||
|
||||
/* 32 bit int. See that the received value is the less or the same
|
||||
than the one in the field */
|
||||
/* 32 bit int. Check that the received value is less than or equal to
|
||||
the one in the field */
|
||||
|
||||
static int get_int32_le(QEMUFile *f, void *pv, size_t size)
|
||||
{
|
||||
int32_t *old = pv;
|
||||
int32_t new;
|
||||
qemu_get_sbe32s(f, &new);
|
||||
int32_t *cur = pv;
|
||||
int32_t loaded;
|
||||
qemu_get_sbe32s(f, &loaded);
|
||||
|
||||
if (*old <= new) {
|
||||
if (loaded <= *cur) {
|
||||
*cur = loaded;
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
const VMStateInfo vmstate_info_int32_le = {
|
||||
.name = "int32 equal",
|
||||
.name = "int32 le",
|
||||
.get = get_int32_le,
|
||||
.put = put_int32,
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user