migration/next for 20140225
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJTDKLvAAoJEPSH7xhYctcjhA8QAKDslw9iovAHU4c0NgQxp3yE 08dAD6bznHPkc6ENZEbV4+Yx9AvtGwYeKE4IlVqxDaSCBQ1T/lGr6Di/X/Yuwjo9 80/av6cFpFsO9fw4fhFRNjU0n8xKeN2S/kjCQhz07Zky2mD2fEoLnTrhmjBRCsVN tVCWOYzbkNbIFUCsJB0OBfC/qH0r5RuB2/SuNnwk4NwT5r7+UxMtfZ+BIE4Kez3n l6G4L1XO3julErp/8BQmIChnHH7QtTfQzBahJIlBsiLiqHhX1f1v6Q0CRln+A9S1 jfAK/1zqpYVOAb59R2u0FCgB793sV0P+aa71ORRP1g57lFC5KsGJghQq0OoWr1YA OHrOFPm2YHdTBsU7BG3ndMSbNgZspVAxns6mcSkcDWEH0JDv+FhK08+45tDqkAOu 9hWuYA5p6hodOEBLprNit7lK+7coAKDCkIM4hzPMVZxGCucDqRmtI0oHadjar1Wi nTbxeDqsh67mr6+QXSR8PRQ3y0TDsuBS6Sm2+Bchv1Nt5GiAKaMySiPuXGQlMSS1 3ohy77Ltz42ci1+mFSp6aVaZO8hEkakaN8Hg53T57IVTSqy4B9t/R3bvi+SsysCt BMaHONUnOuloKtA5dnOd6Q+hLE8tw3UNGFB71VZoj1tEbXj48WpIZ1IpQYbVAoyQ DR2+Wccft0O3GVAgLAo0 =yrmU -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20140225' into staging migration/next for 20140225 # gpg: Signature made Tue 25 Feb 2014 14:04:31 GMT using RSA key ID 5872D723 # gpg: Can't check signature: public key not found * remotes/juanquintela/tags/migration/20140225: rdma: rename 'x-rdma' => 'rdma' Fix two XBZRLE corruption issues Fix vmstate_info_int32_le comparison/assign qemu_file: use fwrite() correctly Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
6f6831f61a
64
arch_init.c
64
arch_init.c
@ -122,7 +122,6 @@ static void check_guest_throttling(void);
|
|||||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
||||||
/* 0x80 is reserved in migration.h start with 0x100 next */
|
/* 0x80 is reserved in migration.h start with 0x100 next */
|
||||||
|
|
||||||
|
|
||||||
static struct defconfig_file {
|
static struct defconfig_file {
|
||||||
const char *filename;
|
const char *filename;
|
||||||
/* Indicates it is an user config file (disabled by -no-user-config) */
|
/* Indicates it is an user config file (disabled by -no-user-config) */
|
||||||
@ -133,6 +132,7 @@ static struct defconfig_file {
|
|||||||
{ NULL }, /* end of list */
|
{ NULL }, /* end of list */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
|
||||||
|
|
||||||
int qemu_read_default_config_files(bool userconfig)
|
int qemu_read_default_config_files(bool userconfig)
|
||||||
{
|
{
|
||||||
@ -273,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This is the last block that we have visited serching for dirty pages
|
||||||
|
*/
|
||||||
|
static RAMBlock *last_seen_block;
|
||||||
|
/* This is the last block from where we have sent data */
|
||||||
|
static RAMBlock *last_sent_block;
|
||||||
|
static ram_addr_t last_offset;
|
||||||
|
static unsigned long *migration_bitmap;
|
||||||
|
static uint64_t migration_dirty_pages;
|
||||||
|
static uint32_t last_version;
|
||||||
|
static bool ram_bulk_stage;
|
||||||
|
|
||||||
|
/* Update the xbzrle cache to reflect a page that's been sent as all 0.
|
||||||
|
* The important thing is that a stale (not-yet-0'd) page be replaced
|
||||||
|
* by the new data.
|
||||||
|
* As a bonus, if the page wasn't in the cache it gets added so that
|
||||||
|
* when a small write is made into the 0'd page it gets XBZRLE sent
|
||||||
|
*/
|
||||||
|
static void xbzrle_cache_zero_page(ram_addr_t current_addr)
|
||||||
|
{
|
||||||
|
if (ram_bulk_stage || !migrate_use_xbzrle()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We don't care if this fails to allocate a new cache page
|
||||||
|
* as long as it updated an old one */
|
||||||
|
cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
|
||||||
|
}
|
||||||
|
|
||||||
#define ENCODING_FLAG_XBZRLE 0x1
|
#define ENCODING_FLAG_XBZRLE 0x1
|
||||||
|
|
||||||
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
|
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
|
||||||
@ -329,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
|
|||||||
return bytes_sent;
|
return bytes_sent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* This is the last block that we have visited serching for dirty pages
|
|
||||||
*/
|
|
||||||
static RAMBlock *last_seen_block;
|
|
||||||
/* This is the last block from where we have sent data */
|
|
||||||
static RAMBlock *last_sent_block;
|
|
||||||
static ram_addr_t last_offset;
|
|
||||||
static unsigned long *migration_bitmap;
|
|
||||||
static uint64_t migration_dirty_pages;
|
|
||||||
static uint32_t last_version;
|
|
||||||
static bool ram_bulk_stage;
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
|
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
|
||||||
ram_addr_t start)
|
ram_addr_t start)
|
||||||
@ -512,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
|||||||
} else {
|
} else {
|
||||||
int ret;
|
int ret;
|
||||||
uint8_t *p;
|
uint8_t *p;
|
||||||
|
bool send_async = true;
|
||||||
int cont = (block == last_sent_block) ?
|
int cont = (block == last_sent_block) ?
|
||||||
RAM_SAVE_FLAG_CONTINUE : 0;
|
RAM_SAVE_FLAG_CONTINUE : 0;
|
||||||
|
|
||||||
@ -522,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
|||||||
ret = ram_control_save_page(f, block->offset,
|
ret = ram_control_save_page(f, block->offset,
|
||||||
offset, TARGET_PAGE_SIZE, &bytes_sent);
|
offset, TARGET_PAGE_SIZE, &bytes_sent);
|
||||||
|
|
||||||
|
current_addr = block->offset + offset;
|
||||||
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||||
if (ret != RAM_SAVE_CONTROL_DELAYED) {
|
if (ret != RAM_SAVE_CONTROL_DELAYED) {
|
||||||
if (bytes_sent > 0) {
|
if (bytes_sent > 0) {
|
||||||
@ -536,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
|||||||
RAM_SAVE_FLAG_COMPRESS);
|
RAM_SAVE_FLAG_COMPRESS);
|
||||||
qemu_put_byte(f, 0);
|
qemu_put_byte(f, 0);
|
||||||
bytes_sent++;
|
bytes_sent++;
|
||||||
|
/* Must let xbzrle know, otherwise a previous (now 0'd) cached
|
||||||
|
* page would be stale
|
||||||
|
*/
|
||||||
|
xbzrle_cache_zero_page(current_addr);
|
||||||
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
|
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
|
||||||
current_addr = block->offset + offset;
|
|
||||||
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
|
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
|
||||||
offset, cont, last_stage);
|
offset, cont, last_stage);
|
||||||
if (!last_stage) {
|
if (!last_stage) {
|
||||||
|
/* We must send exactly what's in the xbzrle cache
|
||||||
|
* even if the page wasn't xbzrle compressed, so that
|
||||||
|
* it's right next time.
|
||||||
|
*/
|
||||||
p = get_cached_data(XBZRLE.cache, current_addr);
|
p = get_cached_data(XBZRLE.cache, current_addr);
|
||||||
|
|
||||||
|
/* Can't send this cached data async, since the cache page
|
||||||
|
* might get updated before it gets to the wire
|
||||||
|
*/
|
||||||
|
send_async = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XBZRLE overflow or normal page */
|
/* XBZRLE overflow or normal page */
|
||||||
if (bytes_sent == -1) {
|
if (bytes_sent == -1) {
|
||||||
bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
|
bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
|
||||||
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
|
if (send_async) {
|
||||||
|
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
|
||||||
|
} else {
|
||||||
|
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
|
||||||
|
}
|
||||||
bytes_sent += TARGET_PAGE_SIZE;
|
bytes_sent += TARGET_PAGE_SIZE;
|
||||||
acct_info.norm_pages++;
|
acct_info.norm_pages++;
|
||||||
}
|
}
|
||||||
|
@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
|
|||||||
high-performance RDMA hardware using the following command:
|
high-performance RDMA hardware using the following command:
|
||||||
|
|
||||||
QEMU Monitor Command:
|
QEMU Monitor Command:
|
||||||
$ migrate_set_capability x-rdma-pin-all on # disabled by default
|
$ migrate_set_capability rdma-pin-all on # disabled by default
|
||||||
|
|
||||||
Performing this action will cause all 8GB to be pinned, so if that's
|
Performing this action will cause all 8GB to be pinned, so if that's
|
||||||
not what you want, then please ignore this step altogether.
|
not what you want, then please ignore this step altogether.
|
||||||
@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device
|
|||||||
|
|
||||||
Next, on the destination machine, add the following to the QEMU command line:
|
Next, on the destination machine, add the following to the QEMU command line:
|
||||||
|
|
||||||
qemu ..... -incoming x-rdma:host:port
|
qemu ..... -incoming rdma:host:port
|
||||||
|
|
||||||
Finally, perform the actual migration on the source machine:
|
Finally, perform the actual migration on the source machine:
|
||||||
|
|
||||||
QEMU Monitor Command:
|
QEMU Monitor Command:
|
||||||
$ migrate -d x-rdma:host:port
|
$ migrate -d rdma:host:port
|
||||||
|
|
||||||
PERFORMANCE
|
PERFORMANCE
|
||||||
===========
|
===========
|
||||||
@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
|
|||||||
active use and the VM itself is completely idle using the same 40 gbps
|
active use and the VM itself is completely idle using the same 40 gbps
|
||||||
infiniband link:
|
infiniband link:
|
||||||
|
|
||||||
1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
|
1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
|
||||||
2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
|
2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
|
||||||
|
|
||||||
These numbers would of course scale up to whatever size virtual machine
|
These numbers would of course scale up to whatever size virtual machine
|
||||||
you have to migrate using RDMA.
|
you have to migrate using RDMA.
|
||||||
@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.
|
|||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
=====
|
=====
|
||||||
1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
|
1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
|
||||||
renamed to 'rdma' after the experimental phase of this work has
|
|
||||||
completed upstream.
|
|
||||||
2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
|
|
||||||
are not compatible with infinband memory pinning and will result in
|
are not compatible with infinband memory pinning and will result in
|
||||||
an aborted migration (but with the source VM left unaffected).
|
an aborted migration (but with the source VM left unaffected).
|
||||||
3. Use of the recent /proc/<pid>/pagemap would likely speed up
|
2. Use of the recent /proc/<pid>/pagemap would likely speed up
|
||||||
the use of KSM and ballooning while using RDMA.
|
the use of KSM and ballooning while using RDMA.
|
||||||
4. Also, some form of balloon-device usage tracking would also
|
3. Also, some form of balloon-device usage tracking would also
|
||||||
help alleviate some issues.
|
help alleviate some issues.
|
||||||
5. Move UNREGISTER requests to a separate thread.
|
4. Use LRU to provide more fine-grained direction of UNREGISTER
|
||||||
6. Use LRU to provide more fine-grained direction of UNREGISTER
|
|
||||||
requests for unpinning memory in an overcommitted environment.
|
requests for unpinning memory in an overcommitted environment.
|
||||||
7. Expose UNREGISTER support to the user by way of workload-specific
|
5. Expose UNREGISTER support to the user by way of workload-specific
|
||||||
hints about application behavior.
|
hints about application behavior.
|
||||||
|
@ -66,7 +66,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
|
|||||||
* @addr: page address
|
* @addr: page address
|
||||||
* @pdata: pointer to the page
|
* @pdata: pointer to the page
|
||||||
*/
|
*/
|
||||||
int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
|
int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cache_resize: resize the page cache. In case of size reduction the extra
|
* cache_resize: resize the page cache. In case of size reduction the extra
|
||||||
|
@ -3412,7 +3412,7 @@ void rdma_start_outgoing_migration(void *opaque,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = qemu_rdma_source_init(rdma, &local_err,
|
ret = qemu_rdma_source_init(rdma, &local_err,
|
||||||
s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
|
s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -82,7 +82,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
|
|||||||
if (strstart(uri, "tcp:", &p))
|
if (strstart(uri, "tcp:", &p))
|
||||||
tcp_start_incoming_migration(p, errp);
|
tcp_start_incoming_migration(p, errp);
|
||||||
#ifdef CONFIG_RDMA
|
#ifdef CONFIG_RDMA
|
||||||
else if (strstart(uri, "x-rdma:", &p))
|
else if (strstart(uri, "rdma:", &p))
|
||||||
rdma_start_incoming_migration(p, errp);
|
rdma_start_incoming_migration(p, errp);
|
||||||
#endif
|
#endif
|
||||||
#if !defined(WIN32)
|
#if !defined(WIN32)
|
||||||
@ -438,7 +438,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
|||||||
if (strstart(uri, "tcp:", &p)) {
|
if (strstart(uri, "tcp:", &p)) {
|
||||||
tcp_start_outgoing_migration(s, p, &local_err);
|
tcp_start_outgoing_migration(s, p, &local_err);
|
||||||
#ifdef CONFIG_RDMA
|
#ifdef CONFIG_RDMA
|
||||||
} else if (strstart(uri, "x-rdma:", &p)) {
|
} else if (strstart(uri, "rdma:", &p)) {
|
||||||
rdma_start_outgoing_migration(s, p, &local_err);
|
rdma_start_outgoing_migration(s, p, &local_err);
|
||||||
#endif
|
#endif
|
||||||
#if !defined(WIN32)
|
#if !defined(WIN32)
|
||||||
@ -532,7 +532,7 @@ bool migrate_rdma_pin_all(void)
|
|||||||
|
|
||||||
s = migrate_get_current();
|
s = migrate_get_current();
|
||||||
|
|
||||||
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
|
return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
|
||||||
}
|
}
|
||||||
|
|
||||||
bool migrate_auto_converge(void)
|
bool migrate_auto_converge(void)
|
||||||
|
@ -150,7 +150,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
|
|||||||
return cache_get_by_addr(cache, addr)->it_data;
|
return cache_get_by_addr(cache, addr)->it_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata)
|
int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
|
||||||
{
|
{
|
||||||
|
|
||||||
CacheItem *it = NULL;
|
CacheItem *it = NULL;
|
||||||
|
@ -751,10 +751,9 @@
|
|||||||
# This feature allows us to minimize migration traffic for certain work
|
# This feature allows us to minimize migration traffic for certain work
|
||||||
# loads, by sending compressed difference of the pages
|
# loads, by sending compressed difference of the pages
|
||||||
#
|
#
|
||||||
# @x-rdma-pin-all: Controls whether or not the entire VM memory footprint is
|
# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
|
||||||
# mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
|
# mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
|
||||||
# Disabled by default. Experimental: may (or may not) be renamed after
|
# Disabled by default. (since 2.0)
|
||||||
# further testing is complete. (since 1.6)
|
|
||||||
#
|
#
|
||||||
# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
|
# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
|
||||||
# essentially saves 1MB of zeroes per block on the wire. Enabling requires
|
# essentially saves 1MB of zeroes per block on the wire. Enabling requires
|
||||||
@ -768,7 +767,7 @@
|
|||||||
# Since: 1.2
|
# Since: 1.2
|
||||||
##
|
##
|
||||||
{ 'enum': 'MigrationCapability',
|
{ 'enum': 'MigrationCapability',
|
||||||
'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
|
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks'] }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @MigrationCapabilityStatus
|
# @MigrationCapabilityStatus
|
||||||
|
@ -100,7 +100,14 @@ static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
|
|||||||
int size)
|
int size)
|
||||||
{
|
{
|
||||||
QEMUFileStdio *s = opaque;
|
QEMUFileStdio *s = opaque;
|
||||||
return fwrite(buf, 1, size, s->stdio_file);
|
int res;
|
||||||
|
|
||||||
|
res = fwrite(buf, 1, size, s->stdio_file);
|
||||||
|
|
||||||
|
if (res != size) {
|
||||||
|
return -EIO; /* fake errno value */
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
|
static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
|
||||||
|
15
vmstate.c
15
vmstate.c
@ -321,23 +321,24 @@ const VMStateInfo vmstate_info_int32_equal = {
|
|||||||
.put = put_int32,
|
.put = put_int32,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* 32 bit int. See that the received value is the less or the same
|
/* 32 bit int. Check that the received value is less than or equal to
|
||||||
than the one in the field */
|
the one in the field */
|
||||||
|
|
||||||
static int get_int32_le(QEMUFile *f, void *pv, size_t size)
|
static int get_int32_le(QEMUFile *f, void *pv, size_t size)
|
||||||
{
|
{
|
||||||
int32_t *old = pv;
|
int32_t *cur = pv;
|
||||||
int32_t new;
|
int32_t loaded;
|
||||||
qemu_get_sbe32s(f, &new);
|
qemu_get_sbe32s(f, &loaded);
|
||||||
|
|
||||||
if (*old <= new) {
|
if (loaded <= *cur) {
|
||||||
|
*cur = loaded;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
const VMStateInfo vmstate_info_int32_le = {
|
const VMStateInfo vmstate_info_int32_le = {
|
||||||
.name = "int32 equal",
|
.name = "int32 le",
|
||||||
.get = get_int32_le,
|
.get = get_int32_le,
|
||||||
.put = put_int32,
|
.put = put_int32,
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user