multifd: Copy pages before compressing them with zlib

zlib_send_prepare() compresses pages of a running VM. zlib does not
make any thread-safety guarantees with respect to changing deflate()
input concurrently with deflate() [1].

One can observe problems due to this with the IBM zEnterprise Data
Compression accelerator capable zlib [2]. When the hardware
acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
intermittently [3] due to sliding window corruption. The accelerator's
architecture explicitly discourages concurrent accesses [4]:

    Page 26-57, "Other Conditions":

    As observed by this CPU, other CPUs, and channel
    programs, references to the parameter block, first,
    second, and third operands may be multiple-access
    references, accesses to these storage locations are
    not necessarily block-concurrent, and the sequence
    of these accesses or references is undefined.

Mark Adler pointed out that vanilla zlib performs double fetches under
certain circumstances as well [5], therefore we need to copy data
before passing it to deflate().

[1] https://zlib.net/manual.html
[2] https://github.com/madler/zlib/pull/410
[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Ilya Leoshkevich 2022-07-05 22:35:59 +02:00 committed by Dr. David Alan Gilbert
parent 8aff6f501d
commit 007e179ef0

View File

@ -27,6 +27,8 @@ struct zlib_data {
uint8_t *zbuff; uint8_t *zbuff;
/* size of compressed buffer */ /* size of compressed buffer */
uint32_t zbuff_len; uint32_t zbuff_len;
/* uncompressed buffer of size qemu_target_page_size() */
uint8_t *buf;
}; };
/* Multifd zlib compression */ /* Multifd zlib compression */
@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
{ {
struct zlib_data *z = g_new0(struct zlib_data, 1); struct zlib_data *z = g_new0(struct zlib_data, 1);
z_stream *zs = &z->zs; z_stream *zs = &z->zs;
const char *err_msg;
zs->zalloc = Z_NULL; zs->zalloc = Z_NULL;
zs->zfree = Z_NULL; zs->zfree = Z_NULL;
zs->opaque = Z_NULL; zs->opaque = Z_NULL;
if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
g_free(z); err_msg = "deflate init failed";
error_setg(errp, "multifd %u: deflate init failed", p->id); goto err_free_z;
return -1;
} }
/* This is the maxium size of the compressed buffer */ /* This is the maxium size of the compressed buffer */
z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
z->zbuff = g_try_malloc(z->zbuff_len); z->zbuff = g_try_malloc(z->zbuff_len);
if (!z->zbuff) { if (!z->zbuff) {
deflateEnd(&z->zs); err_msg = "out of memory for zbuff";
g_free(z); goto err_deflate_end;
error_setg(errp, "multifd %u: out of memory for zbuff", p->id); }
return -1; z->buf = g_try_malloc(qemu_target_page_size());
if (!z->buf) {
err_msg = "out of memory for buf";
goto err_free_zbuff;
} }
p->data = z; p->data = z;
return 0; return 0;
err_free_zbuff:
g_free(z->zbuff);
err_deflate_end:
deflateEnd(&z->zs);
err_free_z:
g_free(z);
error_setg(errp, "multifd %u: %s", p->id, err_msg);
return -1;
} }
/** /**
@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
deflateEnd(&z->zs); deflateEnd(&z->zs);
g_free(z->zbuff); g_free(z->zbuff);
z->zbuff = NULL; z->zbuff = NULL;
g_free(z->buf);
z->buf = NULL;
g_free(p->data); g_free(p->data);
p->data = NULL; p->data = NULL;
} }
@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
flush = Z_SYNC_FLUSH; flush = Z_SYNC_FLUSH;
} }
/*
* Since the VM might be running, the page may be changing concurrently
* with compression. zlib does not guarantee that this is safe,
* therefore copy the page before calling deflate().
*/
memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
zs->avail_in = page_size; zs->avail_in = page_size;
zs->next_in = p->pages->block->host + p->normal[i]; zs->next_in = z->buf;
zs->avail_out = available; zs->avail_out = available;
zs->next_out = z->zbuff + out_size; zs->next_out = z->zbuff + out_size;