f5f48a7891
Multifd provide a threaded model for processing jobs. On sender side, there can be two kinds of job: (1) a list of pages to send, or (2) a sync request. The sync request is a very special kind of job. It never contains a page array, but only a multifd packet telling the dest side to synchronize with sent pages. Before this patch, both requests use the pending_job field, no matter what the request is, it will boost pending_job, while multifd sender thread will decrement it after it finishes one job. However this should be racy, because SYNC is special in that it needs to set p->flags with MULTIFD_FLAG_SYNC, showing that this is a sync request. Consider a sequence of operations where: - migration thread enqueue a job to send some pages, pending_job++ (0->1) - [...before the selected multifd sender thread wakes up...] - migration thread enqueue another job to sync, pending_job++ (1->2), setup p->flags=MULTIFD_FLAG_SYNC - multifd sender thread wakes up, found pending_job==2 - send the 1st packet with MULTIFD_FLAG_SYNC and list of pages - send the 2nd packet with flags==0 and no pages This is not expected, because MULTIFD_FLAG_SYNC should hopefully be done after all the pages are received. Meanwhile, the 2nd packet will be completely useless, which contains zero information. I didn't verify above, but I think this issue is still benign in that at least on the recv side we always receive pages before handling MULTIFD_FLAG_SYNC. However that's not always guaranteed and just tricky. One other reason I want to separate it is using p->flags to communicate between the two threads is also not clearly defined, it's very hard to read and understand why accessing p->flags is always safe; see the current impl of multifd_send_thread() where we tried to cache only p->flags. It doesn't need to be that complicated. This patch introduces pending_sync, a separate flag just to show that the requester needs a sync. Alongside, we remove the tricky caching of p->flags now because after this patch p->flags should only be used by multifd sender thread now, which will be crystal clear. So it is always thread safe to access p->flags. With that, we can also safely convert the pending_job into a boolean, because we don't support >1 pending jobs anyway. Always use atomic ops to access both flags to make sure no cache effect. When at it, drop the initial setting of "pending_job = 0" because it's always allocated using g_new0(). Reviewed-by: Fabiano Rosas <farosas@suse.de> Link: https://lore.kernel.org/r/20240202102857.110210-7-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com>
214 lines
6.1 KiB
C
214 lines
6.1 KiB
C
/*
|
|
* Multifd common functions
|
|
*
|
|
* Copyright (c) 2019-2020 Red Hat Inc
|
|
*
|
|
* Authors:
|
|
* Juan Quintela <quintela@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#ifndef QEMU_MIGRATION_MULTIFD_H
|
|
#define QEMU_MIGRATION_MULTIFD_H
|
|
|
|
int multifd_save_setup(Error **errp);
|
|
void multifd_save_cleanup(void);
|
|
int multifd_load_setup(Error **errp);
|
|
void multifd_load_cleanup(void);
|
|
void multifd_load_shutdown(void);
|
|
bool multifd_recv_all_channels_created(void);
|
|
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
|
void multifd_recv_sync_main(void);
|
|
int multifd_send_sync_main(void);
|
|
int multifd_queue_page(RAMBlock *block, ram_addr_t offset);
|
|
|
|
/* Multifd Compression flags */
|
|
#define MULTIFD_FLAG_SYNC (1 << 0)
|
|
|
|
/* We reserve 3 bits for compression methods */
|
|
#define MULTIFD_FLAG_COMPRESSION_MASK (7 << 1)
|
|
/* we need to be compatible. Before compression value was 0 */
|
|
#define MULTIFD_FLAG_NOCOMP (0 << 1)
|
|
#define MULTIFD_FLAG_ZLIB (1 << 1)
|
|
#define MULTIFD_FLAG_ZSTD (2 << 1)
|
|
|
|
/* This value needs to be a multiple of qemu_target_page_size() */
|
|
#define MULTIFD_PACKET_SIZE (512 * 1024)
|
|
|
|
typedef struct {
|
|
uint32_t magic;
|
|
uint32_t version;
|
|
uint32_t flags;
|
|
/* maximum number of allocated pages */
|
|
uint32_t pages_alloc;
|
|
/* non zero pages */
|
|
uint32_t normal_pages;
|
|
/* size of the next packet that contains pages */
|
|
uint32_t next_packet_size;
|
|
uint64_t packet_num;
|
|
uint64_t unused[4]; /* Reserved for future use */
|
|
char ramblock[256];
|
|
uint64_t offset[];
|
|
} __attribute__((packed)) MultiFDPacket_t;
|
|
|
|
typedef struct {
|
|
/* number of used pages */
|
|
uint32_t num;
|
|
/* number of allocated pages */
|
|
uint32_t allocated;
|
|
/* offset of each page */
|
|
ram_addr_t *offset;
|
|
RAMBlock *block;
|
|
} MultiFDPages_t;
|
|
|
|
typedef struct {
|
|
/* Fields are only written at creating/deletion time */
|
|
/* No lock required for them, they are read only */
|
|
|
|
/* channel number */
|
|
uint8_t id;
|
|
/* channel thread name */
|
|
char *name;
|
|
/* channel thread id */
|
|
QemuThread thread;
|
|
/* communication channel */
|
|
QIOChannel *c;
|
|
/* is the yank function registered */
|
|
bool registered_yank;
|
|
/* packet allocated len */
|
|
uint32_t packet_len;
|
|
/* guest page size */
|
|
uint32_t page_size;
|
|
/* number of pages in a full packet */
|
|
uint32_t page_count;
|
|
/* multifd flags for sending ram */
|
|
int write_flags;
|
|
|
|
/* sem where to wait for more work */
|
|
QemuSemaphore sem;
|
|
/* syncs main thread and channels */
|
|
QemuSemaphore sem_sync;
|
|
|
|
/* this mutex protects the following parameters */
|
|
QemuMutex mutex;
|
|
/* is this channel thread running */
|
|
bool running;
|
|
/* multifd flags for each packet */
|
|
uint32_t flags;
|
|
/* global number of generated multifd packets */
|
|
uint64_t packet_num;
|
|
/*
|
|
* The sender thread has work to do if either of below boolean is set.
|
|
*
|
|
* @pending_job: a job is pending
|
|
* @pending_sync: a sync request is pending
|
|
*
|
|
* For both of these fields, they're only set by the requesters, and
|
|
* cleared by the multifd sender threads.
|
|
*/
|
|
bool pending_job;
|
|
bool pending_sync;
|
|
/* array of pages to sent.
|
|
* The owner of 'pages' depends of 'pending_job' value:
|
|
* pending_job == 0 -> migration_thread can use it.
|
|
* pending_job != 0 -> multifd_channel can use it.
|
|
*/
|
|
MultiFDPages_t *pages;
|
|
|
|
/* thread local variables. No locking required */
|
|
|
|
/* pointer to the packet */
|
|
MultiFDPacket_t *packet;
|
|
/* size of the next packet that contains pages */
|
|
uint32_t next_packet_size;
|
|
/* packets sent through this channel */
|
|
uint64_t num_packets;
|
|
/* non zero pages sent through this channel */
|
|
uint64_t total_normal_pages;
|
|
/* buffers to send */
|
|
struct iovec *iov;
|
|
/* number of iovs used */
|
|
uint32_t iovs_num;
|
|
/* used for compression methods */
|
|
void *data;
|
|
} MultiFDSendParams;
|
|
|
|
typedef struct {
|
|
/* Fields are only written at creating/deletion time */
|
|
/* No lock required for them, they are read only */
|
|
|
|
/* channel number */
|
|
uint8_t id;
|
|
/* channel thread name */
|
|
char *name;
|
|
/* channel thread id */
|
|
QemuThread thread;
|
|
/* communication channel */
|
|
QIOChannel *c;
|
|
/* packet allocated len */
|
|
uint32_t packet_len;
|
|
/* guest page size */
|
|
uint32_t page_size;
|
|
/* number of pages in a full packet */
|
|
uint32_t page_count;
|
|
|
|
/* syncs main thread and channels */
|
|
QemuSemaphore sem_sync;
|
|
|
|
/* this mutex protects the following parameters */
|
|
QemuMutex mutex;
|
|
/* is this channel thread running */
|
|
bool running;
|
|
/* should this thread finish */
|
|
bool quit;
|
|
/* multifd flags for each packet */
|
|
uint32_t flags;
|
|
/* global number of generated multifd packets */
|
|
uint64_t packet_num;
|
|
|
|
/* thread local variables. No locking required */
|
|
|
|
/* pointer to the packet */
|
|
MultiFDPacket_t *packet;
|
|
/* size of the next packet that contains pages */
|
|
uint32_t next_packet_size;
|
|
/* packets sent through this channel */
|
|
uint64_t num_packets;
|
|
/* ramblock */
|
|
RAMBlock *block;
|
|
/* ramblock host address */
|
|
uint8_t *host;
|
|
/* non zero pages recv through this channel */
|
|
uint64_t total_normal_pages;
|
|
/* buffers to recv */
|
|
struct iovec *iov;
|
|
/* Pages that are not zero */
|
|
ram_addr_t *normal;
|
|
/* num of non zero pages */
|
|
uint32_t normal_num;
|
|
/* used for de-compression methods */
|
|
void *data;
|
|
} MultiFDRecvParams;
|
|
|
|
typedef struct {
|
|
/* Setup for sending side */
|
|
int (*send_setup)(MultiFDSendParams *p, Error **errp);
|
|
/* Cleanup for sending side */
|
|
void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
|
|
/* Prepare the send packet */
|
|
int (*send_prepare)(MultiFDSendParams *p, Error **errp);
|
|
/* Setup for receiving side */
|
|
int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
|
|
/* Cleanup for receiving side */
|
|
void (*recv_cleanup)(MultiFDRecvParams *p);
|
|
/* Read all pages */
|
|
int (*recv_pages)(MultiFDRecvParams *p, Error **errp);
|
|
} MultiFDMethods;
|
|
|
|
void multifd_register_ops(int method, MultiFDMethods *ops);
|
|
|
|
#endif
|
|
|