postcopy: Allow registering of fd handler

Allow other userfaultfd's to be registered into the fault thread
so that handlers for shared memory can get responses.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Dr. David Alan Gilbert 2018-03-12 17:21:04 +00:00 committed by Michael S. Tsirkin
parent 2a84ffc0be
commit 00fa4fc85b
5 changed files with 186 additions and 52 deletions

View File

@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void)
if (!once) {
mis_current.state = MIGRATION_STATUS_NONE;
memset(&mis_current, 0, sizeof(MigrationIncomingState));
mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
sizeof(struct PostCopyFD));
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
once = true;
@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void)
qemu_fclose(mis->from_src_file);
mis->from_src_file = NULL;
}
if (mis->postcopy_remote_fds) {
g_array_free(mis->postcopy_remote_fds, TRUE);
mis->postcopy_remote_fds = NULL;
}
qemu_event_reset(&mis->main_thread_load_event);
}

View File

@ -51,6 +51,8 @@ struct MigrationIncomingState {
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
void *postcopy_tmp_zero_page;
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;
QEMUBH *bh;

View File

@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
size_t index;
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
trace_postcopy_ram_fault_thread_entry();
qemu_sem_post(&mis->fault_thread_sem);
struct pollfd *pfd;
size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
pfd = g_new0(struct pollfd, pfd_len);
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index);
pfd[2 + index].fd = pcfd->fd;
pfd[2 + index].events = POLLIN;
trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
pcfd->fd);
}
while (true) {
ram_addr_t rb_offset;
struct pollfd pfd[2];
int poll_result;
/*
* We're mainly waiting for the kernel to give us a faulting HVA,
* however we can be told to quit via userfault_quit_fd which is
* an eventfd
*/
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[0].revents = 0;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
pfd[1].revents = 0;
if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
if (poll_result == -1) {
error_report("%s: userfault poll: %s", __func__, strerror(errno));
break;
}
@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque)
}
}
ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
if (pfd[0].revents) {
poll_result--;
ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s",
__func__, strerror(errno));
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
}
}
if (ret < 0) {
error_report("%s: Failed to read full userfault message: %s",
__func__, strerror(errno));
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
} else {
error_report("%s: Read %d bytes from userfaultfd expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
}
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
}
}
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
/* Now handle any requests from external processes on shared memory */
/* TODO: May need to handle devices deregistering during postcopy */
for (index = 2; index < pfd_len && poll_result; index++) {
if (pfd[index].revents) {
struct PostCopyFD *pcfd =
&g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index - 2);
poll_result--;
if (pfd[index].revents & POLLERR) {
error_report("%s: POLLERR on poll %zd fd=%d",
__func__, index, pcfd->fd);
pfd[index].events = 0;
continue;
}
ret = read(pcfd->fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s (shared) revents=%d",
__func__, strerror(errno),
pfd[index].revents);
/*TODO: Could just disable this sharer */
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd (shared)",
__func__, ret, sizeof(msg));
/*TODO: Could just disable this sharer */
break; /*Lost alignment,don't know what we'd read next*/
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud "
"from userfaultfd (shared)",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
/* Call the device handler registered with us */
ret = pcfd->handler(pcfd, &msg);
if (ret) {
error_report("%s: Failed to resolve shared fault on %zd/%s",
__func__, index, pcfd->idstr);
/* TODO: Fail? Disable this sharer? */
}
}
}
}
trace_postcopy_ram_fault_thread_exit();
@ -970,3 +1045,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state)
{
return atomic_xchg(&incoming_postcopy_state, new_state);
}
/* Register a handler for external shared memory postcopy
* called on the destination.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
{
MigrationIncomingState *mis = migration_incoming_get_current();
mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
*pcfd);
}
/* Unregister a handler for external shared memory postcopy
*/
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
{
guint i;
MigrationIncomingState *mis = migration_incoming_get_current();
GArray *pcrfds = mis->postcopy_remote_fds;
for (i = 0; i < pcrfds->len; i++) {
struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
if (cur->fd == pcfd->fd) {
mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
return;
}
}
}

View File

@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n);
/* Call the notifier list set by postcopy_add_start_notifier */
int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp);
struct PostCopyFD;
/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */
typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd);
struct PostCopyFD {
int fd;
/* Data to pass to handler */
void *data;
/* Handler to be called whenever we get a poll event */
pcfdhandler handler;
/* A string to use in error messages */
const char *idstr;
};
/* Register a userfaultfd owned by an external process for
* shared memory.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd);
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd);
#endif

View File

@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p"
postcopy_ram_enable_notify(void) ""
postcopy_ram_fault_thread_entry(void) ""
postcopy_ram_fault_thread_exit(void) ""
postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d"
postcopy_ram_fault_thread_quit(void) ""
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
postcopy_ram_incoming_cleanup_closeuf(void) ""