From 0ff0fad23d3693ecf7a0c462cdb48f0e60f93808 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 19 Jan 2016 11:14:28 +0000 Subject: [PATCH 01/49] char: remove fixed length filename allocation A variety of places were snprintf()ing into a fixed length filename buffer. Some of the buffers were stack allocated, while another was heap allocated with g_malloc(). Switch them all to heap allocated using g_strdup_printf() avoiding arbitrary length restrictions. This also facilitates later patches which will want to populate the filename by calling external functions which do not support use of a pre-allocated buffer. Signed-off-by: Daniel P. Berrange Message-Id: <1453202071-10289-2-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-char.c | 88 +++++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index e133f4fc35..d818adbcd9 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -88,39 +88,37 @@ #define READ_BUF_LEN 4096 #define READ_RETRIES 10 -#define CHR_MAX_FILENAME_SIZE 256 #define TCP_MAX_FDS 16 /***********************************************************/ /* Socket address helpers */ -static int SocketAddress_to_str(char *dest, int max_len, - const char *prefix, SocketAddress *addr, - bool is_listen, bool is_telnet) +static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr, + bool is_listen, bool is_telnet) { switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - return snprintf(dest, max_len, "%s%s:%s:%s%s", prefix, - is_telnet ? "telnet" : "tcp", addr->u.inet->host, - addr->u.inet->port, is_listen ? ",server" : ""); + return g_strdup_printf("%s%s:%s:%s%s", prefix, + is_telnet ? "telnet" : "tcp", addr->u.inet->host, + addr->u.inet->port, is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_UNIX: - return snprintf(dest, max_len, "%sunix:%s%s", prefix, - addr->u.q_unix->path, is_listen ? ",server" : ""); + return g_strdup_printf("%sunix:%s%s", prefix, + addr->u.q_unix->path, + is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_FD: - return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->u.fd->str, - is_listen ? ",server" : ""); + return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd->str, + is_listen ? ",server" : ""); break; default: abort(); } } -static int sockaddr_to_str(char *dest, int max_len, - struct sockaddr_storage *ss, socklen_t ss_len, - struct sockaddr_storage *ps, socklen_t ps_len, - bool is_listen, bool is_telnet) +static char *sockaddr_to_str(struct sockaddr_storage *ss, socklen_t ss_len, + struct sockaddr_storage *ps, socklen_t ps_len, + bool is_listen, bool is_telnet) { char shost[NI_MAXHOST], sserv[NI_MAXSERV]; char phost[NI_MAXHOST], pserv[NI_MAXSERV]; @@ -129,9 +127,9 @@ static int sockaddr_to_str(char *dest, int max_len, switch (ss->ss_family) { #ifndef _WIN32 case AF_UNIX: - return snprintf(dest, max_len, "unix:%s%s", - ((struct sockaddr_un *)(ss))->sun_path, - is_listen ? ",server" : ""); + return g_strdup_printf("unix:%s%s", + ((struct sockaddr_un *)(ss))->sun_path, + is_listen ? ",server" : ""); #endif case AF_INET6: left = "["; @@ -142,14 +140,14 @@ static int sockaddr_to_str(char *dest, int max_len, sserv, sizeof(sserv), NI_NUMERICHOST | NI_NUMERICSERV); getnameinfo((struct sockaddr *) ps, ps_len, phost, sizeof(phost), pserv, sizeof(pserv), NI_NUMERICHOST | NI_NUMERICSERV); - return snprintf(dest, max_len, "%s:%s%s%s:%s%s <-> %s%s%s:%s", - is_telnet ? "telnet" : "tcp", - left, shost, right, sserv, - is_listen ? ",server" : "", - left, phost, right, pserv); + return g_strdup_printf("%s:%s%s%s:%s%s <-> %s%s%s:%s", + is_telnet ? "telnet" : "tcp", + left, shost, right, sserv, + is_listen ? ",server" : "", + left, phost, right, pserv); default: - return snprintf(dest, max_len, "unknown"); + return g_strdup_printf("unknown"); } } @@ -923,6 +921,7 @@ static GIOChannel *io_channel_from_socket(int fd) return chan; } +#ifndef _WIN32 static int io_channel_send(GIOChannel *fd, const void *buf, size_t len) { size_t offset = 0; @@ -953,7 +952,6 @@ static int io_channel_send(GIOChannel *fd, const void *buf, size_t len) return -1; } -#ifndef _WIN32 typedef struct FDCharDriver { CharDriverState *chr; @@ -1074,15 +1072,18 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, { ChardevHostdev *opts = backend->u.pipe; int fd_in, fd_out; - char filename_in[CHR_MAX_FILENAME_SIZE]; - char filename_out[CHR_MAX_FILENAME_SIZE]; + char *filename_in; + char *filename_out; const char *filename = opts->device; ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe); - snprintf(filename_in, CHR_MAX_FILENAME_SIZE, "%s.in", filename); - snprintf(filename_out, CHR_MAX_FILENAME_SIZE, "%s.out", filename); + + filename_in = g_strdup_printf("%s.in", filename); + filename_out = g_strdup_printf("%s.out", filename); TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY)); TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY)); + g_free(filename_in); + g_free(filename_out); if (fd_in < 0 || fd_out < 0) { if (fd_in >= 0) close(fd_in); @@ -2115,7 +2116,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename, OVERLAPPED ov; int ret; DWORD size; - char openname[CHR_MAX_FILENAME_SIZE]; + char *openname; s->fpipe = TRUE; @@ -2130,11 +2131,12 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename, goto fail; } - snprintf(openname, sizeof(openname), "\\\\.\\pipe\\%s", filename); + openname = g_strdup_printf("\\\\.\\pipe\\%s", filename); s->hcom = CreateNamedPipe(openname, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL); + g_free(openname); if (s->hcom == INVALID_HANDLE_VALUE) { error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError()); s->hcom = NULL; @@ -2913,8 +2915,9 @@ static void tcp_chr_disconnect(CharDriverState *chr) s->chan = NULL; closesocket(s->fd); s->fd = -1; - SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, - "disconnected:", s->addr, s->is_listen, s->is_telnet); + g_free(chr->filename); + chr->filename = SocketAddress_to_str("disconnected:", s->addr, + s->is_listen, s->is_telnet); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); if (s->reconnect_time) { qemu_chr_socket_restart_timer(chr); @@ -2989,16 +2992,16 @@ static void tcp_chr_connect(void *opaque) socklen_t ss_len = sizeof(ss), ps_len = sizeof(ps); memset(&ss, 0, ss_len); + g_free(chr->filename); if (getsockname(s->fd, (struct sockaddr *) &ss, &ss_len) != 0) { - snprintf(chr->filename, CHR_MAX_FILENAME_SIZE, - "Error in getsockname: %s\n", strerror(errno)); + chr->filename = g_strdup_printf("Error in getsockname: %s\n", + strerror(errno)); } else if (getpeername(s->fd, (struct sockaddr *) &ps, &ps_len) != 0) { - snprintf(chr->filename, CHR_MAX_FILENAME_SIZE, - "Error in getpeername: %s\n", strerror(errno)); + chr->filename = g_strdup_printf("Error in getpeername: %s\n", + strerror(errno)); } else { - sockaddr_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, - &ss, ss_len, &ps, ps_len, - s->is_listen, s->is_telnet); + chr->filename = sockaddr_to_str(&ss, ss_len, &ps, ps_len, + s->is_listen, s->is_telnet); } s->connected = 1; @@ -4335,9 +4338,8 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, /* be isn't opened until we get a connection */ chr->explicit_be_open = true; - chr->filename = g_malloc(CHR_MAX_FILENAME_SIZE); - SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, "disconnected:", - addr, is_listen, is_telnet); + chr->filename = SocketAddress_to_str("disconnected:", + addr, is_listen, is_telnet); if (is_listen) { if (is_telnet) { From 9894dc0cdcc397ee5b26370bc53da6d360a363c2 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 19 Jan 2016 11:14:29 +0000 Subject: [PATCH 02/49] char: convert from GIOChannel to QIOChannel In preparation for introducing TLS support to the TCP chardev backend, convert existing chardev code from using GIOChannel to QIOChannel. This simplifies the chardev code by removing most of the OS platform conditional code for dealing with file descriptor passing. Signed-off-by: Daniel P. Berrange Message-Id: <1453202071-10289-3-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-char.c | 652 +++++++++++++++++++------------------------------ tests/Makefile | 2 +- 2 files changed, 256 insertions(+), 398 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index d818adbcd9..4fc874628b 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -33,6 +33,8 @@ #include "qapi/qmp-output-visitor.h" #include "qapi-visit.h" #include "qemu/base64.h" +#include "io/channel-socket.h" +#include "io/channel-file.h" #include #include @@ -766,7 +768,7 @@ typedef struct IOWatchPoll { GSource parent; - GIOChannel *channel; + QIOChannel *ioc; GSource *src; IOCanReadHandler *fd_can_read; @@ -789,8 +791,8 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) } if (now_active) { - iwp->src = g_io_create_watch(iwp->channel, - G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); + iwp->src = qio_channel_create_watch( + iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); g_source_attach(iwp->src, NULL); } else { @@ -836,9 +838,9 @@ static GSourceFuncs io_watch_poll_funcs = { }; /* Can only be used for read */ -static guint io_add_watch_poll(GIOChannel *channel, +static guint io_add_watch_poll(QIOChannel *ioc, IOCanReadHandler *fd_can_read, - GIOFunc fd_read, + QIOChannelFunc fd_read, gpointer user_data) { IOWatchPoll *iwp; @@ -847,7 +849,7 @@ static guint io_add_watch_poll(GIOChannel *channel, iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll)); iwp->fd_can_read = fd_can_read; iwp->opaque = user_data; - iwp->channel = channel; + iwp->ioc = ioc; iwp->fd_read = (GSourceFunc) fd_read; iwp->src = NULL; @@ -883,79 +885,50 @@ static void remove_fd_in_watch(CharDriverState *chr) } } -#ifndef _WIN32 -static GIOChannel *io_channel_from_fd(int fd) -{ - GIOChannel *chan; - if (fd == -1) { - return NULL; - } - - chan = g_io_channel_unix_new(fd); - - g_io_channel_set_encoding(chan, NULL, NULL); - g_io_channel_set_buffered(chan, FALSE); - - return chan; -} -#endif - -static GIOChannel *io_channel_from_socket(int fd) -{ - GIOChannel *chan; - - if (fd == -1) { - return NULL; - } - -#ifdef _WIN32 - chan = g_io_channel_win32_new_socket(fd); -#else - chan = g_io_channel_unix_new(fd); -#endif - - g_io_channel_set_encoding(chan, NULL, NULL); - g_io_channel_set_buffered(chan, FALSE); - - return chan; -} - -#ifndef _WIN32 -static int io_channel_send(GIOChannel *fd, const void *buf, size_t len) +static int io_channel_send_full(QIOChannel *ioc, + const void *buf, size_t len, + int *fds, size_t nfds) { size_t offset = 0; - GIOStatus status = G_IO_STATUS_NORMAL; - while (offset < len && status == G_IO_STATUS_NORMAL) { - gsize bytes_written = 0; + while (offset < len) { + ssize_t ret = 0; + struct iovec iov = { .iov_base = (char *)buf + offset, + .iov_len = len - offset }; - status = g_io_channel_write_chars(fd, buf + offset, len - offset, - &bytes_written, NULL); - offset += bytes_written; + ret = qio_channel_writev_full( + ioc, &iov, 1, + fds, nfds, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + errno = EAGAIN; + return -1; + } else if (ret < 0) { + if (offset) { + return offset; + } + + errno = EINVAL; + return -1; + } + + offset += ret; } - if (offset > 0) { - return offset; - } - switch (status) { - case G_IO_STATUS_NORMAL: - g_assert(len == 0); - return 0; - case G_IO_STATUS_AGAIN: - errno = EAGAIN; - return -1; - default: - break; - } - errno = EINVAL; - return -1; + return offset; +} + + +#ifndef _WIN32 +static int io_channel_send(QIOChannel *ioc, const void *buf, size_t len) +{ + return io_channel_send_full(ioc, buf, len, NULL, 0); } typedef struct FDCharDriver { CharDriverState *chr; - GIOChannel *fd_in, *fd_out; + QIOChannel *ioc_in, *ioc_out; int max_size; } FDCharDriver; @@ -964,17 +937,16 @@ static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { FDCharDriver *s = chr->opaque; - return io_channel_send(s->fd_out, buf, len); + return io_channel_send(s->ioc_out, buf, len); } -static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; FDCharDriver *s = chr->opaque; int len; uint8_t buf[READ_BUF_LEN]; - GIOStatus status; - gsize bytes_read; + ssize_t ret; len = sizeof(buf); if (len > s->max_size) { @@ -984,15 +956,15 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) return TRUE; } - status = g_io_channel_read_chars(chan, (gchar *)buf, - len, &bytes_read, NULL); - if (status == G_IO_STATUS_EOF) { + ret = qio_channel_read( + chan, (gchar *)buf, len, NULL); + if (ret == 0) { remove_fd_in_watch(chr); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); return FALSE; } - if (status == G_IO_STATUS_NORMAL) { - qemu_chr_be_write(chr, buf, bytes_read); + if (ret > 0) { + qemu_chr_be_write(chr, buf, ret); } return TRUE; @@ -1010,7 +982,7 @@ static int fd_chr_read_poll(void *opaque) static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond) { FDCharDriver *s = chr->opaque; - return g_io_create_watch(s->fd_out, cond); + return qio_channel_create_watch(s->ioc_out, cond); } static void fd_chr_update_read_handler(CharDriverState *chr) @@ -1018,8 +990,9 @@ static void fd_chr_update_read_handler(CharDriverState *chr) FDCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->fd_in) { - chr->fd_in_tag = io_add_watch_poll(s->fd_in, fd_chr_read_poll, + if (s->ioc_in) { + chr->fd_in_tag = io_add_watch_poll(s->ioc_in, + fd_chr_read_poll, fd_chr_read, chr); } } @@ -1029,11 +1002,11 @@ static void fd_chr_close(struct CharDriverState *chr) FDCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->fd_in) { - g_io_channel_unref(s->fd_in); + if (s->ioc_in) { + object_unref(OBJECT(s->ioc_in)); } - if (s->fd_out) { - g_io_channel_unref(s->fd_out); + if (s->ioc_out) { + object_unref(OBJECT(s->ioc_out)); } g_free(s); @@ -1052,8 +1025,8 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out, return NULL; } s = g_new0(FDCharDriver, 1); - s->fd_in = io_channel_from_fd(fd_in); - s->fd_out = io_channel_from_fd(fd_out); + s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in)); + s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out)); qemu_set_nonblock(fd_out); s->chr = chr; chr->opaque = s; @@ -1196,7 +1169,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, #define HAVE_CHARDEV_PTY 1 typedef struct { - GIOChannel *fd; + QIOChannel *ioc; int read_bytes; /* Protected by the CharDriverState chr_write_lock. */ @@ -1247,8 +1220,9 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr) PtyCharDriver *s = chr->opaque; GPollFD pfd; int rc; + QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc); - pfd.fd = g_io_channel_unix_get_fd(s->fd); + pfd.fd = fioc->fd; pfd.events = G_IO_OUT; pfd.revents = 0; do { @@ -1282,7 +1256,7 @@ static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len) return 0; } } - return io_channel_send(s->fd, buf, len); + return io_channel_send(s->ioc, buf, len); } static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond) @@ -1291,7 +1265,7 @@ static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond) if (!s->connected) { return NULL; } - return g_io_create_watch(s->fd, cond); + return qio_channel_create_watch(s->ioc, cond); } static int pty_chr_read_poll(void *opaque) @@ -1303,13 +1277,13 @@ static int pty_chr_read_poll(void *opaque) return s->read_bytes; } -static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; PtyCharDriver *s = chr->opaque; - gsize size, len; + gsize len; uint8_t buf[READ_BUF_LEN]; - GIOStatus status; + ssize_t ret; len = sizeof(buf); if (len > s->read_bytes) @@ -1317,13 +1291,13 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) if (len == 0) { return TRUE; } - status = g_io_channel_read_chars(s->fd, (gchar *)buf, len, &size, NULL); - if (status != G_IO_STATUS_NORMAL) { + ret = qio_channel_read(s->ioc, (char *)buf, len, NULL); + if (ret <= 0) { pty_chr_state(chr, 0); return FALSE; } else { pty_chr_state(chr, 1); - qemu_chr_be_write(chr, buf, size); + qemu_chr_be_write(chr, buf, ret); } return TRUE; } @@ -1365,7 +1339,8 @@ static void pty_chr_state(CharDriverState *chr, int connected) s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr); } if (!chr->fd_in_tag) { - chr->fd_in_tag = io_add_watch_poll(s->fd, pty_chr_read_poll, + chr->fd_in_tag = io_add_watch_poll(s->ioc, + pty_chr_read_poll, pty_chr_read, chr); } } @@ -1374,13 +1349,10 @@ static void pty_chr_state(CharDriverState *chr, int connected) static void pty_chr_close(struct CharDriverState *chr) { PtyCharDriver *s = chr->opaque; - int fd; qemu_mutex_lock(&chr->chr_write_lock); pty_chr_state(chr, 0); - fd = g_io_channel_unix_get_fd(s->fd); - g_io_channel_unref(s->fd); - close(fd); + object_unref(OBJECT(s->ioc)); if (s->timer_tag) { g_source_remove(s->timer_tag); s->timer_tag = 0; @@ -1431,7 +1403,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id, chr->chr_add_watch = pty_chr_add_watch; chr->explicit_be_open = true; - s->fd = io_channel_from_fd(master_fd); + s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd)); s->timer_tag = 0; return chr; @@ -1555,12 +1527,13 @@ static void tty_serial_init(int fd, int speed, static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { FDCharDriver *s = chr->opaque; + QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in); switch(cmd) { case CHR_IOCTL_SERIAL_SET_PARAMS: { QEMUSerialSetParams *ssp = arg; - tty_serial_init(g_io_channel_unix_get_fd(s->fd_in), + tty_serial_init(fioc->fd, ssp->speed, ssp->parity, ssp->data_bits, ssp->stop_bits); } @@ -1569,7 +1542,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int enable = *(int *)arg; if (enable) { - tcsendbreak(g_io_channel_unix_get_fd(s->fd_in), 1); + tcsendbreak(fioc->fd, 1); } } break; @@ -1577,7 +1550,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int sarg = 0; int *targ = (int *)arg; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &sarg); + ioctl(fioc->fd, TIOCMGET, &sarg); *targ = 0; if (sarg & TIOCM_CTS) *targ |= CHR_TIOCM_CTS; @@ -1597,7 +1570,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int sarg = *(int *)arg; int targ = 0; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &targ); + ioctl(fioc->fd, TIOCMGET, &targ); targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS); if (sarg & CHR_TIOCM_CTS) @@ -1612,7 +1585,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) targ |= TIOCM_DTR; if (sarg & CHR_TIOCM_RTS) targ |= TIOCM_RTS; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMSET, &targ); + ioctl(fioc->fd, TIOCMSET, &targ); } break; default: @@ -1623,18 +1596,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) static void qemu_chr_close_tty(CharDriverState *chr) { - FDCharDriver *s = chr->opaque; - int fd = -1; - - if (s) { - fd = g_io_channel_unix_get_fd(s->fd_in); - } - fd_chr_close(chr); - - if (fd >= 0) { - close(fd); - } } static CharDriverState *qemu_chr_open_tty_fd(int fd, @@ -2456,8 +2418,7 @@ err1: /* UDP Net console */ typedef struct { - int fd; - GIOChannel *chan; + QIOChannel *ioc; uint8_t buf[READ_BUF_LEN]; int bufcnt; int bufptr; @@ -2468,17 +2429,9 @@ typedef struct { static int udp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { NetCharDriver *s = chr->opaque; - gsize bytes_written; - GIOStatus status; - status = g_io_channel_write_chars(s->chan, (const gchar *)buf, len, &bytes_written, NULL); - if (status == G_IO_STATUS_EOF) { - return 0; - } else if (status != G_IO_STATUS_NORMAL) { - return -1; - } - - return bytes_written; + return qio_channel_write( + s->ioc, (const char *)buf, len, NULL); } static int udp_chr_read_poll(void *opaque) @@ -2499,24 +2452,22 @@ static int udp_chr_read_poll(void *opaque) return s->max_size; } -static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; NetCharDriver *s = chr->opaque; - gsize bytes_read = 0; - GIOStatus status; + ssize_t ret; if (s->max_size == 0) { return TRUE; } - status = g_io_channel_read_chars(s->chan, (gchar *)s->buf, sizeof(s->buf), - &bytes_read, NULL); - s->bufcnt = bytes_read; - s->bufptr = s->bufcnt; - if (status != G_IO_STATUS_NORMAL) { + ret = qio_channel_read( + s->ioc, (char *)s->buf, sizeof(s->buf), NULL); + if (ret <= 0) { remove_fd_in_watch(chr); return FALSE; } + s->bufcnt = ret; s->bufptr = 0; while (s->max_size > 0 && s->bufptr < s->bufcnt) { @@ -2533,8 +2484,9 @@ static void udp_chr_update_read_handler(CharDriverState *chr) NetCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, udp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + udp_chr_read_poll, udp_chr_read, chr); } } @@ -2544,17 +2496,16 @@ static void udp_chr_close(CharDriverState *chr) NetCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - g_io_channel_unref(s->chan); - closesocket(s->fd); + if (s->ioc) { + object_unref(OBJECT(s->ioc)); } g_free(s); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static CharDriverState *qemu_chr_open_udp_fd(int fd, - ChardevCommon *backend, - Error **errp) +static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr = NULL; NetCharDriver *s = NULL; @@ -2565,8 +2516,7 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd, } s = g_new0(NetCharDriver, 1); - s->fd = fd; - s->chan = io_channel_from_socket(s->fd); + s->ioc = QIO_CHANNEL(sioc); s->bufcnt = 0; s->bufptr = 0; chr->opaque = s; @@ -2582,19 +2532,18 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd, /* TCP Net console */ typedef struct { - - GIOChannel *chan, *listen_chan; + QIOChannel *ioc; + QIOChannelSocket *listen_ioc; guint listen_tag; - int fd, listen_fd; int connected; int max_size; int do_telnetopt; int do_nodelay; int is_unix; int *read_msgfds; - int read_msgfds_num; + size_t read_msgfds_num; int *write_msgfds; - int write_msgfds_num; + size_t write_msgfds_num; SocketAddress *addr; bool is_listen; @@ -2628,68 +2577,27 @@ static void check_report_connect_error(CharDriverState *chr, qemu_chr_socket_restart_timer(chr); } -static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque); - -#ifndef _WIN32 -static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len) -{ - TCPCharDriver *s = chr->opaque; - struct msghdr msgh; - struct iovec iov; - int r; - - size_t fd_size = s->write_msgfds_num * sizeof(int); - char control[CMSG_SPACE(fd_size)]; - struct cmsghdr *cmsg; - - memset(&msgh, 0, sizeof(msgh)); - memset(control, 0, sizeof(control)); - - /* set the payload */ - iov.iov_base = (uint8_t *) buf; - iov.iov_len = len; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - cmsg = CMSG_FIRSTHDR(&msgh); - - cmsg->cmsg_len = CMSG_LEN(fd_size); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size); - - do { - r = sendmsg(s->fd, &msgh, 0); - } while (r < 0 && errno == EINTR); - - /* free the written msgfds, no matter what */ - if (s->write_msgfds_num) { - g_free(s->write_msgfds); - s->write_msgfds = 0; - s->write_msgfds_num = 0; - } - - return r; -} -#endif +static gboolean tcp_chr_accept(QIOChannel *chan, + GIOCondition cond, + void *opaque); /* Called with chr_write_lock held. */ static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { TCPCharDriver *s = chr->opaque; if (s->connected) { -#ifndef _WIN32 - if (s->is_unix && s->write_msgfds_num) { - return unix_send_msgfds(chr, buf, len); - } else -#endif - { - return io_channel_send(s->chan, buf, len); + int ret = io_channel_send_full(s->ioc, buf, len, + s->write_msgfds, + s->write_msgfds_num); + + /* free the written msgfds, no matter what */ + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + s->write_msgfds = 0; + s->write_msgfds_num = 0; } + + return ret; } else { /* XXX: indicate an error ? */ return len; @@ -2785,6 +2693,10 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) { TCPCharDriver *s = chr->opaque; + if (!qio_channel_has_feature(s->ioc, + QIO_CHANNEL_FEATURE_FD_PASS)) { + return -1; + } /* clear old pending fd array */ g_free(s->write_msgfds); @@ -2798,27 +2710,26 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) return 0; } -#ifndef _WIN32 -static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) +static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) { TCPCharDriver *s = chr->opaque; - struct cmsghdr *cmsg; + struct iovec iov = { .iov_base = buf, .iov_len = len }; + int ret; + size_t i; + int *msgfds = NULL; + size_t msgfds_num = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { - int fd_size, i; - - if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || - cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) { - continue; - } - - fd_size = cmsg->cmsg_len - CMSG_LEN(0); - - if (!fd_size) { - continue; - } + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, + NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, + NULL); + } + if (msgfds_num) { /* close and clean read_msgfds */ for (i = 0; i < s->read_msgfds_num; i++) { close(s->read_msgfds[i]); @@ -2828,77 +2739,31 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) g_free(s->read_msgfds); } - s->read_msgfds_num = fd_size / sizeof(int); - s->read_msgfds = g_malloc(fd_size); - memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size); + s->read_msgfds = msgfds; + s->read_msgfds_num = msgfds_num; + } - for (i = 0; i < s->read_msgfds_num; i++) { - int fd = s->read_msgfds[i]; - if (fd < 0) { - continue; - } - - /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ - qemu_set_block(fd); - - #ifndef MSG_CMSG_CLOEXEC - qemu_set_cloexec(fd); - #endif + for (i = 0; i < s->read_msgfds_num; i++) { + int fd = s->read_msgfds[i]; + if (fd < 0) { + continue; } - } -} -static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) -{ - TCPCharDriver *s = chr->opaque; - struct msghdr msg = { NULL, }; - struct iovec iov[1]; - union { - struct cmsghdr cmsg; - char control[CMSG_SPACE(sizeof(int) * TCP_MAX_FDS)]; - } msg_control; - int flags = 0; - ssize_t ret; + /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ + qemu_set_block(fd); - iov[0].iov_base = buf; - iov[0].iov_len = len; - - msg.msg_iov = iov; - msg.msg_iovlen = 1; - msg.msg_control = &msg_control; - msg.msg_controllen = sizeof(msg_control); - -#ifdef MSG_CMSG_CLOEXEC - flags |= MSG_CMSG_CLOEXEC; +#ifndef MSG_CMSG_CLOEXEC + qemu_set_cloexec(fd); #endif - do { - ret = recvmsg(s->fd, &msg, flags); - } while (ret == -1 && errno == EINTR); - - if (ret > 0 && s->is_unix) { - unix_process_msgfd(chr, &msg); } return ret; } -#else -static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) -{ - TCPCharDriver *s = chr->opaque; - ssize_t ret; - - do { - ret = qemu_recv(s->fd, buf, len, 0); - } while (ret == -1 && socket_error() == EINTR); - - return ret; -} -#endif static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond) { TCPCharDriver *s = chr->opaque; - return g_io_create_watch(s->chan, cond); + return qio_channel_create_watch(s->ioc, cond); } static void tcp_chr_disconnect(CharDriverState *chr) @@ -2906,15 +2771,13 @@ static void tcp_chr_disconnect(CharDriverState *chr) TCPCharDriver *s = chr->opaque; s->connected = 0; - if (s->listen_chan) { - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, - tcp_chr_accept, chr); + if (s->listen_ioc) { + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } remove_fd_in_watch(chr); - g_io_channel_unref(s->chan); - s->chan = NULL; - closesocket(s->fd); - s->fd = -1; + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; g_free(chr->filename); chr->filename = SocketAddress_to_str("disconnected:", s->addr, s->is_listen, s->is_telnet); @@ -2924,7 +2787,7 @@ static void tcp_chr_disconnect(CharDriverState *chr) } } -static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; @@ -2938,9 +2801,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) if (len > s->max_size) len = s->max_size; size = tcp_chr_recv(chr, (void *)buf, len); - if (size == 0 || - (size < 0 && - socket_error() != EAGAIN && socket_error() != EWOULDBLOCK)) { + if (size == 0 || size == -1) { /* connection closed */ tcp_chr_disconnect(chr); } else if (size > 0) { @@ -2988,25 +2849,17 @@ static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - struct sockaddr_storage ss, ps; - socklen_t ss_len = sizeof(ss), ps_len = sizeof(ps); + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); - memset(&ss, 0, ss_len); g_free(chr->filename); - if (getsockname(s->fd, (struct sockaddr *) &ss, &ss_len) != 0) { - chr->filename = g_strdup_printf("Error in getsockname: %s\n", - strerror(errno)); - } else if (getpeername(s->fd, (struct sockaddr *) &ps, &ps_len) != 0) { - chr->filename = g_strdup_printf("Error in getpeername: %s\n", - strerror(errno)); - } else { - chr->filename = sockaddr_to_str(&ss, ss_len, &ps, ps_len, - s->is_listen, s->is_telnet); - } + chr->filename = sockaddr_to_str(&sioc->localAddr, sioc->localAddrLen, + &sioc->remoteAddr, sioc->remoteAddrLen, + s->is_listen, s->is_telnet); s->connected = 1; - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + tcp_chr_read_poll, tcp_chr_read, chr); } qemu_chr_be_generic_open(chr); @@ -3017,38 +2870,41 @@ static void tcp_chr_update_read_handler(CharDriverState *chr) TCPCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + tcp_chr_read_poll, tcp_chr_read, chr); } } #define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c; -static void tcp_chr_telnet_init(int fd) +static void tcp_chr_telnet_init(QIOChannel *ioc) { char buf[3]; /* Send the telnet negotion to put telnet in binary, no echo, single char mode */ IACSET(buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */ - send(fd, (char *)buf, 3, 0); + qio_channel_write(ioc, buf, 3, NULL); IACSET(buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */ - send(fd, (char *)buf, 3, 0); + qio_channel_write(ioc, buf, 3, NULL); IACSET(buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */ - send(fd, (char *)buf, 3, 0); + qio_channel_write(ioc, buf, 3, NULL); IACSET(buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */ - send(fd, (char *)buf, 3, 0); + qio_channel_write(ioc, buf, 3, NULL); } -static int tcp_chr_add_client(CharDriverState *chr, int fd) +static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) { TCPCharDriver *s = chr->opaque; - if (s->fd != -1) + if (s->ioc != NULL) { return -1; + } - qemu_set_nonblock(fd); - if (s->do_nodelay) - socket_set_nodelay(fd); - s->fd = fd; - s->chan = io_channel_from_socket(fd); + s->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(sioc)); + + if (s->do_nodelay) { + qio_channel_set_delay(s->ioc, false); + } if (s->listen_tag) { g_source_remove(s->listen_tag); s->listen_tag = 0; @@ -3058,41 +2914,43 @@ static int tcp_chr_add_client(CharDriverState *chr, int fd) return 0; } -static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opaque) + +static int tcp_chr_add_client(CharDriverState *chr, int fd) +{ + int ret; + QIOChannelSocket *sioc; + + sioc = qio_channel_socket_new_fd(fd, NULL); + if (!sioc) { + return -1; + } + qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL); + ret = tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); + return ret; +} + +static gboolean tcp_chr_accept(QIOChannel *channel, + GIOCondition cond, + void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - struct sockaddr_in saddr; -#ifndef _WIN32 - struct sockaddr_un uaddr; -#endif - struct sockaddr *addr; - socklen_t len; - int fd; + QIOChannelSocket *sioc; - for(;;) { -#ifndef _WIN32 - if (s->is_unix) { - len = sizeof(uaddr); - addr = (struct sockaddr *)&uaddr; - } else -#endif - { - len = sizeof(saddr); - addr = (struct sockaddr *)&saddr; - } - fd = qemu_accept(s->listen_fd, addr, &len); - if (fd < 0 && errno != EINTR) { - s->listen_tag = 0; - return FALSE; - } else if (fd >= 0) { - if (s->do_telnetopt) - tcp_chr_telnet_init(fd); - break; - } + sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel), + NULL); + if (!sioc) { + return TRUE; } - if (tcp_chr_add_client(chr, fd) < 0) - close(fd); + + if (s->do_telnetopt) { + tcp_chr_telnet_init(QIO_CHANNEL(sioc)); + } + + tcp_chr_new_client(chr, sioc); + + object_unref(OBJECT(sioc)); return TRUE; } @@ -3107,22 +2965,16 @@ static void tcp_chr_close(CharDriverState *chr) s->reconnect_timer = 0; } qapi_free_SocketAddress(s->addr); - if (s->fd >= 0) { - remove_fd_in_watch(chr); - if (s->chan) { - g_io_channel_unref(s->chan); - } - closesocket(s->fd); + remove_fd_in_watch(chr); + if (s->ioc) { + object_unref(OBJECT(s->ioc)); } - if (s->listen_fd >= 0) { - if (s->listen_tag) { - g_source_remove(s->listen_tag); - s->listen_tag = 0; - } - if (s->listen_chan) { - g_io_channel_unref(s->listen_chan); - } - closesocket(s->listen_fd); + if (s->listen_tag) { + g_source_remove(s->listen_tag); + s->listen_tag = 0; + } + if (s->listen_ioc) { + object_unref(OBJECT(s->listen_ioc)); } if (s->read_msgfds_num) { for (i = 0; i < s->read_msgfds_num; i++) { @@ -3137,57 +2989,63 @@ static void tcp_chr_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static void qemu_chr_finish_socket_connection(CharDriverState *chr, int fd) +static void qemu_chr_finish_socket_connection(CharDriverState *chr, + QIOChannelSocket *sioc) { TCPCharDriver *s = chr->opaque; if (s->is_listen) { - s->listen_fd = fd; - s->listen_chan = io_channel_from_socket(s->listen_fd); - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, - tcp_chr_accept, chr); + s->listen_ioc = sioc; + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } else { - s->connected = 1; - s->fd = fd; - socket_set_nodelay(fd); - s->chan = io_channel_from_socket(s->fd); - tcp_chr_connect(chr); + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); } } -static void qemu_chr_socket_connected(int fd, Error *err, void *opaque) +static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(src); CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - if (fd < 0) { + if (err) { check_report_connect_error(chr, err); + object_unref(src); return; } s->connect_err_reported = false; - qemu_chr_finish_socket_connection(chr, fd); + qemu_chr_finish_socket_connection(chr, sioc); } static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp) { TCPCharDriver *s = chr->opaque; - int fd; + QIOChannelSocket *sioc = qio_channel_socket_new(); if (s->is_listen) { - fd = socket_listen(s->addr, errp); + if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + goto fail; + } + qemu_chr_finish_socket_connection(chr, sioc); } else if (s->reconnect_time) { - fd = socket_connect(s->addr, errp, qemu_chr_socket_connected, chr); - return fd >= 0; + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); } else { - fd = socket_connect(s->addr, errp, NULL, NULL); - } - if (fd < 0) { - return false; + if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + goto fail; + } + qemu_chr_finish_socket_connection(chr, sioc); } - qemu_chr_finish_socket_connection(chr, fd); return true; + + fail: + object_unref(OBJECT(sioc)); + return false; } /*********************************************************/ @@ -4318,8 +4176,6 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, } s = g_new0(TCPCharDriver, 1); - s->fd = -1; - s->listen_fd = -1; s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX; s->is_listen = is_listen; s->is_telnet = is_telnet; @@ -4360,8 +4216,8 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, if (is_listen && is_waitconnect) { fprintf(stderr, "QEMU waiting for connection on: %s\n", chr->filename); - tcp_chr_accept(s->listen_chan, G_IO_IN, chr); - qemu_set_nonblock(s->listen_fd); + tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); } return chr; @@ -4374,13 +4230,15 @@ static CharDriverState *qmp_chardev_open_udp(const char *id, { ChardevUdp *udp = backend->u.udp; ChardevCommon *common = qapi_ChardevUdp_base(backend->u.udp); - int fd; + QIOChannelSocket *sioc = qio_channel_socket_new(); - fd = socket_dgram(udp->remote, udp->local, errp); - if (fd < 0) { + if (qio_channel_socket_dgram_sync(sioc, + udp->remote, udp->local, + errp) < 0) { + object_unref(OBJECT(sioc)); return NULL; } - return qemu_chr_open_udp_fd(fd, common, errp); + return qemu_chr_open_udp(sioc, common, errp); } ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, diff --git a/tests/Makefile b/tests/Makefile index b7352f1a35..650e654ec2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -558,7 +558,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o -tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) +tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) From f2001a7e0555b66d6db25a3ff1801540814045bb Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 19 Jan 2016 11:14:30 +0000 Subject: [PATCH 03/49] char: don't assume telnet initialization will not block The current code for doing telnet initialization is writing to a socket without checking the return status. While it is highly unlikely to be a problem when writing to a bare socket, as the buffers are large enough to prevent blocking, this cannot be assumed safe with TLS sockets. So write the telnet initialization code into a memory buffer and then use an I/O watch to fully send the data. Signed-off-by: Daniel P. Berrange Message-Id: <1453202071-10289-4-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-char.c | 87 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index 4fc874628b..55440bde5a 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2877,19 +2877,70 @@ static void tcp_chr_update_read_handler(CharDriverState *chr) } } -#define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c; -static void tcp_chr_telnet_init(QIOChannel *ioc) +typedef struct { + CharDriverState *chr; + char buf[12]; + size_t buflen; +} TCPCharDriverTelnetInit; + +static gboolean tcp_chr_telnet_init_io(QIOChannel *ioc, + GIOCondition cond G_GNUC_UNUSED, + gpointer user_data) { - char buf[3]; - /* Send the telnet negotion to put telnet in binary, no echo, single char mode */ - IACSET(buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */ - qio_channel_write(ioc, buf, 3, NULL); - IACSET(buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */ - qio_channel_write(ioc, buf, 3, NULL); - IACSET(buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */ - qio_channel_write(ioc, buf, 3, NULL); - IACSET(buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */ - qio_channel_write(ioc, buf, 3, NULL); + TCPCharDriverTelnetInit *init = user_data; + ssize_t ret; + + ret = qio_channel_write(ioc, init->buf, init->buflen, NULL); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + tcp_chr_disconnect(init->chr); + return FALSE; + } + } + init->buflen -= ret; + + if (init->buflen == 0) { + tcp_chr_connect(init->chr); + return FALSE; + } + + memmove(init->buf, init->buf + ret, init->buflen); + + return TRUE; +} + +static void tcp_chr_telnet_init(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + TCPCharDriverTelnetInit *init = + g_new0(TCPCharDriverTelnetInit, 1); + size_t n = 0; + + init->chr = chr; + init->buflen = 12; + +#define IACSET(x, a, b, c) \ + do { \ + x[n++] = a; \ + x[n++] = b; \ + x[n++] = c; \ + } while (0) + + /* Prep the telnet negotion to put telnet in binary, + * no echo, single char mode */ + IACSET(init->buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */ + IACSET(init->buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */ + IACSET(init->buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */ + IACSET(init->buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */ + +#undef IACSET + + qio_channel_add_watch( + s->ioc, G_IO_OUT, + tcp_chr_telnet_init_io, + init, NULL); } static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) @@ -2909,7 +2960,12 @@ static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) g_source_remove(s->listen_tag); s->listen_tag = 0; } - tcp_chr_connect(chr); + + if (s->do_telnetopt) { + tcp_chr_telnet_init(chr); + } else { + tcp_chr_connect(chr); + } return 0; } @@ -2935,7 +2991,6 @@ static gboolean tcp_chr_accept(QIOChannel *channel, void *opaque) { CharDriverState *chr = opaque; - TCPCharDriver *s = chr->opaque; QIOChannelSocket *sioc; sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel), @@ -2944,10 +2999,6 @@ static gboolean tcp_chr_accept(QIOChannel *channel, return TRUE; } - if (s->do_telnetopt) { - tcp_chr_telnet_init(QIO_CHANNEL(sioc)); - } - tcp_chr_new_client(chr, sioc); object_unref(OBJECT(sioc)); From a8fb542705ac7e0dcf00908bc47bf49cdd058abe Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 19 Jan 2016 11:14:31 +0000 Subject: [PATCH 04/49] char: introduce support for TLS encrypted TCP chardev backend This integrates support for QIOChannelTLS object in the TCP chardev backend. If the 'tls-creds=NAME' option is passed with the '-chardev tcp' argument, then it will setup the chardev such that the client is required to establish a TLS handshake when connecting. There is no support for checking the client certificate against ACLs in this initial patch. This is pending work to QOM-ify the ACL object code. A complete invocation to run QEMU as the server for a TLS encrypted serial dev might be $ qemu-system-x86_64 \ -nodefconfig -nodefaults -device sga -display none \ -chardev socket,id=s0,host=127.0.0.1,port=9000,tls-creds=tls0,server \ -device isa-serial,chardev=s0 \ -object tls-creds-x509,id=tls0,endpoint=server,verify-peer=off,\ dir=/home/berrange/security/qemutls To test with the gnutls-cli tool as the client: $ gnutls-cli --priority=NORMAL -p 9000 \ --x509cafile=/home/berrange/security/qemutls/ca-cert.pem \ 127.0.0.1 If QEMU was told to use 'anon' credential type, then use the priority string 'NORMAL:+ANON-DH' with gnutls-cli Alternatively, if setting up a chardev to operate as a client, then the TLS credentials registered must be for the client endpoint. First a TLS server must be setup, which can be done with the gnutls-serv tool $ gnutls-serv --priority=NORMAL -p 9000 --echo \ --x509cafile=/home/berrange/security/qemutls/ca-cert.pem \ --x509certfile=/home/berrange/security/qemutls/server-cert.pem \ --x509keyfile=/home/berrange/security/qemutls/server-key.pem Then QEMU can connect with $ qemu-system-x86_64 \ -nodefconfig -nodefaults -device sga -display none \ -chardev socket,id=s0,host=127.0.0.1,port=9000,tls-creds=tls0 \ -device isa-serial,chardev=s0 \ -object tls-creds-x509,id=tls0,endpoint=client,\ dir=/home/berrange/security/qemutls Signed-off-by: Daniel P. Berrange Message-Id: <1453202071-10289-5-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qapi-schema.json | 2 + qemu-char.c | 136 +++++++++++++++++++++++++++++++++++++++++++---- qemu-options.hx | 9 +++- 3 files changed, 134 insertions(+), 13 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index b3038b215a..8d04897922 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3146,6 +3146,7 @@ # # @addr: socket address to listen on (server=true) # or connect to (server=false) +# @tls-creds: #optional the ID of the TLS credentials object (since 2.6) # @server: #optional create server socket (default: true) # @wait: #optional wait for incoming connection on server # sockets (default: false). @@ -3160,6 +3161,7 @@ # Since: 1.4 ## { 'struct': 'ChardevSocket', 'data': { 'addr' : 'SocketAddress', + '*tls-creds' : 'str', '*server' : 'bool', '*wait' : 'bool', '*nodelay' : 'bool', diff --git a/qemu-char.c b/qemu-char.c index 55440bde5a..b9e5547ce1 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -35,6 +35,7 @@ #include "qemu/base64.h" #include "io/channel-socket.h" #include "io/channel-file.h" +#include "io/channel-tls.h" #include #include @@ -2532,9 +2533,11 @@ static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc, /* TCP Net console */ typedef struct { - QIOChannel *ioc; + QIOChannel *ioc; /* Client I/O channel */ + QIOChannelSocket *sioc; /* Client master channel */ QIOChannelSocket *listen_ioc; guint listen_tag; + QCryptoTLSCreds *tls_creds; int connected; int max_size; int do_telnetopt; @@ -2776,6 +2779,8 @@ static void tcp_chr_disconnect(CharDriverState *chr) QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } remove_fd_in_watch(chr); + object_unref(OBJECT(s->sioc)); + s->sioc = NULL; object_unref(OBJECT(s->ioc)); s->ioc = NULL; g_free(chr->filename); @@ -2849,12 +2854,12 @@ static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); g_free(chr->filename); - chr->filename = sockaddr_to_str(&sioc->localAddr, sioc->localAddrLen, - &sioc->remoteAddr, sioc->remoteAddrLen, - s->is_listen, s->is_telnet); + chr->filename = sockaddr_to_str( + &s->sioc->localAddr, s->sioc->localAddrLen, + &s->sioc->remoteAddr, s->sioc->remoteAddrLen, + s->is_listen, s->is_telnet); s->connected = 1; if (s->ioc) { @@ -2943,6 +2948,57 @@ static void tcp_chr_telnet_init(CharDriverState *chr) init, NULL); } + +static void tcp_chr_tls_handshake(Object *source, + Error *err, + gpointer user_data) +{ + CharDriverState *chr = user_data; + TCPCharDriver *s = chr->opaque; + + if (err) { + tcp_chr_disconnect(chr); + } else { + if (s->do_telnetopt) { + tcp_chr_telnet_init(chr); + } else { + tcp_chr_connect(chr); + } + } +} + + +static void tcp_chr_tls_init(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + QIOChannelTLS *tioc; + Error *err = NULL; + + if (s->is_listen) { + tioc = qio_channel_tls_new_server( + s->ioc, s->tls_creds, + NULL, /* XXX Use an ACL */ + &err); + } else { + tioc = qio_channel_tls_new_client( + s->ioc, s->tls_creds, + s->addr->u.inet->host, + &err); + } + if (tioc == NULL) { + error_free(err); + tcp_chr_disconnect(chr); + } + object_unref(OBJECT(s->ioc)); + s->ioc = QIO_CHANNEL(tioc); + + qio_channel_tls_handshake(tioc, + tcp_chr_tls_handshake, + chr, + NULL); +} + + static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) { TCPCharDriver *s = chr->opaque; @@ -2952,6 +3008,8 @@ static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) s->ioc = QIO_CHANNEL(sioc); object_ref(OBJECT(sioc)); + s->sioc = sioc; + object_ref(OBJECT(sioc)); if (s->do_nodelay) { qio_channel_set_delay(s->ioc, false); @@ -2961,10 +3019,14 @@ static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) s->listen_tag = 0; } - if (s->do_telnetopt) { - tcp_chr_telnet_init(chr); + if (s->tls_creds) { + tcp_chr_tls_init(chr); } else { - tcp_chr_connect(chr); + if (s->do_telnetopt) { + tcp_chr_telnet_init(chr); + } else { + tcp_chr_connect(chr); + } } return 0; @@ -3033,6 +3095,9 @@ static void tcp_chr_close(CharDriverState *chr) } g_free(s->read_msgfds); } + if (s->tls_creds) { + object_unref(OBJECT(s->tls_creds)); + } if (s->write_msgfds_num) { g_free(s->write_msgfds); } @@ -3563,6 +3628,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, const char *path = qemu_opt_get(opts, "path"); const char *host = qemu_opt_get(opts, "host"); const char *port = qemu_opt_get(opts, "port"); + const char *tls_creds = qemu_opt_get(opts, "tls-creds"); SocketAddress *addr; if (!path) { @@ -3574,6 +3640,11 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: socket: no port given"); return; } + } else { + if (tls_creds) { + error_setg(errp, "TLS can only be used over TCP socket"); + return; + } } backend->u.socket = g_new0(ChardevSocket, 1); @@ -3589,6 +3660,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, backend->u.socket->wait = is_waitconnect; backend->u.socket->has_reconnect = true; backend->u.socket->reconnect = reconnect; + backend->u.socket->tls_creds = g_strdup(tls_creds); addr = g_new0(SocketAddress, 1); if (path) { @@ -4015,6 +4087,9 @@ QemuOptsList qemu_chardev_opts = { },{ .name = "telnet", .type = QEMU_OPT_BOOL, + },{ + .name = "tls-creds", + .type = QEMU_OPT_STRING, },{ .name = "width", .type = QEMU_OPT_NUMBER, @@ -4231,6 +4306,39 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->is_listen = is_listen; s->is_telnet = is_telnet; s->do_nodelay = do_nodelay; + if (sock->tls_creds) { + Object *creds; + creds = object_resolve_path_component( + object_get_objects_root(), sock->tls_creds); + if (!creds) { + error_setg(errp, "No TLS credentials with id '%s'", + sock->tls_creds); + goto error; + } + s->tls_creds = (QCryptoTLSCreds *) + object_dynamic_cast(creds, + TYPE_QCRYPTO_TLS_CREDS); + if (!s->tls_creds) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + sock->tls_creds); + goto error; + } + object_ref(OBJECT(s->tls_creds)); + if (is_listen) { + if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + error_setg(errp, "%s", + "Expected TLS credentials for server endpoint"); + goto error; + } + } else { + if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + error_setg(errp, "%s", + "Expected TLS credentials for client endpoint"); + goto error; + } + } + } + qapi_copy_SocketAddress(&s->addr, sock->addr); chr->opaque = s; @@ -4259,9 +4367,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, if (s->reconnect_time) { socket_try_connect(chr); } else if (!qemu_chr_open_socket_fd(chr, errp)) { - g_free(s); - qemu_chr_free_common(chr); - return NULL; + goto error; } if (is_listen && is_waitconnect) { @@ -4272,6 +4378,14 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, } return chr; + + error: + if (s->tls_creds) { + object_unref(OBJECT(s->tls_creds)); + } + g_free(s); + qemu_chr_free_common(chr); + return NULL; } static CharDriverState *qmp_chardev_open_udp(const char *id, diff --git a/qemu-options.hx b/qemu-options.hx index b4763ba226..f31a240bed 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2092,7 +2092,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n" " [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n" - " [,logfile=PATH][,logappend=on|off] (tcp)\n" + " [,logfile=PATH][,logappend=on|off][,tls-creds=ID] (tcp)\n" "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds]\n" " [,mux=on|off][,logfile=PATH][,logappend=on|off] (unix)\n" "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n" @@ -2172,7 +2172,7 @@ Further options to each backend are described below. A void device. This device will not emit any data, and will drop any data it receives. The null backend does not take any options. -@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] +@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] [,tls-creds=@var{id}] Create a two-way stream socket, which can be either a TCP or a unix socket. A unix socket will be created if @option{path} is specified. Behaviour is @@ -2190,6 +2190,11 @@ escape sequences. the remote end goes away. qemu will delay this many seconds and then attempt to reconnect. Zero disables reconnecting, and is the default. +@option{tls-creds} requests enablement of the TLS protocol for encryption, +and specifies the id of the TLS credentials to use for the handshake. The +credentials must be previously created with the @option{-object tls-creds} +argument. + TCP and unix socket options are given below: @table @option From 8485140fa07f839aef65f7f782a958804d745615 Mon Sep 17 00:00:00 2001 From: Sitsofe Wheeler Date: Wed, 13 Jan 2016 20:50:26 +0000 Subject: [PATCH 05/49] docs: Style the command and its options in the synopsis Signed-off-by: Sitsofe Wheeler Message-Id: <1452718226-25001-1-git-send-email-sitsofe@yahoo.com> Signed-off-by: Paolo Bonzini --- fsdev/virtfs-proxy-helper.texi | 2 +- qemu-doc.texi | 8 ++++---- qemu-ga.texi | 2 +- qemu-img.texi | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fsdev/virtfs-proxy-helper.texi b/fsdev/virtfs-proxy-helper.texi index e60e3b9465..9a25d7ecf4 100644 --- a/fsdev/virtfs-proxy-helper.texi +++ b/fsdev/virtfs-proxy-helper.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: virtfs-proxy-helper options +@command{virtfs-proxy-helper} @var{options} @c man end @end example diff --git a/qemu-doc.texi b/qemu-doc.texi index 7bc388231f..ca4d9de15e 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -259,7 +259,7 @@ Linux should boot and give you a prompt. @example @c man begin SYNOPSIS -usage: qemu-system-i386 [options] [@var{disk_image}] +@command{qemu-system-i386} [@var{options}] [@var{disk_image}] @c man end @end example @@ -1406,7 +1406,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example usage: @example -qemu-system-i386 [...OPTIONS...] -usbdevice bt:hci,vlan=3 -bt device:keyboard,vlan=3 +@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 @end example @end table @@ -2755,7 +2755,7 @@ qemu-i386 /usr/local/qemu-i386/wine/bin/wine \ @subsection Command line options @example -usage: qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g port] [-B offset] [-R size] program [arguments...] +@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...] @end example @table @option @@ -2897,7 +2897,7 @@ qemu-sparc64 /bin/ls @subsection Command line options @example -usage: qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] program [arguments...] +@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...] @end example @table @option diff --git a/qemu-ga.texi b/qemu-ga.texi index 536a9b5241..0e53bf6b2c 100644 --- a/qemu-ga.texi +++ b/qemu-ga.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: qemu-ga [OPTIONS] +@command{qemu-ga} [@var{OPTIONS}] @c man end @end example diff --git a/qemu-img.texi b/qemu-img.texi index 55c6be391d..7163a108e2 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: qemu-img command [command options] +@command{qemu-img} @var{command} [@var{command} @var{options}] @c man end @end example From 27ef9cb0e77eda46618ea084adffa63ebde5be80 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Jan 2016 11:25:45 +0100 Subject: [PATCH 06/49] qemu-char: avoid leak in qemu_chr_open_pp_fd drv leaks if qemu_chr_alloc returns an error. Signed-off-by: Paolo Bonzini --- qemu-char.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index b9e5547ce1..ca53e8c376 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -1740,18 +1740,19 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd, return NULL; } - drv = g_new0(ParallelCharDriver, 1); - drv->fd = fd; - drv->mode = IEEE1284_MODE_COMPAT; - chr = qemu_chr_alloc(backend, errp); if (!chr) { return NULL; } + + drv = g_new0(ParallelCharDriver, 1); + chr->opaque = drv; chr->chr_write = null_chr_write; chr->chr_ioctl = pp_ioctl; chr->chr_close = pp_close; - chr->opaque = drv; + + drv->fd = fd; + drv->mode = IEEE1284_MODE_COMPAT; return chr; } From c81ab0ac90ce50d85a50934f7a519a8bc68cb155 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:31 +0100 Subject: [PATCH 07/49] scripts/kvm/kvm_stat: Cleanup of multiple imports Removed multiple imports of the same module and moved all imports to the top. It is not necessary to import a module each time one of its functions/classes is used. For readability each import should get its own line. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-2-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 7e5d25612b..3fadbfba93 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -12,8 +12,16 @@ # the COPYING file in the top-level directory. import curses -import sys, os, time, optparse, ctypes -from ctypes import * +import sys +import os +import time +import optparse +import ctypes +import fcntl +import resource +import struct +import re +from collections import defaultdict class DebugfsProvider(object): def __init__(self): @@ -285,12 +293,10 @@ filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons)) if exit_reasons: filters['kvm_exit'] = ('exit_reason', invert(exit_reasons)) -import struct, array - libc = ctypes.CDLL('libc.so.6') syscall = libc.syscall get_errno = libc.__errno_location -get_errno.restype = POINTER(c_int) +get_errno.restype = ctypes.POINTER(ctypes.c_int) class perf_event_attr(ctypes.Structure): _fields_ = [('type', ctypes.c_uint32), @@ -334,8 +340,6 @@ PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 PERF_FORMAT_ID = 1 << 2 PERF_FORMAT_GROUP = 1 << 3 -import re - sys_tracing = '/sys/kernel/debug/tracing' class Group(object): @@ -378,17 +382,13 @@ class Event(object): err = get_errno()[0] raise Exception('perf_event_open failed, errno = ' + err.__str__()) if filter: - import fcntl fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter) self.fd = fd def enable(self): - import fcntl fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0) def disable(self): - import fcntl fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0) def reset(self): - import fcntl fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0) class TracepointProvider(object): @@ -426,7 +426,6 @@ class TracepointProvider(object): def _setup(self, _fields): self._fields = _fields cpus = self._online_cpus() - import resource nfiles = len(cpus) * 1000 resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) events = [] @@ -454,7 +453,6 @@ class TracepointProvider(object): else: event.disable() def read(self): - from collections import defaultdict ret = defaultdict(int) for group in self.group_leaders: for name, val in group.read().iteritems(): @@ -468,7 +466,6 @@ class Stats: self._update() def _update(self): def wanted(key): - import re if not self.fields_filter: return True return re.match(self.fields_filter, key) is not None @@ -640,7 +637,6 @@ stats = Stats(providers, fields = options.fields) if options.log: log(stats) elif not options.once: - import curses.wrapper curses.wrapper(tui, stats) else: batch(stats) From 6590045e5dd2fb0b1d7cdc047ae0c52fd4bb5276 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:32 +0100 Subject: [PATCH 08/49] scripts/kvm/kvm_stat: Replaced os.listdir with os.walk Os.walk gives back lists of directories and files, no need to filter directories from the list that listdir gives back. To make it better understandable a wrapper with docstring was introduced. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-3-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 3fadbfba93..63232767f3 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -26,7 +26,7 @@ from collections import defaultdict class DebugfsProvider(object): def __init__(self): self.base = '/sys/kernel/debug/kvm' - self._fields = os.listdir(self.base) + self._fields = walkdir(self.base)[2] def fields(self): return self._fields def select(self, fields): @@ -285,6 +285,15 @@ def detect_platform(): detect_platform() + +def walkdir(path): + """Returns os.walk() data for specified directory. + + As it is only a wrapper it returns the same 3-tuple of (dirpath, + dirnames, filenames). + """ + return next(os.walk(path)) + def invert(d): return dict((x[1], x[0]) for x in d.iteritems()) @@ -394,9 +403,7 @@ class Event(object): class TracepointProvider(object): def __init__(self): path = os.path.join(sys_tracing, 'events', 'kvm') - fields = [f - for f in os.listdir(path) - if os.path.isdir(os.path.join(path, f))] + fields = walkdir(path)[1] extra = [] for f in fields: if f in filters: From db3e5d9a22cb533d4834746b82a5572ad8e3a540 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:33 +0100 Subject: [PATCH 09/49] scripts/kvm/kvm_stat: Make constants uppercase Constants should be uppercase with separating underscores, as requested in PEP8. This helps identifying them when reading the code. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-4-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 64 ++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 63232767f3..c4bf900e1b 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -36,7 +36,7 @@ class DebugfsProvider(object): return int(file(self.base + '/' + key).read()) return dict([(key, val(key)) for key in self._fields]) -vmx_exit_reasons = { +VMX_EXIT_REASONS = { 0: 'EXCEPTION_NMI', 1: 'EXTERNAL_INTERRUPT', 2: 'TRIPLE_FAULT', @@ -78,7 +78,7 @@ vmx_exit_reasons = { 58: 'INVPCID', } -svm_exit_reasons = { +SVM_EXIT_REASONS = { 0x000: 'READ_CR0', 0x003: 'READ_CR3', 0x004: 'READ_CR4', @@ -154,7 +154,7 @@ svm_exit_reasons = { } # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) -aarch64_exit_reasons = { +AARCH64_EXIT_REASONS = { 0x00: 'UNKNOWN', 0x01: 'WFI', 0x03: 'CP15_32', @@ -193,7 +193,7 @@ aarch64_exit_reasons = { } # From include/uapi/linux/kvm.h, KVM_EXIT_xxx -userspace_exit_reasons = { +USERSPACE_EXIT_REASONS = { 0: 'UNKNOWN', 1: 'EXCEPTION', 2: 'IO', @@ -221,15 +221,15 @@ userspace_exit_reasons = { 24: 'SYSTEM_EVENT', } -x86_exit_reasons = { - 'vmx': vmx_exit_reasons, - 'svm': svm_exit_reasons, +X86_EXIT_REASONS = { + 'vmx': VMX_EXIT_REASONS, + 'svm': SVM_EXIT_REASONS, } -sc_perf_evt_open = None -exit_reasons = None +SC_PERF_EVT_OPEN = None +EXIT_REASONS = None -ioctl_numbers = { +IOCTL_NUMBERS = { 'SET_FILTER' : 0x40082406, 'ENABLE' : 0x00002400, 'DISABLE' : 0x00002401, @@ -238,19 +238,19 @@ ioctl_numbers = { def x86_init(flag): globals().update({ - 'sc_perf_evt_open' : 298, - 'exit_reasons' : x86_exit_reasons[flag], + 'SC_PERF_EVT_OPEN' : 298, + 'EXIT_REASONS' : X86_EXIT_REASONS[flag], }) def s390_init(): globals().update({ - 'sc_perf_evt_open' : 331 + 'SC_PERF_EVT_OPEN' : 331 }) def ppc_init(): globals().update({ - 'sc_perf_evt_open' : 319, - 'ioctl_numbers' : { + 'SC_PERF_EVT_OPEN' : 319, + 'IOCTL_NUMBERS' : { 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16), 'ENABLE' : 0x20002400, 'DISABLE' : 0x20002401, @@ -259,8 +259,8 @@ def ppc_init(): def aarch64_init(): globals().update({ - 'sc_perf_evt_open' : 241, - 'exit_reasons' : aarch64_exit_reasons, + 'SC_PERF_EVT_OPEN' : 241, + 'EXIT_REASONS' : AARCH64_EXIT_REASONS, }) def detect_platform(): @@ -274,7 +274,7 @@ def detect_platform(): for line in file('/proc/cpuinfo').readlines(): if line.startswith('flags'): for flag in line.split(): - if flag in x86_exit_reasons: + if flag in X86_EXIT_REASONS: x86_init(flag) return elif line.startswith('vendor_id'): @@ -298,9 +298,9 @@ def invert(d): return dict((x[1], x[0]) for x in d.iteritems()) filters = {} -filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons)) -if exit_reasons: - filters['kvm_exit'] = ('exit_reason', invert(exit_reasons)) +filters['kvm_userspace_exit'] = ('reason', invert(USERSPACE_EXIT_REASONS)) +if EXIT_REASONS: + filters['kvm_exit'] = ('exit_reason', invert(EXIT_REASONS)) libc = ctypes.CDLL('libc.so.6') syscall = libc.syscall @@ -321,7 +321,7 @@ class perf_event_attr(ctypes.Structure): ('bp_len', ctypes.c_uint64), ] def _perf_event_open(attr, pid, cpu, group_fd, flags): - return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), + return syscall(SC_PERF_EVT_OPEN, ctypes.pointer(attr), ctypes.c_int(pid), ctypes.c_int(cpu), ctypes.c_int(group_fd), ctypes.c_long(flags)) @@ -391,14 +391,14 @@ class Event(object): err = get_errno()[0] raise Exception('perf_event_open failed, errno = ' + err.__str__()) if filter: - fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter) + fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], filter) self.fd = fd def enable(self): - fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0) + fcntl.ioctl(self.fd, IOCTL_NUMBERS['ENABLE'], 0) def disable(self): - fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0) + fcntl.ioctl(self.fd, IOCTL_NUMBERS['DISABLE'], 0) def reset(self): - fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0) + fcntl.ioctl(self.fd, IOCTL_NUMBERS['RESET'], 0) class TracepointProvider(object): def __init__(self): @@ -505,8 +505,8 @@ if not os.access('/sys/kernel/debug/kvm', os.F_OK): print "and ensure the kvm modules are loaded" sys.exit(1) -label_width = 40 -number_width = 10 +LABEL_WIDTH = 40 +NUMBER_WIDTH = 10 def tui(screen, stats): curses.use_default_colors() @@ -524,8 +524,8 @@ def tui(screen, stats): screen.erase() screen.addstr(0, 0, 'kvm statistics') screen.addstr(2, 1, 'Event') - screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total') - screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current') + screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - len('Total'), 'Total') + screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - len('Current'), 'Current') row = 3 s = stats.get() def sortkey(x): @@ -541,9 +541,9 @@ def tui(screen, stats): break col = 1 screen.addstr(row, col, key) - col += label_width + col += LABEL_WIDTH screen.addstr(row, col, '%10d' % (values[0],)) - col += number_width + col += NUMBER_WIDTH if values[1] is not None: screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) row += 1 From fff51233b75b745fe6d34f30f39049818d56944b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:34 +0100 Subject: [PATCH 10/49] scripts/kvm/kvm_stat: Removed unneeded PERF constants Only two of the constants are actually needed to set up the events, so the others were removed. All variables that used them were also removed. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-5-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index c4bf900e1b..7a8617dced 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -325,29 +325,8 @@ def _perf_event_open(attr, pid, cpu, group_fd, flags): ctypes.c_int(cpu), ctypes.c_int(group_fd), ctypes.c_long(flags)) -PERF_TYPE_HARDWARE = 0 -PERF_TYPE_SOFTWARE = 1 -PERF_TYPE_TRACEPOINT = 2 -PERF_TYPE_HW_CACHE = 3 -PERF_TYPE_RAW = 4 -PERF_TYPE_BREAKPOINT = 5 - -PERF_SAMPLE_IP = 1 << 0 -PERF_SAMPLE_TID = 1 << 1 -PERF_SAMPLE_TIME = 1 << 2 -PERF_SAMPLE_ADDR = 1 << 3 -PERF_SAMPLE_READ = 1 << 4 -PERF_SAMPLE_CALLCHAIN = 1 << 5 -PERF_SAMPLE_ID = 1 << 6 -PERF_SAMPLE_CPU = 1 << 7 -PERF_SAMPLE_PERIOD = 1 << 8 -PERF_SAMPLE_STREAM_ID = 1 << 9 -PERF_SAMPLE_RAW = 1 << 10 - -PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 -PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 -PERF_FORMAT_ID = 1 << 2 -PERF_FORMAT_GROUP = 1 << 3 +PERF_TYPE_TRACEPOINT = 2 +PERF_FORMAT_GROUP = 1 << 3 sys_tracing = '/sys/kernel/debug/tracing' @@ -378,9 +357,6 @@ class Event(object): tracepoint, 'id') id = int(file(id_path).read()) attr.config = id - attr.sample_type = (PERF_SAMPLE_RAW - | PERF_SAMPLE_TIME - | PERF_SAMPLE_CPU) attr.sample_period = 1 attr.read_format = PERF_FORMAT_GROUP group_leader = -1 From dbedce0ebc88c9b4d3edc5eb1d1dca8e949141ef Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:35 +0100 Subject: [PATCH 11/49] scripts/kvm/kvm_stat: Mark globals in functions Updating globals over the globals().update() method is not the standard way of changing globals. Marking variables as global and modifying them the standard way is better readable. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-6-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 7a8617dced..83450bebc5 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -237,31 +237,34 @@ IOCTL_NUMBERS = { } def x86_init(flag): - globals().update({ - 'SC_PERF_EVT_OPEN' : 298, - 'EXIT_REASONS' : X86_EXIT_REASONS[flag], - }) + global SC_PERF_EVT_OPEN + global EXIT_REASONS + + SC_PERF_EVT_OPEN = 298 + EXIT_REASONS = X86_EXIT_REASONS[flag] def s390_init(): - globals().update({ - 'SC_PERF_EVT_OPEN' : 331 - }) + global SC_PERF_EVT_OPEN + + SC_PERF_EVT_OPEN = 331 def ppc_init(): - globals().update({ - 'SC_PERF_EVT_OPEN' : 319, - 'IOCTL_NUMBERS' : { - 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16), - 'ENABLE' : 0x20002400, - 'DISABLE' : 0x20002401, - } - }) + global SC_PERF_EVT_OPEN + global IOCTL_NUMBERS + + SC_PERF_EVT_OPEN = 319 + + IOCTL_NUMBERS['ENABLE'] = 0x20002400 + IOCTL_NUMBERS['DISABLE'] = 0x20002401 + IOCTL_NUMBERS['SET_FILTER'] = 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) + << 16) def aarch64_init(): - globals().update({ - 'SC_PERF_EVT_OPEN' : 241, - 'EXIT_REASONS' : AARCH64_EXIT_REASONS, - }) + global SC_PERF_EVT_OPEN + global EXIT_REASONS + + SC_PERF_EVT_OPEN = 241 + EXIT_REASONS = AARCH64_EXIT_REASONS def detect_platform(): if os.uname()[4].startswith('ppc'): From a6ad61f98742b881b0bce77efb5664d461735c6b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:36 +0100 Subject: [PATCH 12/49] scripts/kvm/kvm_stat: Invert dictionaries The exit reasons dictionaries were defined number -> value but later on were accessed the other way around. Therefore a invert function inverted them. Defining them the right way removes the need to invert them and therefore also speeds up the script's setup process. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-7-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 349 +++++++++++++++++++++---------------------- 1 file changed, 173 insertions(+), 176 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 83450bebc5..d53945ee0c 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -37,188 +37,188 @@ class DebugfsProvider(object): return dict([(key, val(key)) for key in self._fields]) VMX_EXIT_REASONS = { - 0: 'EXCEPTION_NMI', - 1: 'EXTERNAL_INTERRUPT', - 2: 'TRIPLE_FAULT', - 7: 'PENDING_INTERRUPT', - 8: 'NMI_WINDOW', - 9: 'TASK_SWITCH', - 10: 'CPUID', - 12: 'HLT', - 14: 'INVLPG', - 15: 'RDPMC', - 16: 'RDTSC', - 18: 'VMCALL', - 19: 'VMCLEAR', - 20: 'VMLAUNCH', - 21: 'VMPTRLD', - 22: 'VMPTRST', - 23: 'VMREAD', - 24: 'VMRESUME', - 25: 'VMWRITE', - 26: 'VMOFF', - 27: 'VMON', - 28: 'CR_ACCESS', - 29: 'DR_ACCESS', - 30: 'IO_INSTRUCTION', - 31: 'MSR_READ', - 32: 'MSR_WRITE', - 33: 'INVALID_STATE', - 36: 'MWAIT_INSTRUCTION', - 39: 'MONITOR_INSTRUCTION', - 40: 'PAUSE_INSTRUCTION', - 41: 'MCE_DURING_VMENTRY', - 43: 'TPR_BELOW_THRESHOLD', - 44: 'APIC_ACCESS', - 48: 'EPT_VIOLATION', - 49: 'EPT_MISCONFIG', - 54: 'WBINVD', - 55: 'XSETBV', - 56: 'APIC_WRITE', - 58: 'INVPCID', + 'EXCEPTION_NMI': 0, + 'EXTERNAL_INTERRUPT': 1, + 'TRIPLE_FAULT': 2, + 'PENDING_INTERRUPT': 7, + 'NMI_WINDOW': 8, + 'TASK_SWITCH': 9, + 'CPUID': 10, + 'HLT': 12, + 'INVLPG': 14, + 'RDPMC': 15, + 'RDTSC': 16, + 'VMCALL': 18, + 'VMCLEAR': 19, + 'VMLAUNCH': 20, + 'VMPTRLD': 21, + 'VMPTRST': 22, + 'VMREAD': 23, + 'VMRESUME': 24, + 'VMWRITE': 25, + 'VMOFF': 26, + 'VMON': 27, + 'CR_ACCESS': 28, + 'DR_ACCESS': 29, + 'IO_INSTRUCTION': 30, + 'MSR_READ': 31, + 'MSR_WRITE': 32, + 'INVALID_STATE': 33, + 'MWAIT_INSTRUCTION': 36, + 'MONITOR_INSTRUCTION': 39, + 'PAUSE_INSTRUCTION': 40, + 'MCE_DURING_VMENTRY': 41, + 'TPR_BELOW_THRESHOLD': 43, + 'APIC_ACCESS': 44, + 'EPT_VIOLATION': 48, + 'EPT_MISCONFIG': 49, + 'WBINVD': 54, + 'XSETBV': 55, + 'APIC_WRITE': 56, + 'INVPCID': 58, } SVM_EXIT_REASONS = { - 0x000: 'READ_CR0', - 0x003: 'READ_CR3', - 0x004: 'READ_CR4', - 0x008: 'READ_CR8', - 0x010: 'WRITE_CR0', - 0x013: 'WRITE_CR3', - 0x014: 'WRITE_CR4', - 0x018: 'WRITE_CR8', - 0x020: 'READ_DR0', - 0x021: 'READ_DR1', - 0x022: 'READ_DR2', - 0x023: 'READ_DR3', - 0x024: 'READ_DR4', - 0x025: 'READ_DR5', - 0x026: 'READ_DR6', - 0x027: 'READ_DR7', - 0x030: 'WRITE_DR0', - 0x031: 'WRITE_DR1', - 0x032: 'WRITE_DR2', - 0x033: 'WRITE_DR3', - 0x034: 'WRITE_DR4', - 0x035: 'WRITE_DR5', - 0x036: 'WRITE_DR6', - 0x037: 'WRITE_DR7', - 0x040: 'EXCP_BASE', - 0x060: 'INTR', - 0x061: 'NMI', - 0x062: 'SMI', - 0x063: 'INIT', - 0x064: 'VINTR', - 0x065: 'CR0_SEL_WRITE', - 0x066: 'IDTR_READ', - 0x067: 'GDTR_READ', - 0x068: 'LDTR_READ', - 0x069: 'TR_READ', - 0x06a: 'IDTR_WRITE', - 0x06b: 'GDTR_WRITE', - 0x06c: 'LDTR_WRITE', - 0x06d: 'TR_WRITE', - 0x06e: 'RDTSC', - 0x06f: 'RDPMC', - 0x070: 'PUSHF', - 0x071: 'POPF', - 0x072: 'CPUID', - 0x073: 'RSM', - 0x074: 'IRET', - 0x075: 'SWINT', - 0x076: 'INVD', - 0x077: 'PAUSE', - 0x078: 'HLT', - 0x079: 'INVLPG', - 0x07a: 'INVLPGA', - 0x07b: 'IOIO', - 0x07c: 'MSR', - 0x07d: 'TASK_SWITCH', - 0x07e: 'FERR_FREEZE', - 0x07f: 'SHUTDOWN', - 0x080: 'VMRUN', - 0x081: 'VMMCALL', - 0x082: 'VMLOAD', - 0x083: 'VMSAVE', - 0x084: 'STGI', - 0x085: 'CLGI', - 0x086: 'SKINIT', - 0x087: 'RDTSCP', - 0x088: 'ICEBP', - 0x089: 'WBINVD', - 0x08a: 'MONITOR', - 0x08b: 'MWAIT', - 0x08c: 'MWAIT_COND', - 0x08d: 'XSETBV', - 0x400: 'NPF', + 'READ_CR0': 0x000, + 'READ_CR3': 0x003, + 'READ_CR4': 0x004, + 'READ_CR8': 0x008, + 'WRITE_CR0': 0x010, + 'WRITE_CR3': 0x013, + 'WRITE_CR4': 0x014, + 'WRITE_CR8': 0x018, + 'READ_DR0': 0x020, + 'READ_DR1': 0x021, + 'READ_DR2': 0x022, + 'READ_DR3': 0x023, + 'READ_DR4': 0x024, + 'READ_DR5': 0x025, + 'READ_DR6': 0x026, + 'READ_DR7': 0x027, + 'WRITE_DR0': 0x030, + 'WRITE_DR1': 0x031, + 'WRITE_DR2': 0x032, + 'WRITE_DR3': 0x033, + 'WRITE_DR4': 0x034, + 'WRITE_DR5': 0x035, + 'WRITE_DR6': 0x036, + 'WRITE_DR7': 0x037, + 'EXCP_BASE': 0x040, + 'INTR': 0x060, + 'NMI': 0x061, + 'SMI': 0x062, + 'INIT': 0x063, + 'VINTR': 0x064, + 'CR0_SEL_WRITE': 0x065, + 'IDTR_READ': 0x066, + 'GDTR_READ': 0x067, + 'LDTR_READ': 0x068, + 'TR_READ': 0x069, + 'IDTR_WRITE': 0x06a, + 'GDTR_WRITE': 0x06b, + 'LDTR_WRITE': 0x06c, + 'TR_WRITE': 0x06d, + 'RDTSC': 0x06e, + 'RDPMC': 0x06f, + 'PUSHF': 0x070, + 'POPF': 0x071, + 'CPUID': 0x072, + 'RSM': 0x073, + 'IRET': 0x074, + 'SWINT': 0x075, + 'INVD': 0x076, + 'PAUSE': 0x077, + 'HLT': 0x078, + 'INVLPG': 0x079, + 'INVLPGA': 0x07a, + 'IOIO': 0x07b, + 'MSR': 0x07c, + 'TASK_SWITCH': 0x07d, + 'FERR_FREEZE': 0x07e, + 'SHUTDOWN': 0x07f, + 'VMRUN': 0x080, + 'VMMCALL': 0x081, + 'VMLOAD': 0x082, + 'VMSAVE': 0x083, + 'STGI': 0x084, + 'CLGI': 0x085, + 'SKINIT': 0x086, + 'RDTSCP': 0x087, + 'ICEBP': 0x088, + 'WBINVD': 0x089, + 'MONITOR': 0x08a, + 'MWAIT': 0x08b, + 'MWAIT_COND': 0x08c, + 'XSETBV': 0x08d, + 'NPF': 0x400, } # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) AARCH64_EXIT_REASONS = { - 0x00: 'UNKNOWN', - 0x01: 'WFI', - 0x03: 'CP15_32', - 0x04: 'CP15_64', - 0x05: 'CP14_MR', - 0x06: 'CP14_LS', - 0x07: 'FP_ASIMD', - 0x08: 'CP10_ID', - 0x0C: 'CP14_64', - 0x0E: 'ILL_ISS', - 0x11: 'SVC32', - 0x12: 'HVC32', - 0x13: 'SMC32', - 0x15: 'SVC64', - 0x16: 'HVC64', - 0x17: 'SMC64', - 0x18: 'SYS64', - 0x20: 'IABT', - 0x21: 'IABT_HYP', - 0x22: 'PC_ALIGN', - 0x24: 'DABT', - 0x25: 'DABT_HYP', - 0x26: 'SP_ALIGN', - 0x28: 'FP_EXC32', - 0x2C: 'FP_EXC64', - 0x2F: 'SERROR', - 0x30: 'BREAKPT', - 0x31: 'BREAKPT_HYP', - 0x32: 'SOFTSTP', - 0x33: 'SOFTSTP_HYP', - 0x34: 'WATCHPT', - 0x35: 'WATCHPT_HYP', - 0x38: 'BKPT32', - 0x3A: 'VECTOR32', - 0x3C: 'BRK64', + 'UNKNOWN': 0x00, + 'WFI': 0x01, + 'CP15_32': 0x03, + 'CP15_64': 0x04, + 'CP14_MR': 0x05, + 'CP14_LS': 0x06, + 'FP_ASIMD': 0x07, + 'CP10_ID': 0x08, + 'CP14_64': 0x0C, + 'ILL_ISS': 0x0E, + 'SVC32': 0x11, + 'HVC32': 0x12, + 'SMC32': 0x13, + 'SVC64': 0x15, + 'HVC64': 0x16, + 'SMC64': 0x17, + 'SYS64': 0x18, + 'IABT': 0x20, + 'IABT_HYP': 0x21, + 'PC_ALIGN': 0x22, + 'DABT': 0x24, + 'DABT_HYP': 0x25, + 'SP_ALIGN': 0x26, + 'FP_EXC32': 0x28, + 'FP_EXC64': 0x2C, + 'SERROR': 0x2F, + 'BREAKPT': 0x30, + 'BREAKPT_HYP': 0x31, + 'SOFTSTP': 0x32, + 'SOFTSTP_HYP': 0x33, + 'WATCHPT': 0x34, + 'WATCHPT_HYP': 0x35, + 'BKPT32': 0x38, + 'VECTOR32': 0x3A, + 'BRK64': 0x3C, } # From include/uapi/linux/kvm.h, KVM_EXIT_xxx USERSPACE_EXIT_REASONS = { - 0: 'UNKNOWN', - 1: 'EXCEPTION', - 2: 'IO', - 3: 'HYPERCALL', - 4: 'DEBUG', - 5: 'HLT', - 6: 'MMIO', - 7: 'IRQ_WINDOW_OPEN', - 8: 'SHUTDOWN', - 9: 'FAIL_ENTRY', - 10: 'INTR', - 11: 'SET_TPR', - 12: 'TPR_ACCESS', - 13: 'S390_SIEIC', - 14: 'S390_RESET', - 15: 'DCR', - 16: 'NMI', - 17: 'INTERNAL_ERROR', - 18: 'OSI', - 19: 'PAPR_HCALL', - 20: 'S390_UCONTROL', - 21: 'WATCHDOG', - 22: 'S390_TSCH', - 23: 'EPR', - 24: 'SYSTEM_EVENT', + 'UNKNOWN': 0, + 'EXCEPTION': 1, + 'IO': 2, + 'HYPERCALL': 3, + 'DEBUG': 4, + 'HLT': 5, + 'MMIO': 6, + 'IRQ_WINDOW_OPEN': 7, + 'SHUTDOWN': 8, + 'FAIL_ENTRY': 9, + 'INTR': 10, + 'SET_TPR': 11, + 'TPR_ACCESS': 12, + 'S390_SIEIC': 13, + 'S390_RESET': 14, + 'DCR': 15, + 'NMI': 16, + 'INTERNAL_ERROR': 17, + 'OSI': 18, + 'PAPR_HCALL': 19, + 'S390_UCONTROL': 20, + 'WATCHDOG': 21, + 'S390_TSCH': 22, + 'EPR': 23, + 'SYSTEM_EVENT': 24, } X86_EXIT_REASONS = { @@ -297,13 +297,10 @@ def walkdir(path): """ return next(os.walk(path)) -def invert(d): - return dict((x[1], x[0]) for x in d.iteritems()) - filters = {} -filters['kvm_userspace_exit'] = ('reason', invert(USERSPACE_EXIT_REASONS)) +filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) if EXIT_REASONS: - filters['kvm_exit'] = ('exit_reason', invert(EXIT_REASONS)) + filters['kvm_exit'] = ('exit_reason', EXIT_REASONS) libc = ctypes.CDLL('libc.so.6') syscall = libc.syscall From 6fbff649d75137fd808aaab1e78504a786bcdd29 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:37 +0100 Subject: [PATCH 13/49] scripts/kvm/kvm_stat: Cleanup of path variables Paths to debugfs and trace dirs are now specified globally to remove redundancies in the code. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-8-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index d53945ee0c..5ca09f46d8 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -25,15 +25,14 @@ from collections import defaultdict class DebugfsProvider(object): def __init__(self): - self.base = '/sys/kernel/debug/kvm' - self._fields = walkdir(self.base)[2] + self._fields = walkdir(PATH_DEBUGFS_KVM)[2] def fields(self): return self._fields def select(self, fields): self._fields = fields def read(self): def val(key): - return int(file(self.base + '/' + key).read()) + return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) return dict([(key, val(key)) for key in self._fields]) VMX_EXIT_REASONS = { @@ -328,7 +327,8 @@ def _perf_event_open(attr, pid, cpu, group_fd, flags): PERF_TYPE_TRACEPOINT = 2 PERF_FORMAT_GROUP = 1 << 3 -sys_tracing = '/sys/kernel/debug/tracing' +PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' +PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' class Group(object): def __init__(self, cpu): @@ -353,7 +353,7 @@ class Event(object): attr = perf_event_attr() attr.type = PERF_TYPE_TRACEPOINT attr.size = ctypes.sizeof(attr) - id_path = os.path.join(sys_tracing, 'events', event_set, + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', event_set, tracepoint, 'id') id = int(file(id_path).read()) attr.config = id @@ -378,7 +378,7 @@ class Event(object): class TracepointProvider(object): def __init__(self): - path = os.path.join(sys_tracing, 'events', 'kvm') + path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') fields = walkdir(path)[1] extra = [] for f in fields: @@ -476,7 +476,7 @@ class Stats: if not os.access('/sys/kernel/debug', os.F_OK): print 'Please enable CONFIG_DEBUG_FS in your kernel' sys.exit(1) -if not os.access('/sys/kernel/debug/kvm', os.F_OK): +if not os.access(PATH_DEBUGFS_KVM, os.F_OK): print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" print "and ensure the kvm modules are loaded" sys.exit(1) From 7aa4ee5a60c6ac651fd2e60b783630e35a7e3ff4 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:38 +0100 Subject: [PATCH 14/49] scripts/kvm/kvm_stat: Improve debugfs access checking Access checking with F_OK was replaced with the better readable os.path.exists(). On Linux exists() returns False when the user doesn't have sufficient permissions for statting the directory. Therefore the error message now states that sufficient rights are needed when the check fails. Also added check for /sys/kernel/debug/tracing/. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-9-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 5ca09f46d8..6f0692d60a 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -473,12 +473,18 @@ class Stats: self.values[key] = (newval, newdelta) return self.values -if not os.access('/sys/kernel/debug', os.F_OK): - print 'Please enable CONFIG_DEBUG_FS in your kernel' +if not os.path.exists('/sys/kernel/debug'): + sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') sys.exit(1) -if not os.access(PATH_DEBUGFS_KVM, os.F_OK): - print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" - print "and ensure the kvm modules are loaded" +if not os.path.exists(PATH_DEBUGFS_KVM): + sys.stderr.write("Please make sure, that debugfs is mounted and " + "readable by the current user:\n" + "('mount -t debugfs debugfs /sys/kernel/debug')\n" + "Also ensure, that the kvm modules are loaded.\n") + sys.exit(1) +if not os.path.exists(PATH_DEBUGFS_TRACING): + sys.stderr.write("Please make {0} readable by the current user.\n" + .format(PATH_DEBUGFS_TRACING)) sys.exit(1) LABEL_WIDTH = 40 From 639ce1831082084af80290c79f06a5794a3caa0b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:39 +0100 Subject: [PATCH 15/49] scripts/kvm/kvm_stat: Introduce main function The main function should be the main location for initialization and helps encapsulating variables into a scope. This way they don't have to be global and might be mistaken for local ones. As the providers variable is scoped now it can't be accessed from within the Stats class. Hence, the global access to the variable was changed to a local one. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-10-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 145 +++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 6f0692d60a..9f943ef202 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -285,8 +285,6 @@ def detect_platform(): s390_init() return -detect_platform() - def walkdir(path): """Returns os.walk() data for specified directory. @@ -453,7 +451,7 @@ class Stats: return True return re.match(self.fields_filter, key) is not None self.values = dict() - for d in providers: + for d in self.providers: provider_fields = [key for key in d.fields() if wanted(key)] for key in provider_fields: self.values[key] = None @@ -462,7 +460,7 @@ class Stats: self.fields_filter = fields_filter self._update() def get(self): - for d in providers: + for d in self.providers: new = d.read() for key in d.fields(): oldval = self.values.get(key, (0, 0)) @@ -473,20 +471,6 @@ class Stats: self.values[key] = (newval, newdelta) return self.values -if not os.path.exists('/sys/kernel/debug'): - sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') - sys.exit(1) -if not os.path.exists(PATH_DEBUGFS_KVM): - sys.stderr.write("Please make sure, that debugfs is mounted and " - "readable by the current user:\n" - "('mount -t debugfs debugfs /sys/kernel/debug')\n" - "Also ensure, that the kvm modules are loaded.\n") - sys.exit(1) -if not os.path.exists(PATH_DEBUGFS_TRACING): - sys.stderr.write("Please make {0} readable by the current user.\n" - .format(PATH_DEBUGFS_TRACING)) - sys.exit(1) - LABEL_WIDTH = 40 NUMBER_WIDTH = 10 @@ -576,56 +560,83 @@ def log(stats): statline() line += 1 -options = optparse.OptionParser() -options.add_option('-1', '--once', '--batch', - action = 'store_true', - default = False, - dest = 'once', - help = 'run in batch mode for one second', - ) -options.add_option('-l', '--log', - action = 'store_true', - default = False, - dest = 'log', - help = 'run in logging mode (like vmstat)', - ) -options.add_option('-t', '--tracepoints', - action = 'store_true', - default = False, - dest = 'tracepoints', - help = 'retrieve statistics from tracepoints', - ) -options.add_option('-d', '--debugfs', - action = 'store_true', - default = False, - dest = 'debugfs', - help = 'retrieve statistics from debugfs', - ) -options.add_option('-f', '--fields', - action = 'store', - default = None, - dest = 'fields', - help = 'fields to display (regex)', - ) -(options, args) = options.parse_args(sys.argv) +def get_options(): + optparser = optparse.OptionParser() + optparser.add_option('-1', '--once', '--batch', + action = 'store_true', + default = False, + dest = 'once', + help = 'run in batch mode for one second', + ) + optparser.add_option('-l', '--log', + action = 'store_true', + default = False, + dest = 'log', + help = 'run in logging mode (like vmstat)', + ) + optparser.add_option('-t', '--tracepoints', + action = 'store_true', + default = False, + dest = 'tracepoints', + help = 'retrieve statistics from tracepoints', + ) + optparser.add_option('-d', '--debugfs', + action = 'store_true', + default = False, + dest = 'debugfs', + help = 'retrieve statistics from debugfs', + ) + optparser.add_option('-f', '--fields', + action = 'store', + default = None, + dest = 'fields', + help = 'fields to display (regex)', + ) + (options, _) = optparser.parse_args(sys.argv) + return options -providers = [] -if options.tracepoints: - providers.append(TracepointProvider()) -if options.debugfs: - providers.append(DebugfsProvider()) +def get_providers(options): + providers = [] -if len(providers) == 0: - try: - providers = [TracepointProvider()] - except: - providers = [DebugfsProvider()] + if options.tracepoints: + providers.append(TracepointProvider()) + if options.debugfs: + providers.append(DebugfsProvider()) + if len(providers) == 0: + providers.append(TracepointProvider()) -stats = Stats(providers, fields = options.fields) + return providers -if options.log: - log(stats) -elif not options.once: - curses.wrapper(tui, stats) -else: - batch(stats) +def check_access(): + if not os.path.exists('/sys/kernel/debug'): + sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_KVM): + sys.stderr.write("Please make sure, that debugfs is mounted and " + "readable by the current user:\n" + "('mount -t debugfs debugfs /sys/kernel/debug')\n" + "Also ensure, that the kvm modules are loaded.\n") + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_TRACING): + sys.stderr.write("Please make {0} readable by the current user.\n" + .format(PATH_DEBUGFS_TRACING)) + sys.exit(1) + +def main(): + check_access() + detect_platform() + options = get_options() + providers = get_providers(options) + stats = Stats(providers, fields = options.fields) + + if options.log: + log(stats) + elif not options.once: + curses.wrapper(tui, stats) + else: + batch(stats) + +if __name__ == "__main__": + main() From e02d896e45d5dbb2855a591f8f1e05876df3334b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:40 +0100 Subject: [PATCH 16/49] scripts/kvm/kvm_stat: Fix spaces around keyword assignments Keyword assignments should not not have spaces around the equal character according to PEP8. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-11-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 62 ++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 9f943ef202..b1e5853b4d 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -333,10 +333,10 @@ class Group(object): self.events = [] self.group_leader = None self.cpu = cpu - def add_event(self, name, event_set, tracepoint, filter = None): - self.events.append(Event(group = self, - name = name, event_set = event_set, - tracepoint = tracepoint, filter = filter)) + def add_event(self, name, event_set, tracepoint, filter=None): + self.events.append(Event(group=self, + name=name, event_set=event_set, + tracepoint=tracepoint, filter=filter)) if len(self.events) == 1: self.file = os.fdopen(self.events[0].fd) def read(self): @@ -346,7 +346,7 @@ class Group(object): struct.unpack(fmt, self.file.read(bytes)))) class Event(object): - def __init__(self, group, name, event_set, tracepoint, filter = None): + def __init__(self, group, name, event_set, tracepoint, filter=None): self.name = name attr = perf_event_attr() attr.type = PERF_TYPE_TRACEPOINT @@ -421,9 +421,9 @@ class TracepointProvider(object): tracepoint, sub = m.groups() filter = '%s==%d\0' % (filters[tracepoint][0], filters[tracepoint][1][sub]) - event = group.add_event(name, event_set = 'kvm', - tracepoint = tracepoint, - filter = filter) + event = group.add_event(name, event_set='kvm', + tracepoint=tracepoint, + filter=filter) self.group_leaders.append(group) def select(self, fields): for group in self.group_leaders: @@ -441,7 +441,7 @@ class TracepointProvider(object): return ret class Stats: - def __init__(self, providers, fields = None): + def __init__(self, providers, fields=None): self.providers = providers self.fields_filter = fields self._update() @@ -499,7 +499,7 @@ def tui(screen, stats): return (-s[x][1], -s[x][0]) else: return (0, -s[x][0]) - for key in sorted(s.keys(), key = sortkey): + for key in sorted(s.keys(), key=sortkey): if row >= screen.getmaxyx()[0]: break values = s[key] @@ -563,34 +563,34 @@ def log(stats): def get_options(): optparser = optparse.OptionParser() optparser.add_option('-1', '--once', '--batch', - action = 'store_true', - default = False, - dest = 'once', - help = 'run in batch mode for one second', + action='store_true', + default=False, + dest='once', + help='run in batch mode for one second', ) optparser.add_option('-l', '--log', - action = 'store_true', - default = False, - dest = 'log', - help = 'run in logging mode (like vmstat)', + action='store_true', + default=False, + dest='log', + help='run in logging mode (like vmstat)', ) optparser.add_option('-t', '--tracepoints', - action = 'store_true', - default = False, - dest = 'tracepoints', - help = 'retrieve statistics from tracepoints', + action='store_true', + default=False, + dest='tracepoints', + help='retrieve statistics from tracepoints', ) optparser.add_option('-d', '--debugfs', - action = 'store_true', - default = False, - dest = 'debugfs', - help = 'retrieve statistics from debugfs', + action='store_true', + default=False, + dest='debugfs', + help='retrieve statistics from debugfs', ) optparser.add_option('-f', '--fields', - action = 'store', - default = None, - dest = 'fields', - help = 'fields to display (regex)', + action='store', + default=None, + dest='fields', + help='fields to display (regex)', ) (options, _) = optparser.parse_args(sys.argv) return options @@ -629,7 +629,7 @@ def main(): detect_platform() options = get_options() providers = get_providers(options) - stats = Stats(providers, fields = options.fields) + stats = Stats(providers, fields=options.fields) if options.log: log(stats) From a4b2be204b5034f077c45048e5420c9daf1944b1 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:41 +0100 Subject: [PATCH 17/49] scripts/kvm/kvm_stat: Rename variables that redefine globals Filter, id and byte are builtin python modules which should not be redefined by local variables. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-12-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index b1e5853b4d..98e1ec74b0 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -333,20 +333,21 @@ class Group(object): self.events = [] self.group_leader = None self.cpu = cpu - def add_event(self, name, event_set, tracepoint, filter=None): + def add_event(self, name, event_set, tracepoint, tracefilter=None): self.events.append(Event(group=self, name=name, event_set=event_set, - tracepoint=tracepoint, filter=filter)) + tracepoint=tracepoint, + tracefilter=tracefilter)) if len(self.events) == 1: self.file = os.fdopen(self.events[0].fd) def read(self): - bytes = 8 * (1 + len(self.events)) + length = 8 * (1 + len(self.events)) fmt = 'xxxxxxxx' + 'q' * len(self.events) return dict(zip([event.name for event in self.events], - struct.unpack(fmt, self.file.read(bytes)))) + struct.unpack(fmt, self.file.read(length)))) class Event(object): - def __init__(self, group, name, event_set, tracepoint, filter=None): + def __init__(self, group, name, event_set, tracepoint, tracefilter=None): self.name = name attr = perf_event_attr() attr.type = PERF_TYPE_TRACEPOINT @@ -364,8 +365,8 @@ class Event(object): if fd == -1: err = get_errno()[0] raise Exception('perf_event_open failed, errno = ' + err.__str__()) - if filter: - fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], filter) + if tracefilter: + fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], tracefilter) self.fd = fd def enable(self): fcntl.ioctl(self.fd, IOCTL_NUMBERS['ENABLE'], 0) @@ -415,15 +416,15 @@ class TracepointProvider(object): group = Group(cpu) for name in _fields: tracepoint = name - filter = None + tracefilter = None m = re.match(r'(.*)\((.*)\)', name) if m: tracepoint, sub = m.groups() - filter = '%s==%d\0' % (filters[tracepoint][0], - filters[tracepoint][1][sub]) + tracefilter = '%s==%d\0' % (filters[tracepoint][0], + filters[tracepoint][1][sub]) event = group.add_event(name, event_set='kvm', tracepoint=tracepoint, - filter=filter) + tracefilter=tracefilter) self.group_leaders.append(group) def select(self, fields): for group in self.group_leaders: From f4109dba216f2df61a6098fdd7a6f2d2be4ac848 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:42 +0100 Subject: [PATCH 18/49] scripts/kvm/kvm_stat: Moved DebugfsProvider When it is next to the TracepointProvider less scrolling is needed to change related, surrounding code. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-13-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 98e1ec74b0..b5422f86ab 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -23,18 +23,6 @@ import struct import re from collections import defaultdict -class DebugfsProvider(object): - def __init__(self): - self._fields = walkdir(PATH_DEBUGFS_KVM)[2] - def fields(self): - return self._fields - def select(self, fields): - self._fields = fields - def read(self): - def val(key): - return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) - return dict([(key, val(key)) for key in self._fields]) - VMX_EXIT_REASONS = { 'EXCEPTION_NMI': 0, 'EXTERNAL_INTERRUPT': 1, @@ -441,6 +429,18 @@ class TracepointProvider(object): ret[name] += val return ret +class DebugfsProvider(object): + def __init__(self): + self._fields = walkdir(PATH_DEBUGFS_KVM)[2] + def fields(self): + return self._fields + def select(self, fields): + self._fields = fields + def read(self): + def val(key): + return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) + return dict([(key, val(key)) for key in self._fields]) + class Stats: def __init__(self, providers, fields=None): self.providers = providers From 400b3cb519a286421e61692058b980adb7554e1e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:43 +0100 Subject: [PATCH 19/49] scripts/kvm/kvm_stat: Fixup syscall error reporting In 2008 a patch was written that introduced ctypes.get_errno() and set_errno() as official interfaces to the libc errno variable. Using them we can avoid accessing private libc variables. The patch was included in python 2.6. Also we need to raise the right exception, with the right parameters and a helpful message. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-14-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index b5422f86ab..457624d9f3 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -287,10 +287,8 @@ filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) if EXIT_REASONS: filters['kvm_exit'] = ('exit_reason', EXIT_REASONS) -libc = ctypes.CDLL('libc.so.6') +libc = ctypes.CDLL('libc.so.6', use_errno=True) syscall = libc.syscall -get_errno = libc.__errno_location -get_errno.restype = ctypes.POINTER(ctypes.c_int) class perf_event_attr(ctypes.Structure): _fields_ = [('type', ctypes.c_uint32), @@ -351,8 +349,9 @@ class Event(object): group_leader = group.events[0].fd fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) if fd == -1: - err = get_errno()[0] - raise Exception('perf_event_open failed, errno = ' + err.__str__()) + err = ctypes.get_errno() + raise OSError(err, os.strerror(err), + 'while calling sys_perf_event_open().') if tracefilter: fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], tracefilter) self.fd = fd From 392a7fa3ca3fbd157cec6b58283c51cbd894d4fb Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:44 +0100 Subject: [PATCH 20/49] scripts/kvm/kvm_stat: Set sensible no. files rlimit As num cpus * 1000 is NOT a sensible rlimit, we need to calculate a more accurate rlimit. The number of open files is directly dependent on the cpu count and on the number of trace points per cpu. A additional constant works as a buffer for files that are needed by python or do get opened when the script runs. Hence we have: cpus * traces + constant Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-15-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 457624d9f3..93b5ea7aac 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -395,8 +395,15 @@ class TracepointProvider(object): def _setup(self, _fields): self._fields = _fields cpus = self._online_cpus() - nfiles = len(cpus) * 1000 - resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) + + # The constant is needed as a buffer for python libs, std + # streams and other files that the script opens. + rlimit = len(cpus) * len(_fields) + 50 + try: + resource.setrlimit(resource.RLIMIT_NOFILE, (rlimit, rlimit)) + except ValueError: + sys.exit("NOFILE rlimit could not be raised to {0}".format(rlimit)) + events = [] self.group_leaders = [] for cpu in cpus: From 8d3b5ddc4e10eeda435c8f9ceecd15cd9ab284e7 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:45 +0100 Subject: [PATCH 21/49] scripts/kvm/kvm_stat: Cleanup of platform detection s390 machines can also be detected via uname -m, i.e. python's os.uname, no need for more complicated checks. Calling uname once and saving its value for multiple checks is perfectly sufficient. We don't expect the machine's architecture to change when the script is running anyway. On multi-cpu systems x86_init currently will get called multiple times, returning makes sure we don't waste cicles on that. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-16-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 93b5ea7aac..5b6742a6b7 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -254,24 +254,21 @@ def aarch64_init(): EXIT_REASONS = AARCH64_EXIT_REASONS def detect_platform(): - if os.uname()[4].startswith('ppc'): - ppc_init() - return - elif os.uname()[4].startswith('aarch64'): - aarch64_init() - return + machine = os.uname()[4] - for line in file('/proc/cpuinfo').readlines(): - if line.startswith('flags'): - for flag in line.split(): - if flag in X86_EXIT_REASONS: - x86_init(flag) - return - elif line.startswith('vendor_id'): - for flag in line.split(): - if flag == 'IBM/S390': - s390_init() - return + if machine.startswith('ppc'): + ppc_init() + elif machine.startswith('aarch64'): + aarch64_init() + elif machine.startswith('s390'): + s390_init() + else: + for line in file('/proc/cpuinfo').readlines(): + if line.startswith('flags'): + for flag in line.split(): + if flag in X86_EXIT_REASONS: + x86_init(flag) + return def walkdir(path): From 3e46a5c272c71930d3da7353cdc1f912528f1da8 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:46 +0100 Subject: [PATCH 22/49] scripts/kvm/kvm_stat: Make cpu detection a function The online cpus detection method is in the Stats class but does not use any class variables. Moving it out of the class to the platform detection function makes the Stats class more readable. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-17-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 5b6742a6b7..af24f2d6c7 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -279,6 +279,20 @@ def walkdir(path): """ return next(os.walk(path)) + +def get_online_cpus(): + cpulist = [] + pattern = r'cpu([0-9]+)' + basedir = '/sys/devices/system/cpu' + for entry in os.listdir(basedir): + match = re.match(pattern, entry) + if not match: + continue + path = os.path.join(basedir, entry, 'online') + if os.path.isfile(path) and open(path).read().strip() == '1': + cpulist.append(int(match.group(1))) + return cpulist + filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) if EXIT_REASONS: @@ -375,23 +389,9 @@ class TracepointProvider(object): def fields(self): return self._fields - def _online_cpus(self): - l = [] - pattern = r'cpu([0-9]+)' - basedir = '/sys/devices/system/cpu' - for entry in os.listdir(basedir): - match = re.match(pattern, entry) - if not match: - continue - path = os.path.join(basedir, entry, 'online') - if os.path.exists(path) and open(path).read().strip() != '1': - continue - l.append(int(match.group(1))) - return l - def _setup(self, _fields): self._fields = _fields - cpus = self._online_cpus() + cpus = get_online_cpus() # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. From 312bf62b7cb37928ef992518df9a0a5e38a2c69a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:47 +0100 Subject: [PATCH 23/49] scripts/kvm/kvm_stat: Rename _perf_event_open The underscore in front of the function name does not comply with the python coding guidelines. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-18-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index af24f2d6c7..66dfed6ec3 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -314,7 +314,7 @@ class perf_event_attr(ctypes.Structure): ('bp_addr', ctypes.c_uint64), ('bp_len', ctypes.c_uint64), ] -def _perf_event_open(attr, pid, cpu, group_fd, flags): +def perf_event_open(attr, pid, cpu, group_fd, flags): return syscall(SC_PERF_EVT_OPEN, ctypes.pointer(attr), ctypes.c_int(pid), ctypes.c_int(cpu), ctypes.c_int(group_fd), ctypes.c_long(flags)) @@ -358,7 +358,7 @@ class Event(object): group_leader = -1 if group.events: group_leader = group.events[0].fd - fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) + fd = perf_event_open(attr, -1, group.cpu, group_leader, 0) if fd == -1: err = ctypes.get_errno() raise OSError(err, os.strerror(err), From a90b87bf25d4d96a8c31a78eefe4633a0e9da3ad Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:48 +0100 Subject: [PATCH 24/49] scripts/kvm/kvm_stat: Introduce properties for providers As previous commit authors used a mixture of setters/getters and direct access to class variables consolidating them the python way improved readability. Properties allow us to assign a value to a class variable through a setter without the need to call the setter ourselves. Reviewed-by: Jason J. Herne Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-19-git-send-email-frankja@linux.vnet.ibm.com> [prop.setter is new in Python 2.6, which is the earliest supported version. - Paolo] Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 66dfed6ec3..c4e22d0536 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -385,9 +385,7 @@ class TracepointProvider(object): extra.append(f + '(' + name + ')') fields += extra self._setup(fields) - self.select(fields) - def fields(self): - return self._fields + self.fields = fields def _setup(self, _fields): self._fields = _fields @@ -417,7 +415,14 @@ class TracepointProvider(object): tracepoint=tracepoint, tracefilter=tracefilter) self.group_leaders.append(group) - def select(self, fields): + + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, fields): + self._fields = fields for group in self.group_leaders: for event in group.events: if event.name in fields: @@ -425,6 +430,7 @@ class TracepointProvider(object): event.enable() else: event.disable() + def read(self): ret = defaultdict(int) for group in self.group_leaders: @@ -435,10 +441,15 @@ class TracepointProvider(object): class DebugfsProvider(object): def __init__(self): self._fields = walkdir(PATH_DEBUGFS_KVM)[2] + + @property def fields(self): return self._fields - def select(self, fields): + + @fields.setter + def fields(self, fields): self._fields = fields + def read(self): def val(key): return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) @@ -456,17 +467,17 @@ class Stats: return re.match(self.fields_filter, key) is not None self.values = dict() for d in self.providers: - provider_fields = [key for key in d.fields() if wanted(key)] + provider_fields = [key for key in d.fields if wanted(key)] for key in provider_fields: self.values[key] = None - d.select(provider_fields) + d.fields = provider_fields def set_fields_filter(self, fields_filter): self.fields_filter = fields_filter self._update() def get(self): for d in self.providers: new = d.read() - for key in d.fields(): + for key in d.fields: oldval = self.values.get(key, (0, 0)) newval = new[key] newdelta = None From e06715a3632e0b454dca32ffb9bfefe8fd106a7d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:49 +0100 Subject: [PATCH 25/49] scripts/kvm/kvm_stat: Cleanup of TracepointProvider Variables with bad names like f and m were renamed to their full name, so it is clearer which data they contain. Unneeded variables were removed and the field generating code was moved in an own function. dict.iteritems() was removed as directly iterating over a dictionary also yields the needed keys. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-20-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index c4e22d0536..032e491f6b 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -375,45 +375,47 @@ class Event(object): class TracepointProvider(object): def __init__(self): + self.group_leaders = [] + self._fields = self.get_available_fields() + self.setup_traces() + self.fields = self._fields + + def get_available_fields(self): path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') fields = walkdir(path)[1] extra = [] - for f in fields: - if f in filters: - subfield, values = filters[f] - for name, number in values.iteritems(): - extra.append(f + '(' + name + ')') + for field in fields: + if field in filters: + filter_name_, filter_dicts = filters[field] + for name in filter_dicts: + extra.append(field + '(' + name + ')') fields += extra - self._setup(fields) - self.fields = fields + return fields - def _setup(self, _fields): - self._fields = _fields + def setup_traces(self): cpus = get_online_cpus() # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. - rlimit = len(cpus) * len(_fields) + 50 + rlimit = len(cpus) * len(self._fields) + 50 try: resource.setrlimit(resource.RLIMIT_NOFILE, (rlimit, rlimit)) except ValueError: sys.exit("NOFILE rlimit could not be raised to {0}".format(rlimit)) - events = [] - self.group_leaders = [] for cpu in cpus: group = Group(cpu) - for name in _fields: + for name in self._fields: tracepoint = name tracefilter = None - m = re.match(r'(.*)\((.*)\)', name) - if m: - tracepoint, sub = m.groups() + match = re.match(r'(.*)\((.*)\)', name) + if match: + tracepoint, sub = match.groups() tracefilter = '%s==%d\0' % (filters[tracepoint][0], filters[tracepoint][1][sub]) - event = group.add_event(name, event_set='kvm', - tracepoint=tracepoint, - tracefilter=tracefilter) + group.add_event(name, event_set='kvm', + tracepoint=tracepoint, + tracefilter=tracefilter) self.group_leaders.append(group) @property From 357bc1e74fc8af96530148d52dd9ccc8e626f000 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:50 +0100 Subject: [PATCH 26/49] scripts/kvm/kvm_stat: Cleanup cpu list retrieval Reading /sys/devices/system/cpu/online makes opening the cpu directories unnecessary and works on more/older systems. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-21-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 032e491f6b..083dd2f6a3 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -280,18 +280,27 @@ def walkdir(path): return next(os.walk(path)) +def parse_int_list(list_string): + """Returns an int list from a string of comma separated integers and + integer ranges.""" + integers = [] + members = list_string.split(',') + + for member in members: + if '-' not in member: + integers.append(int(member)) + else: + int_range = member.split('-') + integers.extend(range(int(int_range[0]), + int(int_range[1]) + 1)) + + return integers + + def get_online_cpus(): - cpulist = [] - pattern = r'cpu([0-9]+)' - basedir = '/sys/devices/system/cpu' - for entry in os.listdir(basedir): - match = re.match(pattern, entry) - if not match: - continue - path = os.path.join(basedir, entry, 'online') - if os.path.isfile(path) and open(path).read().strip() == '1': - cpulist.append(int(match.group(1))) - return cpulist + with open('/sys/devices/system/cpu/online') as cpu_list: + cpu_string = cpu_list.readline() + return parse_int_list(cpu_string) filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) From dd0b6a4e101e57f0495b71b03be8217c0df1af14 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:51 +0100 Subject: [PATCH 27/49] scripts/kvm/kvm_stat: Encapsulate filters variable The variable was only used in one class but still was defined globally. Additionaly the detect_platform routine which prepares the data that goes into the variable was called on each start of the script, no matter if the class was needed. To make the variable local to the TracepointProvider class, a new function that calls detect_platform and returns the filters was introduced. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-22-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 083dd2f6a3..7837f40ea6 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -302,10 +302,14 @@ def get_online_cpus(): cpu_string = cpu_list.readline() return parse_int_list(cpu_string) -filters = {} -filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) -if EXIT_REASONS: - filters['kvm_exit'] = ('exit_reason', EXIT_REASONS) + +def get_filters(): + detect_platform() + filters = {} + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) + if EXIT_REASONS: + filters['kvm_exit'] = ('exit_reason', EXIT_REASONS) + return filters libc = ctypes.CDLL('libc.so.6', use_errno=True) syscall = libc.syscall @@ -385,6 +389,7 @@ class Event(object): class TracepointProvider(object): def __init__(self): self.group_leaders = [] + self.filters = get_filters() self._fields = self.get_available_fields() self.setup_traces() self.fields = self._fields @@ -394,8 +399,8 @@ class TracepointProvider(object): fields = walkdir(path)[1] extra = [] for field in fields: - if field in filters: - filter_name_, filter_dicts = filters[field] + if field in self.filters: + filter_name_, filter_dicts = self.filters[field] for name in filter_dicts: extra.append(field + '(' + name + ')') fields += extra @@ -420,8 +425,9 @@ class TracepointProvider(object): match = re.match(r'(.*)\((.*)\)', name) if match: tracepoint, sub = match.groups() - tracefilter = '%s==%d\0' % (filters[tracepoint][0], - filters[tracepoint][1][sub]) + tracefilter = ('%s==%d\0' % + (self.filters[tracepoint][0], + self.filters[tracepoint][1][sub])) group.add_event(name, event_set='kvm', tracepoint=tracepoint, tracefilter=tracefilter) @@ -652,7 +658,6 @@ def check_access(): def main(): check_access() - detect_platform() options = get_options() providers = get_providers(options) stats = Stats(providers, fields=options.fields) From e75a36abb42e8ae30bb4ed6fee74699a510ea6e6 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:52 +0100 Subject: [PATCH 28/49] scripts/kvm/kvm_stat: Cleanup of Stats class Converted class definition to new style and renamed improper named variables. Introduced property for fields_filter. Moved member variable declaration to init, so one can see all class variables when reading the init method. Completely clear the values dict, as we don't need to keep single values. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-23-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 52 ++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 7837f40ea6..203873e913 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -472,31 +472,41 @@ class DebugfsProvider(object): return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) return dict([(key, val(key)) for key in self._fields]) -class Stats: +class Stats(object): def __init__(self, providers, fields=None): self.providers = providers - self.fields_filter = fields - self._update() - def _update(self): + self._fields_filter = fields + self.values = {} + self.update_provider_filters() + + def update_provider_filters(self): def wanted(key): - if not self.fields_filter: + if not self._fields_filter: return True - return re.match(self.fields_filter, key) is not None - self.values = dict() - for d in self.providers: - provider_fields = [key for key in d.fields if wanted(key)] - for key in provider_fields: - self.values[key] = None - d.fields = provider_fields - def set_fields_filter(self, fields_filter): - self.fields_filter = fields_filter - self._update() + return re.match(self._fields_filter, key) is not None + + # As we reset the counters when updating the fields we can + # also clear the cache of old values. + self.values = {} + for provider in self.providers: + provider_fields = [key for key in provider.fields if wanted(key)] + provider.fields = provider_fields + + @property + def fields_filter(self): + return self._fields_filter + + @fields_filter.setter + def fields_filter(self, fields_filter): + self._fields_filter = fields_filter + self.update_provider_filters() + def get(self): - for d in self.providers: - new = d.read() - for key in d.fields: + for provider in self.providers: + new = provider.read() + for key in provider.fields: oldval = self.values.get(key, (0, 0)) - newval = new[key] + newval = new.get(key, 0) newdelta = None if oldval is not None: newdelta = newval - oldval[0] @@ -514,9 +524,9 @@ def tui(screen, stats): def update_drilldown(): if not fields_filter: if drilldown: - stats.set_fields_filter(None) + stats.fields_filter = None else: - stats.set_fields_filter(r'^[^\(]*$') + stats.fields_filter = r'^[^\(]*$' update_drilldown() def refresh(sleeptime): screen.erase() From fc9fdeebd5f092d746438ab8eb5f8f8dc5030002 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:53 +0100 Subject: [PATCH 29/49] scripts/kvm/kvm_stat: Cleanup of Groups class Introduced separating newlines for readability and removed special treatment/variable of the group leader. Renamed fmt to read_format. The group leader's file descriptor will not be turned into a file object anymore, instead os.read is used to read from the descriptor. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-24-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 203873e913..91054e5efa 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -341,20 +341,20 @@ PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' class Group(object): def __init__(self, cpu): self.events = [] - self.group_leader = None self.cpu = cpu + def add_event(self, name, event_set, tracepoint, tracefilter=None): self.events.append(Event(group=self, name=name, event_set=event_set, tracepoint=tracepoint, tracefilter=tracefilter)) - if len(self.events) == 1: - self.file = os.fdopen(self.events[0].fd) + def read(self): length = 8 * (1 + len(self.events)) - fmt = 'xxxxxxxx' + 'q' * len(self.events) + read_format = 'xxxxxxxx' + 'q' * len(self.events) return dict(zip([event.name for event in self.events], - struct.unpack(fmt, self.file.read(length)))) + struct.unpack(read_format, + os.read(self.events[0].fd, length)))) class Event(object): def __init__(self, group, name, event_set, tracepoint, tracefilter=None): From d895493b7c8bf22305ebcf74b3e64d1d12e4d27f Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:54 +0100 Subject: [PATCH 30/49] scripts/kvm/kvm_stat: Cleanup of Event class Added additional newlines for readability. Factored out attribute and event setup code into own methods. Exchanged file() with preferred open(). Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-25-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 67 ++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 91054e5efa..bf948b9cc0 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -339,15 +339,11 @@ PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' class Group(object): - def __init__(self, cpu): + def __init__(self): self.events = [] - self.cpu = cpu - def add_event(self, name, event_set, tracepoint, tracefilter=None): - self.events.append(Event(group=self, - name=name, event_set=event_set, - tracepoint=tracepoint, - tracefilter=tracefilter)) + def add_event(self, event): + self.events.append(event) def read(self): length = 8 * (1 + len(self.events)) @@ -357,32 +353,52 @@ class Group(object): os.read(self.events[0].fd, length)))) class Event(object): - def __init__(self, group, name, event_set, tracepoint, tracefilter=None): + def __init__(self, name, group, trace_cpu, trace_point, trace_filter, + trace_set='kvm'): self.name = name - attr = perf_event_attr() - attr.type = PERF_TYPE_TRACEPOINT - attr.size = ctypes.sizeof(attr) - id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', event_set, - tracepoint, 'id') - id = int(file(id_path).read()) - attr.config = id - attr.sample_period = 1 - attr.read_format = PERF_FORMAT_GROUP + self.fd = None + self.setup_event(group, trace_cpu, trace_point, trace_filter, + trace_set) + + def setup_event_attribute(self, trace_set, trace_point): + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, + trace_point, 'id') + + event_attr = perf_event_attr() + event_attr.type = PERF_TYPE_TRACEPOINT + event_attr.size = ctypes.sizeof(event_attr) + event_attr.config = int(open(id_path).read()) + event_attr.sample_period = 1 + event_attr.read_format = PERF_FORMAT_GROUP + return event_attr + + def setup_event(self, group, trace_cpu, trace_point, trace_filter, + trace_set): + event_attr = self.setup_event_attribute(trace_set, trace_point) + group_leader = -1 if group.events: group_leader = group.events[0].fd - fd = perf_event_open(attr, -1, group.cpu, group_leader, 0) + + fd = perf_event_open(event_attr, -1, trace_cpu, + group_leader, 0) if fd == -1: err = ctypes.get_errno() raise OSError(err, os.strerror(err), 'while calling sys_perf_event_open().') - if tracefilter: - fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], tracefilter) + + if trace_filter: + fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], + trace_filter) + self.fd = fd + def enable(self): fcntl.ioctl(self.fd, IOCTL_NUMBERS['ENABLE'], 0) + def disable(self): fcntl.ioctl(self.fd, IOCTL_NUMBERS['DISABLE'], 0) + def reset(self): fcntl.ioctl(self.fd, IOCTL_NUMBERS['RESET'], 0) @@ -418,7 +434,7 @@ class TracepointProvider(object): sys.exit("NOFILE rlimit could not be raised to {0}".format(rlimit)) for cpu in cpus: - group = Group(cpu) + group = Group() for name in self._fields: tracepoint = name tracefilter = None @@ -428,9 +444,12 @@ class TracepointProvider(object): tracefilter = ('%s==%d\0' % (self.filters[tracepoint][0], self.filters[tracepoint][1][sub])) - group.add_event(name, event_set='kvm', - tracepoint=tracepoint, - tracefilter=tracefilter) + + group.add_event(Event(name=name, + group=group, + trace_cpu=cpu, + trace_point=tracepoint, + trace_filter=tracefilter)) self.group_leaders.append(group) @property From 068294a1cac4798e4554d9c9848c7c1b31d28713 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:55 +0100 Subject: [PATCH 31/49] scripts/kvm/kvm_stat: Group arch specific data Using global variables and multiple initialization functions for arch specific data makes the code hard to read. By grouping them in the Arch classes we encapsulate and initialize them in one place. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-26-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 117 +++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index bf948b9cc0..42d35f5d95 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -213,62 +213,72 @@ X86_EXIT_REASONS = { 'svm': SVM_EXIT_REASONS, } -SC_PERF_EVT_OPEN = None -EXIT_REASONS = None - IOCTL_NUMBERS = { - 'SET_FILTER' : 0x40082406, - 'ENABLE' : 0x00002400, - 'DISABLE' : 0x00002401, - 'RESET' : 0x00002403, + 'SET_FILTER': 0x40082406, + 'ENABLE': 0x00002400, + 'DISABLE': 0x00002401, + 'RESET': 0x00002403, } -def x86_init(flag): - global SC_PERF_EVT_OPEN - global EXIT_REASONS +class Arch(object): + """Class that encapsulates global architecture specific data like + syscall and ioctl numbers. - SC_PERF_EVT_OPEN = 298 - EXIT_REASONS = X86_EXIT_REASONS[flag] + """ + @staticmethod + def get_arch(): + machine = os.uname()[4] -def s390_init(): - global SC_PERF_EVT_OPEN + if machine.startswith('ppc'): + return ArchPPC() + elif machine.startswith('aarch64'): + return ArchA64() + elif machine.startswith('s390'): + return ArchS390() + else: + # X86_64 + for line in open('/proc/cpuinfo'): + if not line.startswith('flags'): + continue - SC_PERF_EVT_OPEN = 331 + flags = line.split() + if 'vmx' in flags: + return ArchX86(VMX_EXIT_REASONS) + if 'svm' in flags: + return ArchX86(SVM_EXIT_REASONS) + return -def ppc_init(): - global SC_PERF_EVT_OPEN - global IOCTL_NUMBERS +class ArchX86(Arch): + def __init__(self, exit_reasons): + self.sc_perf_evt_open = 298 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = exit_reasons - SC_PERF_EVT_OPEN = 319 +class ArchPPC(Arch): + def __init__(self): + self.sc_perf_evt_open = 319 + self.ioctl_numbers = IOCTL_NUMBERS + self.ioctl_numbers['ENABLE'] = 0x20002400 + self.ioctl_numbers['DISABLE'] = 0x20002401 - IOCTL_NUMBERS['ENABLE'] = 0x20002400 - IOCTL_NUMBERS['DISABLE'] = 0x20002401 - IOCTL_NUMBERS['SET_FILTER'] = 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) - << 16) + # PPC comes in 32 and 64 bit and some generated ioctl + # numbers depend on the wordsize. + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 -def aarch64_init(): - global SC_PERF_EVT_OPEN - global EXIT_REASONS +class ArchA64(Arch): + def __init__(self): + self.sc_perf_evt_open = 241 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = AARCH64_EXIT_REASONS - SC_PERF_EVT_OPEN = 241 - EXIT_REASONS = AARCH64_EXIT_REASONS +class ArchS390(Arch): + def __init__(self): + self.sc_perf_evt_open = 331 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = None -def detect_platform(): - machine = os.uname()[4] - - if machine.startswith('ppc'): - ppc_init() - elif machine.startswith('aarch64'): - aarch64_init() - elif machine.startswith('s390'): - s390_init() - else: - for line in file('/proc/cpuinfo').readlines(): - if line.startswith('flags'): - for flag in line.split(): - if flag in X86_EXIT_REASONS: - x86_init(flag) - return +ARCH = Arch.get_arch() def walkdir(path): @@ -304,11 +314,10 @@ def get_online_cpus(): def get_filters(): - detect_platform() filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if EXIT_REASONS: - filters['kvm_exit'] = ('exit_reason', EXIT_REASONS) + if ARCH.exit_reasons: + filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) return filters libc = ctypes.CDLL('libc.so.6', use_errno=True) @@ -328,9 +337,9 @@ class perf_event_attr(ctypes.Structure): ('bp_len', ctypes.c_uint64), ] def perf_event_open(attr, pid, cpu, group_fd, flags): - return syscall(SC_PERF_EVT_OPEN, ctypes.pointer(attr), ctypes.c_int(pid), - ctypes.c_int(cpu), ctypes.c_int(group_fd), - ctypes.c_long(flags)) + return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), + ctypes.c_int(pid), ctypes.c_int(cpu), + ctypes.c_int(group_fd), ctypes.c_long(flags)) PERF_TYPE_TRACEPOINT = 2 PERF_FORMAT_GROUP = 1 << 3 @@ -388,19 +397,19 @@ class Event(object): 'while calling sys_perf_event_open().') if trace_filter: - fcntl.ioctl(fd, IOCTL_NUMBERS['SET_FILTER'], + fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], trace_filter) self.fd = fd def enable(self): - fcntl.ioctl(self.fd, IOCTL_NUMBERS['ENABLE'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) def disable(self): - fcntl.ioctl(self.fd, IOCTL_NUMBERS['DISABLE'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) def reset(self): - fcntl.ioctl(self.fd, IOCTL_NUMBERS['RESET'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) class TracepointProvider(object): def __init__(self): From 9c0ab054edf4b197501e5fa00ce5bf16ed7e3111 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:56 +0100 Subject: [PATCH 32/49] scripts/kvm/kvm_stat: Remove unneeded X86_EXIT_REASONS The architecture detection method directly accesses vmx and smv exit reason constants. Therefore we don't need it anymore. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-27-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 42d35f5d95..8efe3b886d 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -208,11 +208,6 @@ USERSPACE_EXIT_REASONS = { 'SYSTEM_EVENT': 24, } -X86_EXIT_REASONS = { - 'vmx': VMX_EXIT_REASONS, - 'svm': SVM_EXIT_REASONS, -} - IOCTL_NUMBERS = { 'SET_FILTER': 0x40082406, 'ENABLE': 0x00002400, From 8a2a33316cb40779bc7975df9114fd07da25de7c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:57 +0100 Subject: [PATCH 33/49] scripts/kvm/kvm_stat: Make tui function a class The tui function itself had a few sub-functions and therefore basically already was class-like. Making it an actual one with proper methods improved readability. The curses wrapper was dropped in favour of __entry/exit__ methods that implement the same behaviour. Also renamed single character variable name, so the name reflects the content. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-28-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 123 +++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 8efe3b886d..63a657be64 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -539,63 +539,97 @@ class Stats(object): LABEL_WIDTH = 40 NUMBER_WIDTH = 10 -def tui(screen, stats): - curses.use_default_colors() - curses.noecho() - drilldown = False - fields_filter = stats.fields_filter - def update_drilldown(): - if not fields_filter: - if drilldown: - stats.fields_filter = None +class Tui(object): + def __init__(self, stats): + self.stats = stats + self.screen = None + self.drilldown = False + self.fields_filter = self.stats.fields_filter + self.update_drilldown() + + def __enter__(self): + """Initialises curses for later use. Based on curses.wrapper + implementation from the Python standard library.""" + self.screen = curses.initscr() + curses.noecho() + curses.cbreak() + + # The try/catch works around a minor bit of + # over-conscientiousness in the curses module, the error + # return from C start_color() is ignorable. + try: + curses.start_color() + except: + pass + + curses.use_default_colors() + return self + + def __exit__(self, *exception): + """Resets the terminal to its normal state. Based on curses.wrappre + implementation from the Python standard library.""" + if self.screen: + self.screen.keypad(0) + curses.echo() + curses.nocbreak() + curses.endwin() + + def update_drilldown(self): + if not self.fields_filter: + if self.drilldown: + self.stats.fields_filter = None else: - stats.fields_filter = r'^[^\(]*$' - update_drilldown() - def refresh(sleeptime): - screen.erase() - screen.addstr(0, 0, 'kvm statistics') - screen.addstr(2, 1, 'Event') - screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - len('Total'), 'Total') - screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - len('Current'), 'Current') + self.stats.fields_filter = r'^[^\(]*$' + + def refresh(self, sleeptime): + self.screen.erase() + self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + self.screen.addstr(2, 1, 'Event') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - + len('Total'), 'Total') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - + len('Current'), 'Current') row = 3 - s = stats.get() + stats = self.stats.get() def sortkey(x): - if s[x][1]: - return (-s[x][1], -s[x][0]) + if stats[x][1]: + return (-stats[x][1], -stats[x][0]) else: - return (0, -s[x][0]) - for key in sorted(s.keys(), key=sortkey): - if row >= screen.getmaxyx()[0]: + return (0, -stats[x][0]) + for key in sorted(stats.keys(), key=sortkey): + + if row >= self.screen.getmaxyx()[0]: break - values = s[key] + values = stats[key] if not values[0] and not values[1]: break col = 1 - screen.addstr(row, col, key) + self.screen.addstr(row, col, key) col += LABEL_WIDTH - screen.addstr(row, col, '%10d' % (values[0],)) + self.screen.addstr(row, col, '%10d' % (values[0],)) col += NUMBER_WIDTH if values[1] is not None: - screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) + self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) row += 1 - screen.refresh() + self.screen.refresh() - sleeptime = 0.25 - while True: - refresh(sleeptime) - curses.halfdelay(int(sleeptime * 10)) - sleeptime = 3 - try: - c = screen.getkey() - if c == 'x': - drilldown = not drilldown - update_drilldown() - if c == 'q': + def show_stats(self): + sleeptime = 0.25 + while True: + self.refresh(sleeptime) + curses.halfdelay(int(sleeptime * 10)) + sleeptime = 3 + try: + char = self.screen.getkey() + if char == 'x': + self.drilldown = not self.drilldown + self.update_drilldown() + if char == 'q': + break + except KeyboardInterrupt: break - except KeyboardInterrupt: - break - except curses.error: - continue + except curses.error: + continue def batch(stats): s = stats.get() @@ -698,7 +732,8 @@ def main(): if options.log: log(stats) elif not options.once: - curses.wrapper(tui, stats) + with Tui(stats) as tui: + tui.show_stats() else: batch(stats) From c887d9a25e877afe135e94fe582d5c58a2fd4ee2 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:58 +0100 Subject: [PATCH 34/49] scripts/kvm/kvm_stat: Fix output formatting The key names in log mode were capped to 10 characters which is not enough for distinguishing between keys. Capping was therefore removed. In batch mode the spacing between keys and values was too narrow and therefore had to be extended to 42. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-29-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 63a657be64..8c7d097e6c 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -637,13 +637,13 @@ def batch(stats): s = stats.get() for key in sorted(s.keys()): values = s[key] - print '%-22s%10d%10d' % (key, values[0], values[1]) + print '%-42s%10d%10d' % (key, values[0], values[1]) def log(stats): keys = sorted(stats.get().iterkeys()) def banner(): for k in keys: - print '%10s' % k[0:9], + print '%s' % k, print def statline(): s = stats.get() From 00842aaca5265a57b0a1f0248bde2f77f986e352 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:17:59 +0100 Subject: [PATCH 35/49] scripts/kvm/kvm_stat: Cleanup and pre-init perf_event_attr All initializations of the ctypes struct that don't need additional information were moved to its init method. The unneeded initializations for sample_type and sample_period were removed as they do not affect the counters that are read. This improves readability of the setup_event_attribute by halfing its LOC. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-30-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 8c7d097e6c..ce970465d6 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -331,6 +331,13 @@ class perf_event_attr(ctypes.Structure): ('bp_addr', ctypes.c_uint64), ('bp_len', ctypes.c_uint64), ] + + def __init__(self): + super(self.__class__, self).__init__() + self.type = PERF_TYPE_TRACEPOINT + self.size = ctypes.sizeof(self) + self.read_format = PERF_FORMAT_GROUP + def perf_event_open(attr, pid, cpu, group_fd, flags): return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), ctypes.c_int(cpu), @@ -369,11 +376,7 @@ class Event(object): trace_point, 'id') event_attr = perf_event_attr() - event_attr.type = PERF_TYPE_TRACEPOINT - event_attr.size = ctypes.sizeof(event_attr) event_attr.config = int(open(id_path).read()) - event_attr.sample_period = 1 - event_attr.read_format = PERF_FORMAT_GROUP return event_attr def setup_event(self, group, trace_cpu, trace_point, trace_filter, From d8e44802f8ae320a454644fb010ef06f3ac8fb06 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:18:00 +0100 Subject: [PATCH 36/49] scripts/kvm/kvm_stat: Read event values as u64 The struct read_format, which denotes the returned values on a read states that the values are u64 and not long long which is used for struct unpacking. Therefore the 'q' long long formatter was exchanged with 'Q' which is the format for u64 data. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-31-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index ce970465d6..611f82ad2a 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -358,7 +358,7 @@ class Group(object): def read(self): length = 8 * (1 + len(self.events)) - read_format = 'xxxxxxxx' + 'q' * len(self.events) + read_format = 'xxxxxxxx' + 'Q' * len(self.events) return dict(zip([event.name for event in self.events], struct.unpack(read_format, os.read(self.events[0].fd, length)))) From 1cd55f9dc7debdad0d54f4fad8617527433b4c4b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:18:01 +0100 Subject: [PATCH 37/49] scripts/kvm/kvm_stat: Fix rlimit for unprivileged users Setting the hard limit as a unprivileged user either returns an error when it is higher than the current one or irreversibly sets it lower. Therefore we leave the hardlimit untouched as long as we don't need to raise it as this needs CAP_SYS_RESOURCE. This gives admins the possibility to run the script as an unprivileged user to increase security. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-32-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 611f82ad2a..2a1842e33e 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -434,11 +434,19 @@ class TracepointProvider(object): # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. - rlimit = len(cpus) * len(self._fields) + 50 + newlim = len(cpus) * len(self._fields) + 50 try: - resource.setrlimit(resource.RLIMIT_NOFILE, (rlimit, rlimit)) + softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) + + if hardlim < newlim: + # Now we need CAP_SYS_RESOURCE, to increase the hard limit. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) + else: + # Raising the soft limit is sufficient. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) + except ValueError: - sys.exit("NOFILE rlimit could not be raised to {0}".format(rlimit)) + sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) for cpu in cpus: group = Group() From 126b33e6191cd21ba7e05673eb1428b73bf2d34e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:18:02 +0100 Subject: [PATCH 38/49] scripts/kvm/kvm_stat: Fixup filtering When filtering, the group leader event should not be disabled, as all other events under it will also be disabled. Also we should make sure that values from disabled fields will not be displayed. This also filters the fields from the log and batch output for better readability. Also the drilldown update now directly checks for the stats' field filter and (un)sets drilldown accordingly. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-33-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 2a1842e33e..eb97a650f0 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -467,6 +467,9 @@ class TracepointProvider(object): trace_filter=tracefilter)) self.group_leaders.append(group) + def available_fields(self): + return self.get_available_fields() + @property def fields(self): return self._fields @@ -475,23 +478,30 @@ class TracepointProvider(object): def fields(self, fields): self._fields = fields for group in self.group_leaders: - for event in group.events: + for index, event in enumerate(group.events): if event.name in fields: event.reset() event.enable() else: - event.disable() + # Do not disable the group leader. + # It would disable all of its events. + if index != 0: + event.disable() def read(self): ret = defaultdict(int) for group in self.group_leaders: for name, val in group.read().iteritems(): - ret[name] += val + if name in self._fields: + ret[name] += val return ret class DebugfsProvider(object): def __init__(self): - self._fields = walkdir(PATH_DEBUGFS_KVM)[2] + self._fields = self.get_available_fields() + + def get_available_fields(self): + return walkdir(PATH_DEBUGFS_KVM)[2] @property def fields(self): @@ -523,7 +533,8 @@ class Stats(object): # also clear the cache of old values. self.values = {} for provider in self.providers: - provider_fields = [key for key in provider.fields if wanted(key)] + provider_fields = [key for key in provider.get_available_fields() + if wanted(key)] provider.fields = provider_fields @property @@ -555,7 +566,6 @@ class Tui(object): self.stats = stats self.screen = None self.drilldown = False - self.fields_filter = self.stats.fields_filter self.update_drilldown() def __enter__(self): @@ -586,11 +596,11 @@ class Tui(object): curses.endwin() def update_drilldown(self): - if not self.fields_filter: - if self.drilldown: - self.stats.fields_filter = None - else: - self.stats.fields_filter = r'^[^\(]*$' + if not self.stats.fields_filter: + self.stats.fields_filter = r'^[^\(]*$' + + elif self.stats.fields_filter == r'^[^\(]*$': + self.stats.fields_filter = None def refresh(self, sleeptime): self.screen.erase() From 7f786a9a06cc100b1261b120c7698083a802d46c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:18:03 +0100 Subject: [PATCH 39/49] scripts/kvm/kvm_stat: Add interactive filtering Interactively changing the filter is much more useful than the drilldown, because it is more versatile. With this patch, the filter can be changed by pressing 'f' in the text ui and entering a new filter regex. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-34-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index eb97a650f0..25631a49f7 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -634,6 +634,28 @@ class Tui(object): row += 1 self.screen.refresh() + def show_filter_selection(self): + while True: + self.screen.erase() + self.screen.addstr(0, 0, + "Show statistics for events matching a regex.", + curses.A_BOLD) + self.screen.addstr(2, 0, + "Current regex: {0}" + .format(self.stats.fields_filter)) + self.screen.addstr(3, 0, "New regex: ") + curses.echo() + regex = self.screen.getstr() + curses.noecho() + if len(regex) == 0: + return + try: + re.compile(regex) + self.stats.fields_filter = regex + return + except re.error: + continue + def show_stats(self): sleeptime = 0.25 while True: @@ -647,6 +669,8 @@ class Tui(object): self.update_drilldown() if char == 'q': break + if char == 'f': + self.show_filter_selection() except KeyboardInterrupt: break except curses.error: From a013bd2f7b88c831241b3ad6d5635e9d13a7e3fc Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 11 Jan 2016 16:18:04 +0100 Subject: [PATCH 40/49] scripts/kvm/kvm_stat: Add optparse description Added a description text that explains what the script does and which requirements have to be met to let it run. The help formatter class is needed as the default optparse formatter makes the text unreadable. Signed-off-by: Janosch Frank Message-Id: <1452525484-32309-35-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 25631a49f7..d43e8f3e85 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -705,7 +705,34 @@ def log(stats): line += 1 def get_options(): - optparser = optparse.OptionParser() + description_text = """ +This script displays various statistics about VMs running under KVM. +The statistics are gathered from the KVM debugfs entries and / or the +currently available perf traces. + +The monitoring takes additional cpu cycles and might affect the VM's +performance. + +Requirements: +- Access to: + /sys/kernel/debug/kvm + /sys/kernel/debug/trace/events/* + /proc/pid/task +- /proc/sys/kernel/perf_event_paranoid < 1 if user has no + CAP_SYS_ADMIN and perf events are used. +- CAP_SYS_RESOURCE if the hard limit is not high enough to allow + the large number of files that are possibly opened. +""" + + class PlainHelpFormatter(optparse.IndentedHelpFormatter): + def format_description(self, description): + if description: + return description + "\n" + else: + return "" + + optparser = optparse.OptionParser(description=description_text, + formatter=PlainHelpFormatter()) optparser.add_option('-1', '--once', '--batch', action='store_true', default=False, From 96bce6831bd19b61e965384427741d805c7234c3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 25 Jan 2016 10:08:18 +0000 Subject: [PATCH 41/49] cpus: use broadcast on qemu_pause_cond Jiri saw a hang on pause_all_vcpus called from postcopy_start, where the cpus are all apparently stopped ('stopped' flag set) but pause_all_vcpus is still stuck on a cond_wait on qemu_paused_cond. We suspect this is happening if a qmp_stop is called at about the same time as the postcopy code calls that pause_all_vcpus; although they both should have the main lock held, Paolo spotted the cond_wait unlocks the global lock so perhaps they both could end up waiting at the same time? Signed-off-by: Dr. David Alan Gilbert Reported-by: Jiri Denemark Message-Id: <1453716498-27238-1-git-send-email-dgilbert@redhat.com> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- cpus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index ea29584397..51f5ea4466 100644 --- a/cpus.c +++ b/cpus.c @@ -986,7 +986,7 @@ static void qemu_wait_io_event_common(CPUState *cpu) if (cpu->stop) { cpu->stop = false; cpu->stopped = true; - qemu_cond_signal(&qemu_pause_cond); + qemu_cond_broadcast(&qemu_pause_cond); } flush_queued_work(cpu); cpu->thread_kicked = false; @@ -1387,7 +1387,7 @@ void cpu_stop_current(void) current_cpu->stop = false; current_cpu->stopped = true; cpu_exit(current_cpu); - qemu_cond_signal(&qemu_pause_cond); + qemu_cond_broadcast(&qemu_pause_cond); } } From fae947b096020e5004ec7ae106a95a69a5e4d929 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 22 Jan 2016 09:15:01 -0500 Subject: [PATCH 42/49] memory: exit when hugepage allocation fails if mem-prealloc When -mem-prealloc is passed on the command-line, the expected behavior is to exit if the hugepage allocation fails. However, this behavior is broken since commit cc57501dee which made hugepage allocation fall back to regular ram in case of faliure. This commit restores the expected behavior for -mem-prealloc. Signed-off-by: Luiz Capitulino Message-Id: <20160122091501.75bbd42a@redhat.com> Signed-off-by: Paolo Bonzini --- numa.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/numa.c b/numa.c index 425ef8dc21..23a5d83024 100644 --- a/numa.c +++ b/numa.c @@ -418,12 +418,15 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, Error *err = NULL; memory_region_init_ram_from_file(mr, owner, name, ram_size, false, mem_path, &err); - - /* Legacy behavior: if allocation failed, fall back to - * regular RAM allocation. - */ if (err) { error_report_err(err); + if (mem_prealloc) { + exit(1); + } + + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ memory_region_init_ram(mr, owner, name, ram_size, &error_fatal); } #else From e5f3e12e8489f5325800c6a86fe5f476adfcbdae Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 20 Jan 2016 10:12:20 +0300 Subject: [PATCH 43/49] nbd: add missed aio_context_acquire in nbd_export_new blk_invalidate_cache() can call qcow2_invalidate_cache which performs IO inside. Signed-off-by: Denis V. Lunev CC: Kevin Wolf CC: Paolo Bonzini Message-Id: <1453273940-15382-3-git-send-email-den@openvz.org> Signed-off-by: Paolo Bonzini --- nbd/server.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nbd/server.c b/nbd/server.c index eead339a2c..3596e68191 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -671,7 +671,9 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, * that BDRV_O_INCOMING is cleared and the image is ready for write * access since the export could be available before migration handover. */ + aio_context_acquire(exp->ctx); blk_invalidate_cache(blk, NULL); + aio_context_release(exp->ctx); return exp; fail: From ca81ce72b4d12494424d1813c6437035c1f89a8c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:35 +0100 Subject: [PATCH 44/49] scripts/dump-guest-memory.py: Move constants to the top The constants bloated the class definition and were therefore moved to the top. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-2-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 126 +++++++++++++++++------------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 08796fff8c..e49c835185 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -17,6 +17,55 @@ import struct +TARGET_PAGE_SIZE = 0x1000 +TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 + +# Various ELF constants +EM_X86_64 = 62 # AMD x86-64 target machine +ELFDATA2LSB = 1 # little endian +ELFCLASS64 = 2 +ELFMAG = "\x7FELF" +EV_CURRENT = 1 +ET_CORE = 4 +PT_LOAD = 1 +PT_NOTE = 4 + +# Special value for e_phnum. This indicates that the real number of +# program headers is too large to fit into e_phnum. Instead the real +# value is in the field sh_info of section 0. +PN_XNUM = 0xFFFF + +# Format strings for packing and header size calculation. +ELF64_EHDR = ("4s" # e_ident/magic + "B" # e_ident/class + "B" # e_ident/data + "B" # e_ident/version + "B" # e_ident/osabi + "8s" # e_ident/pad + "H" # e_type + "H" # e_machine + "I" # e_version + "Q" # e_entry + "Q" # e_phoff + "Q" # e_shoff + "I" # e_flags + "H" # e_ehsize + "H" # e_phentsize + "H" # e_phnum + "H" # e_shentsize + "H" # e_shnum + "H" # e_shstrndx + ) +ELF64_PHDR = ("I" # p_type + "I" # p_flags + "Q" # p_offset + "Q" # p_vaddr + "Q" # p_paddr + "Q" # p_filesz + "Q" # p_memsz + "Q" # p_align + ) + class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. @@ -47,62 +96,13 @@ deliberately called abort(), or it was dumped in response to a signal at a halfway fortunate point, then its coredump should be in reasonable shape and this command should mostly work.""" - TARGET_PAGE_SIZE = 0x1000 - TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 - - # Various ELF constants - EM_X86_64 = 62 # AMD x86-64 target machine - ELFDATA2LSB = 1 # little endian - ELFCLASS64 = 2 - ELFMAG = "\x7FELF" - EV_CURRENT = 1 - ET_CORE = 4 - PT_LOAD = 1 - PT_NOTE = 4 - - # Special value for e_phnum. This indicates that the real number of - # program headers is too large to fit into e_phnum. Instead the real - # value is in the field sh_info of section 0. - PN_XNUM = 0xFFFF - - # Format strings for packing and header size calculation. - ELF64_EHDR = ("4s" # e_ident/magic - "B" # e_ident/class - "B" # e_ident/data - "B" # e_ident/version - "B" # e_ident/osabi - "8s" # e_ident/pad - "H" # e_type - "H" # e_machine - "I" # e_version - "Q" # e_entry - "Q" # e_phoff - "Q" # e_shoff - "I" # e_flags - "H" # e_ehsize - "H" # e_phentsize - "H" # e_phnum - "H" # e_shentsize - "H" # e_shnum - "H" # e_shstrndx - ) - ELF64_PHDR = ("I" # p_type - "I" # p_flags - "Q" # p_offset - "Q" # p_vaddr - "Q" # p_paddr - "Q" # p_filesz - "Q" # p_memsz - "Q" # p_align - ) - def __init__(self): super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) self.uintptr_t = gdb.lookup_type("uintptr_t") - self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR) - self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR) + self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) + self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) def int128_get64(self, val): assert (val["hi"] == 0) @@ -130,7 +130,7 @@ shape and this command should mostly work.""" if (mr["alias"] != 0): return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + mr["alias_offset"]) - return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK) + return self.qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) def guest_phys_blocks_init(self): self.guest_phys_blocks = [] @@ -198,21 +198,21 @@ shape and this command should mostly work.""" # most common values. This also means that instruction pointer # etc. will be bogus in the dump, but at least the RAM contents # should be valid. - self.dump_info = {"d_machine": self.EM_X86_64, - "d_endian" : self.ELFDATA2LSB, - "d_class" : self.ELFCLASS64} + self.dump_info = {"d_machine": EM_X86_64, + "d_endian" : ELFDATA2LSB, + "d_class" : ELFCLASS64} def encode_elf64_ehdr_le(self): return self.elf64_ehdr_le.pack( - self.ELFMAG, # e_ident/magic + ELFMAG, # e_ident/magic self.dump_info["d_class"], # e_ident/class self.dump_info["d_endian"], # e_ident/data - self.EV_CURRENT, # e_ident/version + EV_CURRENT, # e_ident/version 0, # e_ident/osabi "", # e_ident/pad - self.ET_CORE, # e_type + ET_CORE, # e_type self.dump_info["d_machine"], # e_machine - self.EV_CURRENT, # e_version + EV_CURRENT, # e_version 0, # e_entry self.elf64_ehdr_le.size, # e_phoff 0, # e_shoff @@ -226,7 +226,7 @@ shape and this command should mostly work.""" ) def encode_elf64_note_le(self): - return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type + return self.elf64_phdr_le.pack(PT_NOTE, # p_type 0, # p_flags (self.memory_offset - len(self.note)), # p_offset @@ -238,7 +238,7 @@ shape and this command should mostly work.""" ) def encode_elf64_load_le(self, offset, start_hwaddr, range_size): - return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type + return self.elf64_phdr_le.pack(PT_LOAD, # p_type 0, # p_flags offset, # p_offset 0, # p_vaddr @@ -276,7 +276,7 @@ shape and this command should mostly work.""" # We should never reach PN_XNUM for paging=false dumps: there's # just a handful of discontiguous ranges after merging. self.phdr_num += len(self.guest_phys_blocks) - assert (self.phdr_num < self.PN_XNUM) + assert (self.phdr_num < PN_XNUM) # Calculate the ELF file offset where the memory dump commences: # @@ -312,7 +312,7 @@ shape and this command should mostly work.""" print ("dumping range at %016x for length %016x" % (cur.cast(self.uintptr_t), left)) while (left > 0): - chunk_size = min(self.TARGET_PAGE_SIZE, left) + chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) vmcore.write(chunk) cur += chunk_size From 47890203842de8b29716bdffb406ca851e70829d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:36 +0100 Subject: [PATCH 45/49] scripts/dump-guest-memory.py: Make methods functions The functions dealing with qemu components rarely used parts of the class, so they were moved out of the class. As the uintptr_t variable is needed both within and outside the class, it was made a constant and moved to the top. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-3-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 184 ++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 91 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index e49c835185..d0b927a2bc 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -17,6 +17,8 @@ import struct +UINTPTR_T = gdb.lookup_type("uintptr_t") + TARGET_PAGE_SIZE = 0x1000 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 @@ -66,6 +68,94 @@ ELF64_PHDR = ("I" # p_type "Q" # p_align ) +def int128_get64(val): + assert (val["hi"] == 0) + return val["lo"] + +def qlist_foreach(head, field_str): + var_p = head["lh_first"] + while (var_p != 0): + var = var_p.dereference() + yield var + var_p = var[field_str]["le_next"] + +def qemu_get_ram_block(ram_addr): + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + for block in qlist_foreach(ram_blocks, "next"): + if (ram_addr - block["offset"] < block["used_length"]): + return block + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + +def qemu_get_ram_ptr(ram_addr): + block = qemu_get_ram_block(ram_addr) + return block["host"] + (ram_addr - block["offset"]) + +def memory_region_get_ram_ptr(mr): + if (mr["alias"] != 0): + return (memory_region_get_ram_ptr(mr["alias"].dereference()) + + mr["alias_offset"]) + return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) + +def get_guest_phys_blocks(): + guest_phys_blocks = [] + print "guest RAM blocks:" + print ("target_start target_end host_addr message " + "count") + print ("---------------- ---------------- ---------------- ------- " + "-----") + + current_map_p = gdb.parse_and_eval("address_space_memory.current_map") + current_map = current_map_p.dereference() + for cur in range(current_map["nr"]): + flat_range = (current_map["ranges"] + cur).dereference() + mr = flat_range["mr"].dereference() + + # we only care about RAM + if (not mr["ram"]): + continue + + section_size = int128_get64(flat_range["addr"]["size"]) + target_start = int128_get64(flat_range["addr"]["start"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(mr) + + flat_range["offset_in_region"]) + predecessor = None + + # find continuity in guest physical address space + if (len(guest_phys_blocks) > 0): + predecessor = guest_phys_blocks[-1] + predecessor_size = (predecessor["target_end"] - + predecessor["target_start"]) + + # the memory API guarantees monotonically increasing + # traversal + assert (predecessor["target_end"] <= target_start) + + # we want continuity in both guest-physical and + # host-virtual memory + if (predecessor["target_end"] < target_start or + predecessor["host_addr"] + predecessor_size != host_addr): + predecessor = None + + if (predecessor is None): + # isolated mapping, add it to the list + guest_phys_blocks.append({"target_start": target_start, + "target_end" : target_end, + "host_addr" : host_addr}) + message = "added" + else: + # expand predecessor until @target_end; predecessor's + # start doesn't change + predecessor["target_end"] = target_end + message = "joined" + + print ("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) + + return guest_phys_blocks + + class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. @@ -100,96 +190,9 @@ shape and this command should mostly work.""" super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) - self.uintptr_t = gdb.lookup_type("uintptr_t") self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) - - def int128_get64(self, val): - assert (val["hi"] == 0) - return val["lo"] - - def qlist_foreach(self, head, field_str): - var_p = head["lh_first"] - while (var_p != 0): - var = var_p.dereference() - yield var - var_p = var[field_str]["le_next"] - - def qemu_get_ram_block(self, ram_addr): - ram_blocks = gdb.parse_and_eval("ram_list.blocks") - for block in self.qlist_foreach(ram_blocks, "next"): - if (ram_addr - block["offset"] < block["used_length"]): - return block - raise gdb.GdbError("Bad ram offset %x" % ram_addr) - - def qemu_get_ram_ptr(self, ram_addr): - block = self.qemu_get_ram_block(ram_addr) - return block["host"] + (ram_addr - block["offset"]) - - def memory_region_get_ram_ptr(self, mr): - if (mr["alias"] != 0): - return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + - mr["alias_offset"]) - return self.qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) - - def guest_phys_blocks_init(self): - self.guest_phys_blocks = [] - - def guest_phys_blocks_append(self): - print "guest RAM blocks:" - print ("target_start target_end host_addr message " - "count") - print ("---------------- ---------------- ---------------- ------- " - "-----") - - current_map_p = gdb.parse_and_eval("address_space_memory.current_map") - current_map = current_map_p.dereference() - for cur in range(current_map["nr"]): - flat_range = (current_map["ranges"] + cur).dereference() - mr = flat_range["mr"].dereference() - - # we only care about RAM - if (not mr["ram"]): - continue - - section_size = self.int128_get64(flat_range["addr"]["size"]) - target_start = self.int128_get64(flat_range["addr"]["start"]) - target_end = target_start + section_size - host_addr = (self.memory_region_get_ram_ptr(mr) + - flat_range["offset_in_region"]) - predecessor = None - - # find continuity in guest physical address space - if (len(self.guest_phys_blocks) > 0): - predecessor = self.guest_phys_blocks[-1] - predecessor_size = (predecessor["target_end"] - - predecessor["target_start"]) - - # the memory API guarantees monotonically increasing - # traversal - assert (predecessor["target_end"] <= target_start) - - # we want continuity in both guest-physical and - # host-virtual memory - if (predecessor["target_end"] < target_start or - predecessor["host_addr"] + predecessor_size != host_addr): - predecessor = None - - if (predecessor is None): - # isolated mapping, add it to the list - self.guest_phys_blocks.append({"target_start": target_start, - "target_end" : target_end, - "host_addr" : host_addr}) - message = "added" - else: - # expand predecessor until @target_end; predecessor's - # start doesn't change - predecessor["target_end"] = target_end - message = "joined" - - print ("%016x %016x %016x %-7s %5u" % - (target_start, target_end, host_addr.cast(self.uintptr_t), - message, len(self.guest_phys_blocks))) + self.guest_phys_blocks = None def cpu_get_dump_info(self): # We can't synchronize the registers with KVM post-mortem, and @@ -263,8 +266,7 @@ shape and this command should mostly work.""" len(name) + 1, len(desc), type, name, desc) def dump_init(self): - self.guest_phys_blocks_init() - self.guest_phys_blocks_append() + self.guest_phys_blocks = get_guest_phys_blocks() self.cpu_get_dump_info() # we have no way to retrieve the VCPU status from KVM # post-mortem @@ -310,7 +312,7 @@ shape and this command should mostly work.""" cur = block["host_addr"] left = block["target_end"] - block["target_start"] print ("dumping range at %016x for length %016x" % - (cur.cast(self.uintptr_t), left)) + (cur.cast(UINTPTR_T), left)) while (left > 0): chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) From 7cb1089d5fbd7b2d9497f111ce948edef41df32d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:37 +0100 Subject: [PATCH 46/49] scripts/dump-guest-memory.py: Improve python 3 compatibility This commit does not make the script python 3 compatible, it is a preparation that fixes the easy and common incompatibilities. Print is a function in python 3 and therefore needs braces around its arguments. Range does not cast a gdb.Value object to int in python 3, we have to do it ourselves. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-4-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index d0b927a2bc..bb4ca8e3d4 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -98,15 +98,19 @@ def memory_region_get_ram_ptr(mr): def get_guest_phys_blocks(): guest_phys_blocks = [] - print "guest RAM blocks:" - print ("target_start target_end host_addr message " - "count") - print ("---------------- ---------------- ---------------- ------- " - "-----") + print("guest RAM blocks:") + print("target_start target_end host_addr message " + "count") + print("---------------- ---------------- ---------------- ------- " + "-----") current_map_p = gdb.parse_and_eval("address_space_memory.current_map") current_map = current_map_p.dereference() - for cur in range(current_map["nr"]): + + # Conversion to int is needed for python 3 + # compatibility. Otherwise range doesn't cast the value itself and + # breaks. + for cur in range(int(current_map["nr"])): flat_range = (current_map["ranges"] + cur).dereference() mr = flat_range["mr"].dereference() @@ -149,9 +153,9 @@ def get_guest_phys_blocks(): predecessor["target_end"] = target_end message = "joined" - print ("%016x %016x %016x %-7s %5u" % - (target_start, target_end, host_addr.cast(UINTPTR_T), - message, len(guest_phys_blocks))) + print("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) return guest_phys_blocks @@ -311,8 +315,8 @@ shape and this command should mostly work.""" for block in self.guest_phys_blocks: cur = block["host_addr"] left = block["target_end"] - block["target_start"] - print ("dumping range at %016x for length %016x" % - (cur.cast(UINTPTR_T), left)) + print("dumping range at %016x for length %016x" % + (cur.cast(UINTPTR_T), left)) while (left > 0): chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) From 6782c0e785a0ba48cd96d99f2402cb87af027d26 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:38 +0100 Subject: [PATCH 47/49] scripts/dump-guest-memory.py: Cleanup functions Increase readability by adding newlines and comments, as well as removing wrong whitespaces and C style braces around conditionals and loops. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-5-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 75 ++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index bb4ca8e3d4..2cf73659a1 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -69,35 +69,60 @@ ELF64_PHDR = ("I" # p_type ) def int128_get64(val): - assert (val["hi"] == 0) + """Returns low 64bit part of Int128 struct.""" + + assert val["hi"] == 0 return val["lo"] + def qlist_foreach(head, field_str): + """Generator for qlists.""" + var_p = head["lh_first"] - while (var_p != 0): + while var_p != 0: var = var_p.dereference() - yield var var_p = var[field_str]["le_next"] + yield var + def qemu_get_ram_block(ram_addr): + """Returns the RAMBlock struct to which the given address belongs.""" + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + for block in qlist_foreach(ram_blocks, "next"): - if (ram_addr - block["offset"] < block["used_length"]): + if (ram_addr - block["offset"]) < block["used_length"]: return block + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + def qemu_get_ram_ptr(ram_addr): + """Returns qemu vaddr for given guest physical address.""" + block = qemu_get_ram_block(ram_addr) return block["host"] + (ram_addr - block["offset"]) -def memory_region_get_ram_ptr(mr): - if (mr["alias"] != 0): - return (memory_region_get_ram_ptr(mr["alias"].dereference()) + - mr["alias_offset"]) - return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) + +def memory_region_get_ram_ptr(memory_region): + if memory_region["alias"] != 0: + return (memory_region_get_ram_ptr(memory_region["alias"].dereference()) + + memory_region["alias_offset"]) + + return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK) + def get_guest_phys_blocks(): + """Returns a list of ram blocks. + + Each block entry contains: + 'target_start': guest block phys start address + 'target_end': guest block phys end address + 'host_addr': qemu vaddr of the block's start + """ + guest_phys_blocks = [] + print("guest RAM blocks:") print("target_start target_end host_addr message " "count") @@ -111,29 +136,29 @@ def get_guest_phys_blocks(): # compatibility. Otherwise range doesn't cast the value itself and # breaks. for cur in range(int(current_map["nr"])): - flat_range = (current_map["ranges"] + cur).dereference() - mr = flat_range["mr"].dereference() + flat_range = (current_map["ranges"] + cur).dereference() + memory_region = flat_range["mr"].dereference() # we only care about RAM - if (not mr["ram"]): + if not memory_region["ram"]: continue section_size = int128_get64(flat_range["addr"]["size"]) target_start = int128_get64(flat_range["addr"]["start"]) - target_end = target_start + section_size - host_addr = (memory_region_get_ram_ptr(mr) + - flat_range["offset_in_region"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(memory_region) + + flat_range["offset_in_region"]) predecessor = None # find continuity in guest physical address space - if (len(guest_phys_blocks) > 0): + if len(guest_phys_blocks) > 0: predecessor = guest_phys_blocks[-1] predecessor_size = (predecessor["target_end"] - predecessor["target_start"]) # the memory API guarantees monotonically increasing # traversal - assert (predecessor["target_end"] <= target_start) + assert predecessor["target_end"] <= target_start # we want continuity in both guest-physical and # host-virtual memory @@ -141,11 +166,11 @@ def get_guest_phys_blocks(): predecessor["host_addr"] + predecessor_size != host_addr): predecessor = None - if (predecessor is None): + if predecessor is None: # isolated mapping, add it to the list guest_phys_blocks.append({"target_start": target_start, - "target_end" : target_end, - "host_addr" : host_addr}) + "target_end": target_end, + "host_addr": host_addr}) message = "added" else: # expand predecessor until @target_end; predecessor's @@ -282,7 +307,7 @@ shape and this command should mostly work.""" # We should never reach PN_XNUM for paging=false dumps: there's # just a handful of discontiguous ranges after merging. self.phdr_num += len(self.guest_phys_blocks) - assert (self.phdr_num < PN_XNUM) + assert self.phdr_num < PN_XNUM # Calculate the ELF file offset where the memory dump commences: # @@ -313,15 +338,15 @@ shape and this command should mostly work.""" def dump_iterate(self, vmcore): qemu_core = gdb.inferiors()[0] for block in self.guest_phys_blocks: - cur = block["host_addr"] + cur = block["host_addr"] left = block["target_end"] - block["target_start"] print("dumping range at %016x for length %016x" % (cur.cast(UINTPTR_T), left)) - while (left > 0): + while left > 0: chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) vmcore.write(chunk) - cur += chunk_size + cur += chunk_size left -= chunk_size def create_vmcore(self, filename): @@ -336,7 +361,7 @@ shape and this command should mostly work.""" self.dont_repeat() argv = gdb.string_to_argv(args) - if (len(argv) != 1): + if len(argv) != 1: raise gdb.GdbError("usage: dump-guest-memory FILE") self.dump_init() From 368e3adc8928b2786939a25a336527f83f18e926 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:39 +0100 Subject: [PATCH 48/49] scripts/dump-guest-memory.py: Introduce multi-arch support By modelling the ELF with ctypes we not only gain full python 3 support but can also create dumps for different architectures more easily. Tested-by: Andrew Jones Acked-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-6-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 486 +++++++++++++++++++++++------------ 1 file changed, 322 insertions(+), 164 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 2cf73659a1..7acce654bc 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -6,6 +6,7 @@ # # Authors: # Laszlo Ersek +# Janosch Frank # # This work is licensed under the terms of the GNU GPL, version 2 or later. See # the COPYING file in the top-level directory. @@ -15,58 +16,303 @@ # "help data" summary), and it should match how other help texts look in # gdb. -import struct +import ctypes UINTPTR_T = gdb.lookup_type("uintptr_t") TARGET_PAGE_SIZE = 0x1000 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 -# Various ELF constants -EM_X86_64 = 62 # AMD x86-64 target machine -ELFDATA2LSB = 1 # little endian -ELFCLASS64 = 2 -ELFMAG = "\x7FELF" -EV_CURRENT = 1 -ET_CORE = 4 -PT_LOAD = 1 -PT_NOTE = 4 - # Special value for e_phnum. This indicates that the real number of # program headers is too large to fit into e_phnum. Instead the real # value is in the field sh_info of section 0. PN_XNUM = 0xFFFF -# Format strings for packing and header size calculation. -ELF64_EHDR = ("4s" # e_ident/magic - "B" # e_ident/class - "B" # e_ident/data - "B" # e_ident/version - "B" # e_ident/osabi - "8s" # e_ident/pad - "H" # e_type - "H" # e_machine - "I" # e_version - "Q" # e_entry - "Q" # e_phoff - "Q" # e_shoff - "I" # e_flags - "H" # e_ehsize - "H" # e_phentsize - "H" # e_phnum - "H" # e_shentsize - "H" # e_shnum - "H" # e_shstrndx - ) -ELF64_PHDR = ("I" # p_type - "I" # p_flags - "Q" # p_offset - "Q" # p_vaddr - "Q" # p_paddr - "Q" # p_filesz - "Q" # p_memsz - "Q" # p_align - ) +EV_CURRENT = 1 + +ELFCLASS32 = 1 +ELFCLASS64 = 2 + +ELFDATA2LSB = 1 +ELFDATA2MSB = 2 + +ET_CORE = 4 + +PT_LOAD = 1 +PT_NOTE = 4 + +EM_386 = 3 +EM_PPC = 20 +EM_PPC64 = 21 +EM_S390 = 22 +EM_AARCH = 183 +EM_X86_64 = 62 + +class ELF(object): + """Representation of a ELF file.""" + + def __init__(self, arch): + self.ehdr = None + self.notes = [] + self.segments = [] + self.notes_size = 0 + self.endianess = None + self.elfclass = ELFCLASS64 + + if arch == 'aarch64-le': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS64 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'aarch64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'X86_64': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_X86_64 + + elif arch == '386': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS32 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_386 + + elif arch == 's390': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_S390 + + elif arch == 'ppc64-le': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + elif arch == 'ppc64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + else: + raise gdb.GdbError("No valid arch type specified.\n" + "Currently supported types:\n" + "aarch64-be, aarch64-le, X86_64, 386, s390, " + "ppc64-be, ppc64-le") + + self.add_segment(PT_NOTE, 0, 0) + + def add_note(self, n_name, n_desc, n_type): + """Adds a note to the ELF.""" + + note = get_arch_note(self.endianess, len(n_name), len(n_desc)) + note.n_namesz = len(n_name) + 1 + note.n_descsz = len(n_desc) + note.n_name = n_name.encode() + note.n_type = n_type + + # Desc needs to be 4 byte aligned (although the 64bit spec + # specifies 8 byte). When defining n_desc as uint32 it will be + # automatically aligned but we need the memmove to copy the + # string into it. + ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc)) + + self.notes.append(note) + self.segments[0].p_filesz += ctypes.sizeof(note) + self.segments[0].p_memsz += ctypes.sizeof(note) + + def add_segment(self, p_type, p_paddr, p_size): + """Adds a segment to the elf.""" + + phdr = get_arch_phdr(self.endianess, self.elfclass) + phdr.p_type = p_type + phdr.p_paddr = p_paddr + phdr.p_filesz = p_size + phdr.p_memsz = p_size + self.segments.append(phdr) + self.ehdr.e_phnum += 1 + + def to_file(self, elf_file): + """Writes all ELF structures to the the passed file. + + Structure: + Ehdr + Segment 0:PT_NOTE + Segment 1:PT_LOAD + Segment N:PT_LOAD + Note 0..N + Dump contents + """ + elf_file.write(self.ehdr) + off = ctypes.sizeof(self.ehdr) + \ + len(self.segments) * ctypes.sizeof(self.segments[0]) + + for phdr in self.segments: + phdr.p_offset = off + elf_file.write(phdr) + off += phdr.p_filesz + + for note in self.notes: + elf_file.write(note) + + +def get_arch_note(endianess, len_name, len_desc): + """Returns a Note class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + len_name = len_name + 1 + + class Note(superclass): + """Represents an ELF note, includes the content.""" + + _fields_ = [("n_namesz", ctypes.c_uint32), + ("n_descsz", ctypes.c_uint32), + ("n_type", ctypes.c_uint32), + ("n_name", ctypes.c_char * len_name), + ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))] + return Note() + + +class Ident(ctypes.Structure): + """Represents the ELF ident array in the ehdr structure.""" + + _fields_ = [('ei_mag0', ctypes.c_ubyte), + ('ei_mag1', ctypes.c_ubyte), + ('ei_mag2', ctypes.c_ubyte), + ('ei_mag3', ctypes.c_ubyte), + ('ei_class', ctypes.c_ubyte), + ('ei_data', ctypes.c_ubyte), + ('ei_version', ctypes.c_ubyte), + ('ei_osabi', ctypes.c_ubyte), + ('ei_abiversion', ctypes.c_ubyte), + ('ei_pad', ctypes.c_ubyte * 7)] + + def __init__(self, endianess, elfclass): + self.ei_mag0 = 0x7F + self.ei_mag1 = ord('E') + self.ei_mag2 = ord('L') + self.ei_mag3 = ord('F') + self.ei_class = elfclass + self.ei_data = endianess + self.ei_version = EV_CURRENT + + +def get_arch_ehdr(endianess, elfclass): + """Returns a EHDR64 class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class EHDR64(superclass): + """Represents the 64 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint64), + ('e_phoff', ctypes.c_uint64), + ('e_shoff', ctypes.c_uint64), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + + class EHDR32(superclass): + """Represents the 32 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint32), + ('e_phoff', ctypes.c_uint32), + ('e_shoff', ctypes.c_uint32), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + # End get_arch_ehdr + if elfclass == ELFCLASS64: + return EHDR64() + else: + return EHDR32() + + +def get_arch_phdr(endianess, elfclass): + """Returns a 32 or 64 bit PHDR class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class PHDR64(superclass): + """Represents the 64 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_offset', ctypes.c_uint64), + ('p_vaddr', ctypes.c_uint64), + ('p_paddr', ctypes.c_uint64), + ('p_filesz', ctypes.c_uint64), + ('p_memsz', ctypes.c_uint64), + ('p_align', ctypes.c_uint64)] + + class PHDR32(superclass): + """Represents the 32 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_offset', ctypes.c_uint32), + ('p_vaddr', ctypes.c_uint32), + ('p_paddr', ctypes.c_uint32), + ('p_filesz', ctypes.c_uint32), + ('p_memsz', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_align', ctypes.c_uint32)] + + # End get_arch_phdr + if elfclass == ELFCLASS64: + return PHDR64() + else: + return PHDR32() + def int128_get64(val): """Returns low 64bit part of Int128 struct.""" @@ -188,20 +434,22 @@ def get_guest_phys_blocks(): class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. -The sole argument is FILE, identifying the target file to write the -guest vmcore to. +The two required arguments are FILE and ARCH: +FILE identifies the target file to write the guest vmcore to. +ARCH specifies the architecture for which the core will be generated. This GDB command reimplements the dump-guest-memory QMP command in python, using the representation of guest memory as captured in the qemu coredump. The qemu process that has been dumped must have had the -command line option "-machine dump-guest-core=on". +command line option "-machine dump-guest-core=on" which is the default. For simplicity, the "paging", "begin" and "end" parameters of the QMP command are not supported -- no attempt is made to get the guest's internal paging structures (ie. paging=false is hard-wired), and guest memory is always fully dumped. -Only x86_64 guests are supported. +Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be, +ppc64-le guests are supported. The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are not written to the vmcore. Preparing these would require context that is @@ -219,129 +467,39 @@ shape and this command should mostly work.""" super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) - self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) - self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) + self.elf = None self.guest_phys_blocks = None - def cpu_get_dump_info(self): - # We can't synchronize the registers with KVM post-mortem, and - # the bits in (first_x86_cpu->env.hflags) seem to be stale; they - # may not reflect long mode for example. Hence just assume the - # most common values. This also means that instruction pointer - # etc. will be bogus in the dump, but at least the RAM contents - # should be valid. - self.dump_info = {"d_machine": EM_X86_64, - "d_endian" : ELFDATA2LSB, - "d_class" : ELFCLASS64} + def dump_init(self, vmcore): + """Prepares and writes ELF structures to core file.""" - def encode_elf64_ehdr_le(self): - return self.elf64_ehdr_le.pack( - ELFMAG, # e_ident/magic - self.dump_info["d_class"], # e_ident/class - self.dump_info["d_endian"], # e_ident/data - EV_CURRENT, # e_ident/version - 0, # e_ident/osabi - "", # e_ident/pad - ET_CORE, # e_type - self.dump_info["d_machine"], # e_machine - EV_CURRENT, # e_version - 0, # e_entry - self.elf64_ehdr_le.size, # e_phoff - 0, # e_shoff - 0, # e_flags - self.elf64_ehdr_le.size, # e_ehsize - self.elf64_phdr_le.size, # e_phentsize - self.phdr_num, # e_phnum - 0, # e_shentsize - 0, # e_shnum - 0 # e_shstrndx - ) + # Needed to make crash happy, data for more useful notes is + # not available in a qemu core. + self.elf.add_note("NONE", "EMPTY", 0) - def encode_elf64_note_le(self): - return self.elf64_phdr_le.pack(PT_NOTE, # p_type - 0, # p_flags - (self.memory_offset - - len(self.note)), # p_offset - 0, # p_vaddr - 0, # p_paddr - len(self.note), # p_filesz - len(self.note), # p_memsz - 0 # p_align - ) + # We should never reach PN_XNUM for paging=false dumps, + # there's just a handful of discontiguous ranges after + # merging. + # The constant is needed to account for the PT_NOTE segment. + phdr_num = len(self.guest_phys_blocks) + 1 + assert phdr_num < PN_XNUM - def encode_elf64_load_le(self, offset, start_hwaddr, range_size): - return self.elf64_phdr_le.pack(PT_LOAD, # p_type - 0, # p_flags - offset, # p_offset - 0, # p_vaddr - start_hwaddr, # p_paddr - range_size, # p_filesz - range_size, # p_memsz - 0 # p_align - ) - - def note_init(self, name, desc, type): - # name must include a trailing NUL - namesz = (len(name) + 1 + 3) / 4 * 4 - descsz = (len(desc) + 3) / 4 * 4 - fmt = ("<" # little endian - "I" # n_namesz - "I" # n_descsz - "I" # n_type - "%us" # name - "%us" # desc - % (namesz, descsz)) - self.note = struct.pack(fmt, - len(name) + 1, len(desc), type, name, desc) - - def dump_init(self): - self.guest_phys_blocks = get_guest_phys_blocks() - self.cpu_get_dump_info() - # we have no way to retrieve the VCPU status from KVM - # post-mortem - self.note_init("NONE", "EMPTY", 0) - - # Account for PT_NOTE. - self.phdr_num = 1 - - # We should never reach PN_XNUM for paging=false dumps: there's - # just a handful of discontiguous ranges after merging. - self.phdr_num += len(self.guest_phys_blocks) - assert self.phdr_num < PN_XNUM - - # Calculate the ELF file offset where the memory dump commences: - # - # ELF header - # PT_NOTE - # PT_LOAD: 1 - # PT_LOAD: 2 - # ... - # PT_LOAD: len(self.guest_phys_blocks) - # ELF note - # memory dump - self.memory_offset = (self.elf64_ehdr_le.size + - self.elf64_phdr_le.size * self.phdr_num + - len(self.note)) - - def dump_begin(self, vmcore): - vmcore.write(self.encode_elf64_ehdr_le()) - vmcore.write(self.encode_elf64_note_le()) - running = self.memory_offset for block in self.guest_phys_blocks: - range_size = block["target_end"] - block["target_start"] - vmcore.write(self.encode_elf64_load_le(running, - block["target_start"], - range_size)) - running += range_size - vmcore.write(self.note) + block_size = block["target_end"] - block["target_start"] + self.elf.add_segment(PT_LOAD, block["target_start"], block_size) + + self.elf.to_file(vmcore) def dump_iterate(self, vmcore): + """Writes guest core to file.""" + qemu_core = gdb.inferiors()[0] for block in self.guest_phys_blocks: cur = block["host_addr"] left = block["target_end"] - block["target_start"] print("dumping range at %016x for length %016x" % (cur.cast(UINTPTR_T), left)) + while left > 0: chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) @@ -349,22 +507,22 @@ shape and this command should mostly work.""" cur += chunk_size left -= chunk_size - def create_vmcore(self, filename): - vmcore = open(filename, "wb") - self.dump_begin(vmcore) - self.dump_iterate(vmcore) - vmcore.close() - def invoke(self, args, from_tty): + """Handles command invocation from gdb.""" + # Unwittingly pressing the Enter key after the command should # not dump the same multi-gig coredump to the same file. self.dont_repeat() argv = gdb.string_to_argv(args) - if len(argv) != 1: - raise gdb.GdbError("usage: dump-guest-memory FILE") + if len(argv) != 2: + raise gdb.GdbError("usage: dump-guest-memory FILE ARCH") - self.dump_init() - self.create_vmcore(argv[0]) + self.elf = ELF(argv[1]) + self.guest_phys_blocks = get_guest_phys_blocks() + + with open(argv[0], "wb") as vmcore: + self.dump_init(vmcore) + self.dump_iterate(vmcore) DumpGuestMemory() From 28fbf8f67b078f738e790f3c3a56aeab2c0ea5d6 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:40 +0100 Subject: [PATCH 49/49] scripts/dump-guest-memory.py: Fix module docstring The module docstring is changed into a multi-line comment to comply with pep 257. The comment about the docstring that gets used by gdb to print the help is moved to the location of the docstring. Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-7-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 7acce654bc..f274bf80fa 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -1,20 +1,17 @@ -# This python script adds a new gdb command, "dump-guest-memory". It -# should be loaded with "source dump-guest-memory.py" at the (gdb) -# prompt. -# -# Copyright (C) 2013, Red Hat, Inc. -# -# Authors: -# Laszlo Ersek -# Janosch Frank -# -# This work is licensed under the terms of the GNU GPL, version 2 or later. See -# the COPYING file in the top-level directory. -# -# The leading docstring doesn't have idiomatic Python formatting. It is -# printed by gdb's "help" command (the first line is printed in the -# "help data" summary), and it should match how other help texts look in -# gdb. +""" +This python script adds a new gdb command, "dump-guest-memory". It +should be loaded with "source dump-guest-memory.py" at the (gdb) +prompt. + +Copyright (C) 2013, Red Hat, Inc. + +Authors: + Laszlo Ersek + Janosch Frank + +This work is licensed under the terms of the GNU GPL, version 2 or later. See +the COPYING file in the top-level directory. +""" import ctypes @@ -431,6 +428,10 @@ def get_guest_phys_blocks(): return guest_phys_blocks +# The leading docstring doesn't have idiomatic Python formatting. It is +# printed by gdb's "help" command (the first line is printed in the +# "help data" summary), and it should match how other help texts look in +# gdb. class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump.