Merge remote-tracking branch 'bonzini/migration-writev' into staging

# By Paolo Bonzini
# Via Paolo Bonzini
* bonzini/migration-writev:
  qemu-file: do not use stdio for qemu_fdopen
  iov: handle partial writes from sendmsg and recvmsg
  iov: reorganize iov_send_recv, part 3
  iov: reorganize iov_send_recv, part 2
  iov: reorganize iov_send_recv, part 1
  qemu-file: drop socket_put_buffer

Message-id: 1366192012-14872-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-04-17 10:14:07 -05:00
commit 20781f9c00
2 changed files with 152 additions and 72 deletions

114
savevm.c
View File

@ -219,18 +219,6 @@ static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
return len;
}
static int socket_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
{
QEMUFileSocket *s = opaque;
ssize_t len;
len = qemu_send_full(s->fd, buf, size, 0);
if (len < size) {
len = -socket_error();
}
return len;
}
static int socket_close(void *opaque)
{
QEMUFileSocket *s = opaque;
@ -368,9 +356,94 @@ static const QEMUFileOps stdio_file_write_ops = {
.close = stdio_fclose
};
static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
int64_t pos)
{
QEMUFileSocket *s = opaque;
ssize_t len, offset;
ssize_t size = iov_size(iov, iovcnt);
ssize_t total = 0;
assert(iovcnt > 0);
offset = 0;
while (size > 0) {
/* Find the next start position; skip all full-sized vector elements */
while (offset >= iov[0].iov_len) {
offset -= iov[0].iov_len;
iov++, iovcnt--;
}
/* skip `offset' bytes from the (now) first element, undo it on exit */
assert(iovcnt > 0);
iov[0].iov_base += offset;
iov[0].iov_len -= offset;
do {
len = writev(s->fd, iov, iovcnt);
} while (len == -1 && errno == EINTR);
if (len == -1) {
return -errno;
}
/* Undo the changes above */
iov[0].iov_base -= offset;
iov[0].iov_len += offset;
/* Prepare for the next iteration */
offset += len;
total += len;
size -= len;
}
return total;
}
static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
{
QEMUFileSocket *s = opaque;
ssize_t len;
for (;;) {
len = read(s->fd, buf, size);
if (len != -1) {
break;
}
if (errno == EAGAIN) {
yield_until_fd_readable(s->fd);
} else if (errno != EINTR) {
break;
}
}
if (len == -1) {
len = -errno;
}
return len;
}
static int unix_close(void *opaque)
{
QEMUFileSocket *s = opaque;
close(s->fd);
g_free(s);
return 0;
}
static const QEMUFileOps unix_read_ops = {
.get_fd = socket_get_fd,
.get_buffer = unix_get_buffer,
.close = unix_close
};
static const QEMUFileOps unix_write_ops = {
.get_fd = socket_get_fd,
.writev_buffer = unix_writev_buffer,
.close = unix_close
};
QEMUFile *qemu_fdopen(int fd, const char *mode)
{
QEMUFileStdio *s;
QEMUFileSocket *s;
if (mode == NULL ||
(mode[0] != 'r' && mode[0] != 'w') ||
@ -379,21 +452,15 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
return NULL;
}
s = g_malloc0(sizeof(QEMUFileStdio));
s->stdio_file = fdopen(fd, mode);
if (!s->stdio_file)
goto fail;
s = g_malloc0(sizeof(QEMUFileSocket));
s->fd = fd;
if(mode[0] == 'r') {
s->file = qemu_fopen_ops(s, &stdio_file_read_ops);
s->file = qemu_fopen_ops(s, &unix_read_ops);
} else {
s->file = qemu_fopen_ops(s, &stdio_file_write_ops);
s->file = qemu_fopen_ops(s, &unix_write_ops);
}
return s->file;
fail:
g_free(s);
return NULL;
}
static const QEMUFileOps socket_read_ops = {
@ -404,7 +471,6 @@ static const QEMUFileOps socket_read_ops = {
static const QEMUFileOps socket_write_ops = {
.get_fd = socket_get_fd,
.put_buffer = socket_put_buffer,
.writev_buffer = socket_writev_buffer,
.close = socket_close
};

View File

@ -144,57 +144,71 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
size_t offset, size_t bytes,
bool do_send)
{
ssize_t total = 0;
ssize_t ret;
unsigned si, ei; /* start and end indexes */
if (bytes == 0) {
/* Catch the do-nothing case early, as otherwise we will pass an
* empty iovec to sendmsg/recvmsg(), and not all implementations
* accept this.
*/
return 0;
size_t orig_len, tail;
unsigned niov;
while (bytes > 0) {
/* Find the start position, skipping `offset' bytes:
* first, skip all full-sized vector elements, */
for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
offset -= iov[niov].iov_len;
}
/* niov == iov_cnt would only be valid if bytes == 0, which
* we already ruled out in the loop condition. */
assert(niov < iov_cnt);
iov += niov;
iov_cnt -= niov;
if (offset) {
/* second, skip `offset' bytes from the (now) first element,
* undo it on exit */
iov[0].iov_base += offset;
iov[0].iov_len -= offset;
}
/* Find the end position skipping `bytes' bytes: */
/* first, skip all full-sized elements */
tail = bytes;
for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
tail -= iov[niov].iov_len;
}
if (tail) {
/* second, fixup the last element, and remember the original
* length */
assert(niov < iov_cnt);
assert(iov[niov].iov_len > tail);
orig_len = iov[niov].iov_len;
iov[niov++].iov_len = tail;
}
ret = do_send_recv(sockfd, iov, niov, do_send);
/* Undo the changes above before checking for errors */
if (tail) {
iov[niov-1].iov_len = orig_len;
}
if (offset) {
iov[0].iov_base -= offset;
iov[0].iov_len += offset;
}
if (ret < 0) {
assert(errno != EINTR);
if (errno == EAGAIN && total > 0) {
return total;
}
return -1;
}
/* Prepare for the next iteration */
offset += ret;
total += ret;
bytes -= ret;
}
/* Find the start position, skipping `offset' bytes:
* first, skip all full-sized vector elements, */
for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) {
offset -= iov[si].iov_len;
}
if (offset) {
assert(si < iov_cnt);
/* second, skip `offset' bytes from the (now) first element,
* undo it on exit */
iov[si].iov_base += offset;
iov[si].iov_len -= offset;
}
/* Find the end position skipping `bytes' bytes: */
/* first, skip all full-sized elements */
for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) {
bytes -= iov[ei].iov_len;
}
if (bytes) {
/* second, fixup the last element, and remember
* the length we've cut from the end of it in `bytes' */
size_t tail;
assert(ei < iov_cnt);
assert(iov[ei].iov_len > bytes);
tail = iov[ei].iov_len - bytes;
iov[ei].iov_len = bytes;
bytes = tail; /* bytes is now equal to the tail size */
++ei;
}
ret = do_send_recv(sockfd, iov + si, ei - si, do_send);
/* Undo the changes above */
if (offset) {
iov[si].iov_base -= offset;
iov[si].iov_len += offset;
}
if (bytes) {
iov[ei-1].iov_len += bytes;
}
return ret;
return total;
}