-----BEGIN PGP SIGNATURE-----

Version: GnuPG v1
 
 iQEcBAABAgAGBQJVJkx/AAoJEJykq7OBq3PIGtQIAJ871JaHxAxNjApMKE1TrAoI
 rYbioek/QNtyJbVEOH4F1huR0b4kQ4K/+79gaYBJnUqyXkO6/BnQS2ZuxaAIrzvM
 xjgKW9gkRRvKSCggE8n2wGkkl+0tJQm19jhOr9UX+qlLdeLXU5JcJ1uKeGlKDxi+
 JpxPPcXLSNCeBH8nyED12kpOu7h7YjsH4QJn4FO385ZwODbVREn5XeFW0+Ngj/cb
 FpsJVXNtb9A4wvx9lJQUL1/eOCt54hFJw+P4CHI1fQiDVOdX9gAHVagWiC8MCK9r
 KnGb7ho32/VY9UmHbqogPbJerJjzj0InWfenhjYtbYMEB1rLEcdigcjkbBpLS2Y=
 =bvIr
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

# gpg: Signature made Thu Apr  9 10:55:11 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  block/iscsi: handle zero events from iscsi_which_events
  aio: strengthen memory barriers for bottom half scheduling
  virtio-blk: correctly dirty guest memory
  qcow2: Fix header update with overridden backing file

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-04-09 12:05:00 +01:00
commit a6f2cb037a
10 changed files with 221 additions and 33 deletions

28
async.c
View File

@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx)
/* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends();
next = bh->next;
if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0;
/* Paired with write barrier in bh schedule to ensure reading for
* idle & callbacks coming after bh's scheduling.
*/
smp_rmb();
/* The atomic_xchg is paired with the one in qemu_bh_schedule. The
* implicit memory barrier ensures that the callback sees all writes
* done by the scheduling thread. It also ensures that the scheduling
* thread sees the zero before bh->cb has run, and thus will call
* aio_notify again if necessary.
*/
if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
if (!bh->idle)
ret = 1;
bh->idle = 0;
@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx)
void qemu_bh_schedule_idle(QEMUBH *bh)
{
if (bh->scheduled)
return;
bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
smp_wmb();
bh->scheduled = 1;
atomic_mb_set(&bh->scheduled, 1);
}
void qemu_bh_schedule(QEMUBH *bh)
{
AioContext *ctx;
if (bh->scheduled)
return;
ctx = bh->ctx;
bh->idle = 0;
/* Make sure that:
/* The memory barrier implicit in atomic_xchg makes sure that:
* 1. idle & any writes needed by the callback are done before the
* locations are read in the aio_bh_poll.
* 2. ctx is loaded before scheduled is set and the callback has a chance
* to execute.
*/
smp_mb();
bh->scheduled = 1;
aio_notify(ctx);
if (atomic_xchg(&bh->scheduled, 1) == 0) {
aio_notify(ctx);
}
}

View File

@ -56,6 +56,7 @@ typedef struct IscsiLun {
uint64_t num_blocks;
int events;
QEMUTimer *nop_timer;
QEMUTimer *event_timer;
uint8_t lbpme;
uint8_t lbprz;
uint8_t has_write_same;
@ -95,6 +96,7 @@ typedef struct IscsiAIOCB {
#endif
} IscsiAIOCB;
#define EVENT_INTERVAL 250
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
@ -256,21 +258,30 @@ static void
iscsi_set_events(IscsiLun *iscsilun)
{
struct iscsi_context *iscsi = iscsilun->iscsi;
int ev;
int ev = iscsi_which_events(iscsi);
/* We always register a read handler. */
ev = POLLIN;
ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
aio_set_fd_handler(iscsilun->aio_context,
iscsi_get_fd(iscsi),
iscsi_process_read,
(ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun);
iscsilun->events = ev;
}
iscsilun->events = ev;
/* newer versions of libiscsi may return zero events. In this
* case start a timer to ensure we are able to return to service
* once this situation changes. */
if (!ev) {
timer_mod(iscsilun->event_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}
}
static void iscsi_timed_set_events(void *opaque)
{
IscsiLun *iscsilun = opaque;
iscsi_set_events(iscsilun);
}
static void
@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
timer_free(iscsilun->nop_timer);
iscsilun->nop_timer = NULL;
}
if (iscsilun->event_timer) {
timer_del(iscsilun->event_timer);
timer_free(iscsilun->event_timer);
iscsilun->event_timer = NULL;
}
}
static void iscsi_attach_aio_context(BlockDriverState *bs,
@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
iscsi_nop_timed_event, iscsilun);
timer_mod(iscsilun->nop_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
/* Prepare a timer for a delayed call to iscsi_set_events */
iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
QEMU_CLOCK_REALTIME, SCALE_MS,
iscsi_timed_set_events, iscsilun);
}
static bool iscsi_is_write_protected(IscsiLun *iscsilun)

View File

@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
return 3;
}
bs->backing_format[ext.len] = '\0';
s->image_backing_format = g_strdup(bs->backing_format);
#ifdef DEBUG_EXT
printf("Qcow2: Got format extension %s\n", bs->backing_format);
#endif
@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->backing_file[len] = '\0';
s->image_backing_file = g_strdup(bs->backing_file);
}
/* Internal snapshots */
@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs)
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
g_free(s->image_backing_file);
g_free(s->image_backing_format);
g_free(s->cluster_cache);
qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Backing file format header extension */
if (*bs->backing_format) {
if (s->image_backing_format) {
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
bs->backing_format, strlen(bs->backing_format),
s->image_backing_format,
strlen(s->image_backing_format),
buflen);
if (ret < 0) {
goto fail;
@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs)
buflen -= ret;
/* Backing file name */
if (*bs->backing_file) {
size_t backing_file_len = strlen(bs->backing_file);
if (s->image_backing_file) {
size_t backing_file_len = strlen(s->image_backing_file);
if (buflen < backing_file_len) {
ret = -ENOSPC;
@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Using strncpy is ok here, since buf is not NUL-terminated. */
strncpy(buf, bs->backing_file, buflen);
strncpy(buf, s->image_backing_file, buflen);
header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
header->backing_file_size = cpu_to_be32(backing_file_len);
@ -1712,9 +1718,17 @@ fail:
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
BDRVQcowState *s = bs->opaque;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
g_free(s->image_backing_file);
g_free(s->image_backing_format);
s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
return qcow2_update_header(bs);
}
@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (backing_file || backing_format) {
ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
backing_format ?: bs->backing_format);
ret = qcow2_change_backing_file(bs,
backing_file ?: s->image_backing_file,
backing_format ?: s->image_backing_format);
if (ret < 0) {
return ret;
}

View File

@ -283,6 +283,12 @@ typedef struct BDRVQcowState {
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards;
/* Backing file path and format as stored in the image (this is not the
* effective path/format, which may be the result of a runtime option
* override) */
char *image_backing_file;
char *image_backing_format;
} BDRVQcowState;
struct QCowAIOCB;

View File

@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
VirtIOBlockDataPlane *s = req->dev->dataplane;
stb_p(&req->in->status, status);
vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
req->qiov.size + sizeof(*req->in));
vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
/* Suppress notification to guest by BH and its scheduled
* flag because requests are completed as a batch after io

View File

@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s;
req->qiov.size = 0;
req->in_len = 0;
req->next = NULL;
req->mr_next = NULL;
return req;
@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
virtqueue_push(s->vq, &req->elem, req->in_len);
virtio_notify(vdev, s->vq);
}
@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
/* Note that memory may be dirtied on read failure. If the
* virtio request is not completed here, as is the case for
* BLOCK_ERROR_ACTION_STOP, the memory may not be copied
* correctly during live migration. While this is ugly,
* it is acceptable because the device is free to write to
* the memory until the request is completed (which will
* happen on the other side of the migration).
*/
if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
continue;
}
@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
exit(1);
}
/* We always touch the last byte, so just see how big in_iov is. */
req->in_len = iov_size(in_iov, in_num);
req->in = (void *)in_iov[in_num - 1].iov_base
+ in_iov[in_num - 1].iov_len
- sizeof(struct virtio_blk_inhdr);

View File

@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
size_t in_len;
struct VirtIOBlockReq *next;
struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct;

95
tests/qemu-iotests/130 Executable file
View File

@ -0,0 +1,95 @@
#!/bin/bash
#
# Test that temporary backing file overrides (on the command line or in
# blockdev-add) don't replace the original path stored in the image during
# header updates.
#
# Copyright (C) 2015 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq="$(basename $0)"
echo "QA output created by $seq"
here="$PWD"
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
. ./common.qemu
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
qemu_comm_method="monitor"
TEST_IMG="$TEST_IMG.orig" _make_test_img 64M
TEST_IMG="$TEST_IMG.base" _make_test_img 64M
_make_test_img 64M
_img_info | _filter_img_info
echo
echo "=== HMP commit ==="
echo
# bdrv_make_empty() involves a header update for qcow2
# Test that a backing file isn't written
_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base"
_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
_cleanup_qemu
_img_info | _filter_img_info
# Make sure that if there was a backing file that was just overridden on the
# command line, that backing file is retained, with the right format
_make_test_img -F raw -b "$TEST_IMG.orig" 64M
_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT
_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
_cleanup_qemu
_img_info | _filter_img_info
echo
echo "=== Marking image dirty (lazy refcounts) ==="
echo
# Test that a backing file isn't written
_make_test_img 64M
$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
_img_info | _filter_img_info
# Make sure that if there was a backing file that was just overridden on the
# command line, that backing file is retained, with the right format
_make_test_img -F raw -b "$TEST_IMG.orig" 64M
$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,backing.driver=$IMGFMT,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
_img_info | _filter_img_info
# success, all done
echo '*** done'
rm -f $seq.full
status=0

View File

@ -0,0 +1,43 @@
QA output created by 130
Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
=== HMP commit ===
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
(qemu)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
(qemu)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
backing file: TEST_DIR/t.IMGFMT.orig
backing file format: raw
=== Marking image dirty (lazy refcounts) ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 4096/4096 bytes at offset 0
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
wrote 4096/4096 bytes at offset 0
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
backing file: TEST_DIR/t.IMGFMT.orig
backing file format: raw
*** done

View File

@ -124,3 +124,4 @@
121 rw auto
123 rw auto quick
128 rw auto quick
130 rw auto quick