Merge remote-tracking branch 'kwolf/for-anthony' into staging

This commit is contained in:
Anthony Liguori 2011-12-05 09:39:25 -06:00
commit eb5d5beaeb
40 changed files with 1333 additions and 347 deletions

View File

@ -387,7 +387,7 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
if (bmds_aio_inflight(bmds, sector)) {
qemu_aio_flush();
bdrv_drain_all();
}
if (bdrv_get_dirty(bmds->bs, sector)) {

613
block.c
View File

@ -30,6 +30,7 @@
#include "qjson.h"
#include "qemu-coroutine.h"
#include "qmp-commands.h"
#include "qemu-timer.h"
#ifdef CONFIG_BSD
#include <sys/types.h>
@ -73,6 +74,13 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
bool is_write, double elapsed_time, uint64_t *wait);
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
double elapsed_time, uint64_t *wait);
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
bool is_write, int64_t *wait);
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@ -105,6 +113,79 @@ int is_windows_drive(const char *filename)
}
#endif
/* throttling disk I/O limits */
void bdrv_io_limits_disable(BlockDriverState *bs)
{
bs->io_limits_enabled = false;
while (qemu_co_queue_next(&bs->throttled_reqs));
if (bs->block_timer) {
qemu_del_timer(bs->block_timer);
qemu_free_timer(bs->block_timer);
bs->block_timer = NULL;
}
bs->slice_start = 0;
bs->slice_end = 0;
bs->slice_time = 0;
memset(&bs->io_base, 0, sizeof(bs->io_base));
}
static void bdrv_block_timer(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_co_queue_next(&bs->throttled_reqs);
}
void bdrv_io_limits_enable(BlockDriverState *bs)
{
qemu_co_queue_init(&bs->throttled_reqs);
bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
bs->slice_start = qemu_get_clock_ns(vm_clock);
bs->slice_end = bs->slice_start + bs->slice_time;
memset(&bs->io_base, 0, sizeof(bs->io_base));
bs->io_limits_enabled = true;
}
bool bdrv_io_limits_enabled(BlockDriverState *bs)
{
BlockIOLimit *io_limits = &bs->io_limits;
return io_limits->bps[BLOCK_IO_LIMIT_READ]
|| io_limits->bps[BLOCK_IO_LIMIT_WRITE]
|| io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
|| io_limits->iops[BLOCK_IO_LIMIT_READ]
|| io_limits->iops[BLOCK_IO_LIMIT_WRITE]
|| io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
}
static void bdrv_io_limits_intercept(BlockDriverState *bs,
bool is_write, int nb_sectors)
{
int64_t wait_time = -1;
if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
qemu_co_queue_wait(&bs->throttled_reqs);
}
/* In fact, we hope to keep each request's timing, in FIFO mode. The next
* throttled requests will not be dequeued until the current request is
* allowed to be serviced. So if the current request still exceeds the
* limits, it will be inserted to the head. All requests followed it will
* be still in throttled_reqs queue.
*/
while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
qemu_mod_timer(bs->block_timer,
wait_time + qemu_get_clock_ns(vm_clock));
qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
}
qemu_co_queue_next(&bs->throttled_reqs);
}
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
@ -457,6 +538,22 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
return 0;
}
/**
* The copy-on-read flag is actually a reference count so multiple users may
* use the feature without worrying about clobbering its previous state.
* Copy-on-read stays enabled until all users have called to disable it.
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
bs->copy_on_read++;
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
assert(bs->copy_on_read > 0);
bs->copy_on_read--;
}
/*
* Common part for opening disk images and files
*/
@ -478,6 +575,11 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
bs->growable = 0;
bs->buffer_alignment = 512;
assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
bdrv_enable_copy_on_read(bs);
}
pstrcpy(bs->filename, sizeof(bs->filename), filename);
bs->backing_file[0] = '\0';
@ -687,6 +789,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
bdrv_dev_change_media_cb(bs, true);
}
/* throttling disk I/O limits */
if (bs->io_limits_enabled) {
bdrv_io_limits_enable(bs);
}
return 0;
unlink_and_fail:
@ -715,6 +822,7 @@ void bdrv_close(BlockDriverState *bs)
#endif
bs->opaque = NULL;
bs->drv = NULL;
bs->copy_on_read = 0;
if (bs->file != NULL) {
bdrv_close(bs->file);
@ -722,6 +830,11 @@ void bdrv_close(BlockDriverState *bs)
bdrv_dev_change_media_cb(bs, false);
}
/*throttling disk I/O limits*/
if (bs->io_limits_enabled) {
bdrv_io_limits_disable(bs);
}
}
void bdrv_close_all(void)
@ -733,6 +846,25 @@ void bdrv_close_all(void)
}
}
/*
* Wait for pending requests to complete across all BlockDriverStates
*
* This function does not flush data to disk, use bdrv_flush_all() for that
* after calling this function.
*/
void bdrv_drain_all(void)
{
BlockDriverState *bs;
qemu_aio_flush();
/* If requests are still pending there is a bug somewhere */
QTAILQ_FOREACH(bs, &bdrv_states, list) {
assert(QLIST_EMPTY(&bs->tracked_requests));
assert(qemu_co_queue_empty(&bs->throttled_reqs));
}
}
/* make a BlockDriverState anonymous by removing from bdrv_state list.
Also, NULL terminate the device_name to prevent double remove */
void bdrv_make_anon(BlockDriverState *bs)
@ -922,7 +1054,7 @@ int bdrv_commit(BlockDriverState *bs)
buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
for (sector = 0; sector < total_sectors; sector += n) {
if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
if (bdrv_read(bs, sector, buf, n) != 0) {
ret = -EIO;
@ -980,6 +1112,118 @@ void bdrv_commit_all(void)
}
}
struct BdrvTrackedRequest {
BlockDriverState *bs;
int64_t sector_num;
int nb_sectors;
bool is_write;
QLIST_ENTRY(BdrvTrackedRequest) list;
Coroutine *co; /* owner, used for deadlock detection */
CoQueue wait_queue; /* coroutines blocked on this request */
};
/**
* Remove an active request from the tracked requests list
*
* This function should be called when a tracked request is completing.
*/
static void tracked_request_end(BdrvTrackedRequest *req)
{
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
}
/**
* Add an active request to the tracked requests list
*/
static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, bool is_write)
{
*req = (BdrvTrackedRequest){
.bs = bs,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
.is_write = is_write,
.co = qemu_coroutine_self(),
};
qemu_co_queue_init(&req->wait_queue);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
}
/**
* Round a region to cluster boundaries
*/
static void round_to_clusters(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
int64_t *cluster_sector_num,
int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
*cluster_sector_num = sector_num;
*cluster_nb_sectors = nb_sectors;
} else {
int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
*cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
*cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
nb_sectors, c);
}
}
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
int64_t sector_num, int nb_sectors) {
/* aaaa bbbb */
if (sector_num >= req->sector_num + req->nb_sectors) {
return false;
}
/* bbbb aaaa */
if (req->sector_num >= sector_num + nb_sectors) {
return false;
}
return true;
}
static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
BdrvTrackedRequest *req;
int64_t cluster_sector_num;
int cluster_nb_sectors;
bool retry;
/* If we touch the same cluster it counts as an overlap. This guarantees
* that allocating writes will be serialized and not race with each other
* for the same cluster. For example, in copy-on-read it ensures that the
* CoR read and write operations are atomic and guest writes cannot
* interleave between them.
*/
round_to_clusters(bs, sector_num, nb_sectors,
&cluster_sector_num, &cluster_nb_sectors);
do {
retry = false;
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (tracked_request_overlaps(req, cluster_sector_num,
cluster_nb_sectors)) {
/* Hitting this means there was a reentrant request, for
* example, a block driver issuing nested requests. This must
* never happen since it means deadlock.
*/
assert(qemu_coroutine_self() != req->co);
qemu_co_queue_wait(&req->wait_queue);
retry = true;
break;
}
}
} while (retry);
}
/*
* Return values:
* 0 - success
@ -1252,6 +1496,61 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return 0;
}
static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
* where anything might happen inside guest memory.
*/
void *bounce_buffer;
struct iovec iov;
QEMUIOVector bounce_qiov;
int64_t cluster_sector_num;
int cluster_nb_sectors;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
round_to_clusters(bs, sector_num, nb_sectors,
&cluster_sector_num, &cluster_nb_sectors);
trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
cluster_sector_num, cluster_nb_sectors);
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
&bounce_qiov);
if (ret < 0) {
goto err;
}
ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
&bounce_qiov);
if (ret < 0) {
/* It might be okay to ignore write errors for guest requests. If this
* is a deliberate copy-on-read then we don't want to ignore the error.
* Simply report it in all cases.
*/
goto err;
}
skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
nb_sectors * BDRV_SECTOR_SIZE);
err:
qemu_vfree(bounce_buffer);
return ret;
}
/*
* Handle a read request in coroutine context
*/
@ -1259,6 +1558,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
int ret;
if (!drv) {
return -ENOMEDIUM;
@ -1267,7 +1568,36 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
return -EIO;
}
return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
/* throttling disk read I/O */
if (bs->io_limits_enabled) {
bdrv_io_limits_intercept(bs, false, nb_sectors);
}
if (bs->copy_on_read) {
wait_for_overlapping_requests(bs, sector_num, nb_sectors);
}
tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
if (bs->copy_on_read) {
int pnum;
ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
if (!ret || pnum != nb_sectors) {
ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
goto out;
}
}
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
out:
tracked_request_end(&req);
return ret;
}
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
@ -1285,6 +1615,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
int ret;
if (!bs->drv) {
@ -1297,6 +1628,17 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
return -EIO;
}
/* throttling disk write I/O */
if (bs->io_limits_enabled) {
bdrv_io_limits_intercept(bs, true, nb_sectors);
}
if (bs->copy_on_read) {
wait_for_overlapping_requests(bs, sector_num, nb_sectors);
}
tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
if (bs->dirty_bitmap) {
@ -1307,6 +1649,8 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
bs->wr_highest_sector = sector_num + nb_sectors - 1;
}
tracked_request_end(&req);
return ret;
}
@ -1526,6 +1870,14 @@ void bdrv_get_geometry_hint(BlockDriverState *bs,
*psecs = bs->secs;
}
/* throttling disk io limits */
void bdrv_set_io_limits(BlockDriverState *bs,
BlockIOLimit *io_limits)
{
bs->io_limits = *io_limits;
bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
}
/* Recognize floppy formats */
typedef struct FDFormat {
FDriveType drive;
@ -1778,31 +2130,87 @@ int bdrv_has_zero_init(BlockDriverState *bs)
return 1;
}
typedef struct BdrvCoIsAllocatedData {
BlockDriverState *bs;
int64_t sector_num;
int nb_sectors;
int *pnum;
int ret;
bool done;
} BdrvCoIsAllocatedData;
/*
* Returns true iff the specified sector is present in the disk image. Drivers
* not implementing the functionality are assumed to not support backing files,
* hence all their sectors are reported as allocated.
*
* If 'sector_num' is beyond the end of the disk image the return value is 0
* and 'pnum' is set to 0.
*
* 'pnum' is set to the number of sectors (including and immediately following
* the specified sector) that are known to be in the same
* allocated/unallocated state.
*
* 'nb_sectors' is the max value 'pnum' should be set to.
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*/
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum)
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
int64_t n;
if (!bs->drv->bdrv_is_allocated) {
if (sector_num >= bs->total_sectors) {
*pnum = 0;
return 0;
}
n = bs->total_sectors - sector_num;
*pnum = (n < nb_sectors) ? (n) : (nb_sectors);
if (sector_num >= bs->total_sectors) {
*pnum = 0;
return 0;
}
n = bs->total_sectors - sector_num;
if (n < nb_sectors) {
nb_sectors = n;
}
if (!bs->drv->bdrv_co_is_allocated) {
*pnum = nb_sectors;
return 1;
}
return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
}
/* Coroutine wrapper for bdrv_is_allocated() */
static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
{
BdrvCoIsAllocatedData *data = opaque;
BlockDriverState *bs = data->bs;
data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
data->pnum);
data->done = true;
}
/*
* Synchronous wrapper around bdrv_co_is_allocated().
*
* See bdrv_co_is_allocated() for details.
*/
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum)
{
Coroutine *co;
BdrvCoIsAllocatedData data = {
.bs = bs,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
.pnum = pnum,
.done = false,
};
co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
qemu_coroutine_enter(co, &data);
while (!data.done) {
qemu_aio_wait();
}
return data.ret;
}
void bdrv_mon_event(const BlockDriverState *bdrv,
@ -1869,6 +2277,21 @@ BlockInfoList *qmp_query_block(Error **errp)
info->value->inserted->has_backing_file = true;
info->value->inserted->backing_file = g_strdup(bs->backing_file);
}
if (bs->io_limits_enabled) {
info->value->inserted->bps =
bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
info->value->inserted->bps_rd =
bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
info->value->inserted->bps_wr =
bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
info->value->inserted->iops =
bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
info->value->inserted->iops_rd =
bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
info->value->inserted->iops_wr =
bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
}
}
/* XXX: waiting for the qapi to support GSList */
@ -2480,6 +2903,170 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
acb->pool->cancel(acb);
}
/* block I/O throttling */
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
bool is_write, double elapsed_time, uint64_t *wait)
{
uint64_t bps_limit = 0;
double bytes_limit, bytes_base, bytes_res;
double slice_time, wait_time;
if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
} else if (bs->io_limits.bps[is_write]) {
bps_limit = bs->io_limits.bps[is_write];
} else {
if (wait) {
*wait = 0;
}
return false;
}
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
bytes_limit = bps_limit * slice_time;
bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
}
/* bytes_base: the bytes of data which have been read/written; and
* it is obtained from the history statistic info.
* bytes_res: the remaining bytes of data which need to be read/written.
* (bytes_base + bytes_res) / bps_limit: used to calcuate
* the total time for completing reading/writting all data.
*/
bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
if (bytes_base + bytes_res <= bytes_limit) {
if (wait) {
*wait = 0;
}
return false;
}
/* Calc approx time to dispatch */
wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
/* When the I/O rate at runtime exceeds the limits,
* bs->slice_end need to be extended in order that the current statistic
* info can be kept until the timer fire, so it is increased and tuned
* based on the result of experiment.
*/
bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
if (wait) {
*wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
}
return true;
}
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
double elapsed_time, uint64_t *wait)
{
uint64_t iops_limit = 0;
double ios_limit, ios_base;
double slice_time, wait_time;
if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
} else if (bs->io_limits.iops[is_write]) {
iops_limit = bs->io_limits.iops[is_write];
} else {
if (wait) {
*wait = 0;
}
return false;
}
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
ios_limit = iops_limit * slice_time;
ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
}
if (ios_base + 1 <= ios_limit) {
if (wait) {
*wait = 0;
}
return false;
}
/* Calc approx time to dispatch */
wait_time = (ios_base + 1) / iops_limit;
if (wait_time > elapsed_time) {
wait_time = wait_time - elapsed_time;
} else {
wait_time = 0;
}
bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
if (wait) {
*wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
}
return true;
}
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
bool is_write, int64_t *wait)
{
int64_t now, max_wait;
uint64_t bps_wait = 0, iops_wait = 0;
double elapsed_time;
int bps_ret, iops_ret;
now = qemu_get_clock_ns(vm_clock);
if ((bs->slice_start < now)
&& (bs->slice_end > now)) {
bs->slice_end = now + bs->slice_time;
} else {
bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
bs->slice_start = now;
bs->slice_end = now + bs->slice_time;
bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
bs->io_base.ios[is_write] = bs->nr_ops[is_write];
bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
}
elapsed_time = now - bs->slice_start;
elapsed_time /= (NANOSECONDS_PER_SECOND);
bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
is_write, elapsed_time, &bps_wait);
iops_ret = bdrv_exceed_iops_limits(bs, is_write,
elapsed_time, &iops_wait);
if (bps_ret || iops_ret) {
max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
if (wait) {
*wait = max_wait;
}
now = qemu_get_clock_ns(vm_clock);
if (bs->slice_end < now + max_wait) {
bs->slice_end = now + max_wait;
}
return true;
}
if (wait) {
*wait = 0;
}
return false;
}
/**************************************************************/
/* async block device emulation */

12
block.h
View File

@ -70,6 +70,7 @@ typedef struct BlockDevOps {
#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
@ -98,6 +99,11 @@ void bdrv_info(Monitor *mon, QObject **ret_data);
void bdrv_stats_print(Monitor *mon, const QObject *data);
void bdrv_info_stats(Monitor *mon, QObject **ret_data);
/* disk I/O throttling */
void bdrv_io_limits_enable(BlockDriverState *bs);
void bdrv_io_limits_disable(BlockDriverState *bs);
bool bdrv_io_limits_enabled(BlockDriverState *bs);
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
BlockDriver *bdrv_find_protocol(const char *filename);
@ -138,6 +144,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
@ -206,6 +214,7 @@ int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
void bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain_all(void);
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
@ -305,6 +314,9 @@ void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors);
int64_t bdrv_get_dirty_count(BlockDriverState *bs);
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_set_in_use(BlockDriverState *bs, int in_use);
int bdrv_in_use(BlockDriverState *bs);

View File

@ -132,8 +132,8 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
/* Return true if first block has been changed (ie. current version is
* in COW file). Set the number of continuous blocks for which that
* is true. */
static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *num_same)
static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *num_same)
{
int changed;
@ -171,14 +171,14 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
return error;
}
static int cow_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
BDRVCowState *s = bs->opaque;
int ret, n;
while (nb_sectors > 0) {
if (cow_is_allocated(bs, sector_num, nb_sectors, &n)) {
if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
ret = bdrv_pread(bs->file,
s->cow_sectors_offset + sector_num * 512,
buf, n * 512);
@ -243,12 +243,12 @@ static void cow_close(BlockDriverState *bs)
static int cow_create(const char *filename, QEMUOptionParameter *options)
{
int fd, cow_fd;
struct cow_header_v2 cow_header;
struct stat st;
int64_t image_sectors = 0;
const char *image_filename = NULL;
int ret;
BlockDriverState *cow_bs;
/* Read out options */
while (options && options->name) {
@ -260,10 +260,16 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
options++;
}
cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
if (cow_fd < 0)
return -errno;
ret = bdrv_create_file(filename, options);
if (ret < 0) {
return ret;
}
ret = bdrv_file_open(&cow_bs, filename, BDRV_O_RDWR);
if (ret < 0) {
return ret;
}
memset(&cow_header, 0, sizeof(cow_header));
cow_header.magic = cpu_to_be32(COW_MAGIC);
cow_header.version = cpu_to_be32(COW_VERSION);
@ -271,16 +277,9 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
/* Note: if no file, we put a dummy mtime */
cow_header.mtime = cpu_to_be32(0);
fd = open(image_filename, O_RDONLY | O_BINARY);
if (fd < 0) {
close(cow_fd);
if (stat(image_filename, &st) != 0) {
goto mtime_fail;
}
if (fstat(fd, &st) != 0) {
close(fd);
goto mtime_fail;
}
close(fd);
cow_header.mtime = cpu_to_be32(st.st_mtime);
mtime_fail:
pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
@ -288,21 +287,20 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
}
cow_header.sectorsize = cpu_to_be32(512);
cow_header.size = cpu_to_be64(image_sectors * 512);
ret = qemu_write_full(cow_fd, &cow_header, sizeof(cow_header));
ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
if (ret != sizeof(cow_header)) {
ret = -errno;
goto exit;
}
/* resize to include at least all the bitmap */
ret = ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
ret = bdrv_truncate(cow_bs,
sizeof(cow_header) + ((image_sectors + 7) >> 3));
if (ret) {
ret = -errno;
goto exit;
}
exit:
close(cow_fd);
bdrv_delete(cow_bs);
return ret;
}
@ -337,7 +335,7 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_flush_to_disk = cow_co_flush,
.bdrv_is_allocated = cow_is_allocated,
.bdrv_co_is_allocated = cow_co_is_allocated,
.create_options = cow_create_options,
};

View File

@ -368,14 +368,16 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n;
uint64_t cluster_offset;
qemu_co_mutex_lock(&s->lock);
cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
qemu_co_mutex_unlock(&s->lock);
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@ -433,7 +435,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
return 0;
}
static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@ -531,7 +533,7 @@ fail:
goto done;
}
static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@ -844,7 +846,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_flush_to_disk = qcow_co_flush,
.bdrv_is_allocated = qcow_is_allocated,
.bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,

View File

@ -289,89 +289,62 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
}
}
static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
static int coroutine_fn copy_sectors(BlockDriverState *bs,
uint64_t start_sect,
uint64_t cluster_offset,
int n_start, int n_end)
{
BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1;
uint64_t cluster_offset;
struct iovec iov;
QEMUIOVector qiov;
while (nb_sectors > 0) {
n = nb_sectors;
ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
&cluster_offset);
if (ret < 0) {
return ret;
}
index_in_cluster = sector_num & (s->cluster_sectors - 1);
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
iov.iov_base = buf;
iov.iov_len = n * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
if (ret < 0)
return -1;
}
} else {
memset(buf, 0, 512 * n);
}
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
return -1;
memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
} else {
BLKDBG_EVENT(bs->file, BLKDBG_READ);
ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
if (ret != n * 512)
return -1;
if (s->crypt_method) {
qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
&s->aes_decrypt_key);
}
}
nb_sectors -= n;
sector_num += n;
buf += n * 512;
}
return 0;
}
static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
uint64_t cluster_offset, int n_start, int n_end)
{
BDRVQcowState *s = bs->opaque;
struct iovec iov;
int n, ret;
/*
* If this is the last cluster and it is only partially used, we must only
* copy until the end of the image, or bdrv_check_request will fail for the
* bdrv_read/write calls below.
*/
if (start_sect + n_end > bs->total_sectors) {
n_end = bs->total_sectors - start_sect;
}
n = n_end - n_start;
if (n <= 0)
if (n <= 0) {
return 0;
}
iov.iov_len = n * BDRV_SECTOR_SIZE;
iov.iov_base = qemu_blockalign(bs, iov.iov_len);
qemu_iovec_init_external(&qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
if (ret < 0)
return ret;
/* Call .bdrv_co_readv() directly instead of using the public block-layer
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
if (ret < 0) {
goto out;
}
if (s->crypt_method) {
qcow2_encrypt_sectors(s, start_sect + n_start,
s->cluster_data,
s->cluster_data, n, 1,
iov.iov_base, iov.iov_base, n, 1,
&s->aes_encrypt_key);
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
s->cluster_data, n);
if (ret < 0)
return ret;
return 0;
ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
if (ret < 0) {
goto out;
}
ret = 0;
out:
qemu_vfree(iov.iov_base);
return ret;
}
@ -620,7 +593,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
if (m->n_start) {
cow = true;
qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}
@ -628,8 +603,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
if (m->nb_available & (s->cluster_sectors - 1)) {
uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
cow = true;
qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
m->nb_available - end, s->cluster_sectors);
qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}

View File

@ -700,6 +700,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l2_table = NULL;
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
/* WARNING: qcow2_snapshot_goto relies on this function not using the
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
if (l1_size2 != 0) {
l1_table = g_malloc0(align_offset(l1_size2, 512));
@ -819,7 +823,8 @@ fail:
qcow2_cache_set_writethrough(bs, s->refcount_block_cache,
old_refcount_writethrough);
if (l1_modified) {
/* Update L1 only if it isn't deleted anyway (addend = -1) */
if (addend >= 0 && l1_modified) {
for(i = 0; i < l1_size; i++)
cpu_to_be64s(&l1_table[i]);
if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,

View File

@ -68,6 +68,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
int i, id_str_size, name_size;
int64_t offset;
uint32_t extra_data_size;
int ret;
if (!s->nb_snapshots) {
s->snapshots = NULL;
@ -77,10 +78,15 @@ int qcow2_read_snapshots(BlockDriverState *bs)
offset = s->snapshots_offset;
s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h))
ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
sn = s->snapshots + i;
sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
@ -94,25 +100,34 @@ int qcow2_read_snapshots(BlockDriverState *bs)
id_str_size = be16_to_cpu(h.id_str_size);
name_size = be16_to_cpu(h.name_size);
/* Skip extra data */
offset += extra_data_size;
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != id_str_size)
ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
sn->id_str[id_str_size] = '\0';
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size)
ret = bdrv_pread(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
offset += name_size;
sn->name[name_size] = '\0';
}
s->snapshots_size = offset - s->snapshots_offset;
return 0;
fail:
fail:
qcow2_free_snapshots(bs);
return -1;
return ret;
}
/* add at the end of the file a new list of snapshots */
@ -122,9 +137,12 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
QCowSnapshot *sn;
QCowSnapshotHeader h;
int i, name_size, id_str_size, snapshots_size;
uint64_t data64;
uint32_t data32;
struct {
uint32_t nb_snapshots;
uint64_t snapshots_offset;
} QEMU_PACKED header_data;
int64_t offset, snapshots_offset;
int ret;
/* compute the size of the snapshots */
offset = 0;
@ -137,6 +155,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
}
snapshots_size = offset;
/* Allocate space for the new snapshot list */
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
bdrv_flush(bs->file);
offset = snapshots_offset;
@ -144,6 +163,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
return offset;
}
/* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
memset(&h, 0, sizeof(h));
@ -159,34 +179,55 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.id_str_size = cpu_to_be16(id_str_size);
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0)
ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0)
ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0)
ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
offset += name_size;
}
/* update the various header fields */
data64 = cpu_to_be64(snapshots_offset);
if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset),
&data64, sizeof(data64)) < 0)
/*
* Update the header to point to the new snapshot table. This requires the
* new table and its refcounts to be stable on disk.
*/
ret = bdrv_flush(bs);
if (ret < 0) {
goto fail;
data32 = cpu_to_be32(s->nb_snapshots);
if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
&data32, sizeof(data32)) < 0)
}
QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
}
/* free the old snapshot table */
qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
s->snapshots_offset = snapshots_offset;
s->snapshots_size = snapshots_size;
return 0;
fail:
return -1;
fail:
return ret;
}
static void find_new_snapshot_id(BlockDriverState *bs,
@ -236,72 +277,92 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *snapshots1, sn1, *sn = &sn1;
QCowSnapshot *new_snapshot_list = NULL;
QCowSnapshot *old_snapshot_list = NULL;
QCowSnapshot sn1, *sn = &sn1;
int i, ret;
uint64_t *l1_table = NULL;
int64_t l1_table_offset;
memset(sn, 0, sizeof(*sn));
/* Generate an ID if it wasn't passed */
if (sn_info->id_str[0] == '\0') {
/* compute a new id */
find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
}
/* check that the ID is unique */
if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
/* Check that the ID is unique */
if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
return -ENOENT;
}
/* Populate sn with passed data */
sn->id_str = g_strdup(sn_info->id_str);
if (!sn->id_str)
goto fail;
sn->name = g_strdup(sn_info->name);
if (!sn->name)
goto fail;
sn->vm_state_size = sn_info->vm_state_size;
sn->date_sec = sn_info->date_sec;
sn->date_nsec = sn_info->date_nsec;
sn->vm_clock_nsec = sn_info->vm_clock_nsec;
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
if (ret < 0)
goto fail;
/* create the L1 table of the snapshot */
/* Allocate the L1 table of the snapshot and copy the current one there. */
l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
if (l1_table_offset < 0) {
ret = l1_table_offset;
goto fail;
}
bdrv_flush(bs->file);
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
if (s->l1_size != 0) {
l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
} else {
l1_table = NULL;
}
l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset,
l1_table, s->l1_size * sizeof(uint64_t)) < 0)
ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
g_free(l1_table);
l1_table = NULL;
snapshots1 = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
if (s->snapshots) {
memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
g_free(s->snapshots);
/*
* Increase the refcounts of all clusters and make sure everything is
* stable on disk before updating the snapshot table to contain a pointer
* to the new L1 table.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
if (ret < 0) {
goto fail;
}
s->snapshots = snapshots1;
ret = bdrv_flush(bs);
if (ret < 0) {
goto fail;
}
/* Append the new snapshot to the snapshot list */
new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
if (s->snapshots) {
memcpy(new_snapshot_list, s->snapshots,
s->nb_snapshots * sizeof(QCowSnapshot));
old_snapshot_list = s->snapshots;
}
s->snapshots = new_snapshot_list;
s->snapshots[s->nb_snapshots++] = *sn;
if (qcow2_write_snapshots(bs) < 0)
ret = qcow2_write_snapshots(bs);
if (ret < 0) {
g_free(s->snapshots);
s->snapshots = old_snapshot_list;
goto fail;
}
g_free(old_snapshot_list);
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@ -309,10 +370,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
#endif
return 0;
fail:
fail:
g_free(sn->id_str);
g_free(sn->name);
g_free(l1_table);
return -1;
return ret;
}
/* copy the snapshot 'snapshot_name' into the current disk image */
@ -322,38 +386,92 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
int ret;
uint64_t *sn_l1_table = NULL;
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
if (snapshot_index < 0)
if (snapshot_index < 0) {
return -ENOENT;
}
sn = &s->snapshots[snapshot_index];
if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0)
goto fail;
if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0)
/*
* Make sure that the current L1 table is big enough to contain the whole
* L1 table of the snapshot. If the snapshot L1 table is smaller, the
* current one must be padded with zeros.
*/
ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
if (ret < 0) {
goto fail;
}
cur_l1_bytes = s->l1_size * sizeof(uint64_t);
sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
if (cur_l1_bytes > sn_l1_bytes) {
memset(s->l1_table + sn->l1_size, 0, cur_l1_bytes - sn_l1_bytes);
/*
* Copy the snapshot L1 table to the current L1 table.
*
* Before overwriting the old current L1 table on disk, make sure to
* increase all refcounts for the clusters referenced by the new one.
* Decrease the refcount referenced by the old one only when the L1
* table is overwritten.
*/
sn_l1_table = g_malloc0(cur_l1_bytes);
ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
}
/* copy the snapshot l1 table to the current l1 table */
if (bdrv_pread(bs->file, sn->l1_table_offset,
s->l1_table, sn_l1_bytes) < 0)
ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
sn->l1_size, 1);
if (ret < 0) {
goto fail;
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset,
s->l1_table, cur_l1_bytes) < 0)
}
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
}
/*
* Decrease refcount of clusters of current L1 table.
*
* At this point, the in-memory s->l1_table points to the old L1 table,
* whereas on disk we already have the new one.
*
* qcow2_update_snapshot_refcount special cases the current L1 table to use
* the in-memory data instead of really using the offset to load a new one,
* which is why this works.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
s->l1_size, -1);
/*
* Now update the in-memory L1 table to be in sync with the on-disk one. We
* need to do this even if updating refcounts failed.
*/
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
}
if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0)
if (ret < 0) {
goto fail;
}
g_free(sn_l1_table);
sn_l1_table = NULL;
/*
* Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
* when we decreased the refcount of the old snapshot.
*/
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
if (ret < 0) {
goto fail;
}
#ifdef DEBUG_ALLOC
{
@ -362,39 +480,59 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
}
#endif
return 0;
fail:
return -EIO;
fail:
g_free(sn_l1_table);
return ret;
}
int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
{
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
QCowSnapshot sn;
int snapshot_index, ret;
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
if (snapshot_index < 0)
if (snapshot_index < 0) {
return -ENOENT;
sn = &s->snapshots[snapshot_index];
}
sn = s->snapshots[snapshot_index];
ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1);
if (ret < 0)
return ret;
/* must update the copied flag on the current cluster offsets */
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
if (ret < 0)
return ret;
qcow2_free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t));
g_free(sn->id_str);
g_free(sn->name);
memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn));
/* Remove it from the snapshot list */
memmove(s->snapshots + snapshot_index,
s->snapshots + snapshot_index + 1,
(s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
s->nb_snapshots--;
ret = qcow2_write_snapshots(bs);
if (ret < 0) {
/* XXX: restore snapshot if error ? */
return ret;
}
/*
* The snapshot is now unused, clean up. If we fail after this point, we
* won't recover but just leak clusters.
*/
g_free(sn.id_str);
g_free(sn.name);
/*
* Now decrease the refcounts of clusters referenced by the snapshot and
* free the L1 table.
*/
ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
sn.l1_size, -1);
if (ret < 0) {
return ret;
}
qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
/* must update the copied flag on the current cluster offsets */
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
if (ret < 0) {
return ret;
}
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@ -435,32 +573,42 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
{
int i, snapshot_index, l1_size2;
int i, snapshot_index;
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
uint64_t *new_l1_table;
int new_l1_bytes;
int ret;
assert(bs->read_only);
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
if (snapshot_index < 0) {
return -ENOENT;
}
sn = &s->snapshots[snapshot_index];
/* Allocate and read in the snapshot's L1 table */
new_l1_bytes = s->l1_size * sizeof(uint64_t);
new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
if (ret < 0) {
g_free(new_l1_table);
return ret;
}
/* Switch the L1 table */
g_free(s->l1_table);
s->l1_size = sn->l1_size;
l1_size2 = s->l1_size * sizeof(uint64_t);
if (s->l1_table != NULL) {
g_free(s->l1_table);
}
s->l1_table_offset = sn->l1_table_offset;
s->l1_table = g_malloc0(align_offset(l1_size2, 512));
if (bdrv_pread(bs->file, sn->l1_table_offset,
s->l1_table, l1_size2) != l1_size2) {
return -1;
}
s->l1_table = new_l1_table;
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
return 0;
}

View File

@ -273,8 +273,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
}
bs->backing_file[len] = '\0';
}
if (qcow2_read_snapshots(bs) < 0) {
ret = -EINVAL;
ret = qcow2_read_snapshots(bs);
if (ret < 0) {
goto fail;
}
@ -343,16 +344,19 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
return 0;
}
static int qcow2_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_offset;
int ret;
*pnum = nb_sectors;
/* FIXME We can get errors here, but the bdrv_is_allocated interface can't
* pass them on today */
/* FIXME We can get errors here, but the bdrv_co_is_allocated interface
* can't pass them on today */
qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
*pnum = 0;
}
@ -377,7 +381,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
return n1;
}
static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
int remaining_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@ -512,12 +516,12 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
/* Restart all dependent requests */
if (!qemu_co_queue_empty(&m->dependent_requests)) {
qemu_co_mutex_unlock(&s->lock);
while(qemu_co_queue_next(&m->dependent_requests));
qemu_co_queue_restart_all(&m->dependent_requests);
qemu_co_mutex_lock(&s->lock);
}
}
static int qcow2_co_writev(BlockDriverState *bs,
static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
QEMUIOVector *qiov)
@ -1137,7 +1141,7 @@ fail:
return ret;
}
static int qcow2_co_flush_to_os(BlockDriverState *bs)
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
int ret;
@ -1159,7 +1163,7 @@ static int qcow2_co_flush_to_os(BlockDriverState *bs)
return 0;
}
static int qcow2_co_flush_to_disk(BlockDriverState *bs)
static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
@ -1276,7 +1280,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_create = qcow2_create,
.bdrv_is_allocated = qcow2_is_allocated,
.bdrv_co_is_allocated = qcow2_co_is_allocated,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,

View File

@ -29,7 +29,7 @@ static void qed_read_table_cb(void *opaque, int ret)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
int noffsets = read_table_cb->iov.iov_len / sizeof(uint64_t);
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
/* Handle I/O error */
@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
aiocb = bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
read_table_cb->iov.iov_len / BDRV_SECTOR_SIZE,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
if (!aiocb) {
qed_read_table_cb(read_table_cb, -EIO);
@ -160,7 +160,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
aiocb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->iov.iov_len / BDRV_SECTOR_SIZE,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
if (!aiocb) {
qed_write_table_cb(write_table_cb, -EIO);

View File

@ -661,6 +661,7 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
}
typedef struct {
Coroutine *co;
int is_allocated;
int *pnum;
} QEDIsAllocatedCB;
@ -670,10 +671,14 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
QEDIsAllocatedCB *cb = opaque;
*cb->pnum = len / BDRV_SECTOR_SIZE;
cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
if (cb->co) {
qemu_coroutine_enter(cb->co, NULL);
}
}
static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
BDRVQEDState *s = bs->opaque;
uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
@ -686,8 +691,10 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
/* Now sleep if the callback wasn't invoked immediately */
while (cb.is_allocated == -1) {
qemu_aio_wait();
cb.co = qemu_coroutine_self();
qemu_coroutine_yield();
}
qed_unref_l2_cache_entry(request.l2_table);
@ -1485,7 +1492,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_create = bdrv_qed_create,
.bdrv_is_allocated = bdrv_qed_is_allocated,
.bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,

View File

@ -1715,7 +1715,7 @@ out:
return 1;
}
static int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
@ -1744,7 +1744,7 @@ static int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
static int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;

View File

@ -472,8 +472,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
return -1;
}
static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
@ -996,7 +996,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_close = vdi_close,
.bdrv_create = vdi_create,
.bdrv_co_flush_to_disk = vdi_co_flush,
.bdrv_is_allocated = vdi_is_allocated,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
.bdrv_aio_readv = vdi_aio_readv,

View File

@ -861,8 +861,8 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
return NULL;
}
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
@ -873,8 +873,10 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return 0;
}
qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
sector_num * 512, 0, &offset);
qemu_co_mutex_unlock(&s->lock);
/* get_cluster_offset returning 0 means success */
ret = !ret;
@ -1596,7 +1598,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
.bdrv_is_allocated = vmdk_is_allocated,
.bdrv_co_is_allocated = vmdk_co_is_allocated,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.create_options = vmdk_create_options,

View File

@ -2758,7 +2758,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
static int vvfat_is_allocated(BlockDriverState *bs,
static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int* n)
{
BDRVVVFATState* s = bs->opaque;
@ -2855,7 +2855,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
.bdrv_close = vvfat_close,
.bdrv_is_allocated = vvfat_is_allocated,
.bdrv_co_is_allocated = vvfat_co_is_allocated,
.protocol_name = "fat",
};

View File

@ -34,6 +34,13 @@
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
#define BLOCK_IO_LIMIT_READ 0
#define BLOCK_IO_LIMIT_WRITE 1
#define BLOCK_IO_LIMIT_TOTAL 2
#define BLOCK_IO_SLICE_TIME 100000000
#define NANOSECONDS_PER_SECOND 1000000000.0
#define BLOCK_OPT_SIZE "size"
#define BLOCK_OPT_ENCRYPT "encryption"
#define BLOCK_OPT_COMPAT6 "compat6"
@ -44,12 +51,24 @@
#define BLOCK_OPT_PREALLOC "preallocation"
#define BLOCK_OPT_SUBFMT "subformat"
typedef struct BdrvTrackedRequest BdrvTrackedRequest;
typedef struct AIOPool {
void (*cancel)(BlockDriverAIOCB *acb);
int aiocb_size;
BlockDriverAIOCB *free_aiocb;
} AIOPool;
typedef struct BlockIOLimit {
int64_t bps[3];
int64_t iops[3];
} BlockIOLimit;
typedef struct BlockIOBaseValue {
uint64_t bytes[2];
uint64_t ios[2];
} BlockIOBaseValue;
struct BlockDriver {
const char *format_name;
int instance_size;
@ -63,8 +82,6 @@ struct BlockDriver {
const uint8_t *buf, int nb_sectors);
void (*bdrv_close)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
int (*bdrv_make_empty)(BlockDriverState *bs);
/* aio */
@ -86,6 +103,8 @@ struct BlockDriver {
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum);
/*
* Invalidate any cached meta-data.
@ -179,6 +198,8 @@ struct BlockDriverState {
int encrypted; /* if true, the media is encrypted */
int valid_key; /* if true, a valid encryption key has been set */
int sg; /* if true, the device is a /dev/sg* */
int copy_on_read; /* if true, copy read backing sectors into image
note this is a reference count */
BlockDriver *drv; /* NULL means no media */
void *opaque;
@ -201,6 +222,16 @@ struct BlockDriverState {
void *sync_aiocb;
/* the time for latest disk I/O */
int64_t slice_time;
int64_t slice_start;
int64_t slice_end;
BlockIOLimit io_limits;
BlockIOBaseValue io_base;
CoQueue throttled_reqs;
QEMUTimer *block_timer;
bool io_limits_enabled;
/* I/O stats (display with "info blockstats"). */
uint64_t nr_bytes[BDRV_MAX_IOTYPE];
uint64_t nr_ops[BDRV_MAX_IOTYPE];
@ -228,6 +259,8 @@ struct BlockDriverState {
int in_use; /* users other than guest access, eg. block migration */
QTAILQ_ENTRY(BlockDriverState) list;
void *private;
QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
};
struct BlockDriverAIOCB {
@ -244,6 +277,9 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
void qemu_aio_release(void *p);
void bdrv_set_io_limits(BlockDriverState *bs,
BlockIOLimit *io_limits);
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif

View File

@ -216,6 +216,26 @@ static int parse_block_error_action(const char *buf, int is_read)
}
}
static bool do_check_io_limits(BlockIOLimit *io_limits)
{
bool bps_flag;
bool iops_flag;
assert(io_limits);
bps_flag = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
&& ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
|| (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
&& ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
|| (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
if (bps_flag || iops_flag) {
return false;
}
return true;
}
DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
{
const char *buf;
@ -235,7 +255,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
int on_read_error, on_write_error;
const char *devaddr;
DriveInfo *dinfo;
BlockIOLimit io_limits;
int snapshot = 0;
bool copy_on_read;
int ret;
translation = BIOS_ATA_TRANSLATION_AUTO;
@ -252,6 +274,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
ro = qemu_opt_get_bool(opts, "readonly", 0);
copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
file = qemu_opt_get(opts, "file");
serial = qemu_opt_get(opts, "serial");
@ -353,6 +376,26 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
}
}
/* disk I/O throttling */
io_limits.bps[BLOCK_IO_LIMIT_TOTAL] =
qemu_opt_get_number(opts, "bps", 0);
io_limits.bps[BLOCK_IO_LIMIT_READ] =
qemu_opt_get_number(opts, "bps_rd", 0);
io_limits.bps[BLOCK_IO_LIMIT_WRITE] =
qemu_opt_get_number(opts, "bps_wr", 0);
io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
qemu_opt_get_number(opts, "iops", 0);
io_limits.iops[BLOCK_IO_LIMIT_READ] =
qemu_opt_get_number(opts, "iops_rd", 0);
io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
qemu_opt_get_number(opts, "iops_wr", 0);
if (!do_check_io_limits(&io_limits)) {
error_report("bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
"cannot be used at the same time");
return NULL;
}
on_write_error = BLOCK_ERR_STOP_ENOSPC;
if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
@ -460,6 +503,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
/* disk I/O throttling */
bdrv_set_io_limits(dinfo->bdrv, &io_limits);
switch(type) {
case IF_IDE:
case IF_SCSI:
@ -502,6 +548,10 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
}
if (copy_on_read) {
bdrv_flags |= BDRV_O_COPY_ON_READ;
}
if (media == MEDIA_CDROM) {
/* CDROM is fine for any interface, don't check. */
ro = 1;
@ -603,7 +653,7 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data)
goto out;
}
qemu_aio_flush();
bdrv_drain_all();
bdrv_flush(bs);
bdrv_close(bs);
@ -715,6 +765,65 @@ int do_change_block(Monitor *mon, const char *device,
return monitor_read_bdrv_key_start(mon, bs, NULL, NULL);
}
/* throttling disk I/O limits */
int do_block_set_io_throttle(Monitor *mon,
const QDict *qdict, QObject **ret_data)
{
BlockIOLimit io_limits;
const char *devname = qdict_get_str(qdict, "device");
BlockDriverState *bs;
io_limits.bps[BLOCK_IO_LIMIT_TOTAL]
= qdict_get_try_int(qdict, "bps", -1);
io_limits.bps[BLOCK_IO_LIMIT_READ]
= qdict_get_try_int(qdict, "bps_rd", -1);
io_limits.bps[BLOCK_IO_LIMIT_WRITE]
= qdict_get_try_int(qdict, "bps_wr", -1);
io_limits.iops[BLOCK_IO_LIMIT_TOTAL]
= qdict_get_try_int(qdict, "iops", -1);
io_limits.iops[BLOCK_IO_LIMIT_READ]
= qdict_get_try_int(qdict, "iops_rd", -1);
io_limits.iops[BLOCK_IO_LIMIT_WRITE]
= qdict_get_try_int(qdict, "iops_wr", -1);
bs = bdrv_find(devname);
if (!bs) {
qerror_report(QERR_DEVICE_NOT_FOUND, devname);
return -1;
}
if ((io_limits.bps[BLOCK_IO_LIMIT_TOTAL] == -1)
|| (io_limits.bps[BLOCK_IO_LIMIT_READ] == -1)
|| (io_limits.bps[BLOCK_IO_LIMIT_WRITE] == -1)
|| (io_limits.iops[BLOCK_IO_LIMIT_TOTAL] == -1)
|| (io_limits.iops[BLOCK_IO_LIMIT_READ] == -1)
|| (io_limits.iops[BLOCK_IO_LIMIT_WRITE] == -1)) {
qerror_report(QERR_MISSING_PARAMETER,
"bps/bps_rd/bps_wr/iops/iops_rd/iops_wr");
return -1;
}
if (!do_check_io_limits(&io_limits)) {
qerror_report(QERR_INVALID_PARAMETER_COMBINATION);
return -1;
}
bs->io_limits = io_limits;
bs->slice_time = BLOCK_IO_SLICE_TIME;
if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
bdrv_io_limits_enable(bs);
} else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
bdrv_io_limits_disable(bs);
} else {
if (bs->block_timer) {
qemu_mod_timer(bs->block_timer, qemu_get_clock_ns(vm_clock));
}
}
return 0;
}
int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
{
const char *id = qdict_get_str(qdict, "id");
@ -731,7 +840,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
}
/* quiesce block driver; prevent further io */
qemu_aio_flush();
bdrv_drain_all();
bdrv_flush(bs);
bdrv_close(bs);

View File

@ -63,6 +63,8 @@ int do_block_set_passwd(Monitor *mon, const QDict *qdict, QObject **ret_data);
int do_change_block(Monitor *mon, const char *device,
const char *filename, const char *fmt);
int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data);
int do_block_set_io_throttle(Monitor *mon,
const QDict *qdict, QObject **ret_data);
int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data);
int do_block_resize(Monitor *mon, const QDict *qdict, QObject **ret_data);

2
cpus.c
View File

@ -396,7 +396,7 @@ static void do_vm_stop(RunState state)
pause_all_vcpus();
runstate_set(state);
vm_state_notify(0, state);
qemu_aio_flush();
bdrv_drain_all();
bdrv_flush_all();
monitor_protocol_event(QEVENT_STOP, NULL);
}

View File

@ -9,6 +9,7 @@
#include "dma.h"
#include "block_int.h"
#include "trace.h"
void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
{
@ -83,6 +84,8 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
static void dma_complete(DMAAIOCB *dbs, int ret)
{
trace_dma_complete(dbs, ret, dbs->common.cb);
dma_bdrv_unmap(dbs);
if (dbs->common.cb) {
dbs->common.cb(dbs->common.opaque, ret);
@ -106,6 +109,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
target_phys_addr_t cur_addr, cur_len;
void *mem;
trace_dma_bdrv_cb(dbs, ret);
dbs->acb = NULL;
dbs->sector_num += dbs->iov.size / 512;
dma_bdrv_unmap(dbs);
@ -130,6 +135,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
}
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
cpu_register_map_client(dbs, continue_after_map_failure);
return;
}
@ -145,6 +151,8 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb)
{
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
trace_dma_aio_cancel(dbs);
if (dbs->acb) {
BlockDriverAIOCB *acb = dbs->acb;
dbs->acb = NULL;
@ -168,6 +176,8 @@ BlockDriverAIOCB *dma_bdrv_io(
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
trace_dma_bdrv_io(dbs, bs, sector_num, to_dev);
dbs->acb = NULL;
dbs->bs = bs;
dbs->sg = sg;

View File

@ -860,9 +860,10 @@ ETEXI
.args_type = "pci_addr:s,opts:s",
.params = "[[<domain>:]<bus>:]<slot>\n"
"[file=file][,if=type][,bus=n]\n"
"[,unit=m][,media=d][index=i]\n"
"[,unit=m][,media=d][,index=i]\n"
"[,cyls=c,heads=h,secs=s[,trans=t]]\n"
"[snapshot=on|off][,cache=on|off]",
"[,snapshot=on|off][,cache=on|off]\n"
"[,readonly=on|off][,copy-on-read=on|off]",
.help = "add drive to PCI storage controller",
.mhandler.cmd = drive_hot_add,
},
@ -1206,6 +1207,21 @@ ETEXI
.mhandler.cmd_new = do_block_set_passwd,
},
STEXI
@item block_set_io_throttle @var{device} @var{bps} @var{bps_rd} @var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}
@findex block_set_io_throttle
Change I/O throttle limits for a block drive to @var{bps} @var{bps_rd} @var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}
ETEXI
{
.name = "block_set_io_throttle",
.args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l",
.params = "device bps bps_rd bps_wr iops iops_rd iops_wr",
.help = "change I/O throttle limits for a block drive",
.user_print = monitor_user_noop,
.mhandler.cmd_new = do_block_set_io_throttle,
},
STEXI
@item block_passwd @var{device} @var{password}
@findex block_passwd

10
hmp.c
View File

@ -216,6 +216,16 @@ void hmp_info_block(Monitor *mon)
info->value->inserted->ro,
info->value->inserted->drv,
info->value->inserted->encrypted);
monitor_printf(mon, " bps=%" PRId64 " bps_rd=%" PRId64
" bps_wr=%" PRId64 " iops=%" PRId64
" iops_rd=%" PRId64 " iops_wr=%" PRId64,
info->value->inserted->bps,
info->value->inserted->bps_rd,
info->value->inserted->bps_wr,
info->value->inserted->iops,
info->value->inserted->iops_rd,
info->value->inserted->iops_wr);
} else {
monitor_printf(mon, " [not inserted]");
}

View File

@ -200,8 +200,9 @@ static void pmac_ide_flush(DBDMA_io *io)
{
MACIOIDEState *m = io->opaque;
if (m->aiocb)
qemu_aio_flush();
if (m->aiocb) {
bdrv_drain_all();
}
}
/* PowerMac IDE memory IO */

View File

@ -309,7 +309,7 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val)
* aio operation with preadv/pwritev.
*/
if (bm->bus->dma->aiocb) {
qemu_aio_flush();
bdrv_drain_all();
assert(bm->bus->dma->aiocb == NULL);
assert((bm->status & BM_STATUS_DMAING) == 0);
}

View File

@ -474,7 +474,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
* This should cancel pending requests, but can't do nicely until there
* are per-device request lists.
*/
qemu_aio_flush();
bdrv_drain_all();
}
/* coalesce internal state, copy to pci i/o region 0

View File

@ -49,7 +49,6 @@ static int syncwrite = 0;
static int batch_maps = 0;
static int max_requests = 32;
static int use_aio = 1;
/* ------------------------------------------------------------- */
@ -314,76 +313,6 @@ static int ioreq_map(struct ioreq *ioreq)
return 0;
}
static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
{
struct XenBlkDev *blkdev = ioreq->blkdev;
int i, rc;
off_t pos;
if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
goto err_no_map;
}
if (ioreq->presync) {
bdrv_flush(blkdev->bs);
}
switch (ioreq->req.operation) {
case BLKIF_OP_READ:
pos = ioreq->start;
for (i = 0; i < ioreq->v.niov; i++) {
rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
ioreq->v.iov[i].iov_base,
ioreq->v.iov[i].iov_len / BLOCK_SIZE);
if (rc != 0) {
xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
ioreq->v.iov[i].iov_base,
ioreq->v.iov[i].iov_len);
goto err;
}
pos += ioreq->v.iov[i].iov_len;
}
break;
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
if (!ioreq->req.nr_segments) {
break;
}
pos = ioreq->start;
for (i = 0; i < ioreq->v.niov; i++) {
rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
ioreq->v.iov[i].iov_base,
ioreq->v.iov[i].iov_len / BLOCK_SIZE);
if (rc != 0) {
xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
ioreq->v.iov[i].iov_base,
ioreq->v.iov[i].iov_len);
goto err;
}
pos += ioreq->v.iov[i].iov_len;
}
break;
default:
/* unknown operation (shouldn't happen -- parse catches this) */
goto err;
}
if (ioreq->postsync) {
bdrv_flush(blkdev->bs);
}
ioreq->status = BLKIF_RSP_OKAY;
ioreq_unmap(ioreq);
ioreq_finish(ioreq);
return 0;
err:
ioreq_unmap(ioreq);
err_no_map:
ioreq_finish(ioreq);
ioreq->status = BLKIF_RSP_ERROR;
return -1;
}
static void qemu_aio_complete(void *opaque, int ret)
{
struct ioreq *ioreq = opaque;
@ -554,9 +483,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
rp = blkdev->rings.common.sring->req_prod;
xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
if (use_aio) {
blk_send_response_all(blkdev);
}
blk_send_response_all(blkdev);
while (rc != rp) {
/* pull request from ring */
if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
@ -579,16 +506,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
continue;
}
if (use_aio) {
/* run i/o in aio mode */
ioreq_runio_qemu_aio(ioreq);
} else {
/* run i/o in sync mode */
ioreq_runio_qemu_sync(ioreq);
}
}
if (!use_aio) {
blk_send_response_all(blkdev);
ioreq_runio_qemu_aio(ioreq);
}
if (blkdev->more_work && blkdev->requests_inflight < max_requests) {

View File

@ -120,7 +120,7 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
devices, and bit 2 the non-primary-master IDE devices. */
if (val & UNPLUG_ALL_IDE_DISKS) {
DPRINTF("unplug disks\n");
qemu_aio_flush();
bdrv_drain_all();
bdrv_flush_all();
pci_unplug_disks(s->pci_dev.bus);
}

View File

@ -370,13 +370,27 @@
#
# @encrypted: true if the backing device is encrypted
#
# @bps: total throughput limit in bytes per second is specified
#
# @bps_rd: read throughput limit in bytes per second is specified
#
# @bps_wr: write throughput limit in bytes per second is specified
#
# @iops: total I/O operations per second is specified
#
# @iops_rd: read I/O operations per second is specified
#
# @iops_wr: write I/O operations per second is specified
#
# Since: 0.14.0
#
# Notes: This interface is only found in @BlockInfo.
##
{ 'type': 'BlockDeviceInfo',
'data': { 'file': 'str', 'ro': 'bool', 'drv': 'str',
'*backing_file': 'str', 'encrypted': 'bool' } }
'*backing_file': 'str', 'encrypted': 'bool',
'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int'} }
##
# @BlockDeviceIoStatus:

View File

@ -341,6 +341,12 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
return res.ll;
}
/* Round number down to multiple */
#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
/* Round number up to multiple */
#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
#include "module.h"
#endif

View File

@ -85,6 +85,34 @@ static QemuOptsList qemu_drive_opts = {
.name = "readonly",
.type = QEMU_OPT_BOOL,
.help = "open drive file as read-only",
},{
.name = "iops",
.type = QEMU_OPT_NUMBER,
.help = "limit total I/O operations per second",
},{
.name = "iops_rd",
.type = QEMU_OPT_NUMBER,
.help = "limit read operations per second",
},{
.name = "iops_wr",
.type = QEMU_OPT_NUMBER,
.help = "limit write operations per second",
},{
.name = "bps",
.type = QEMU_OPT_NUMBER,
.help = "limit total bytes per second",
},{
.name = "bps_rd",
.type = QEMU_OPT_NUMBER,
.help = "limit read bytes per second",
},{
.name = "bps_wr",
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
},
{ /* end of list */ }
},

View File

@ -61,6 +61,14 @@ void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
assert(qemu_in_coroutine());
}
void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue)
{
Coroutine *self = qemu_coroutine_self();
QTAILQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
qemu_coroutine_yield();
assert(qemu_in_coroutine());
}
bool qemu_co_queue_next(CoQueue *queue)
{
Coroutine *next;
@ -76,6 +84,13 @@ bool qemu_co_queue_next(CoQueue *queue)
return (next != NULL);
}
void qemu_co_queue_restart_all(CoQueue *queue)
{
while (qemu_co_queue_next(queue)) {
/* Do nothing */
}
}
bool qemu_co_queue_empty(CoQueue *queue)
{
return (QTAILQ_FIRST(&queue->entries) == NULL);
@ -136,13 +151,7 @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
assert(qemu_in_coroutine());
if (lock->writer) {
lock->writer = false;
while (!qemu_co_queue_empty(&lock->queue)) {
/*
* Wakeup every body. This will include some
* writers too.
*/
qemu_co_queue_next(&lock->queue);
}
qemu_co_queue_restart_all(&lock->queue);
} else {
lock->reader--;
assert(lock->reader >= 0);

View File

@ -117,6 +117,12 @@ void qemu_co_queue_init(CoQueue *queue);
*/
void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
/**
* Adds the current coroutine to the head of the CoQueue and transfers control to the
* caller of the coroutine.
*/
void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
/**
* Restarts the next coroutine in the CoQueue and removes it from the queue.
*
@ -124,6 +130,11 @@ void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
*/
bool qemu_co_queue_next(CoQueue *queue);
/**
* Restarts all coroutines in the CoQueue and leaves the queue empty.
*/
void qemu_co_queue_restart_all(CoQueue *queue);
/**
* Checks if the CoQueue is empty.
*/

View File

@ -1853,9 +1853,9 @@ int main(int argc, char **argv)
command_loop();
/*
* Make sure all outstanding requests get flushed the program exits.
* Make sure all outstanding requests complete before the program exits.
*/
qemu_aio_flush();
bdrv_drain_all();
if (bs) {
bdrv_delete(bs);

View File

@ -135,7 +135,8 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
" [,readonly=on|off]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]]\n"
" use 'file' as a drive image\n", QEMU_ARCH_ALL)
STEXI
@item -drive @var{option}[,@var{option}[,@var{option}[,...]]]
@ -186,6 +187,9 @@ host disk is full; report the error to the guest otherwise).
The default setting is @option{werror=enospc} and @option{rerror=report}.
@item readonly
Open drive @option{file} as read-only. Guest write attempts will fail.
@item copy-on-read=@var{copy-on-read}
@var{copy-on-read} is "on" or "off" and enables whether to copy read backing
file sectors into the image file.
@end table
By default, writethrough caching is used for all block device. This means that
@ -217,6 +221,10 @@ like your host losing power, the disk storage getting disconnected accidently,
etc. you're image will most probably be rendered unusable. When using
the @option{-snapshot} option, unsafe caching is always used.
Copy-on-read avoids accessing the same backing file sectors repeatedly and is
useful when the backing file is over a slow network. By default copy-on-read
is off.
Instead of @option{-cdrom} you can use:
@example
qemu -drive file=file,index=2,media=cdrom

View File

@ -251,6 +251,10 @@ static const QErrorStringTable qerror_table[] = {
.error_fmt = QERR_QGA_COMMAND_FAILED,
.desc = "Guest agent command failed, error was '%(message)'",
},
{
.error_fmt = QERR_INVALID_PARAMETER_COMBINATION,
.desc = "Invalid paramter combination",
},
{}
};

View File

@ -207,4 +207,7 @@ QError *qobject_to_qerror(const QObject *obj);
#define QERR_QGA_COMMAND_FAILED \
"{ 'class': 'QgaCommandFailed', 'data': { 'message': %s } }"
#define QERR_INVALID_PARAMETER_COMBINATION \
"{ 'class': 'InvalidParameterCombination', 'data': {} }"
#endif /* QERROR_H */

View File

@ -848,6 +848,44 @@ Example:
"password": "12345" } }
<- { "return": {} }
EQMP
{
.name = "block_set_io_throttle",
.args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l",
.params = "device bps bps_rd bps_wr iops iops_rd iops_wr",
.help = "change I/O throttle limits for a block drive",
.user_print = monitor_user_noop,
.mhandler.cmd_new = do_block_set_io_throttle,
},
SQMP
block_set_io_throttle
------------
Change I/O throttle limits for a block drive.
Arguments:
- "device": device name (json-string)
- "bps": total throughput limit in bytes per second(json-int)
- "bps_rd": read throughput limit in bytes per second(json-int)
- "bps_wr": read throughput limit in bytes per second(json-int)
- "iops": total I/O operations per second(json-int)
- "iops_rd": read I/O operations per second(json-int)
- "iops_wr": write I/O operations per second(json-int)
Example:
-> { "execute": "block_set_io_throttle", "arguments": { "device": "virtio0",
"bps": "1000000",
"bps_rd": "0",
"bps_wr": "0",
"iops": "0",
"iops_rd": "0",
"iops_wr": "0" } }
<- { "return": {} }
EQMP
{
@ -1152,6 +1190,13 @@ Each json-object contain the following:
"tftp", "vdi", "vmdk", "vpc", "vvfat"
- "backing_file": backing file name (json-string, optional)
- "encrypted": true if encrypted, false otherwise (json-bool)
- "bps": limit total bytes per second (json-int)
- "bps_rd": limit read bytes per second (json-int)
- "bps_wr": limit write bytes per second (json-int)
- "iops": limit total I/O operations per second (json-int)
- "iops_rd": limit read operations per second (json-int)
- "iops_wr": limit write operations per second (json-int)
- "io-status": I/O operation status, only present if the device supports it
and the VM is configured to stop on errors. It's always reset
to "ok" when the "cont" command is issued (json_string, optional)
@ -1171,7 +1216,13 @@ Example:
"ro":false,
"drv":"qcow2",
"encrypted":false,
"file":"disks/test.img"
"file":"disks/test.img",
"bps":1000000,
"bps_rd":0,
"bps_wr":0,
"iops":1000000,
"iops_rd":0,
"iops_wr":0,
},
"type":"unknown"
},

View File

@ -2104,7 +2104,7 @@ int load_vmstate(const char *name)
}
/* Flush all IO requests so they don't interfere with the new state. */
qemu_aio_flush();
bdrv_drain_all();
bs = NULL;
while ((bs = bdrv_next(bs))) {

View File

@ -69,6 +69,7 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p"
bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
# hw/virtio-blk.c
virtio_blk_req_complete(void *req, int status) "req %p status %d"
@ -631,3 +632,10 @@ win_helper_no_switch_pstate(uint32_t new_pstate_regs) "change_pstate: regs new=%
win_helper_wrpil(uint32_t psrpil, uint32_t new_pil) "old=%x new=%x"
win_helper_done(uint32_t tl) "tl=%d"
win_helper_retry(uint32_t tl) "tl=%d"
# dma-helpers.c
dma_bdrv_io(void *dbs, void *bs, int64_t sector_num, bool to_dev) "dbs=%p bs=%p sector_num=%" PRId64 " to_dev=%d"
dma_aio_cancel(void *dbs) "dbs=%p"
dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
dma_bdrv_cb(void *dbs, int ret) "dbs=%p ret=%d"
dma_map_wait(void *dbs) "dbs=%p"

View File

@ -351,7 +351,7 @@ void xen_invalidate_map_cache(void)
MapCacheRev *reventry;
/* Flush pending AIO before destroying the mapcache */
qemu_aio_flush();
bdrv_drain_all();
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
DPRINTF("There should be no locked mappings at this time, "