diff --git a/block.c b/block.c index 3621d11de0..c9fa5c17ff 100644 --- a/block.c +++ b/block.c @@ -50,6 +50,7 @@ typedef enum { BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, } BdrvRequestFlags; static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); @@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags); static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, @@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) rwco->nb_sectors, rwco->qiov, 0); } else { rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov); + rwco->nb_sectors, rwco->qiov, 0); } } @@ -1639,11 +1641,37 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, BDRV_REQ_COPY_ON_READ); } +static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + BlockDriver *drv = bs->drv; + QEMUIOVector qiov; + struct iovec iov; + int ret; + + /* First try the efficient write zeroes operation */ + if (drv->bdrv_co_write_zeroes) { + return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + } + + /* Fall back to bounce buffer if write zeroes is unsupported */ + iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_base = qemu_blockalign(bs, iov.iov_len); + memset(iov.iov_base, 0, iov.iov_len); + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); + + qemu_vfree(iov.iov_base); + return ret; +} + /* * Handle a write request in coroutine context */ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; BdrvTrackedRequest req; @@ -1670,7 +1698,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, tracked_request_begin(&req, bs, sector_num, nb_sectors, true); - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); + } else { + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); + } if (bs->dirty_bitmap) { set_dirty_bitmap(bs, sector_num, nb_sectors, 1); @@ -1690,7 +1722,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, { trace_bdrv_co_writev(bs, sector_num, nb_sectors); - return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov); + return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); +} + +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, + BDRV_REQ_ZERO_WRITE); } /** @@ -3192,7 +3233,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov, 0); } else { acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov); + acb->req.nb_sectors, acb->req.qiov, 0); } acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); diff --git a/block.h b/block.h index cae289b2fb..60ea7308c6 100644 --- a/block.h +++ b/block.h @@ -146,6 +146,14 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, diff --git a/block_int.h b/block_int.h index 7be2988ca7..7946cf66a1 100644 --- a/block_int.h +++ b/block_int.h @@ -131,6 +131,14 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL and .bdrv_co_writev() will be called + * instead. + */ + int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs, diff --git a/trace-events b/trace-events index 5d05749643..3372087ed9 100644 --- a/trace-events +++ b/trace-events @@ -67,6 +67,7 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p" bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"