diff --git a/block/io.c b/block/io.c index aa532a5c1f..4f005623f7 100644 --- a/block/io.c +++ b/block/io.c @@ -1214,6 +1214,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, + MAX_WRITE_ZEROES_BOUNCE_BUFFER); assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; @@ -1229,9 +1231,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, * boundaries. */ if (head) { - /* Make a small request up to the first aligned sector. */ - num = MIN(count, alignment - head); - head = 0; + /* Make a small request up to the first aligned sector. For + * convenience, limit this request to max_transfer even if + * we don't need to fall back to writes. */ + num = MIN(MIN(count, max_transfer), alignment - head); + head = (head + num) % alignment; + assert(num < max_write_zeroes); } else if (tail && num > alignment) { /* Shorten the request to the last aligned sector. */ num -= tail; @@ -1257,8 +1262,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, if (ret == -ENOTSUP) { /* Fall back to bounce buffer if write zeroes is unsupported */ - int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, - MAX_WRITE_ZEROES_BOUNCE_BUFFER); BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; if ((flags & BDRV_REQ_FUA) && @@ -2421,7 +2424,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, { BdrvTrackedRequest req; int max_pdiscard, ret; - int head, align; + int head, tail, align; if (!bs->drv) { return -ENOMEDIUM; @@ -2444,19 +2447,15 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, return 0; } - /* Discard is advisory, so ignore any unaligned head or tail */ + /* Discard is advisory, but some devices track and coalesce + * unaligned requests, so we must pass everything down rather than + * round here. Still, most devices will just silently ignore + * unaligned requests (by returning -ENOTSUP), so we must fragment + * the request accordingly. */ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); assert(align % bs->bl.request_alignment == 0); head = offset % align; - if (head) { - head = MIN(count, align - head); - count -= head; - offset += head; - } - count = QEMU_ALIGN_DOWN(count, align); - if (!count) { - return 0; - } + tail = (offset + count) % align; bdrv_inc_in_flight(bs); tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD); @@ -2468,11 +2467,34 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), align); - assert(max_pdiscard); + assert(max_pdiscard >= bs->bl.request_alignment); while (count > 0) { int ret; - int num = MIN(count, max_pdiscard); + int num = count; + + if (head) { + /* Make small requests to get to alignment boundaries. */ + num = MIN(count, align - head); + if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) { + num %= bs->bl.request_alignment; + } + head = (head + num) % align; + assert(num < max_pdiscard); + } else if (tail) { + if (num > align) { + /* Shorten the request to the last aligned cluster. */ + num -= tail; + } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) && + tail > bs->bl.request_alignment) { + tail %= bs->bl.request_alignment; + num -= tail; + } + } + /* limit request size */ + if (num > max_pdiscard) { + num = max_pdiscard; + } if (bs->drv->bdrv_co_pdiscard) { ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); diff --git a/block/iscsi.c b/block/iscsi.c index 71bd523df5..0960929d57 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1083,7 +1083,9 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) struct IscsiTask iTask; struct unmap_list list; - assert(is_byte_request_lun_aligned(offset, count, iscsilun)); + if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { + return -ENOTSUP; + } if (!iscsilun->lbp.lbpu) { /* UNMAP is not supported by the target */ diff --git a/block/qcow2.c b/block/qcow2.c index 6d5689a23c..7cfcd8412c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1206,6 +1206,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.request_alignment = BDRV_SECTOR_SIZE; } bs->bl.pwrite_zeroes_alignment = s->cluster_size; + bs->bl.pdiscard_alignment = s->cluster_size; } static int qcow2_set_key(BlockDriverState *bs, const char *key) @@ -2490,6 +2491,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; + if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) { + assert(count < s->cluster_size); + return -ENOTSUP; + } + qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS, QCOW2_DISCARD_REQUEST, false); diff --git a/block/sheepdog.c b/block/sheepdog.c index 1fb917343a..4c9af89180 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2829,8 +2829,9 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, iov.iov_len = sizeof(zero); discard_iov.iov = &iov; discard_iov.niov = 1; - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((count & (BDRV_SECTOR_SIZE - 1)) == 0); + if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) { + return -ENOTSUP; + } acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS, count >> BDRV_SECTOR_BITS); acb->aiocb_type = AIOCB_DISCARD_OBJ;