qcow2: Implement .bdrv_co_preadv()

Reading from qcow2 images is now byte granularity.

Most of the affected code in qcow2 actually gets simpler with this
change. The only exception is encryption, which is fixed on 512 bytes
blocks; in order to keep this working, bs->request_alignment is set for
encrypted images.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
This commit is contained in:
Kevin Wolf 2016-05-31 16:13:07 +02:00
parent b2f65d6b02
commit ecfe186380
3 changed files with 73 additions and 66 deletions

View File

@ -424,7 +424,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
ret = bs->drv->bdrv_co_preadv(bs, (start_sect + n_start) * BDRV_SECTOR_SIZE,
n * BDRV_SECTOR_SIZE, &qiov, 0);
if (ret < 0) {
goto out;
}
@ -464,19 +465,21 @@ out:
/*
* get_cluster_offset
*
* For a given offset of the disk image, find the cluster offset in
* qcow2 file. The offset is stored in *cluster_offset.
* For a given offset of the virtual disk, find the cluster type and offset in
* the qcow2 file. The offset is stored in *cluster_offset.
*
* on entry, *num is the number of contiguous sectors we'd like to
* access following offset.
* On entry, *bytes is the maximum number of contiguous bytes starting at
* offset that we are interested in.
*
* on exit, *num is the number of contiguous sectors we can read.
* On exit, *bytes is the number of bytes starting at offset that have the same
* cluster type and (if applicable) are stored contiguously in the image file.
* Compressed clusters are always returned one by one.
*
* Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
* cases.
*/
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset)
unsigned int *bytes, uint64_t *cluster_offset)
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
@ -485,13 +488,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
unsigned int offset_in_cluster, nb_clusters;
uint64_t bytes_available, bytes_needed;
int ret;
unsigned int bytes;
assert(*num <= BDRV_REQUEST_MAX_SECTORS);
bytes = *num * BDRV_SECTOR_SIZE;
offset_in_cluster = offset_into_cluster(s, offset);
bytes_needed = (uint64_t) bytes + offset_in_cluster;
bytes_needed = (uint64_t) *bytes + offset_in_cluster;
l1_bits = s->l2_bits + s->cluster_bits;
@ -595,9 +594,7 @@ out:
bytes_available = bytes_needed;
}
bytes = bytes_available - offset_in_cluster;
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
*num = bytes >> BDRV_SECTOR_BITS;
*bytes = bytes_available - offset_in_cluster;
return ret;

View File

@ -975,6 +975,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
bs->encrypted = 1;
/* Encryption works on a sector granularity */
bs->request_alignment = BDRV_SECTOR_SIZE;
}
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@ -1331,16 +1334,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
unsigned int bytes;
int64_t status = 0;
*pnum = nb_sectors;
bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
&cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
*pnum = bytes >> BDRV_SECTOR_BITS;
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
!s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
@ -1358,28 +1365,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
/* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors)
int64_t offset, int bytes)
{
uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
int n1;
if ((sector_num + nb_sectors) <= bs->total_sectors)
return nb_sectors;
if (sector_num >= bs->total_sectors)
n1 = 0;
else
n1 = bs->total_sectors - sector_num;
qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
if ((offset + bytes) <= bs_size) {
return bytes;
}
if (offset >= bs_size) {
n1 = 0;
} else {
n1 = bs_size - offset;
}
qemu_iovec_memset(qiov, n1, 0, bytes - n1);
return n1;
}
static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
int remaining_sectors, QEMUIOVector *qiov)
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
BDRVQcow2State *s = bs->opaque;
int index_in_cluster, n1;
int offset_in_cluster, n1;
int ret;
int cur_nr_sectors; /* number of sectors in current iteration */
unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
@ -1389,26 +1402,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_co_mutex_lock(&s->lock);
while (remaining_sectors != 0) {
while (bytes != 0) {
/* prepare next request */
cur_nr_sectors = remaining_sectors;
cur_bytes = MIN(bytes, INT_MAX);
if (s->cipher) {
cur_nr_sectors = MIN(cur_nr_sectors,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
cur_bytes = MIN(cur_bytes,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
ret = qcow2_get_cluster_offset(bs, sector_num << 9,
&cur_nr_sectors, &cluster_offset);
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
if (ret < 0) {
goto fail;
}
index_in_cluster = sector_num & (s->cluster_sectors - 1);
offset_in_cluster = offset_into_cluster(s, offset);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
cur_nr_sectors * 512);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
@ -1416,18 +1427,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
if (bs->backing) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
sector_num, cur_nr_sectors);
offset, cur_bytes);
if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
n1 * BDRV_SECTOR_SIZE);
qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->backing->bs, sector_num,
n1, &local_qiov);
ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
&local_qiov, 0);
qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
@ -1438,12 +1448,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
} else {
/* Note: in this case, no need to wait */
qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
}
break;
case QCOW2_CLUSTER_ZERO:
qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
break;
case QCOW2_CLUSTER_COMPRESSED:
@ -1454,8 +1464,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
qemu_iovec_from_buf(&hd_qiov, 0,
s->cluster_cache + index_in_cluster * 512,
512 * cur_nr_sectors);
s->cluster_cache + offset_in_cluster,
cur_bytes);
break;
case QCOW2_CLUSTER_NORMAL:
@ -1482,34 +1492,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
}
assert(cur_nr_sectors <=
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cluster_data,
512 * cur_nr_sectors);
qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
ret = bdrv_co_preadv(bs->file->bs,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
if (bs->encrypted) {
assert(s->cipher);
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Error *err = NULL;
if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
cluster_data, cur_nr_sectors, false,
&err) < 0) {
if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
cluster_data, cluster_data,
cur_bytes >> BDRV_SECTOR_BITS,
false, &err) < 0) {
error_free(err);
ret = -EIO;
goto fail;
}
qemu_iovec_from_buf(qiov, bytes_done,
cluster_data, 512 * cur_nr_sectors);
qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
}
break;
@ -1519,9 +1529,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
goto fail;
}
remaining_sectors -= cur_nr_sectors;
sector_num += cur_nr_sectors;
bytes_done += cur_nr_sectors * 512;
bytes -= cur_bytes;
offset += cur_bytes;
bytes_done += cur_bytes;
}
ret = 0;
@ -2435,7 +2445,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
if (head || tail) {
int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
uint64_t off;
int nr;
unsigned int nr;
assert(head + count <= s->cluster_size);
@ -2452,7 +2462,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
/* We can have new write after previous check */
offset = cl_start << BDRV_SECTOR_BITS;
count = s->cluster_size;
nr = s->cluster_sectors;
nr = s->cluster_size;
ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
qemu_co_mutex_unlock(&s->lock);
@ -3368,7 +3378,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_preadv = qcow2_co_preadv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,

View File

@ -544,7 +544,7 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
int nb_sectors, bool enc, Error **errp);
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset);
unsigned int *bytes, uint64_t *cluster_offset);
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *host_offset, QCowL2Meta **m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,