qcow2: Batch discards

This optimises the discard operation for freed clusters by batching
discard requests (both snapshot deletion and bdrv_discard end up
updating the refcounts cluster by cluster).

Note that we don't discard asynchronously, but keep s->lock held. This
is to avoid that a freed cluster is reallocated and written to while the
discard is still in flight.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Kevin Wolf 2013-06-19 13:44:20 +02:00 committed by Stefan Hajnoczi
parent 67af674e47
commit 0b919fae31
4 changed files with 109 additions and 7 deletions

View File

@ -1377,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
nb_clusters = size_to_clusters(s, end_offset - offset); nb_clusters = size_to_clusters(s, end_offset - offset);
s->cache_discards = true;
/* Each L2 table is handled by its own loop iteration */ /* Each L2 table is handled by its own loop iteration */
while (nb_clusters > 0) { while (nb_clusters > 0) {
ret = discard_single_l2(bs, offset, nb_clusters); ret = discard_single_l2(bs, offset, nb_clusters);
if (ret < 0) { if (ret < 0) {
return ret; goto fail;
} }
nb_clusters -= ret; nb_clusters -= ret;
offset += (ret * s->cluster_size); offset += (ret * s->cluster_size);
} }
return 0; ret = 0;
fail:
s->cache_discards = false;
qcow2_process_discards(bs, ret);
return ret;
} }
/* /*
@ -1450,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
/* Each L2 table is handled by its own loop iteration */ /* Each L2 table is handled by its own loop iteration */
nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
s->cache_discards = true;
while (nb_clusters > 0) { while (nb_clusters > 0) {
ret = zero_single_l2(bs, offset, nb_clusters); ret = zero_single_l2(bs, offset, nb_clusters);
if (ret < 0) { if (ret < 0) {
return ret; goto fail;
} }
nb_clusters -= ret; nb_clusters -= ret;
offset += (ret * s->cluster_size); offset += (ret * s->cluster_size);
} }
return 0; ret = 0;
fail:
s->cache_discards = false;
qcow2_process_discards(bs, ret);
return ret;
} }

View File

@ -420,6 +420,74 @@ fail_block:
return ret; return ret;
} }
void qcow2_process_discards(BlockDriverState *bs, int ret)
{
BDRVQcowState *s = bs->opaque;
Qcow2DiscardRegion *d, *next;
QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
QTAILQ_REMOVE(&s->discards, d, next);
/* Discard is optional, ignore the return value */
if (ret >= 0) {
bdrv_discard(bs->file,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
}
g_free(d);
}
}
static void update_refcount_discard(BlockDriverState *bs,
uint64_t offset, uint64_t length)
{
BDRVQcowState *s = bs->opaque;
Qcow2DiscardRegion *d, *p, *next;
QTAILQ_FOREACH(d, &s->discards, next) {
uint64_t new_start = MIN(offset, d->offset);
uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
if (new_end - new_start <= length + d->bytes) {
/* There can't be any overlap, areas ending up here have no
* references any more and therefore shouldn't get freed another
* time. */
assert(d->bytes + length == new_end - new_start);
d->offset = new_start;
d->bytes = new_end - new_start;
goto found;
}
}
d = g_malloc(sizeof(*d));
*d = (Qcow2DiscardRegion) {
.bs = bs,
.offset = offset,
.bytes = length,
};
QTAILQ_INSERT_TAIL(&s->discards, d, next);
found:
/* Merge discard requests if they are adjacent now */
QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
if (p == d
|| p->offset > d->offset + d->bytes
|| d->offset > p->offset + p->bytes)
{
continue;
}
/* Still no overlap possible */
assert(p->offset == d->offset + d->bytes
|| d->offset == p->offset + p->bytes);
QTAILQ_REMOVE(&s->discards, p, next);
d->offset = MIN(d->offset, p->offset);
d->bytes += p->bytes;
}
}
/* XXX: cache several refcount block clusters ? */ /* XXX: cache several refcount block clusters ? */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, int addend, enum qcow2_discard_type type) int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
@ -488,15 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
s->free_cluster_index = cluster_index; s->free_cluster_index = cluster_index;
} }
refcount_block[block_index] = cpu_to_be16(refcount); refcount_block[block_index] = cpu_to_be16(refcount);
if (refcount == 0 && s->discard_passthrough[type]) { if (refcount == 0 && s->discard_passthrough[type]) {
/* Try discarding, ignore errors */ update_refcount_discard(bs, cluster_offset, s->cluster_size);
/* FIXME Doing this cluster by cluster will be painfully slow */
bdrv_discard(bs->file, cluster_offset, 1);
} }
} }
ret = 0; ret = 0;
fail: fail:
if (!s->cache_discards) {
qcow2_process_discards(bs, ret);
}
/* Write last changed block to disk */ /* Write last changed block to disk */
if (refcount_block) { if (refcount_block) {
int wret; int wret;
@ -755,6 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l1_table = NULL; l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t); l1_size2 = l1_size * sizeof(uint64_t);
s->cache_discards = true;
/* WARNING: qcow2_snapshot_goto relies on this function not using the /* WARNING: qcow2_snapshot_goto relies on this function not using the
* l1_table_offset when it is the current s->l1_table_offset! Be careful * l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */ * when changing this! */
@ -867,6 +940,9 @@ fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
} }
s->cache_discards = false;
qcow2_process_discards(bs, ret);
/* Update L1 only if it isn't deleted anyway (addend = -1) */ /* Update L1 only if it isn't deleted anyway (addend = -1) */
if (ret == 0 && addend >= 0 && l1_modified) { if (ret == 0 && addend >= 0 && l1_modified) {
for (i = 0; i < l1_size; i++) { for (i = 0; i < l1_size; i++) {

View File

@ -486,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
} }
QLIST_INIT(&s->cluster_allocs); QLIST_INIT(&s->cluster_allocs);
QTAILQ_INIT(&s->discards);
/* read qcow2 extensions */ /* read qcow2 extensions */
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) { if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {

View File

@ -147,6 +147,13 @@ typedef struct Qcow2Feature {
char name[46]; char name[46];
} QEMU_PACKED Qcow2Feature; } QEMU_PACKED Qcow2Feature;
typedef struct Qcow2DiscardRegion {
BlockDriverState *bs;
uint64_t offset;
uint64_t bytes;
QTAILQ_ENTRY(Qcow2DiscardRegion) next;
} Qcow2DiscardRegion;
typedef struct BDRVQcowState { typedef struct BDRVQcowState {
int cluster_bits; int cluster_bits;
int cluster_size; int cluster_size;
@ -199,6 +206,8 @@ typedef struct BDRVQcowState {
size_t unknown_header_fields_size; size_t unknown_header_fields_size;
void* unknown_header_fields; void* unknown_header_fields;
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards;
} BDRVQcowState; } BDRVQcowState;
/* XXX: use std qcow open function ? */ /* XXX: use std qcow open function ? */
@ -374,6 +383,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix); BdrvCheckMode fix);
void qcow2_process_discards(BlockDriverState *bs, int ret);
/* qcow2-cluster.c functions */ /* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size); bool exact_size);