qcow2: implement lazy refcounts
Lazy refcounts is a performance optimization for qcow2 that postpones refcount metadata updates and instead marks the image dirty. In the case of crash or power failure the image will be left in a dirty state and repaired next time it is opened. Reducing metadata I/O is important for cache=writethrough and cache=directsync because these modes guarantee that data is on disk after each write (hence we cannot take advantage of caching updates in RAM). Refcount metadata is not needed for guest->file block address translation and therefore does not need to be on-disk at the time of write completion - this is the motivation behind the lazy refcount optimization. The lazy refcount optimization must be enabled at image creation time: qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on a.qcow2 10G qemu-system-x86_64 -drive if=virtio,file=a.qcow2,cache=writethrough Update qemu-iotests 031 and 036 since the extension header size changes when we add feature bit table entries. Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
91cf8a35e7
commit
bfe8043e92
@ -662,7 +662,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
|
||||
qcow2_cache_depends_on_flush(s->l2_table_cache);
|
||||
}
|
||||
|
||||
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
|
||||
if (qcow2_need_accurate_refcounts(s)) {
|
||||
qcow2_cache_set_dependency(bs, s->l2_table_cache,
|
||||
s->refcount_block_cache);
|
||||
}
|
||||
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
|
||||
if (ret < 0) {
|
||||
goto err;
|
||||
|
@ -214,6 +214,41 @@ static void report_unsupported_feature(BlockDriverState *bs,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets the dirty bit and flushes afterwards if necessary.
|
||||
*
|
||||
* The incompatible_features bit is only set if the image file header was
|
||||
* updated successfully. Therefore it is not required to check the return
|
||||
* value of this function.
|
||||
*/
|
||||
static int qcow2_mark_dirty(BlockDriverState *bs)
|
||||
{
|
||||
BDRVQcowState *s = bs->opaque;
|
||||
uint64_t val;
|
||||
int ret;
|
||||
|
||||
assert(s->qcow_version >= 3);
|
||||
|
||||
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
||||
return 0; /* already dirty */
|
||||
}
|
||||
|
||||
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
|
||||
ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
|
||||
&val, sizeof(val));
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
ret = bdrv_flush(bs->file);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Only treat image as dirty if the header was updated successfully */
|
||||
s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clears the dirty bit and flushes before if necessary. Only call this
|
||||
* function when there are no pending requests, it does not guard against
|
||||
@ -752,6 +787,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (l2meta.nb_clusters > 0 &&
|
||||
(s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
|
||||
qcow2_mark_dirty(bs);
|
||||
}
|
||||
|
||||
cluster_offset = l2meta.cluster_offset;
|
||||
assert((cluster_offset & 511) == 0);
|
||||
|
||||
@ -994,6 +1034,11 @@ int qcow2_update_header(BlockDriverState *bs)
|
||||
.bit = QCOW2_INCOMPAT_DIRTY_BITNR,
|
||||
.name = "dirty bit",
|
||||
},
|
||||
{
|
||||
.type = QCOW2_FEAT_TYPE_COMPATIBLE,
|
||||
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
|
||||
.name = "lazy refcounts",
|
||||
},
|
||||
};
|
||||
|
||||
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
|
||||
@ -1176,6 +1221,11 @@ static int qcow2_create2(const char *filename, int64_t total_size,
|
||||
header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
|
||||
}
|
||||
|
||||
if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
|
||||
header.compatible_features |=
|
||||
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
|
||||
}
|
||||
|
||||
ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
@ -1289,6 +1339,8 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
|
||||
options->value.s);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
|
||||
flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
|
||||
}
|
||||
options++;
|
||||
}
|
||||
@ -1299,6 +1351,12 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
|
||||
fprintf(stderr, "Lazy refcounts only supported with compatibility "
|
||||
"level 1.1 and above (use compat=1.1 or greater)\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
|
||||
cluster_size, prealloc, options, version);
|
||||
}
|
||||
@ -1485,10 +1543,12 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
||||
if (ret < 0) {
|
||||
qemu_co_mutex_unlock(&s->lock);
|
||||
return ret;
|
||||
if (qcow2_need_accurate_refcounts(s)) {
|
||||
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
||||
if (ret < 0) {
|
||||
qemu_co_mutex_unlock(&s->lock);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
qemu_co_mutex_unlock(&s->lock);
|
||||
|
||||
@ -1603,6 +1663,11 @@ static QEMUOptionParameter qcow2_create_options[] = {
|
||||
.type = OPT_STRING,
|
||||
.help = "Preallocation mode (allowed values: off, metadata)"
|
||||
},
|
||||
{
|
||||
.name = BLOCK_OPT_LAZY_REFCOUNTS,
|
||||
.type = OPT_FLAG,
|
||||
.help = "Postpone refcount updates",
|
||||
},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
@ -118,6 +118,14 @@ enum {
|
||||
QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
|
||||
};
|
||||
|
||||
/* Compatible feature bits */
|
||||
enum {
|
||||
QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
|
||||
QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
|
||||
|
||||
QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
|
||||
};
|
||||
|
||||
typedef struct Qcow2Feature {
|
||||
uint8_t type;
|
||||
uint8_t bit;
|
||||
@ -245,6 +253,11 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
|
||||
}
|
||||
}
|
||||
|
||||
/* Check whether refcounts are eager or lazy */
|
||||
static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
|
||||
{
|
||||
return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
|
||||
}
|
||||
|
||||
// FIXME Need qcow2_ prefix to global functions
|
||||
|
||||
|
26
block_int.h
26
block_int.h
@ -31,8 +31,9 @@
|
||||
#include "qemu-timer.h"
|
||||
#include "qapi-types.h"
|
||||
|
||||
#define BLOCK_FLAG_ENCRYPT 1
|
||||
#define BLOCK_FLAG_COMPAT6 4
|
||||
#define BLOCK_FLAG_ENCRYPT 1
|
||||
#define BLOCK_FLAG_COMPAT6 4
|
||||
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
|
||||
|
||||
#define BLOCK_IO_LIMIT_READ 0
|
||||
#define BLOCK_IO_LIMIT_WRITE 1
|
||||
@ -41,16 +42,17 @@
|
||||
#define BLOCK_IO_SLICE_TIME 100000000
|
||||
#define NANOSECONDS_PER_SECOND 1000000000.0
|
||||
|
||||
#define BLOCK_OPT_SIZE "size"
|
||||
#define BLOCK_OPT_ENCRYPT "encryption"
|
||||
#define BLOCK_OPT_COMPAT6 "compat6"
|
||||
#define BLOCK_OPT_BACKING_FILE "backing_file"
|
||||
#define BLOCK_OPT_BACKING_FMT "backing_fmt"
|
||||
#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
|
||||
#define BLOCK_OPT_TABLE_SIZE "table_size"
|
||||
#define BLOCK_OPT_PREALLOC "preallocation"
|
||||
#define BLOCK_OPT_SUBFMT "subformat"
|
||||
#define BLOCK_OPT_COMPAT_LEVEL "compat"
|
||||
#define BLOCK_OPT_SIZE "size"
|
||||
#define BLOCK_OPT_ENCRYPT "encryption"
|
||||
#define BLOCK_OPT_COMPAT6 "compat6"
|
||||
#define BLOCK_OPT_BACKING_FILE "backing_file"
|
||||
#define BLOCK_OPT_BACKING_FMT "backing_fmt"
|
||||
#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
|
||||
#define BLOCK_OPT_TABLE_SIZE "table_size"
|
||||
#define BLOCK_OPT_PREALLOC "preallocation"
|
||||
#define BLOCK_OPT_SUBFMT "subformat"
|
||||
#define BLOCK_OPT_COMPAT_LEVEL "compat"
|
||||
#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
|
||||
|
||||
typedef struct BdrvTrackedRequest BdrvTrackedRequest;
|
||||
|
||||
|
@ -54,7 +54,7 @@ header_length 72
|
||||
|
||||
Header extension:
|
||||
magic 0x6803f857
|
||||
length 48
|
||||
length 96
|
||||
data <binary>
|
||||
|
||||
Header extension:
|
||||
@ -68,7 +68,7 @@ No errors were found on the image.
|
||||
|
||||
magic 0x514649fb
|
||||
version 2
|
||||
backing_file_offset 0xc8
|
||||
backing_file_offset 0xf8
|
||||
backing_file_size 0x17
|
||||
cluster_bits 16
|
||||
size 67108864
|
||||
@ -92,7 +92,7 @@ data 'host_device'
|
||||
|
||||
Header extension:
|
||||
magic 0x6803f857
|
||||
length 48
|
||||
length 96
|
||||
data <binary>
|
||||
|
||||
Header extension:
|
||||
@ -155,7 +155,7 @@ header_length 104
|
||||
|
||||
Header extension:
|
||||
magic 0x6803f857
|
||||
length 48
|
||||
length 96
|
||||
data <binary>
|
||||
|
||||
Header extension:
|
||||
@ -169,7 +169,7 @@ No errors were found on the image.
|
||||
|
||||
magic 0x514649fb
|
||||
version 3
|
||||
backing_file_offset 0xe8
|
||||
backing_file_offset 0x118
|
||||
backing_file_size 0x17
|
||||
cluster_bits 16
|
||||
size 67108864
|
||||
@ -193,7 +193,7 @@ data 'host_device'
|
||||
|
||||
Header extension:
|
||||
magic 0x6803f857
|
||||
length 48
|
||||
length 96
|
||||
data <binary>
|
||||
|
||||
Header extension:
|
||||
|
@ -46,7 +46,7 @@ header_length 104
|
||||
|
||||
Header extension:
|
||||
magic 0x6803f857
|
||||
length 48
|
||||
length 96
|
||||
data <binary>
|
||||
|
||||
*** done
|
||||
|
Loading…
Reference in New Issue
Block a user