qcow2: Use QcowCache

Use the new functions of qcow2-cache.c for everything that works on refcount
block and L2 tables.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Kevin Wolf 2011-01-10 17:17:28 +01:00
parent 493810940b
commit 29c1a7301a
5 changed files with 243 additions and 301 deletions

View File

@ -104,6 +104,12 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
} }
} }
if (c == s->refcount_block_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
} else if (c == s->l2_table_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
s->cluster_size); s->cluster_size);
if (ret < 0) { if (ret < 0) {
@ -218,6 +224,10 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
c->entries[i].offset = 0; c->entries[i].offset = 0;
if (read_from_disk) { if (read_from_disk) {
if (c == s->l2_table_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
if (ret < 0) { if (ret < 0) {
return ret; return ret;

View File

@ -67,7 +67,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
qemu_free(new_l1_table); qemu_free(new_l1_table);
return new_l1_table_offset; return new_l1_table_offset;
} }
bdrv_flush(bs->file);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++) for(i = 0; i < s->l1_size; i++)
@ -98,63 +102,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
return ret; return ret;
} }
void qcow2_l2_cache_reset(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
}
static inline int l2_cache_new_entry(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
uint32_t min_count;
int min_index, i;
/* find a new entry in the least used one */
min_index = 0;
min_count = 0xffffffff;
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (s->l2_cache_counts[i] < min_count) {
min_count = s->l2_cache_counts[i];
min_index = i;
}
}
return min_index;
}
/*
* seek_l2_table
*
* seek l2_offset in the l2_cache table
* if not found, return NULL,
* if found,
* increments the l2 cache hit count of the entry,
* if counter overflow, divide by two all counters
* return the pointer to the l2 cache entry
*
*/
static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
{
int i, j;
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == s->l2_cache_offsets[i]) {
/* increment the hit count */
if (++s->l2_cache_counts[i] == 0xffffffff) {
for(j = 0; j < L2_CACHE_SIZE; j++) {
s->l2_cache_counts[j] >>= 1;
}
}
return s->l2_cache + (i << s->l2_bits);
}
}
return NULL;
}
/* /*
* l2_load * l2_load
* *
@ -169,35 +116,13 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table) uint64_t **l2_table)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int min_index;
int ret; int ret;
/* seek if the table for the given offset is in the cache */ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
*l2_table = seek_l2_table(s, l2_offset);
if (*l2_table != NULL) {
return 0;
}
/* not found: load a new entry in the least used one */
min_index = l2_cache_new_entry(bs);
*l2_table = s->l2_cache + (min_index << s->l2_bits);
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
ret = bdrv_pread(bs->file, l2_offset, *l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
qcow2_l2_cache_reset(bs);
return ret; return ret;
} }
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
return 0;
}
/* /*
* Writes one sector of the L1 table to the disk (can't update single entries * Writes one sector of the L1 table to the disk (can't update single entries
* and we really don't want bdrv_pread to perform a read-modify-write) * and we really don't want bdrv_pread to perform a read-modify-write)
@ -238,7 +163,6 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index)
static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int min_index;
uint64_t old_l2_offset; uint64_t old_l2_offset;
uint64_t *l2_table; uint64_t *l2_table;
int64_t l2_offset; int64_t l2_offset;
@ -252,29 +176,48 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
if (l2_offset < 0) { if (l2_offset < 0) {
return l2_offset; return l2_offset;
} }
bdrv_flush(bs->file);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail;
}
/* allocate a new entry in the l2 cache */ /* allocate a new entry in the l2 cache */
min_index = l2_cache_new_entry(bs); ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
l2_table = s->l2_cache + (min_index << s->l2_bits); if (ret < 0) {
return ret;
}
l2_table = *table;
if (old_l2_offset == 0) { if (old_l2_offset == 0) {
/* if there was no old l2 table, clear the new table */ /* if there was no old l2 table, clear the new table */
memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
} else { } else {
uint64_t* old_table;
/* if there was an old l2 table, read it from the disk */ /* if there was an old l2 table, read it from the disk */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
ret = bdrv_pread(bs->file, old_l2_offset, l2_table, ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset,
s->l2_size * sizeof(uint64_t)); (void**) &old_table);
if (ret < 0) {
goto fail;
}
memcpy(l2_table, old_table, s->cluster_size);
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
} }
} }
/* write the l2 table to the file */ /* write the l2 table to the file */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
} }
@ -286,17 +229,12 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
goto fail; goto fail;
} }
/* update the l2 cache entry */
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
*table = l2_table; *table = l2_table;
return 0; return 0;
fail: fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
s->l1_table[l1_index] = old_l2_offset; s->l1_table[l1_index] = old_l2_offset;
qcow2_l2_cache_reset(bs);
return ret; return ret;
} }
@ -521,6 +459,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
&l2_table[l2_index], 0, QCOW_OFLAG_COPIED); &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
} }
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
nb_available = (c * s->cluster_sectors); nb_available = (c * s->cluster_sectors);
out: out:
if (nb_available > nb_needed) if (nb_available > nb_needed)
@ -575,6 +515,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
return ret; return ret;
} }
} else { } else {
/* FIXME Order */
if (l2_offset) if (l2_offset)
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
ret = l2_allocate(bs, l1_index, &l2_table); ret = l2_allocate(bs, l1_index, &l2_table);
@ -632,6 +573,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
cluster_offset = qcow2_alloc_bytes(bs, compressed_size); cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
if (cluster_offset < 0) { if (cluster_offset < 0) {
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
return 0; return 0;
} }
@ -646,46 +588,23 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
/* compressed clusters never have the copied flag */ /* compressed clusters never have the copied flag */
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
l2_table[l2_index] = cpu_to_be64(cluster_offset); l2_table[l2_index] = cpu_to_be64(cluster_offset);
if (bdrv_pwrite_sync(bs->file, ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
l2_offset + l2_index * sizeof(uint64_t), if (ret < 0) {
l2_table + l2_index,
sizeof(uint64_t)) < 0)
return 0; return 0;
}
return cluster_offset; return cluster_offset;
} }
/*
* Write L2 table updates to disk, writing whole sectors to avoid a
* read-modify-write in bdrv_pwrite
*/
#define L2_ENTRIES_PER_SECTOR (512 / 8)
static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
uint64_t l2_offset, int l2_index, int num)
{
int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
int start_offset = (8 * l2_index) & ~511;
int end_offset = (8 * (l2_index + num) + 511) & ~511;
size_t len = end_offset - start_offset;
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
ret = bdrv_pwrite(bs->file, l2_offset + start_offset,
&l2_table[l2_start_index], len);
if (ret < 0) {
return ret;
}
return 0;
}
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret; int i, j = 0, l2_index, ret;
uint64_t *old_cluster, start_sect, l2_offset, *l2_table; uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
uint64_t cluster_offset = m->cluster_offset; uint64_t cluster_offset = m->cluster_offset;
bool cow = false;
if (m->nb_clusters == 0) if (m->nb_clusters == 0)
return 0; return 0;
@ -695,6 +614,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
/* copy content of unmodified sectors */ /* copy content of unmodified sectors */
start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
if (m->n_start) { if (m->n_start) {
cow = true;
ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
if (ret < 0) if (ret < 0)
goto err; goto err;
@ -702,17 +622,30 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
if (m->nb_available & (s->cluster_sectors - 1)) { if (m->nb_available & (s->cluster_sectors - 1)) {
uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
cow = true;
ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
m->nb_available - end, s->cluster_sectors); m->nb_available - end, s->cluster_sectors);
if (ret < 0) if (ret < 0)
goto err; goto err;
} }
/* update L2 table */ /*
* Update L2 table.
*
* Before we update the L2 table to actually point to the new cluster, we
* need to be sure that the refcounts have been increased and COW was
* handled.
*/
if (cow) {
bdrv_flush(bs->file);
}
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index); ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
if (ret < 0) { if (ret < 0) {
goto err; goto err;
} }
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
for (i = 0; i < m->nb_clusters; i++) { for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster /* if two concurrent writes happen to the same unallocated cluster
@ -728,16 +661,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED); (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
} }
/*
* Before we update the L2 table to actually point to the new cluster, we
* need to be sure that the refcounts have been increased and COW was
* handled.
*/
bdrv_flush(bs->file);
ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters); ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) { if (ret < 0) {
qcow2_l2_cache_reset(bs);
goto err; goto err;
} }
@ -746,7 +672,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
* Also flush bs->file to get the right order for L2 and refcount update. * Also flush bs->file to get the right order for L2 and refcount update.
*/ */
if (j != 0) { if (j != 0) {
bdrv_flush(bs->file);
for (i = 0; i < j; i++) { for (i = 0; i < j; i++) {
qcow2_free_any_clusters(bs, qcow2_free_any_clusters(bs,
be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
@ -868,7 +793,8 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
m->depends_on = old_alloc; m->depends_on = old_alloc;
m->nb_clusters = 0; m->nb_clusters = 0;
*num = 0; *num = 0;
return 0; ret = 0;
goto fail;
} }
} }
} }
@ -884,7 +810,8 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
if (cluster_offset < 0) { if (cluster_offset < 0) {
QLIST_REMOVE(m, next_in_flight); QLIST_REMOVE(m, next_in_flight);
return cluster_offset; ret = cluster_offset;
goto fail;
} }
/* save info needed for meta data update */ /* save info needed for meta data update */
@ -893,12 +820,21 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
m->nb_clusters = nb_clusters; m->nb_clusters = nb_clusters;
out: out:
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) {
return ret;
}
m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
m->cluster_offset = cluster_offset; m->cluster_offset = cluster_offset;
*num = m->nb_available - n_start; *num = m->nb_available - n_start;
return 0; return 0;
fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
return ret;
} }
static int decompress_buffer(uint8_t *out_buf, int out_buf_size, static int decompress_buffer(uint8_t *out_buf, int out_buf_size,

View File

@ -32,27 +32,6 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int addend); int addend);
static int cache_refcount_updates = 0;
static int write_refcount_block(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
size_t size = s->cluster_size;
if (s->refcount_block_cache_offset == 0) {
return 0;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
s->refcount_block_cache, size) < 0)
{
return -EIO;
}
return 0;
}
/*********************************************************/ /*********************************************************/
/* refcount handling */ /* refcount handling */
@ -61,7 +40,6 @@ int qcow2_refcount_init(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int ret, refcount_table_size2, i; int ret, refcount_table_size2, i;
s->refcount_block_cache = qemu_malloc(s->cluster_size);
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
s->refcount_table = qemu_malloc(refcount_table_size2); s->refcount_table = qemu_malloc(refcount_table_size2);
if (s->refcount_table_size > 0) { if (s->refcount_table_size > 0) {
@ -81,34 +59,22 @@ int qcow2_refcount_init(BlockDriverState *bs)
void qcow2_refcount_close(BlockDriverState *bs) void qcow2_refcount_close(BlockDriverState *bs)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
qemu_free(s->refcount_block_cache);
qemu_free(s->refcount_table); qemu_free(s->refcount_table);
} }
static int load_refcount_block(BlockDriverState *bs, static int load_refcount_block(BlockDriverState *bs,
int64_t refcount_block_offset) int64_t refcount_block_offset,
void **refcount_block)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int ret; int ret;
if (cache_refcount_updates) {
ret = write_refcount_block(bs);
if (ret < 0) {
return ret;
}
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache, ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
s->cluster_size); refcount_block);
if (ret < 0) {
s->refcount_block_cache_offset = 0;
return ret;
}
s->refcount_block_cache_offset = refcount_block_offset; return ret;
return 0;
} }
/* /*
@ -122,6 +88,8 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
int refcount_table_index, block_index; int refcount_table_index, block_index;
int64_t refcount_block_offset; int64_t refcount_block_offset;
int ret; int ret;
uint16_t *refcount_block;
uint16_t refcount;
refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
if (refcount_table_index >= s->refcount_table_size) if (refcount_table_index >= s->refcount_table_size)
@ -129,16 +97,24 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
refcount_block_offset = s->refcount_table[refcount_table_index]; refcount_block_offset = s->refcount_table[refcount_table_index];
if (!refcount_block_offset) if (!refcount_block_offset)
return 0; return 0;
if (refcount_block_offset != s->refcount_block_cache_offset) {
/* better than nothing: return allocated if read error */ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
ret = load_refcount_block(bs, refcount_block_offset); (void**) &refcount_block);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
}
block_index = cluster_index & block_index = cluster_index &
((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
return be16_to_cpu(s->refcount_block_cache[block_index]); refcount = be16_to_cpu(refcount_block[block_index]);
ret = qcow2_cache_put(bs, s->refcount_block_cache,
(void**) &refcount_block);
if (ret < 0) {
return ret;
}
return refcount;
} }
/* /*
@ -174,9 +150,10 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
* Loads a refcount block. If it doesn't exist yet, it is allocated first * Loads a refcount block. If it doesn't exist yet, it is allocated first
* (including growing the refcount table if needed). * (including growing the refcount table if needed).
* *
* Returns the offset of the refcount block on success or -errno in error case * Returns 0 on success or -errno in error case
*/ */
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) static int alloc_refcount_block(BlockDriverState *bs,
int64_t cluster_index, uint16_t **refcount_block)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
unsigned int refcount_table_index; unsigned int refcount_table_index;
@ -194,13 +171,8 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
/* If it's already there, we're done */ /* If it's already there, we're done */
if (refcount_block_offset) { if (refcount_block_offset) {
if (refcount_block_offset != s->refcount_block_cache_offset) { return load_refcount_block(bs, refcount_block_offset,
ret = load_refcount_block(bs, refcount_block_offset); (void**) refcount_block);
if (ret < 0) {
return ret;
}
}
return refcount_block_offset;
} }
} }
@ -226,12 +198,10 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
* refcount block into the cache * refcount block into the cache
*/ */
if (cache_refcount_updates) { *refcount_block = NULL;
ret = write_refcount_block(bs);
if (ret < 0) { /* We write to the refcount table, so we might depend on L2 tables */
return ret; qcow2_cache_flush(bs, s->l2_table_cache);
}
}
/* Allocate the refcount block itself and mark it as used */ /* Allocate the refcount block itself and mark it as used */
int64_t new_block = alloc_clusters_noref(bs, s->cluster_size); int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
@ -247,13 +217,18 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
/* Zero the new refcount block before updating it */ /* Zero the new refcount block before updating it */
memset(s->refcount_block_cache, 0, s->cluster_size); ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
s->refcount_block_cache_offset = new_block; (void**) refcount_block);
if (ret < 0) {
goto fail_block;
}
memset(*refcount_block, 0, s->cluster_size);
/* The block describes itself, need to update the cache */ /* The block describes itself, need to update the cache */
int block_index = (new_block >> s->cluster_bits) & int block_index = (new_block >> s->cluster_bits) &
((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
s->refcount_block_cache[block_index] = cpu_to_be16(1); (*refcount_block)[block_index] = cpu_to_be16(1);
} else { } else {
/* Described somewhere else. This can recurse at most twice before we /* Described somewhere else. This can recurse at most twice before we
* arrive at a block that describes itself. */ * arrive at a block that describes itself. */
@ -266,14 +241,19 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
/* Initialize the new refcount block only after updating its refcount, /* Initialize the new refcount block only after updating its refcount,
* update_refcount uses the refcount cache itself */ * update_refcount uses the refcount cache itself */
memset(s->refcount_block_cache, 0, s->cluster_size); ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
s->refcount_block_cache_offset = new_block; (void**) refcount_block);
if (ret < 0) {
goto fail_block;
}
memset(*refcount_block, 0, s->cluster_size);
} }
/* Now the new refcount block needs to be written to disk */ /* Now the new refcount block needs to be written to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache, qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
s->cluster_size); ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) { if (ret < 0) {
goto fail_block; goto fail_block;
} }
@ -290,7 +270,12 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
} }
s->refcount_table[refcount_table_index] = new_block; s->refcount_table[refcount_table_index] = new_block;
return new_block; return 0;
}
ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
if (ret < 0) {
goto fail_block;
} }
/* /*
@ -410,9 +395,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
s->free_cluster_index = old_free_cluster_index; s->free_cluster_index = old_free_cluster_index;
ret = load_refcount_block(bs, new_block); ret = load_refcount_block(bs, new_block, (void**) refcount_block);
if (ret < 0) { if (ret < 0) {
goto fail_block; return ret;
} }
return new_block; return new_block;
@ -420,52 +405,20 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
fail_table: fail_table:
qemu_free(new_table); qemu_free(new_table);
fail_block: fail_block:
s->refcount_block_cache_offset = 0; if (*refcount_block != NULL) {
qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
}
return ret; return ret;
} }
#define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
static int write_refcount_block_entries(BlockDriverState *bs,
int64_t refcount_block_offset, int first_index, int last_index)
{
BDRVQcowState *s = bs->opaque;
size_t size;
int ret;
if (cache_refcount_updates) {
return 0;
}
if (first_index < 0) {
return 0;
}
first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
last_index = (last_index + REFCOUNTS_PER_SECTOR)
& ~(REFCOUNTS_PER_SECTOR - 1);
size = (last_index - first_index) << REFCOUNT_SHIFT;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
ret = bdrv_pwrite(bs->file,
refcount_block_offset + (first_index << REFCOUNT_SHIFT),
&s->refcount_block_cache[first_index], size);
if (ret < 0) {
return ret;
}
return 0;
}
/* XXX: cache several refcount block clusters ? */ /* XXX: cache several refcount block clusters ? */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, int addend) int64_t offset, int64_t length, int addend)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
int64_t start, last, cluster_offset; int64_t start, last, cluster_offset;
int64_t refcount_block_offset = 0; uint16_t *refcount_block = NULL;
int64_t table_index = -1, old_table_index; int64_t old_table_index = -1;
int first_index = -1, last_index = -1;
int ret; int ret;
#ifdef DEBUG_ALLOC2 #ifdef DEBUG_ALLOC2
@ -478,6 +431,11 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
return 0; return 0;
} }
if (addend < 0) {
qcow2_cache_set_dependency(bs, s->refcount_block_cache,
s->l2_table_cache);
}
start = offset & ~(s->cluster_size - 1); start = offset & ~(s->cluster_size - 1);
last = (offset + length - 1) & ~(s->cluster_size - 1); last = (offset + length - 1) & ~(s->cluster_size - 1);
for(cluster_offset = start; cluster_offset <= last; for(cluster_offset = start; cluster_offset <= last;
@ -485,42 +443,33 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
{ {
int block_index, refcount; int block_index, refcount;
int64_t cluster_index = cluster_offset >> s->cluster_bits; int64_t cluster_index = cluster_offset >> s->cluster_bits;
int64_t new_block; int64_t table_index =
cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
/* Only write refcount block to disk when we are done with it */
old_table_index = table_index;
table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
if ((old_table_index >= 0) && (table_index != old_table_index)) {
ret = write_refcount_block_entries(bs, refcount_block_offset,
first_index, last_index);
if (ret < 0) {
return ret;
}
first_index = -1;
last_index = -1;
}
/* Load the refcount block and allocate it if needed */ /* Load the refcount block and allocate it if needed */
new_block = alloc_refcount_block(bs, cluster_index); if (table_index != old_table_index) {
if (new_block < 0) { if (refcount_block) {
ret = new_block; ret = qcow2_cache_put(bs, s->refcount_block_cache,
(void**) &refcount_block);
if (ret < 0) {
goto fail; goto fail;
} }
refcount_block_offset = new_block; }
ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
if (ret < 0) {
goto fail;
}
}
old_table_index = table_index;
qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
/* we can update the count and save it */ /* we can update the count and save it */
block_index = cluster_index & block_index = cluster_index &
((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
if (first_index == -1 || block_index < first_index) {
first_index = block_index;
}
if (block_index > last_index) {
last_index = block_index;
}
refcount = be16_to_cpu(s->refcount_block_cache[block_index]); refcount = be16_to_cpu(refcount_block[block_index]);
refcount += addend; refcount += addend;
if (refcount < 0 || refcount > 0xffff) { if (refcount < 0 || refcount > 0xffff) {
ret = -EINVAL; ret = -EINVAL;
@ -529,17 +478,16 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
if (refcount == 0 && cluster_index < s->free_cluster_index) { if (refcount == 0 && cluster_index < s->free_cluster_index) {
s->free_cluster_index = cluster_index; s->free_cluster_index = cluster_index;
} }
s->refcount_block_cache[block_index] = cpu_to_be16(refcount); refcount_block[block_index] = cpu_to_be16(refcount);
} }
ret = 0; ret = 0;
fail: fail:
/* Write last changed block to disk */ /* Write last changed block to disk */
if (refcount_block_offset != 0) { if (refcount_block) {
int wret; int wret;
wret = write_refcount_block_entries(bs, refcount_block_offset, wret = qcow2_cache_put(bs, s->refcount_block_cache,
first_index, last_index); (void**) &refcount_block);
if (wret < 0) { if (wret < 0) {
return ret < 0 ? ret : wret; return ret < 0 ? ret : wret;
} }
@ -758,9 +706,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
int64_t old_offset, old_l2_offset; int64_t old_offset, old_l2_offset;
int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
int ret;
qcow2_l2_cache_reset(bs);
cache_refcount_updates = 1;
l2_table = NULL; l2_table = NULL;
l1_table = NULL; l1_table = NULL;
@ -784,7 +730,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} }
l2_size = s->l2_size * sizeof(uint64_t); l2_size = s->l2_size * sizeof(uint64_t);
l2_table = qemu_malloc(l2_size);
l1_modified = 0; l1_modified = 0;
for(i = 0; i < l1_size; i++) { for(i = 0; i < l1_size; i++) {
l2_offset = l1_table[i]; l2_offset = l1_table[i];
@ -792,8 +737,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
old_l2_offset = l2_offset; old_l2_offset = l2_offset;
l2_offset &= ~QCOW_OFLAG_COPIED; l2_offset &= ~QCOW_OFLAG_COPIED;
l2_modified = 0; l2_modified = 0;
if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
(void**) &l2_table);
if (ret < 0) {
goto fail; goto fail;
}
for(j = 0; j < s->l2_size; j++) { for(j = 0; j < s->l2_size; j++) {
offset = be64_to_cpu(l2_table[j]); offset = be64_to_cpu(l2_table[j]);
if (offset != 0) { if (offset != 0) {
@ -833,17 +783,23 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
offset |= QCOW_OFLAG_COPIED; offset |= QCOW_OFLAG_COPIED;
} }
if (offset != old_offset) { if (offset != old_offset) {
if (addend > 0) {
qcow2_cache_set_dependency(bs, s->l2_table_cache,
s->refcount_block_cache);
}
l2_table[j] = cpu_to_be64(offset); l2_table[j] = cpu_to_be64(offset);
l2_modified = 1; l2_modified = 1;
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
} }
} }
} }
if (l2_modified) {
if (bdrv_pwrite_sync(bs->file, ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
l2_offset, l2_table, l2_size) < 0) if (ret < 0) {
goto fail; goto fail;
} }
if (addend != 0) { if (addend != 0) {
refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
} else { } else {
@ -871,16 +827,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} }
if (l1_allocated) if (l1_allocated)
qemu_free(l1_table); qemu_free(l1_table);
qemu_free(l2_table);
cache_refcount_updates = 0;
write_refcount_block(bs);
return 0; return 0;
fail: fail:
if (l2_table) {
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
}
if (l1_allocated) if (l1_allocated)
qemu_free(l1_table); qemu_free(l1_table);
qemu_free(l2_table);
cache_refcount_updates = 0;
write_refcount_block(bs);
return -EIO; return -EIO;
} }

View File

@ -143,6 +143,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
int len, i, ret = 0; int len, i, ret = 0;
QCowHeader header; QCowHeader header;
uint64_t ext_end; uint64_t ext_end;
bool writethrough;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) { if (ret < 0) {
@ -217,8 +218,13 @@ static int qcow2_open(BlockDriverState *bs, int flags)
be64_to_cpus(&s->l1_table[i]); be64_to_cpus(&s->l1_table[i]);
} }
} }
/* alloc L2 cache */
s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); /* alloc L2 table/refcount block cache */
writethrough = ((flags & BDRV_O_CACHE_MASK) == 0);
s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, writethrough);
s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE,
writethrough);
s->cluster_cache = qemu_malloc(s->cluster_size); s->cluster_cache = qemu_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */ /* one more sector for decompressed data alignment */
s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
@ -270,7 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
qcow2_free_snapshots(bs); qcow2_free_snapshots(bs);
qcow2_refcount_close(bs); qcow2_refcount_close(bs);
qemu_free(s->l1_table); qemu_free(s->l1_table);
qemu_free(s->l2_cache); if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
qemu_free(s->cluster_cache); qemu_free(s->cluster_cache);
qemu_free(s->cluster_data); qemu_free(s->cluster_data);
return ret; return ret;
@ -719,7 +727,13 @@ static void qcow2_close(BlockDriverState *bs)
{ {
BDRVQcowState *s = bs->opaque; BDRVQcowState *s = bs->opaque;
qemu_free(s->l1_table); qemu_free(s->l1_table);
qemu_free(s->l2_cache);
qcow2_cache_flush(bs, s->l2_table_cache);
qcow2_cache_flush(bs, s->refcount_block_cache);
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
qemu_free(s->cluster_cache); qemu_free(s->cluster_cache);
qemu_free(s->cluster_data); qemu_free(s->cluster_data);
qcow2_refcount_close(bs); qcow2_refcount_close(bs);
@ -1179,6 +1193,19 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
static int qcow2_flush(BlockDriverState *bs) static int qcow2_flush(BlockDriverState *bs)
{ {
BDRVQcowState *s = bs->opaque;
int ret;
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
return ret;
}
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
return ret;
}
return bdrv_flush(bs->file); return bdrv_flush(bs->file);
} }
@ -1186,6 +1213,19 @@ static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, BlockDriverCompletionFunc *cb,
void *opaque) void *opaque)
{ {
BDRVQcowState *s = bs->opaque;
int ret;
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
return NULL;
}
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
return NULL;
}
return bdrv_aio_flush(bs->file, cb, opaque); return bdrv_aio_flush(bs->file, cb, opaque);
} }

View File

@ -51,6 +51,9 @@
#define L2_CACHE_SIZE 16 #define L2_CACHE_SIZE 16
/* Must be at least 4 to cover all cases of refcount table growth */
#define REFCOUNT_CACHE_SIZE 4
typedef struct QCowHeader { typedef struct QCowHeader {
uint32_t magic; uint32_t magic;
uint32_t version; uint32_t version;
@ -94,9 +97,10 @@ typedef struct BDRVQcowState {
uint64_t cluster_offset_mask; uint64_t cluster_offset_mask;
uint64_t l1_table_offset; uint64_t l1_table_offset;
uint64_t *l1_table; uint64_t *l1_table;
uint64_t *l2_cache;
uint64_t l2_cache_offsets[L2_CACHE_SIZE]; Qcow2Cache* l2_table_cache;
uint32_t l2_cache_counts[L2_CACHE_SIZE]; Qcow2Cache* refcount_block_cache;
uint8_t *cluster_cache; uint8_t *cluster_cache;
uint8_t *cluster_data; uint8_t *cluster_data;
uint64_t cluster_cache_offset; uint64_t cluster_cache_offset;
@ -105,8 +109,6 @@ typedef struct BDRVQcowState {
uint64_t *refcount_table; uint64_t *refcount_table;
uint64_t refcount_table_offset; uint64_t refcount_table_offset;
uint32_t refcount_table_size; uint32_t refcount_table_size;
uint64_t refcount_block_cache_offset;
uint16_t *refcount_block_cache;
int64_t free_cluster_index; int64_t free_cluster_index;
int64_t free_byte_offset; int64_t free_byte_offset;