block/parallels: optimize linear image expansion

Plain image expansion spends a lot of time to update image file size.
This seriously affects the performance. The following simple test
  qemu_img create -f parallels -o cluster_size=64k ./1.hds 64G
  qemu_io -n -c "write -P 0x11 0 1024M" ./1.hds
could be improved if the format driver will pre-allocate some space
in the image file with a reasonable chunk.

This patch preallocates 128 Mb using bdrv_write_zeroes, which should
normally use fallocate() call inside. Fallback to older truncate()
could be used as a fallback using image open options thanks to the
previous patch.

The benefit is around 15%.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Roman Karan <rkagan@parallels.com>
Signed-off-by: Roman Kagan <rkagan@parallels.com>
Message-id: 1430207220-24458-27-git-send-email-den@openvz.org
CC: Kevin Wolf <kwolf@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Denis V. Lunev 2015-04-28 10:46:59 +03:00 committed by Stefan Hajnoczi
parent d61790112f
commit 19f5dc1591

View File

@ -88,6 +88,7 @@ typedef struct BDRVParallelsState {
uint32_t *bat_bitmap;
unsigned int bat_size;
int64_t data_end;
uint64_t prealloc_size;
ParallelsPreallocMode prealloc_mode;
@ -187,7 +188,6 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num)
BDRVParallelsState *s = bs->opaque;
uint32_t idx, offset;
int64_t pos;
int ret;
idx = sector_num / s->tracks;
offset = sector_num % s->tracks;
@ -200,14 +200,21 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num)
}
pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS;
if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
ret = bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS);
if (s->data_end + s->tracks > pos) {
int ret;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
ret = bdrv_write_zeroes(bs->file, s->data_end,
s->prealloc_size, 0);
} else {
ret = bdrv_write_zeroes(bs->file, pos, s->tracks, 0);
ret = bdrv_truncate(bs->file,
(s->data_end + s->prealloc_size) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
return ret;
}
}
pos = s->data_end;
s->data_end += s->tracks;
s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier);
@ -549,7 +556,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
{
BDRVParallelsState *s = bs->opaque;
ParallelsHeader ph;
int ret, size;
int ret, size, i;
QemuOpts *opts = NULL;
Error *local_err = NULL;
char *buf;
@ -599,7 +606,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOMEM;
goto fail;
}
if (le32_to_cpu(ph.data_off) < s->header_size) {
s->data_end = le32_to_cpu(ph.data_off);
if (s->data_end == 0) {
s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
}
if (s->data_end < s->header_size) {
/* there is not enough unused space to fit to block align between BAT
and actual data. We can't avoid read-modify-write... */
s->header_size = size;
@ -611,6 +622,13 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
}
s->bat_bitmap = (uint32_t *)(s->header + 1);
for (i = 0; i < s->bat_size; i++) {
int64_t off = bat2sect(s, i);
if (off >= s->data_end) {
s->data_end = off + s->tracks;
}
}
if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
/* Image was not closed correctly. The check is mandatory */
s->header_unclean = true;
@ -685,6 +703,10 @@ static void parallels_close(BlockDriverState *bs)
parallels_update_header(bs);
}
if (bs->open_flags & BDRV_O_RDWR) {
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);
qemu_vfree(s->header);
}