qemu_img: is_not_zero() optimization
I run qemu-img under profiler and realized, that most of CPU time is consumed by is_not_zero() function. I had made a couple of optimizations on it and got the following output for `time qemu-img convert -O qcow2 volume.qcow2 snapshot.qcow2`: Original qemu-img: real 0m56.159s user 0m34.670s sys 0m12.079s Patched qemu-img: real 0m34.805s user 0m18.445s sys 0m12.552s Signed-off-by: Dmitry Konishchev <konishchev@gmail.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
6f321e93ab
commit
f6a00aa150
29
qemu-img.c
29
qemu-img.c
@ -496,14 +496,37 @@ static int img_commit(int argc, char **argv)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks whether the sector is not a zero sector.
|
||||||
|
*
|
||||||
|
* Attention! The len must be a multiple of 4 * sizeof(long) due to
|
||||||
|
* restriction of optimizations in this function.
|
||||||
|
*/
|
||||||
static int is_not_zero(const uint8_t *sector, int len)
|
static int is_not_zero(const uint8_t *sector, int len)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Use long as the biggest available internal data type that fits into the
|
||||||
|
* CPU register and unroll the loop to smooth out the effect of memory
|
||||||
|
* latency.
|
||||||
|
*/
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
len >>= 2;
|
long d0, d1, d2, d3;
|
||||||
for(i = 0;i < len; i++) {
|
const long * const data = (const long *) sector;
|
||||||
if (((uint32_t *)sector)[i] != 0)
|
|
||||||
|
len /= sizeof(long);
|
||||||
|
|
||||||
|
for(i = 0; i < len; i += 4) {
|
||||||
|
d0 = data[i + 0];
|
||||||
|
d1 = data[i + 1];
|
||||||
|
d2 = data[i + 2];
|
||||||
|
d3 = data[i + 3];
|
||||||
|
|
||||||
|
if (d0 || d1 || d2 || d3) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user