Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor
Lvovsky. git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
parent
e56869531a
commit
630530a652
151
block-vmdk.c
151
block-vmdk.c
@ -75,8 +75,25 @@ typedef struct BDRVVmdkState {
|
|||||||
|
|
||||||
unsigned int cluster_sectors;
|
unsigned int cluster_sectors;
|
||||||
uint32_t parent_cid;
|
uint32_t parent_cid;
|
||||||
|
int is_parent;
|
||||||
} BDRVVmdkState;
|
} BDRVVmdkState;
|
||||||
|
|
||||||
|
typedef struct VmdkMetaData {
|
||||||
|
uint32_t offset;
|
||||||
|
unsigned int l1_index;
|
||||||
|
unsigned int l2_index;
|
||||||
|
unsigned int l2_offset;
|
||||||
|
int valid;
|
||||||
|
} VmdkMetaData;
|
||||||
|
|
||||||
|
typedef struct ActiveBDRVState{
|
||||||
|
BlockDriverState *hd; // active image handler
|
||||||
|
uint64_t cluster_offset; // current write offset
|
||||||
|
}ActiveBDRVState;
|
||||||
|
|
||||||
|
static ActiveBDRVState activeBDRV;
|
||||||
|
|
||||||
|
|
||||||
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
|
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
|
||||||
{
|
{
|
||||||
uint32_t magic;
|
uint32_t magic;
|
||||||
@ -305,7 +322,7 @@ static void vmdk_parent_close(BlockDriverState *bs)
|
|||||||
bdrv_close(bs->backing_hd);
|
bdrv_close(bs->backing_hd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int parent_open = 0;
|
||||||
static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
|
static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
|
||||||
{
|
{
|
||||||
BDRVVmdkState *s = bs->opaque;
|
BDRVVmdkState *s = bs->opaque;
|
||||||
@ -339,8 +356,10 @@ static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
|
|||||||
bdrv_close(s->hd);
|
bdrv_close(s->hd);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
|
parent_open = 1;
|
||||||
|
if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
|
||||||
goto failure;
|
goto failure;
|
||||||
|
parent_open = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -352,6 +371,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
|
|||||||
uint32_t magic;
|
uint32_t magic;
|
||||||
int l1_size, i, ret;
|
int l1_size, i, ret;
|
||||||
|
|
||||||
|
if (parent_open)
|
||||||
|
// Parent must be opened as RO.
|
||||||
|
flags = BDRV_O_RDONLY;
|
||||||
|
fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
|
||||||
|
|
||||||
ret = bdrv_file_open(&s->hd, filename, flags);
|
ret = bdrv_file_open(&s->hd, filename, flags);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
@ -387,6 +411,11 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
|
|||||||
s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
|
s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
|
||||||
s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
|
s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
|
||||||
|
|
||||||
|
if (parent_open)
|
||||||
|
s->is_parent = 1;
|
||||||
|
else
|
||||||
|
s->is_parent = 0;
|
||||||
|
|
||||||
// try to open parent images, if exist
|
// try to open parent images, if exist
|
||||||
if (vmdk_parent_open(bs, filename) != 0)
|
if (vmdk_parent_open(bs, filename) != 0)
|
||||||
goto fail;
|
goto fail;
|
||||||
@ -430,7 +459,8 @@ static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
|
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
|
||||||
|
uint64_t offset, int allocate);
|
||||||
|
|
||||||
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
|
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
|
||||||
uint64_t offset, int allocate)
|
uint64_t offset, int allocate)
|
||||||
@ -446,27 +476,55 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
|
|||||||
|
|
||||||
if (!vmdk_is_cid_valid(bs))
|
if (!vmdk_is_cid_valid(bs))
|
||||||
return -1;
|
return -1;
|
||||||
parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
|
|
||||||
if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) !=
|
|
||||||
ps->cluster_sectors*512)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) !=
|
parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
|
||||||
sizeof(whole_grain))
|
|
||||||
return -1;
|
if (parent_cluster_offset) {
|
||||||
|
BDRVVmdkState *act_s = activeBDRV.hd->opaque;
|
||||||
|
|
||||||
|
if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
//Write grain only into the active image
|
||||||
|
if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t get_cluster_offset(BlockDriverState *bs,
|
static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
|
||||||
|
{
|
||||||
|
BDRVVmdkState *s = bs->opaque;
|
||||||
|
|
||||||
|
/* update L2 table */
|
||||||
|
if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
|
||||||
|
&(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
|
||||||
|
return -1;
|
||||||
|
/* update backup L2 table */
|
||||||
|
if (s->l1_backup_table_offset != 0) {
|
||||||
|
m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
|
||||||
|
if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
|
||||||
|
&(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
|
||||||
uint64_t offset, int allocate)
|
uint64_t offset, int allocate)
|
||||||
{
|
{
|
||||||
BDRVVmdkState *s = bs->opaque;
|
BDRVVmdkState *s = bs->opaque;
|
||||||
unsigned int l1_index, l2_offset, l2_index;
|
unsigned int l1_index, l2_offset, l2_index;
|
||||||
int min_index, i, j;
|
int min_index, i, j;
|
||||||
uint32_t min_count, *l2_table, tmp;
|
uint32_t min_count, *l2_table, tmp = 0;
|
||||||
uint64_t cluster_offset;
|
uint64_t cluster_offset;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
if (m_data)
|
||||||
|
m_data->valid = 0;
|
||||||
|
|
||||||
l1_index = (offset >> 9) / s->l1_entry_sectors;
|
l1_index = (offset >> 9) / s->l1_entry_sectors;
|
||||||
if (l1_index >= s->l1_size)
|
if (l1_index >= s->l1_size)
|
||||||
return 0;
|
return 0;
|
||||||
@ -504,32 +562,45 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
|
|||||||
found:
|
found:
|
||||||
l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
|
l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
|
||||||
cluster_offset = le32_to_cpu(l2_table[l2_index]);
|
cluster_offset = le32_to_cpu(l2_table[l2_index]);
|
||||||
|
|
||||||
if (!cluster_offset) {
|
if (!cluster_offset) {
|
||||||
struct stat file_buf;
|
struct stat file_buf;
|
||||||
|
|
||||||
if (!allocate)
|
if (!allocate)
|
||||||
return 0;
|
return 0;
|
||||||
stat(s->hd->filename, &file_buf);
|
// Avoid the L2 tables update for the images that have snapshots.
|
||||||
cluster_offset = file_buf.st_size;
|
if (!s->is_parent) {
|
||||||
bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
|
status = stat(s->hd->filename, &file_buf);
|
||||||
|
if (status == -1) {
|
||||||
cluster_offset >>= 9;
|
fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n",
|
||||||
/* update L2 table */
|
s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno));
|
||||||
tmp = cpu_to_le32(cluster_offset);
|
|
||||||
l2_table[l2_index] = tmp;
|
|
||||||
if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
|
|
||||||
&tmp, sizeof(tmp)) != sizeof(tmp))
|
|
||||||
return 0;
|
|
||||||
/* update backup L2 table */
|
|
||||||
if (s->l1_backup_table_offset != 0) {
|
|
||||||
l2_offset = s->l1_backup_table[l1_index];
|
|
||||||
if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
|
|
||||||
&tmp, sizeof(tmp)) != sizeof(tmp))
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
cluster_offset = file_buf.st_size;
|
||||||
|
bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
|
||||||
|
|
||||||
|
cluster_offset >>= 9;
|
||||||
|
tmp = cpu_to_le32(cluster_offset);
|
||||||
|
l2_table[l2_index] = tmp;
|
||||||
|
// Save the active image state
|
||||||
|
activeBDRV.cluster_offset = cluster_offset;
|
||||||
|
activeBDRV.hd = bs;
|
||||||
|
}
|
||||||
|
/* First of all we write grain itself, to avoid race condition
|
||||||
|
* that may to corrupt the image.
|
||||||
|
* This problem may occur because of insufficient space on host disk
|
||||||
|
* or inappropriate VM shutdown.
|
||||||
|
*/
|
||||||
if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
|
if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (m_data) {
|
||||||
|
m_data->offset = tmp;
|
||||||
|
m_data->l1_index = l1_index;
|
||||||
|
m_data->l2_index = l2_index;
|
||||||
|
m_data->l2_offset = l2_offset;
|
||||||
|
m_data->valid = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
cluster_offset <<= 9;
|
cluster_offset <<= 9;
|
||||||
return cluster_offset;
|
return cluster_offset;
|
||||||
@ -542,7 +613,7 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
|
|||||||
int index_in_cluster, n;
|
int index_in_cluster, n;
|
||||||
uint64_t cluster_offset;
|
uint64_t cluster_offset;
|
||||||
|
|
||||||
cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
|
cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
|
||||||
index_in_cluster = sector_num % s->cluster_sectors;
|
index_in_cluster = sector_num % s->cluster_sectors;
|
||||||
n = s->cluster_sectors - index_in_cluster;
|
n = s->cluster_sectors - index_in_cluster;
|
||||||
if (n > nb_sectors)
|
if (n > nb_sectors)
|
||||||
@ -559,7 +630,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
|
|||||||
uint64_t cluster_offset;
|
uint64_t cluster_offset;
|
||||||
|
|
||||||
while (nb_sectors > 0) {
|
while (nb_sectors > 0) {
|
||||||
cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
|
cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
|
||||||
index_in_cluster = sector_num % s->cluster_sectors;
|
index_in_cluster = sector_num % s->cluster_sectors;
|
||||||
n = s->cluster_sectors - index_in_cluster;
|
n = s->cluster_sectors - index_in_cluster;
|
||||||
if (n > nb_sectors)
|
if (n > nb_sectors)
|
||||||
@ -590,20 +661,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
|
|||||||
const uint8_t *buf, int nb_sectors)
|
const uint8_t *buf, int nb_sectors)
|
||||||
{
|
{
|
||||||
BDRVVmdkState *s = bs->opaque;
|
BDRVVmdkState *s = bs->opaque;
|
||||||
|
VmdkMetaData m_data;
|
||||||
int index_in_cluster, n;
|
int index_in_cluster, n;
|
||||||
uint64_t cluster_offset;
|
uint64_t cluster_offset;
|
||||||
static int cid_update = 0;
|
static int cid_update = 0;
|
||||||
|
|
||||||
|
if (sector_num > bs->total_sectors) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n",
|
||||||
|
sector_num, bs->total_sectors);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
while (nb_sectors > 0) {
|
while (nb_sectors > 0) {
|
||||||
index_in_cluster = sector_num & (s->cluster_sectors - 1);
|
index_in_cluster = sector_num & (s->cluster_sectors - 1);
|
||||||
n = s->cluster_sectors - index_in_cluster;
|
n = s->cluster_sectors - index_in_cluster;
|
||||||
if (n > nb_sectors)
|
if (n > nb_sectors)
|
||||||
n = nb_sectors;
|
n = nb_sectors;
|
||||||
cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
|
cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
|
||||||
if (!cluster_offset)
|
if (!cluster_offset)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
|
if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
|
||||||
return -1;
|
return -1;
|
||||||
|
if (m_data.valid) {
|
||||||
|
/* update L2 tables */
|
||||||
|
if (vmdk_L2update(bs, &m_data) == -1)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
nb_sectors -= n;
|
nb_sectors -= n;
|
||||||
sector_num += n;
|
sector_num += n;
|
||||||
buf += n * 512;
|
buf += n * 512;
|
||||||
|
Loading…
Reference in New Issue
Block a user