added user mode Linux Copy On Write disk image support - added -snapshot support (initial patch by Rusty Russell)

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@309 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
bellard 2003-07-06 17:15:21 +00:00
parent cd4c3e888a
commit 33e3963e1b
5 changed files with 442 additions and 44 deletions

View File

@ -28,7 +28,7 @@ else
LDFLAGS+=-Wl,-shared
endif
ifeq ($(TARGET_ARCH), i386)
PROGS+=vl
PROGS+=vl vlmkcow
endif
endif
@ -141,7 +141,10 @@ endif
# must use static linking to avoid leaving stuff in virtual address space
vl: vl.o block.o libqemu.a
$(CC) -static -Wl,-T,i386-vl.ld -o $@ $^ $(LIBS)
$(CC) -pg -static -Wl,-T,i386-vl.ld -o $@ $^ $(LIBS)
vlmkcow: vlmkcow.o
$(CC) -o $@ $^ $(LIBS)
depend: $(SRCS)
$(CC) -MM $(CFLAGS) $^ 1>.depend

260
block.c
View File

@ -38,76 +38,286 @@
#include <sys/poll.h>
#include <errno.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include "vl.h"
#define NO_THUNK_TYPE_SIZE
#include "thunk.h"
struct BlockDriverState {
int fd;
int fd; /* if -1, only COW mappings */
int64_t total_sectors;
int read_only;
uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
int cow_bitmap_size;
int cow_fd;
int64_t cow_sectors_offset;
char filename[1024];
};
BlockDriverState *bdrv_open(const char *filename)
BlockDriverState *bdrv_open(const char *filename, int snapshot)
{
BlockDriverState *bs;
int fd;
int fd, cow_fd;
int64_t size;
char template[] = "/tmp/vl.XXXXXX";
struct cow_header_v2 cow_header;
struct stat st;
bs = malloc(sizeof(BlockDriverState));
if(!bs)
return NULL;
bs->read_only = 0;
fd = open(filename, O_RDWR);
bs->fd = -1;
bs->cow_fd = -1;
bs->cow_bitmap = NULL;
strcpy(bs->filename, filename);
/* open standard HD image */
fd = open(filename, O_RDWR | O_LARGEFILE);
if (fd < 0) {
fd = open(filename, O_RDONLY);
/* read only image on disk */
fd = open(filename, O_RDONLY | O_LARGEFILE);
if (fd < 0) {
close(fd);
free(bs);
return NULL;
perror(filename);
goto fail;
}
bs->read_only = 1;
if (!snapshot)
bs->read_only = 1;
}
size = lseek64(fd, 0, SEEK_END);
bs->total_sectors = size / 512;
bs->fd = fd;
/* see if it is a cow image */
if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
fprintf(stderr, "%s: could not read header\n", filename);
goto fail;
}
if (cow_header.magic == htonl(COW_MAGIC) &&
cow_header.version == htonl(COW_VERSION)) {
/* cow image found */
size = cow_header.size;
#ifndef WORDS_BIGENDIAN
size = bswap64(size);
#endif
bs->total_sectors = size / 512;
bs->cow_fd = fd;
bs->fd = -1;
if (cow_header.backing_file[0] != '\0') {
if (stat(cow_header.backing_file, &st) != 0) {
fprintf(stderr, "%s: could not find original disk image '%s'\n", filename, cow_header.backing_file);
goto fail;
}
if (st.st_mtime != htonl(cow_header.mtime)) {
fprintf(stderr, "%s: original raw disk image '%s' does not match saved timestamp\n", filename, cow_header.backing_file);
goto fail;
}
fd = open(cow_header.backing_file, O_RDONLY | O_LARGEFILE);
if (fd < 0)
goto fail;
bs->fd = fd;
}
/* mmap the bitmap */
bs->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size),
bs->cow_bitmap_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, bs->cow_fd, 0);
if (bs->cow_bitmap_addr == MAP_FAILED)
goto fail;
bs->cow_bitmap = bs->cow_bitmap_addr + sizeof(cow_header);
bs->cow_sectors_offset = (bs->cow_bitmap_size + 511) & ~511;
snapshot = 0;
} else {
/* standard raw image */
size = lseek64(fd, 0, SEEK_END);
bs->total_sectors = size / 512;
bs->fd = fd;
}
if (snapshot) {
/* create a temporary COW file */
cow_fd = mkstemp(template);
if (cow_fd < 0)
goto fail;
bs->cow_fd = cow_fd;
unlink(template);
/* just need to allocate bitmap */
bs->cow_bitmap_size = (bs->total_sectors + 7) >> 3;
bs->cow_bitmap_addr = mmap(get_mmap_addr(bs->cow_bitmap_size),
bs->cow_bitmap_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (bs->cow_bitmap_addr == MAP_FAILED)
goto fail;
bs->cow_bitmap = bs->cow_bitmap_addr;
bs->cow_sectors_offset = 0;
}
return bs;
fail:
bdrv_close(bs);
return NULL;
}
void bdrv_close(BlockDriverState *bs)
{
close(bs->fd);
/* we unmap the mapping so that it is written to the COW file */
if (bs->cow_bitmap_addr)
munmap(bs->cow_bitmap_addr, bs->cow_bitmap_size);
if (bs->cow_fd >= 0)
close(bs->cow_fd);
if (bs->fd >= 0)
close(bs->fd);
free(bs);
}
static inline void set_bit(uint8_t *bitmap, int64_t bitnum)
{
bitmap[bitnum / 8] |= (1 << (bitnum%8));
}
static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
{
return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
}
/* Return true if first block has been changed (ie. current version is
* in COW file). Set the number of continuous blocks for which that
* is true. */
static int is_changed(uint8_t *bitmap,
int64_t sector_num, int nb_sectors,
int *num_same)
{
int changed;
if (!bitmap || nb_sectors == 0) {
*num_same = nb_sectors;
return 0;
}
changed = is_bit_set(bitmap, sector_num);
for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
if (is_bit_set(bitmap, sector_num + *num_same) != changed)
break;
}
return changed;
}
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
{
int64_t i;
uint8_t *cow_bitmap;
if (!bs->cow_bitmap) {
fprintf(stderr, "Already committed to %s\n", bs->filename);
return 0;
}
if (bs->read_only) {
fprintf(stderr, "Can't commit to %s: read-only\n", bs->filename);
return -1;
}
cow_bitmap = bs->cow_bitmap;
for (i = 0; i < bs->total_sectors; i++) {
if (is_bit_set(cow_bitmap, i)) {
unsigned char sector[512];
if (bdrv_read(bs, i, sector, 1) != 0) {
fprintf(stderr, "Error reading sector %lli: aborting commit\n",
(long long)i);
return -1;
}
/* Make bdrv_write write to real file for a moment. */
bs->cow_bitmap = NULL;
if (bdrv_write(bs, i, sector, 1) != 0) {
fprintf(stderr, "Error writing sector %lli: aborting commit\n",
(long long)i);
bs->cow_bitmap = cow_bitmap;
return -1;
}
bs->cow_bitmap = cow_bitmap;
}
}
fprintf(stderr, "Committed snapshot to %s\n", bs->filename);
return 0;
}
/* return -1 if error */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
int ret;
int ret, n, fd;
int64_t offset;
while (nb_sectors > 0) {
if (is_changed(bs->cow_bitmap, sector_num, nb_sectors, &n)) {
fd = bs->cow_fd;
offset = bs->cow_sectors_offset;
} else {
fd = bs->fd;
offset = 0;
}
lseek64(bs->fd, sector_num * 512, SEEK_SET);
ret = read(bs->fd, buf, nb_sectors * 512);
if (ret != nb_sectors * 512)
return -1;
else
return 0;
if (fd < 0) {
/* no file, just return empty sectors */
memset(buf, 0, n * 512);
} else {
offset += sector_num * 512;
lseek64(fd, offset, SEEK_SET);
ret = read(fd, buf, n * 512);
if (ret != n * 512) {
return -1;
}
}
nb_sectors -= n;
sector_num += n;
buf += n * 512;
}
return 0;
}
/* return -1 if error */
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
int ret;
int ret, fd, i;
int64_t offset, retl;
if (bs->read_only)
return -1;
lseek64(bs->fd, sector_num * 512, SEEK_SET);
ret = write(bs->fd, buf, nb_sectors * 512);
if (ret != nb_sectors * 512)
if (bs->cow_bitmap) {
fd = bs->cow_fd;
offset = bs->cow_sectors_offset;
} else {
fd = bs->fd;
offset = 0;
}
offset += sector_num * 512;
retl = lseek64(fd, offset, SEEK_SET);
if (retl == -1) {
return -1;
else
return 0;
}
ret = write(fd, buf, nb_sectors * 512);
if (ret != nb_sectors * 512) {
return -1;
}
if (bs->cow_bitmap) {
for (i = 0; i < nb_sectors; i++)
set_bit(bs->cow_bitmap, sector_num + i);
}
return 0;
}
void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)

59
vl.c
View File

@ -63,6 +63,8 @@
#define INITRD_LOAD_ADDR 0x00400000
#define KERNEL_PARAMS_ADDR 0x00090000
#define MAX_DISKS 2
/* from plex86 (BSD license) */
struct __attribute__ ((packed)) linux_params {
// For 0x00..0x3f, see 'struct screen_info' in linux/include/linux/tty.h.
@ -190,6 +192,7 @@ FILE *logfile = NULL;
int loglevel;
IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
BlockDriverState *bs_table[MAX_DISKS];
/***********************************************************/
/* x86 io ports */
@ -1265,6 +1268,7 @@ void term_print_help(void)
printf("\n"
"C-a h print this help\n"
"C-a x exit emulatior\n"
"C-a s save disk data back to file (if -snapshot)\n"
"C-a b send break (magic sysrq)\n"
"C-a C-a send C-a\n"
);
@ -1282,6 +1286,15 @@ void serial_received_byte(SerialState *s, int ch)
case 'x':
exit(0);
break;
case 's':
{
int i;
for (i = 0; i < MAX_DISKS; i++) {
if (bs_table[i])
bdrv_commit(bs_table[i]);
}
}
break;
case 'b':
/* send break */
s->rbr = 0;
@ -1976,8 +1989,6 @@ void ne2000_init(void)
/* set to 1 set disable mult support */
#define MAX_MULT_SECTORS 8
#define MAX_DISKS 2
struct IDEState;
typedef void EndTransferFunc(struct IDEState *);
@ -2009,7 +2020,6 @@ typedef struct IDEState {
uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4];
} IDEState;
BlockDriverState *bs_table[MAX_DISKS];
IDEState ide_state[MAX_DISKS];
static void padstr(char *str, const char *src, int len)
@ -2513,6 +2523,16 @@ static void host_alarm_handler(int host_signum, siginfo_t *info,
}
}
unsigned long mmap_addr = PHYS_RAM_BASE;
void *get_mmap_addr(unsigned long size)
{
unsigned long addr;
addr = mmap_addr;
mmap_addr += ((size + 4095) & ~4095) + 4096;
return (void *)addr;
}
/* main execution loop */
CPUState *cpu_gdbstub_get_env(void *opaque)
@ -2612,6 +2632,7 @@ void help(void)
"-initrd file use 'file' as initial ram disk\n"
"-hda file use 'file' as hard disk 0 image\n"
"-hdb file use 'file' as hard disk 1 image\n"
"-snapshot write to temporary files instead of disk image files\n"
"-m megs set virtual RAM size to megs MB\n"
"-n script set network init script [default=%s]\n"
"\n"
@ -2630,12 +2651,14 @@ struct option long_options[] = {
{ "initrd", 1, NULL, 0, },
{ "hda", 1, NULL, 0, },
{ "hdb", 1, NULL, 0, },
{ "snapshot", 0, NULL, 0, },
{ NULL, 0, NULL, 0 },
};
int main(int argc, char **argv)
{
int c, ret, initrd_size, i, use_gdbstub, gdbstub_port, long_index;
int snapshot;
struct linux_params *params;
struct sigaction act;
struct itimerval itv;
@ -2652,6 +2675,7 @@ int main(int argc, char **argv)
pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT);
use_gdbstub = 0;
gdbstub_port = DEFAULT_GDBSTUB_PORT;
snapshot = 0;
for(;;) {
c = getopt_long_only(argc, argv, "hm:dn:sp:", long_options, &long_index);
if (c == -1)
@ -2668,6 +2692,9 @@ int main(int argc, char **argv)
case 2:
hd_filename[1] = optarg;
break;
case 3:
snapshot = 1;
break;
}
break;
case 'h':
@ -2711,18 +2738,6 @@ int main(int argc, char **argv)
setvbuf(logfile, NULL, _IOLBF, 0);
}
/* open the virtual block devices */
for(i = 0; i < MAX_DISKS; i++) {
if (hd_filename[i]) {
bs_table[i] = bdrv_open(hd_filename[i]);
if (!bs_table[i]) {
fprintf(stderr, "vl: could not open hard disk image '%s\n",
hd_filename[i]);
exit(1);
}
}
}
/* init network tun interface */
net_init();
@ -2744,7 +2759,7 @@ int main(int argc, char **argv)
}
ftruncate(phys_ram_fd, phys_ram_size);
unlink(phys_ram_file);
phys_ram_base = mmap((void *)PHYS_RAM_BASE, phys_ram_size,
phys_ram_base = mmap(get_mmap_addr(phys_ram_size), phys_ram_size,
PROT_WRITE | PROT_READ, MAP_SHARED | MAP_FIXED,
phys_ram_fd, 0);
if (phys_ram_base == MAP_FAILED) {
@ -2752,6 +2767,18 @@ int main(int argc, char **argv)
exit(1);
}
/* open the virtual block devices */
for(i = 0; i < MAX_DISKS; i++) {
if (hd_filename[i]) {
bs_table[i] = bdrv_open(hd_filename[i], snapshot);
if (!bs_table[i]) {
fprintf(stderr, "vl: could not open hard disk image '%s\n",
hd_filename[i]);
exit(1);
}
}
}
/* now we can load the kernel */
ret = load_kernel(argv[optind], phys_ram_base + KERNEL_LOAD_ADDR);
if (ret < 0) {

18
vl.h
View File

@ -24,16 +24,32 @@
#ifndef VL_H
#define VL_H
/* vl.c */
void *get_mmap_addr(unsigned long size);
/* block.c */
typedef struct BlockDriverState BlockDriverState;
BlockDriverState *bdrv_open(const char *filename);
BlockDriverState *bdrv_open(const char *filename, int snapshot);
void bdrv_close(BlockDriverState *bs);
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
int bdrv_commit(BlockDriverState *bs);
/* user mode linux compatible COW file */
#define COW_MAGIC 0x4f4f4f4d /* MOOO */
#define COW_VERSION 2
struct cow_header_v2 {
uint32_t magic;
uint32_t long version;
char backing_file[1024];
int32_t mtime;
uint64_t size;
uint32_t sectorsize;
};
#endif /* VL_H */

142
vlmkcow.c Normal file
View File

@ -0,0 +1,142 @@
/*
* create a COW disk image
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <getopt.h>
#include <inttypes.h>
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <malloc.h>
#include <termios.h>
#include <sys/poll.h>
#include <errno.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include "vl.h"
#define NO_THUNK_TYPE_SIZE
#include "thunk.h"
int cow_create(int cow_fd, const char *image_filename,
int64_t image_sectors)
{
struct cow_header_v2 cow_header;
int fd;
struct stat st;
memset(&cow_header, 0, sizeof(cow_header));
cow_header.magic = htonl(COW_MAGIC);
cow_header.version = htonl(COW_VERSION);
if (image_filename) {
fd = open(image_filename, O_RDONLY);
if (fd < 0) {
perror(image_filename);
exit(1);
}
image_sectors = lseek64(fd, 0, SEEK_END);
if (fstat(fd, &st) != 0) {
close(fd);
return -1;
}
close(fd);
image_sectors /= 512;
cow_header.mtime = htonl(st.st_mtime);
realpath(image_filename, cow_header.backing_file);
}
cow_header.sectorsize = htonl(512);
cow_header.size = image_sectors * 512;
#ifndef WORDS_BIGENDIAN
cow_header.size = bswap64(cow_header.size);
#endif
write(cow_fd, &cow_header, sizeof(cow_header));
/* resize to include at least all the bitmap */
ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
lseek(cow_fd, 0, SEEK_SET);
return 0;
}
void help(void)
{
printf("usage vlmkcow [-h] [-f disk_image] cow_image [cow_size]\n"
"Create a Copy On Write disk image from an optional raw disk image\n"
"\n"
"-f disk_image set the raw disk image name\n"
"cow_image the created cow_image\n"
"cow_size the create cow_image size in MB if no raw disk image is used\n"
"\n"
"Once the cow_image is created from a raw disk image, you must not modify the original raw disk image\n"
);
exit(1);
}
int main(int argc, char **argv)
{
const char *image_filename, *cow_filename;
int cow_fd, c, nb_args;
int64_t image_size;
image_filename = NULL;
image_size = 0;
for(;;) {
c = getopt(argc, argv, "hf:");
if (c == -1)
break;
switch(c) {
case 'h':
help();
break;
case 'f':
image_filename = optarg;
break;
}
}
if (!image_filename)
nb_args = 2;
else
nb_args = 1;
if (optind + nb_args != argc)
help();
cow_filename = argv[optind];
if (nb_args == 2) {
image_size = (int64_t)atoi(argv[optind + 1]) * 2 * 1024;
}
cow_fd = open(cow_filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
if (!cow_fd < 0)
return -1;
if (cow_create(cow_fd, image_filename, image_size) < 0) {
fprintf(stderr, "%s: error while formating\n", cow_filename);
exit(1);
}
close(cow_fd);
return 0;
}