You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8242 lines
242 KiB
8242 lines
242 KiB
/*
|
|
* QEMU System Emulator block driver
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
* Copyright (c) 2020 Virtuozzo International GmbH.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "block/trace.h"
|
|
#include "block/block_int.h"
|
|
#include "block/blockjob.h"
|
|
#include "block/fuse.h"
|
|
#include "block/nbd.h"
|
|
#include "block/qdict.h"
|
|
#include "qemu/error-report.h"
|
|
#include "block/module_block.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/module.h"
|
|
#include "qapi/error.h"
|
|
#include "qapi/qmp/qdict.h"
|
|
#include "qapi/qmp/qjson.h"
|
|
#include "qapi/qmp/qnull.h"
|
|
#include "qapi/qmp/qstring.h"
|
|
#include "qapi/qobject-output-visitor.h"
|
|
#include "qapi/qapi-visit-block-core.h"
|
|
#include "sysemu/block-backend.h"
|
|
#include "qemu/notify.h"
|
|
#include "qemu/option.h"
|
|
#include "qemu/coroutine.h"
|
|
#include "block/qapi.h"
|
|
#include "qemu/timer.h"
|
|
#include "qemu/cutils.h"
|
|
#include "qemu/id.h"
|
|
#include "qemu/range.h"
|
|
#include "qemu/rcu.h"
|
|
#include "block/coroutines.h"
|
|
|
|
#ifdef CONFIG_BSD
|
|
#include <sys/ioctl.h>
|
|
#include <sys/queue.h>
|
|
#if defined(HAVE_SYS_DISK_H)
|
|
#include <sys/disk.h>
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
|
|
|
|
/* Protected by BQL */
|
|
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
|
|
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
|
|
|
|
/* Protected by BQL */
|
|
static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
|
|
QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
|
|
|
|
/* Protected by BQL */
|
|
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
|
|
QLIST_HEAD_INITIALIZER(bdrv_drivers);
|
|
|
|
static BlockDriverState *bdrv_open_inherit(const char *filename,
|
|
const char *reference,
|
|
QDict *options, int flags,
|
|
BlockDriverState *parent,
|
|
const BdrvChildClass *child_class,
|
|
BdrvChildRole child_role,
|
|
Error **errp);
|
|
|
|
static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
|
BlockDriverState *child);
|
|
|
|
static void bdrv_child_free(BdrvChild *child);
|
|
static void bdrv_replace_child_noperm(BdrvChild **child,
|
|
BlockDriverState *new_bs,
|
|
bool free_empty_child);
|
|
static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
|
|
BdrvChild *child,
|
|
Transaction *tran);
|
|
static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
|
|
Transaction *tran);
|
|
|
|
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
|
|
BlockReopenQueue *queue,
|
|
Transaction *change_child_tran, Error **errp);
|
|
static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
|
|
static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
|
|
|
|
static bool bdrv_backing_overridden(BlockDriverState *bs);
|
|
|
|
/* If non-zero, use only whitelisted block drivers */
|
|
static int use_bdrv_whitelist;
|
|
|
|
#ifdef _WIN32
|
|
static int is_windows_drive_prefix(const char *filename)
|
|
{
|
|
return (((filename[0] >= 'a' && filename[0] <= 'z') ||
|
|
(filename[0] >= 'A' && filename[0] <= 'Z')) &&
|
|
filename[1] == ':');
|
|
}
|
|
|
|
int is_windows_drive(const char *filename)
|
|
{
|
|
if (is_windows_drive_prefix(filename) &&
|
|
filename[2] == '\0')
|
|
return 1;
|
|
if (strstart(filename, "\\\\.\\", NULL) ||
|
|
strstart(filename, "//./", NULL))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
size_t bdrv_opt_mem_align(BlockDriverState *bs)
|
|
{
|
|
if (!bs || !bs->drv) {
|
|
/* page size or 4k (hdd sector size) should be on the safe side */
|
|
return MAX(4096, qemu_real_host_page_size());
|
|
}
|
|
IO_CODE();
|
|
|
|
return bs->bl.opt_mem_alignment;
|
|
}
|
|
|
|
size_t bdrv_min_mem_align(BlockDriverState *bs)
|
|
{
|
|
if (!bs || !bs->drv) {
|
|
/* page size or 4k (hdd sector size) should be on the safe side */
|
|
return MAX(4096, qemu_real_host_page_size());
|
|
}
|
|
IO_CODE();
|
|
|
|
return bs->bl.min_mem_alignment;
|
|
}
|
|
|
|
/* check if the path starts with "<protocol>:" */
|
|
int path_has_protocol(const char *path)
|
|
{
|
|
const char *p;
|
|
|
|
#ifdef _WIN32
|
|
if (is_windows_drive(path) ||
|
|
is_windows_drive_prefix(path)) {
|
|
return 0;
|
|
}
|
|
p = path + strcspn(path, ":/\\");
|
|
#else
|
|
p = path + strcspn(path, ":/");
|
|
#endif
|
|
|
|
return *p == ':';
|
|
}
|
|
|
|
int path_is_absolute(const char *path)
|
|
{
|
|
#ifdef _WIN32
|
|
/* specific case for names like: "\\.\d:" */
|
|
if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
|
|
return 1;
|
|
}
|
|
return (*path == '/' || *path == '\\');
|
|
#else
|
|
return (*path == '/');
|
|
#endif
|
|
}
|
|
|
|
/* if filename is absolute, just return its duplicate. Otherwise, build a
|
|
path to it by considering it is relative to base_path. URL are
|
|
supported. */
|
|
char *path_combine(const char *base_path, const char *filename)
|
|
{
|
|
const char *protocol_stripped = NULL;
|
|
const char *p, *p1;
|
|
char *result;
|
|
int len;
|
|
|
|
if (path_is_absolute(filename)) {
|
|
return g_strdup(filename);
|
|
}
|
|
|
|
if (path_has_protocol(base_path)) {
|
|
protocol_stripped = strchr(base_path, ':');
|
|
if (protocol_stripped) {
|
|
protocol_stripped++;
|
|
}
|
|
}
|
|
p = protocol_stripped ?: base_path;
|
|
|
|
p1 = strrchr(base_path, '/');
|
|
#ifdef _WIN32
|
|
{
|
|
const char *p2;
|
|
p2 = strrchr(base_path, '\\');
|
|
if (!p1 || p2 > p1) {
|
|
p1 = p2;
|
|
}
|
|
}
|
|
#endif
|
|
if (p1) {
|
|
p1++;
|
|
} else {
|
|
p1 = base_path;
|
|
}
|
|
if (p1 > p) {
|
|
p = p1;
|
|
}
|
|
len = p - base_path;
|
|
|
|
result = g_malloc(len + strlen(filename) + 1);
|
|
memcpy(result, base_path, len);
|
|
strcpy(result + len, filename);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Helper function for bdrv_parse_filename() implementations to remove optional
|
|
* protocol prefixes (especially "file:") from a filename and for putting the
|
|
* stripped filename into the options QDict if there is such a prefix.
|
|
*/
|
|
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
|
|
QDict *options)
|
|
{
|
|
if (strstart(filename, prefix, &filename)) {
|
|
/* Stripping the explicit protocol prefix may result in a protocol
|
|
* prefix being (wrongly) detected (if the filename contains a colon) */
|
|
if (path_has_protocol(filename)) {
|
|
GString *fat_filename;
|
|
|
|
/* This means there is some colon before the first slash; therefore,
|
|
* this cannot be an absolute path */
|
|
assert(!path_is_absolute(filename));
|
|
|
|
/* And we can thus fix the protocol detection issue by prefixing it
|
|
* by "./" */
|
|
fat_filename = g_string_new("./");
|
|
g_string_append(fat_filename, filename);
|
|
|
|
assert(!path_has_protocol(fat_filename->str));
|
|
|
|
qdict_put(options, "filename",
|
|
qstring_from_gstring(fat_filename));
|
|
} else {
|
|
/* If no protocol prefix was detected, we can use the shortened
|
|
* filename as-is */
|
|
qdict_put_str(options, "filename", filename);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Returns whether the image file is opened as read-only. Note that this can
|
|
* return false and writing to the image file is still not possible because the
|
|
* image is inactivated. */
|
|
bool bdrv_is_read_only(BlockDriverState *bs)
|
|
{
|
|
IO_CODE();
|
|
return !(bs->open_flags & BDRV_O_RDWR);
|
|
}
|
|
|
|
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
|
|
bool ignore_allow_rdw, Error **errp)
|
|
{
|
|
IO_CODE();
|
|
|
|
/* Do not set read_only if copy_on_read is enabled */
|
|
if (bs->copy_on_read && read_only) {
|
|
error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
|
|
bdrv_get_device_or_node_name(bs));
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Do not clear read_only if it is prohibited */
|
|
if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
|
|
!ignore_allow_rdw)
|
|
{
|
|
error_setg(errp, "Node '%s' is read only",
|
|
bdrv_get_device_or_node_name(bs));
|
|
return -EPERM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Called by a driver that can only provide a read-only image.
|
|
*
|
|
* Returns 0 if the node is already read-only or it could switch the node to
|
|
* read-only because BDRV_O_AUTO_RDONLY is set.
|
|
*
|
|
* Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
|
|
* or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
|
|
* is not NULL, it is used as the error message for the Error object.
|
|
*/
|
|
int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
|
|
Error **errp)
|
|
{
|
|
int ret = 0;
|
|
IO_CODE();
|
|
|
|
if (!(bs->open_flags & BDRV_O_RDWR)) {
|
|
return 0;
|
|
}
|
|
if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
|
|
goto fail;
|
|
}
|
|
|
|
ret = bdrv_can_set_read_only(bs, true, false, NULL);
|
|
if (ret < 0) {
|
|
goto fail;
|
|
}
|
|
|
|
bs->open_flags &= ~BDRV_O_RDWR;
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
error_setg(errp, "%s", errmsg ?: "Image is read-only");
|
|
return -EACCES;
|
|
}
|
|
|
|
/*
|
|
* If @backing is empty, this function returns NULL without setting
|
|
* @errp. In all other cases, NULL will only be returned with @errp
|
|
* set.
|
|
*
|
|
* Therefore, a return value of NULL without @errp set means that
|
|
* there is no backing file; if @errp is set, there is one but its
|
|
* absolute filename cannot be generated.
|
|
*/
|
|
char *bdrv_get_full_backing_filename_from_filename(const char *backed,
|
|
const char *backing,
|
|
Error **errp)
|
|
{
|
|
if (backing[0] == '\0') {
|
|
return NULL;
|
|
} else if (path_has_protocol(backing) || path_is_absolute(backing)) {
|
|
return g_strdup(backing);
|
|
} else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
|
|
error_setg(errp, "Cannot use relative backing file names for '%s'",
|
|
backed);
|
|
return NULL;
|
|
} else {
|
|
return path_combine(backed, backing);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If @filename is empty or NULL, this function returns NULL without
|
|
* setting @errp. In all other cases, NULL will only be returned with
|
|
* @errp set.
|
|
*/
|
|
static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
|
|
const char *filename, Error **errp)
|
|
{
|
|
char *dir, *full_name;
|
|
|
|
if (!filename || filename[0] == '\0') {
|
|
return NULL;
|
|
} else if (path_has_protocol(filename) || path_is_absolute(filename)) {
|
|
return g_strdup(filename);
|
|
}
|
|
|
|
dir = bdrv_dirname(relative_to, errp);
|
|
if (!dir) {
|
|
return NULL;
|
|
}
|
|
|
|
full_name = g_strconcat(dir, filename, NULL);
|
|
g_free(dir);
|
|
return full_name;
|
|
}
|
|
|
|
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
|
|
}
|
|
|
|
void bdrv_register(BlockDriver *bdrv)
|
|
{
|
|
assert(bdrv->format_name);
|
|
GLOBAL_STATE_CODE();
|
|
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
|
|
}
|
|
|
|
BlockDriverState *bdrv_new(void)
|
|
{
|
|
BlockDriverState *bs;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bs = g_new0(BlockDriverState, 1);
|
|
QLIST_INIT(&bs->dirty_bitmaps);
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
QLIST_INIT(&bs->op_blockers[i]);
|
|
}
|
|
qemu_co_mutex_init(&bs->reqs_lock);
|
|
qemu_mutex_init(&bs->dirty_bitmap_mutex);
|
|
bs->refcnt = 1;
|
|
bs->aio_context = qemu_get_aio_context();
|
|
|
|
qemu_co_queue_init(&bs->flush_queue);
|
|
|
|
qemu_co_mutex_init(&bs->bsc_modify_lock);
|
|
bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
|
|
|
|
for (i = 0; i < bdrv_drain_all_count; i++) {
|
|
bdrv_drained_begin(bs);
|
|
}
|
|
|
|
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
|
|
|
|
return bs;
|
|
}
|
|
|
|
static BlockDriver *bdrv_do_find_format(const char *format_name)
|
|
{
|
|
BlockDriver *drv1;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
|
if (!strcmp(drv1->format_name, format_name)) {
|
|
return drv1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
BlockDriver *bdrv_find_format(const char *format_name)
|
|
{
|
|
BlockDriver *drv1;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
drv1 = bdrv_do_find_format(format_name);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
/* The driver isn't registered, maybe we need to load a module */
|
|
for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
|
|
if (!strcmp(block_driver_modules[i].format_name, format_name)) {
|
|
block_module_load_one(block_driver_modules[i].library_name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return bdrv_do_find_format(format_name);
|
|
}
|
|
|
|
static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
|
|
{
|
|
static const char *whitelist_rw[] = {
|
|
CONFIG_BDRV_RW_WHITELIST
|
|
NULL
|
|
};
|
|
static const char *whitelist_ro[] = {
|
|
CONFIG_BDRV_RO_WHITELIST
|
|
NULL
|
|
};
|
|
const char **p;
|
|
|
|
if (!whitelist_rw[0] && !whitelist_ro[0]) {
|
|
return 1; /* no whitelist, anything goes */
|
|
}
|
|
|
|
for (p = whitelist_rw; *p; p++) {
|
|
if (!strcmp(format_name, *p)) {
|
|
return 1;
|
|
}
|
|
}
|
|
if (read_only) {
|
|
for (p = whitelist_ro; *p; p++) {
|
|
if (!strcmp(format_name, *p)) {
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_format_is_whitelisted(drv->format_name, read_only);
|
|
}
|
|
|
|
bool bdrv_uses_whitelist(void)
|
|
{
|
|
return use_bdrv_whitelist;
|
|
}
|
|
|
|
typedef struct CreateCo {
|
|
BlockDriver *drv;
|
|
char *filename;
|
|
QemuOpts *opts;
|
|
int ret;
|
|
Error *err;
|
|
} CreateCo;
|
|
|
|
static void coroutine_fn bdrv_create_co_entry(void *opaque)
|
|
{
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
|
|
CreateCo *cco = opaque;
|
|
assert(cco->drv);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = cco->drv->bdrv_co_create_opts(cco->drv,
|
|
cco->filename, cco->opts, &local_err);
|
|
error_propagate(&cco->err, local_err);
|
|
cco->ret = ret;
|
|
}
|
|
|
|
int bdrv_create(BlockDriver *drv, const char* filename,
|
|
QemuOpts *opts, Error **errp)
|
|
{
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
Coroutine *co;
|
|
CreateCo cco = {
|
|
.drv = drv,
|
|
.filename = g_strdup(filename),
|
|
.opts = opts,
|
|
.ret = NOT_DONE,
|
|
.err = NULL,
|
|
};
|
|
|
|
if (!drv->bdrv_co_create_opts) {
|
|
error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
|
|
ret = -ENOTSUP;
|
|
goto out;
|
|
}
|
|
|
|
if (qemu_in_coroutine()) {
|
|
/* Fast-path if already in coroutine context */
|
|
bdrv_create_co_entry(&cco);
|
|
} else {
|
|
co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
|
|
qemu_coroutine_enter(co);
|
|
while (cco.ret == NOT_DONE) {
|
|
aio_poll(qemu_get_aio_context(), true);
|
|
}
|
|
}
|
|
|
|
ret = cco.ret;
|
|
if (ret < 0) {
|
|
if (cco.err) {
|
|
error_propagate(errp, cco.err);
|
|
} else {
|
|
error_setg_errno(errp, -ret, "Could not create image");
|
|
}
|
|
}
|
|
|
|
out:
|
|
g_free(cco.filename);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Helper function for bdrv_create_file_fallback(): Resize @blk to at
|
|
* least the given @minimum_size.
|
|
*
|
|
* On success, return @blk's actual length.
|
|
* Otherwise, return -errno.
|
|
*/
|
|
static int64_t create_file_fallback_truncate(BlockBackend *blk,
|
|
int64_t minimum_size, Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int64_t size;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
|
|
&local_err);
|
|
if (ret < 0 && ret != -ENOTSUP) {
|
|
error_propagate(errp, local_err);
|
|
return ret;
|
|
}
|
|
|
|
size = blk_getlength(blk);
|
|
if (size < 0) {
|
|
error_free(local_err);
|
|
error_setg_errno(errp, -size,
|
|
"Failed to inquire the new image file's length");
|
|
return size;
|
|
}
|
|
|
|
if (size < minimum_size) {
|
|
/* Need to grow the image, but we failed to do that */
|
|
error_propagate(errp, local_err);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
error_free(local_err);
|
|
local_err = NULL;
|
|
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* Helper function for bdrv_create_file_fallback(): Zero the first
|
|
* sector to remove any potentially pre-existing image header.
|
|
*/
|
|
static int create_file_fallback_zero_first_sector(BlockBackend *blk,
|
|
int64_t current_size,
|
|
Error **errp)
|
|
{
|
|
int64_t bytes_to_clear;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
|
|
if (bytes_to_clear) {
|
|
ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret,
|
|
"Failed to clear the new image's first sector");
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Simple implementation of bdrv_co_create_opts for protocol drivers
|
|
* which only support creation via opening a file
|
|
* (usually existing raw storage device)
|
|
*/
|
|
int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
|
const char *filename,
|
|
QemuOpts *opts,
|
|
Error **errp)
|
|
{
|
|
BlockBackend *blk;
|
|
QDict *options;
|
|
int64_t size = 0;
|
|
char *buf = NULL;
|
|
PreallocMode prealloc;
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
|
|
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
|
|
prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
|
|
PREALLOC_MODE_OFF, &local_err);
|
|
g_free(buf);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
error_setg(errp, "Unsupported preallocation mode '%s'",
|
|
PreallocMode_str(prealloc));
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
options = qdict_new();
|
|
qdict_put_str(options, "driver", drv->format_name);
|
|
|
|
blk = blk_new_open(filename, NULL, options,
|
|
BDRV_O_RDWR | BDRV_O_RESIZE, errp);
|
|
if (!blk) {
|
|
error_prepend(errp, "Protocol driver '%s' does not support image "
|
|
"creation, and opening the image failed: ",
|
|
drv->format_name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
size = create_file_fallback_truncate(blk, size, errp);
|
|
if (size < 0) {
|
|
ret = size;
|
|
goto out;
|
|
}
|
|
|
|
ret = create_file_fallback_zero_first_sector(blk, size, errp);
|
|
if (ret < 0) {
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
blk_unref(blk);
|
|
return ret;
|
|
}
|
|
|
|
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
|
|
{
|
|
QemuOpts *protocol_opts;
|
|
BlockDriver *drv;
|
|
QDict *qdict;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
drv = bdrv_find_protocol(filename, true, errp);
|
|
if (drv == NULL) {
|
|
return -ENOENT;
|
|
}
|
|
|
|
if (!drv->create_opts) {
|
|
error_setg(errp, "Driver '%s' does not support image creation",
|
|
drv->format_name);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/*
|
|
* 'opts' contains a QemuOptsList with a combination of format and protocol
|
|
* default values.
|
|
*
|
|
* The format properly removes its options, but the default values remain
|
|
* in 'opts->list'. So if the protocol has options with the same name
|
|
* (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
|
|
* of the format, since for overlapping options, the format wins.
|
|
*
|
|
* To avoid this issue, lets convert QemuOpts to QDict, in this way we take
|
|
* only the set options, and then convert it back to QemuOpts, using the
|
|
* create_opts of the protocol. So the new QemuOpts, will contain only the
|
|
* protocol defaults.
|
|
*/
|
|
qdict = qemu_opts_to_qdict(opts, NULL);
|
|
protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
|
|
if (protocol_opts == NULL) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
ret = bdrv_create(drv, filename, protocol_opts, errp);
|
|
out:
|
|
qemu_opts_del(protocol_opts);
|
|
qobject_unref(qdict);
|
|
return ret;
|
|
}
|
|
|
|
int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
|
|
IO_CODE();
|
|
assert(bs != NULL);
|
|
|
|
if (!bs->drv) {
|
|
error_setg(errp, "Block node '%s' is not opened", bs->filename);
|
|
return -ENOMEDIUM;
|
|
}
|
|
|
|
if (!bs->drv->bdrv_co_delete_file) {
|
|
error_setg(errp, "Driver '%s' does not support image deletion",
|
|
bs->drv->format_name);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
|
|
if (ret < 0) {
|
|
error_propagate(errp, local_err);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
|
|
{
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
IO_CODE();
|
|
|
|
if (!bs) {
|
|
return;
|
|
}
|
|
|
|
ret = bdrv_co_delete_file(bs, &local_err);
|
|
/*
|
|
* ENOTSUP will happen if the block driver doesn't support
|
|
* the 'bdrv_co_delete_file' interface. This is a predictable
|
|
* scenario and shouldn't be reported back to the user.
|
|
*/
|
|
if (ret == -ENOTSUP) {
|
|
error_free(local_err);
|
|
} else if (ret < 0) {
|
|
error_report_err(local_err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Try to get @bs's logical and physical block size.
|
|
* On success, store them in @bsz struct and return 0.
|
|
* On failure return -errno.
|
|
* @bs must not be empty.
|
|
*/
|
|
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BlockDriverState *filtered = bdrv_filter_bs(bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (drv && drv->bdrv_probe_blocksizes) {
|
|
return drv->bdrv_probe_blocksizes(bs, bsz);
|
|
} else if (filtered) {
|
|
return bdrv_probe_blocksizes(filtered, bsz);
|
|
}
|
|
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/**
|
|
* Try to get @bs's geometry (cyls, heads, sectors).
|
|
* On success, store them in @geo struct and return 0.
|
|
* On failure return -errno.
|
|
* @bs must not be empty.
|
|
*/
|
|
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BlockDriverState *filtered = bdrv_filter_bs(bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (drv && drv->bdrv_probe_geometry) {
|
|
return drv->bdrv_probe_geometry(bs, geo);
|
|
} else if (filtered) {
|
|
return bdrv_probe_geometry(filtered, geo);
|
|
}
|
|
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/*
|
|
* Create a uniquely-named empty temporary file.
|
|
* Return 0 upon success, otherwise a negative errno value.
|
|
*/
|
|
int get_tmp_filename(char *filename, int size)
|
|
{
|
|
#ifdef _WIN32
|
|
char temp_dir[MAX_PATH];
|
|
/* GetTempFileName requires that its output buffer (4th param)
|
|
have length MAX_PATH or greater. */
|
|
assert(size >= MAX_PATH);
|
|
return (GetTempPath(MAX_PATH, temp_dir)
|
|
&& GetTempFileName(temp_dir, "qem", 0, filename)
|
|
? 0 : -GetLastError());
|
|
#else
|
|
int fd;
|
|
const char *tmpdir;
|
|
tmpdir = getenv("TMPDIR");
|
|
if (!tmpdir) {
|
|
tmpdir = "/var/tmp";
|
|
}
|
|
if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
|
|
return -EOVERFLOW;
|
|
}
|
|
fd = mkstemp(filename);
|
|
if (fd < 0) {
|
|
return -errno;
|
|
}
|
|
if (close(fd) != 0) {
|
|
unlink(filename);
|
|
return -errno;
|
|
}
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Detect host devices. By convention, /dev/cdrom[N] is always
|
|
* recognized as a host CDROM.
|
|
*/
|
|
static BlockDriver *find_hdev_driver(const char *filename)
|
|
{
|
|
int score_max = 0, score;
|
|
BlockDriver *drv = NULL, *d;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
if (d->bdrv_probe_device) {
|
|
score = d->bdrv_probe_device(filename);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
drv = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
return drv;
|
|
}
|
|
|
|
static BlockDriver *bdrv_do_find_protocol(const char *protocol)
|
|
{
|
|
BlockDriver *drv1;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
|
if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
|
|
return drv1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
BlockDriver *bdrv_find_protocol(const char *filename,
|
|
bool allow_protocol_prefix,
|
|
Error **errp)
|
|
{
|
|
BlockDriver *drv1;
|
|
char protocol[128];
|
|
int len;
|
|
const char *p;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
/* TODO Drivers without bdrv_file_open must be specified explicitly */
|
|
|
|
/*
|
|
* XXX(hch): we really should not let host device detection
|
|
* override an explicit protocol specification, but moving this
|
|
* later breaks access to device names with colons in them.
|
|
* Thanks to the brain-dead persistent naming schemes on udev-
|
|
* based Linux systems those actually are quite common.
|
|
*/
|
|
drv1 = find_hdev_driver(filename);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
if (!path_has_protocol(filename) || !allow_protocol_prefix) {
|
|
return &bdrv_file;
|
|
}
|
|
|
|
p = strchr(filename, ':');
|
|
assert(p != NULL);
|
|
len = p - filename;
|
|
if (len > sizeof(protocol) - 1)
|
|
len = sizeof(protocol) - 1;
|
|
memcpy(protocol, filename, len);
|
|
protocol[len] = '\0';
|
|
|
|
drv1 = bdrv_do_find_protocol(protocol);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
|
|
if (block_driver_modules[i].protocol_name &&
|
|
!strcmp(block_driver_modules[i].protocol_name, protocol)) {
|
|
block_module_load_one(block_driver_modules[i].library_name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
drv1 = bdrv_do_find_protocol(protocol);
|
|
if (!drv1) {
|
|
error_setg(errp, "Unknown protocol '%s'", protocol);
|
|
}
|
|
return drv1;
|
|
}
|
|
|
|
/*
|
|
* Guess image format by probing its contents.
|
|
* This is not a good idea when your image is raw (CVE-2008-2004), but
|
|
* we do it anyway for backward compatibility.
|
|
*
|
|
* @buf contains the image's first @buf_size bytes.
|
|
* @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
|
|
* but can be smaller if the image file is smaller)
|
|
* @filename is its filename.
|
|
*
|
|
* For all block drivers, call the bdrv_probe() method to get its
|
|
* probing score.
|
|
* Return the first block driver with the highest probing score.
|
|
*/
|
|
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
|
|
const char *filename)
|
|
{
|
|
int score_max = 0, score;
|
|
BlockDriver *drv = NULL, *d;
|
|
IO_CODE();
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
if (d->bdrv_probe) {
|
|
score = d->bdrv_probe(buf, buf_size, filename);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
drv = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
return drv;
|
|
}
|
|
|
|
static int find_image_format(BlockBackend *file, const char *filename,
|
|
BlockDriver **pdrv, Error **errp)
|
|
{
|
|
BlockDriver *drv;
|
|
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
|
|
int ret = 0;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
|
|
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
|
|
*pdrv = &bdrv_raw;
|
|
return ret;
|
|
}
|
|
|
|
ret = blk_pread(file, 0, buf, sizeof(buf));
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not read image for determining its "
|
|
"format");
|
|
*pdrv = NULL;
|
|
return ret;
|
|
}
|
|
|
|
drv = bdrv_probe_all(buf, ret, filename);
|
|
if (!drv) {
|
|
error_setg(errp, "Could not determine image format: No compatible "
|
|
"driver found");
|
|
ret = -ENOENT;
|
|
}
|
|
*pdrv = drv;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Set the current 'total_sectors' value
|
|
* Return 0 on success, -errno on error.
|
|
*/
|
|
int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
IO_CODE();
|
|
|
|
if (!drv) {
|
|
return -ENOMEDIUM;
|
|
}
|
|
|
|
/* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
|
|
if (bdrv_is_sg(bs))
|
|
return 0;
|
|
|
|
/* query actual device if possible, otherwise just trust the hint */
|
|
if (drv->bdrv_getlength) {
|
|
int64_t length = drv->bdrv_getlength(bs);
|
|
if (length < 0) {
|
|
return length;
|
|
}
|
|
hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
|
|
}
|
|
|
|
bs->total_sectors = hint;
|
|
|
|
if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
|
|
return -EFBIG;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Combines a QDict of new block driver @options with any missing options taken
|
|
* from @old_options, so that leaving out an option defaults to its old value.
|
|
*/
|
|
static void bdrv_join_options(BlockDriverState *bs, QDict *options,
|
|
QDict *old_options)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (bs->drv && bs->drv->bdrv_join_options) {
|
|
bs->drv->bdrv_join_options(options, old_options);
|
|
} else {
|
|
qdict_join(options, old_options, false);
|
|
}
|
|
}
|
|
|
|
static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
|
|
int open_flags,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
char *value = qemu_opt_get_del(opts, "detect-zeroes");
|
|
BlockdevDetectZeroesOptions detect_zeroes =
|
|
qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
|
|
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
|
|
GLOBAL_STATE_CODE();
|
|
g_free(value);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return detect_zeroes;
|
|
}
|
|
|
|
if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
|
|
!(open_flags & BDRV_O_UNMAP))
|
|
{
|
|
error_setg(errp, "setting detect-zeroes to unmap is not allowed "
|
|
"without setting discard operation to unmap");
|
|
}
|
|
|
|
return detect_zeroes;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for aio engine
|
|
*
|
|
* Return 0 on success, -1 if the engine specified is invalid
|
|
*/
|
|
int bdrv_parse_aio(const char *mode, int *flags)
|
|
{
|
|
if (!strcmp(mode, "threads")) {
|
|
/* do nothing, default */
|
|
} else if (!strcmp(mode, "native")) {
|
|
*flags |= BDRV_O_NATIVE_AIO;
|
|
#ifdef CONFIG_LINUX_IO_URING
|
|
} else if (!strcmp(mode, "io_uring")) {
|
|
*flags |= BDRV_O_IO_URING;
|
|
#endif
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for a given discard mode
|
|
*
|
|
* Return 0 on success, -1 if the discard mode was invalid.
|
|
*/
|
|
int bdrv_parse_discard_flags(const char *mode, int *flags)
|
|
{
|
|
*flags &= ~BDRV_O_UNMAP;
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
|
|
/* do nothing */
|
|
} else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
|
|
*flags |= BDRV_O_UNMAP;
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for a given cache mode
|
|
*
|
|
* Return 0 on success, -1 if the cache mode was invalid.
|
|
*/
|
|
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
|
|
{
|
|
*flags &= ~BDRV_O_CACHE_MASK;
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
|
|
*writethrough = false;
|
|
*flags |= BDRV_O_NOCACHE;
|
|
} else if (!strcmp(mode, "directsync")) {
|
|
*writethrough = true;
|
|
*flags |= BDRV_O_NOCACHE;
|
|
} else if (!strcmp(mode, "writeback")) {
|
|
*writethrough = false;
|
|
} else if (!strcmp(mode, "unsafe")) {
|
|
*writethrough = false;
|
|
*flags |= BDRV_O_NO_FLUSH;
|
|
} else if (!strcmp(mode, "writethrough")) {
|
|
*writethrough = true;
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static char *bdrv_child_get_parent_desc(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
|
|
}
|
|
|
|
static void bdrv_child_cb_drained_begin(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
bdrv_do_drained_begin_quiesce(bs, NULL, false);
|
|
}
|
|
|
|
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
return bdrv_drain_poll(bs, false, NULL, false);
|
|
}
|
|
|
|
static void bdrv_child_cb_drained_end(BdrvChild *child,
|
|
int *drained_end_counter)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
bdrv_drained_end_no_poll(bs, drained_end_counter);
|
|
}
|
|
|
|
static int bdrv_child_cb_inactivate(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
GLOBAL_STATE_CODE();
|
|
assert(bs->open_flags & BDRV_O_INACTIVE);
|
|
return 0;
|
|
}
|
|
|
|
static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
|
|
GSList **ignore, Error **errp)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
|
|
}
|
|
|
|
static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
|
|
GSList **ignore)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
return bdrv_set_aio_context_ignore(bs, ctx, ignore);
|
|
}
|
|
|
|
/*
|
|
* Returns the options and flags that a temporary snapshot should get, based on
|
|
* the originally requested flags (the originally requested image will have
|
|
* flags like a backing file)
|
|
*/
|
|
static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
|
|
int parent_flags, QDict *parent_options)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
*child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
|
|
|
|
/* For temporary files, unconditional cache=unsafe is fine */
|
|
qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
|
|
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
|
|
|
|
/* Copy the read-only and discard options from the parent */
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
|
|
|
|
/* aio=native doesn't work for cache.direct=off, so disable it for the
|
|
* temporary snapshot */
|
|
*child_flags &= ~BDRV_O_NATIVE_AIO;
|
|
}
|
|
|
|
static void bdrv_backing_attach(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
BlockDriverState *backing_hd = c->bs;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(!parent->backing_blocker);
|
|
error_setg(&parent->backing_blocker,
|
|
"node is used as backing hd of '%s'",
|
|
bdrv_get_device_or_node_name(parent));
|
|
|
|
bdrv_refresh_filename(backing_hd);
|
|
|
|
parent->open_flags &= ~BDRV_O_NO_BACKING;
|
|
|
|
bdrv_op_block_all(backing_hd, parent->backing_blocker);
|
|
/* Otherwise we won't be able to commit or stream */
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
|
|
parent->backing_blocker);
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
|
|
parent->backing_blocker);
|
|
/*
|
|
* We do backup in 3 ways:
|
|
* 1. drive backup
|
|
* The target bs is new opened, and the source is top BDS
|
|
* 2. blockdev backup
|
|
* Both the source and the target are top BDSes.
|
|
* 3. internal backup(used for block replication)
|
|
* Both the source and the target are backing file
|
|
*
|
|
* In case 1 and 2, neither the source nor the target is the backing file.
|
|
* In case 3, we will block the top BDS, so there is only one block job
|
|
* for the top BDS and its backing chain.
|
|
*/
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
|
|
parent->backing_blocker);
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
|
|
parent->backing_blocker);
|
|
}
|
|
|
|
static void bdrv_backing_detach(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(parent->backing_blocker);
|
|
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
|
|
error_free(parent->backing_blocker);
|
|
parent->backing_blocker = NULL;
|
|
}
|
|
|
|
static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
|
|
const char *filename, Error **errp)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
bool read_only = bdrv_is_read_only(parent);
|
|
int ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (read_only) {
|
|
ret = bdrv_reopen_set_read_only(parent, false, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
ret = bdrv_change_backing_file(parent, filename,
|
|
base->drv ? base->drv->format_name : "",
|
|
false);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not update backing file link");
|
|
}
|
|
|
|
if (read_only) {
|
|
bdrv_reopen_set_read_only(parent, true, NULL);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Returns the options and flags that a generic child of a BDS should
|
|
* get, based on the given options and flags for the parent BDS.
|
|
*/
|
|
static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
|
|
int *child_flags, QDict *child_options,
|
|
int parent_flags, QDict *parent_options)
|
|
{
|
|
int flags = parent_flags;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
|
|
* Generally, the question to answer is: Should this child be
|
|
* format-probed by default?
|
|
*/
|
|
|
|
/*
|
|
* Pure and non-filtered data children of non-format nodes should
|
|
* be probed by default (even when the node itself has BDRV_O_PROTOCOL
|
|
* set). This only affects a very limited set of drivers (namely
|
|
* quorum and blkverify when this comment was written).
|
|
* Force-clear BDRV_O_PROTOCOL then.
|
|
*/
|
|
if (!parent_is_format &&
|
|
(role & BDRV_CHILD_DATA) &&
|
|
!(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
|
|
{
|
|
flags &= ~BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/*
|
|
* All children of format nodes (except for COW children) and all
|
|
* metadata children in general should never be format-probed.
|
|
* Force-set BDRV_O_PROTOCOL then.
|
|
*/
|
|
if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
|
|
(role & BDRV_CHILD_METADATA))
|
|
{
|
|
flags |= BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/*
|
|
* If the cache mode isn't explicitly set, inherit direct and no-flush from
|
|
* the parent.
|
|
*/
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
|
|
|
|
if (role & BDRV_CHILD_COW) {
|
|
/* backing files are opened read-only by default */
|
|
qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
|
|
qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
|
|
} else {
|
|
/* Inherit the read-only option from the parent if it's not set */
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
|
|
qdict_copy_default(child_options, parent_options,
|
|
BDRV_OPT_AUTO_READ_ONLY);
|
|
}
|
|
|
|
/*
|
|
* bdrv_co_pdiscard() respects unmap policy for the parent, so we
|
|
* can default to enable it on lower layers regardless of the
|
|
* parent option.
|
|
*/
|
|
qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
|
|
|
|
/* Clear flags that only apply to the top layer */
|
|
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
|
|
|
|
if (role & BDRV_CHILD_METADATA) {
|
|
flags &= ~BDRV_O_NO_IO;
|
|
}
|
|
if (role & BDRV_CHILD_COW) {
|
|
flags &= ~BDRV_O_TEMPORARY;
|
|
}
|
|
|
|
*child_flags = flags;
|
|
}
|
|
|
|
static void bdrv_child_cb_attach(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
|
|
assert_bdrv_graph_writable(bs);
|
|
QLIST_INSERT_HEAD(&bs->children, child, next);
|
|
|
|
if (child->role & BDRV_CHILD_COW) {
|
|
bdrv_backing_attach(child);
|
|
}
|
|
|
|
bdrv_apply_subtree_drain(child, bs);
|
|
}
|
|
|
|
static void bdrv_child_cb_detach(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
|
|
if (child->role & BDRV_CHILD_COW) {
|
|
bdrv_backing_detach(child);
|
|
}
|
|
|
|
bdrv_unapply_subtree_drain(child, bs);
|
|
|
|
assert_bdrv_graph_writable(bs);
|
|
QLIST_REMOVE(child, next);
|
|
}
|
|
|
|
static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
|
|
const char *filename, Error **errp)
|
|
{
|
|
if (c->role & BDRV_CHILD_COW) {
|
|
return bdrv_backing_update_filename(c, base, filename, errp);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
|
|
{
|
|
BlockDriverState *bs = c->opaque;
|
|
IO_CODE();
|
|
|
|
return bdrv_get_aio_context(bs);
|
|
}
|
|
|
|
const BdrvChildClass child_of_bds = {
|
|
.parent_is_bds = true,
|
|
.get_parent_desc = bdrv_child_get_parent_desc,
|
|
.inherit_options = bdrv_inherited_options,
|
|
.drained_begin = bdrv_child_cb_drained_begin,
|
|
.drained_poll = bdrv_child_cb_drained_poll,
|
|
.drained_end = bdrv_child_cb_drained_end,
|
|
.attach = bdrv_child_cb_attach,
|
|
.detach = bdrv_child_cb_detach,
|
|
.inactivate = bdrv_child_cb_inactivate,
|
|
.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
|
|
.set_aio_ctx = bdrv_child_cb_set_aio_ctx,
|
|
.update_filename = bdrv_child_cb_update_filename,
|
|
.get_parent_aio_context = child_of_bds_get_parent_aio_context,
|
|
};
|
|
|
|
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return c->klass->get_parent_aio_context(c);
|
|
}
|
|
|
|
static int bdrv_open_flags(BlockDriverState *bs, int flags)
|
|
{
|
|
int open_flags = flags;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* Clear flags that are internal to the block layer before opening the
|
|
* image.
|
|
*/
|
|
open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
|
|
|
|
return open_flags;
|
|
}
|
|
|
|
static void update_flags_from_options(int *flags, QemuOpts *opts)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
|
|
*flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
|
|
*flags |= BDRV_O_NO_FLUSH;
|
|
}
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
|
|
*flags |= BDRV_O_NOCACHE;
|
|
}
|
|
|
|
if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
|
|
*flags |= BDRV_O_RDWR;
|
|
}
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
|
|
*flags |= BDRV_O_AUTO_RDONLY;
|
|
}
|
|
}
|
|
|
|
static void update_options_from_flags(QDict *options, int flags)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
|
|
qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
|
|
qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
|
|
flags & BDRV_O_NO_FLUSH);
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
|
|
qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
|
|
qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
|
|
flags & BDRV_O_AUTO_RDONLY);
|
|
}
|
|
}
|
|
|
|
static void bdrv_assign_node_name(BlockDriverState *bs,
|
|
const char *node_name,
|
|
Error **errp)
|
|
{
|
|
char *gen_node_name = NULL;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!node_name) {
|
|
node_name = gen_node_name = id_generate(ID_BLOCK);
|
|
} else if (!id_wellformed(node_name)) {
|
|
/*
|
|
* Check for empty string or invalid characters, but not if it is
|
|
* generated (generated names use characters not available to the user)
|
|
*/
|
|
error_setg(errp, "Invalid node-name: '%s'", node_name);
|
|
return;
|
|
}
|
|
|
|
/* takes care of avoiding namespaces collisions */
|
|
if (blk_by_name(node_name)) {
|
|
error_setg(errp, "node-name=%s is conflicting with a device id",
|
|
node_name);
|
|
goto out;
|
|
}
|
|
|
|
/* takes care of avoiding duplicates node names */
|
|
if (bdrv_find_node(node_name)) {
|
|
error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
|
|
goto out;
|
|
}
|
|
|
|
/* Make sure that the node name isn't truncated */
|
|
if (strlen(node_name) >= sizeof(bs->node_name)) {
|
|
error_setg(errp, "Node name too long");
|
|
goto out;
|
|
}
|
|
|
|
/* copy node name into the bs and insert it into the graph list */
|
|
pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
|
|
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
|
|
out:
|
|
g_free(gen_node_name);
|
|
}
|
|
|
|
static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
|
const char *node_name, QDict *options,
|
|
int open_flags, Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int i, ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_assign_node_name(bs, node_name, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
bs->drv = drv;
|
|
bs->opaque = g_malloc0(drv->instance_size);
|
|
|
|
if (drv->bdrv_file_open) {
|
|
assert(!drv->bdrv_needs_filename || bs->filename[0]);
|
|
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
|
|
} else if (drv->bdrv_open) {
|
|
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
|
|
if (ret < 0) {
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
} else if (bs->filename[0]) {
|
|
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
|
|
} else {
|
|
error_setg_errno(errp, -ret, "Could not open image");
|
|
}
|
|
goto open_failed;
|
|
}
|
|
|
|
ret = refresh_total_sectors(bs, bs->total_sectors);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not refresh total sector count");
|
|
return ret;
|
|
}
|
|
|
|
bdrv_refresh_limits(bs, NULL, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
assert(bdrv_opt_mem_align(bs) != 0);
|
|
assert(bdrv_min_mem_align(bs) != 0);
|
|
assert(is_power_of_2(bs->bl.request_alignment));
|
|
|
|
for (i = 0; i < bs->quiesce_counter; i++) {
|
|
if (drv->bdrv_co_drain_begin) {
|
|
drv->bdrv_co_drain_begin(bs);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
open_failed:
|
|
bs->drv = NULL;
|
|
if (bs->file != NULL) {
|
|
bdrv_unref_child(bs, bs->file);
|
|
bs->file = NULL;
|
|
}
|
|
g_free(bs->opaque);
|
|
bs->opaque = NULL;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Create and open a block node.
|
|
*
|
|
* @options is a QDict of options to pass to the block drivers, or NULL for an
|
|
* empty set of options. The reference to the QDict belongs to the block layer
|
|
* after the call (even on failure), so if the caller intends to reuse the
|
|
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
|
*/
|
|
BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
|
|
const char *node_name,
|
|
QDict *options, int flags,
|
|
Error **errp)
|
|
{
|
|
BlockDriverState *bs;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bs = bdrv_new();
|
|
bs->open_flags = flags;
|
|
bs->options = options ?: qdict_new();
|
|
bs->explicit_options = qdict_clone_shallow(bs->options);
|
|
bs->opaque = NULL;
|
|
|
|
update_options_from_flags(bs->options, flags);
|
|
|
|
ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
|
|
if (ret < 0) {
|
|
qobject_unref(bs->explicit_options);
|
|
bs->explicit_options = NULL;
|
|
qobject_unref(bs->options);
|
|
bs->options = NULL;
|
|
bdrv_unref(bs);
|
|
return NULL;
|
|
}
|
|
|
|
return bs;
|
|
}
|
|
|
|
/* Create and open a block node. */
|
|
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
|
|
int flags, Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
|
|
}
|
|
|
|
QemuOptsList bdrv_runtime_opts = {
|
|
.name = "bdrv_common",
|
|
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
|
|
.desc = {
|
|
{
|
|
.name = "node-name",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Node name of the block device node",
|
|
},
|
|
{
|
|
.name = "driver",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Block driver to use for the node",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_CACHE_DIRECT,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Bypass software writeback cache on the host",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_CACHE_NO_FLUSH,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Ignore flush requests",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_READ_ONLY,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Node is opened in read-only mode",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_AUTO_READ_ONLY,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Node can become read-only if opening read-write fails",
|
|
},
|
|
{
|
|
.name = "detect-zeroes",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "try to optimize zero writes (off, on, unmap)",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_DISCARD,
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "discard operation (ignore/off, unmap/on)",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_FORCE_SHARE,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "always accept other writers (default: off)",
|
|
},
|
|
{ /* end of list */ }
|
|
},
|
|
};
|
|
|
|
QemuOptsList bdrv_create_opts_simple = {
|
|
.name = "simple-create-opts",
|
|
.head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
|
|
.desc = {
|
|
{
|
|
.name = BLOCK_OPT_SIZE,
|
|
.type = QEMU_OPT_SIZE,
|
|
.help = "Virtual disk size"
|
|
},
|
|
{
|
|
.name = BLOCK_OPT_PREALLOC,
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Preallocation mode (allowed values: off)"
|
|
},
|
|
{ /* end of list */ }
|
|
}
|
|
};
|
|
|
|
/*
|
|
* Common part for opening disk images and files
|
|
*
|
|
* Removes all processed options from *options.
|
|
*/
|
|
static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
|
|
QDict *options, Error **errp)
|
|
{
|
|
int ret, open_flags;
|
|
const char *filename;
|
|
const char *driver_name = NULL;
|
|
const char *node_name = NULL;
|
|
const char *discard;
|
|
QemuOpts *opts;
|
|
BlockDriver *drv;
|
|
Error *local_err = NULL;
|
|
bool ro;
|
|
|
|
assert(bs->file == NULL);
|
|
assert(options != NULL && bs->options != options);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
|
|
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
update_flags_from_options(&bs->open_flags, opts);
|
|
|
|
driver_name = qemu_opt_get(opts, "driver");
|
|
drv = bdrv_find_format(driver_name);
|
|
assert(drv != NULL);
|
|
|
|
bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
|
|
|
|
if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
|
|
error_setg(errp,
|
|
BDRV_OPT_FORCE_SHARE
|
|
"=on can only be used with read-only images");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
if (file != NULL) {
|
|
bdrv_refresh_filename(blk_bs(file));
|
|
filename = blk_bs(file)->filename;
|
|
} else {
|
|
/*
|
|
* Caution: while qdict_get_try_str() is fine, getting
|
|
* non-string types would require more care. When @options
|
|
* come from -blockdev or blockdev_add, its members are typed
|
|
* according to the QAPI schema, but when they come from
|
|
* -drive, they're all QString.
|
|
*/
|
|
filename = qdict_get_try_str(options, "filename");
|
|
}
|
|
|
|
if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
|
|
error_setg(errp, "The '%s' block driver requires a file name",
|
|
drv->format_name);
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
|
|
drv->format_name);
|
|
|
|
ro = bdrv_is_read_only(bs);
|
|
|
|
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
|
|
if (!ro && bdrv_is_whitelisted(drv, true)) {
|
|
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
|
|
} else {
|
|
ret = -ENOTSUP;
|
|
}
|
|
if (ret < 0) {
|
|
error_setg(errp,
|
|
!ro && bdrv_is_whitelisted(drv, true)
|
|
? "Driver '%s' can only be used for read-only devices"
|
|
: "Driver '%s' is not whitelisted",
|
|
drv->format_name);
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
/* bdrv_new() and bdrv_close() make it so */
|
|
assert(qatomic_read(&bs->copy_on_read) == 0);
|
|
|
|
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
|
|
if (!ro) {
|
|
bdrv_enable_copy_on_read(bs);
|
|
} else {
|
|
error_setg(errp, "Can't use copy-on-read on read-only device");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
|
|
if (discard != NULL) {
|
|
if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
|
|
error_setg(errp, "Invalid discard option");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
bs->detect_zeroes =
|
|
bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
if (filename != NULL) {
|
|
pstrcpy(bs->filename, sizeof(bs->filename), filename);
|
|
} else {
|
|
bs->filename[0] = '\0';
|
|
}
|
|
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
|
|
|
|
/* Open the image, either directly or using a protocol */
|
|
open_flags = bdrv_open_flags(bs, bs->open_flags);
|
|
node_name = qemu_opt_get(opts, "node-name");
|
|
|
|
assert(!drv->bdrv_file_open || file == NULL);
|
|
ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
|
|
if (ret < 0) {
|
|
goto fail_opts;
|
|
}
|
|
|
|
qemu_opts_del(opts);
|
|
return 0;
|
|
|
|
fail_opts:
|
|
qemu_opts_del(opts);
|
|
return ret;
|
|
}
|
|
|
|
static QDict *parse_json_filename(const char *filename, Error **errp)
|
|
{
|
|
QObject *options_obj;
|
|
QDict *options;
|
|
int ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = strstart(filename, "json:", &filename);
|
|
assert(ret);
|
|
|
|
options_obj = qobject_from_json(filename, errp);
|
|
if (!options_obj) {
|
|
error_prepend(errp, "Could not parse the JSON options: ");
|
|
return NULL;
|
|
}
|
|
|
|
options = qobject_to(QDict, options_obj);
|
|
if (!options) {
|
|
qobject_unref(options_obj);
|
|
error_setg(errp, "Invalid JSON object given");
|
|
return NULL;
|
|
}
|
|
|
|
qdict_flatten(options);
|
|
|
|
return options;
|
|
}
|
|
|
|
static void parse_json_protocol(QDict *options, const char **pfilename,
|
|
Error **errp)
|
|
{
|
|
QDict *json_options;
|
|
Error *local_err = NULL;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/* Parse json: pseudo-protocol */
|
|
if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
|
|
return;
|
|
}
|
|
|
|
json_options = parse_json_filename(*pfilename, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return;
|
|
}
|
|
|
|
/* Options given in the filename have lower priority than options
|
|
* specified directly */
|
|
qdict_join(options, json_options, false);
|
|
qobject_unref(json_options);
|
|
*pfilename = NULL;
|
|
}
|
|
|
|
/*
|
|
* Fills in default options for opening images and converts the legacy
|
|
* filename/flags pair to option QDict entries.
|
|
* The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
|
|
* block driver has been specified explicitly.
|
|
*/
|
|
static int bdrv_fill_options(QDict **options, const char *filename,
|
|
int *flags, Error **errp)
|
|
{
|
|
const char *drvname;
|
|
bool protocol = *flags & BDRV_O_PROTOCOL;
|
|
bool parse_filename = false;
|
|
BlockDriver *drv = NULL;
|
|
Error *local_err = NULL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* Caution: while qdict_get_try_str() is fine, getting non-string
|
|
* types would require more care. When @options come from
|
|
* -blockdev or blockdev_add, its members are typed according to
|
|
* the QAPI schema, but when they come from -drive, they're all
|
|
* QString.
|
|
*/
|
|
drvname = qdict_get_try_str(*options, "driver");
|
|
if (drvname) {
|
|
drv = bdrv_find_format(drvname);
|
|
if (!drv) {
|
|
error_setg(errp, "Unknown driver '%s'", drvname);
|
|
return -ENOENT;
|
|
}
|
|
/* If the user has explicitly specified the driver, this choice should
|
|
* override the BDRV_O_PROTOCOL flag */
|
|
protocol = drv->bdrv_file_open;
|
|
}
|
|
|
|
if (protocol) {
|
|
*flags |= BDRV_O_PROTOCOL;
|
|
} else {
|
|
*flags &= ~BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/* Translate cache options from flags into options */
|
|
update_options_from_flags(*options, *flags);
|
|
|
|
/* Fetch the file name from the options QDict if necessary */
|
|
if (protocol && filename) {
|
|
if (!qdict_haskey(*options, "filename")) {
|
|
qdict_put_str(*options, "filename", filename);
|
|
parse_filename = true;
|
|
} else {
|
|
error_setg(errp, "Can't specify 'file' and 'filename' options at "
|
|
"the same time");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
/* Find the right block driver */
|
|
/* See cautionary note on accessing @options above */
|
|
filename = qdict_get_try_str(*options, "filename");
|
|
|
|
if (!drvname && protocol) {
|
|
if (filename) {
|
|
drv = bdrv_find_protocol(filename, parse_filename, errp);
|
|
if (!drv) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
drvname = drv->format_name;
|
|
qdict_put_str(*options, "driver", drvname);
|
|
} else {
|
|
error_setg(errp, "Must specify either driver or file");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
assert(drv || !protocol);
|
|
|
|
/* Driver-specific filename parsing */
|
|
if (drv && drv->bdrv_parse_filename && parse_filename) {
|
|
drv->bdrv_parse_filename(filename, *options, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!drv->bdrv_needs_filename) {
|
|
qdict_del(*options, "filename");
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct BlockReopenQueueEntry {
|
|
bool prepared;
|
|
bool perms_checked;
|
|
BDRVReopenState state;
|
|
QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
|
|
} BlockReopenQueueEntry;
|
|
|
|
/*
|
|
* Return the flags that @bs will have after the reopens in @q have
|
|
* successfully completed. If @q is NULL (or @bs is not contained in @q),
|
|
* return the current flags.
|
|
*/
|
|
static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
|
|
{
|
|
BlockReopenQueueEntry *entry;
|
|
|
|
if (q != NULL) {
|
|
QTAILQ_FOREACH(entry, q, entry) {
|
|
if (entry->state.bs == bs) {
|
|
return entry->state.flags;
|
|
}
|
|
}
|
|
}
|
|
|
|
return bs->open_flags;
|
|
}
|
|
|
|
/* Returns whether the image file can be written to after the reopen queue @q
|
|
* has been successfully applied, or right now if @q is NULL. */
|
|
static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
|
|
BlockReopenQueue *q)
|
|
{
|
|
int flags = bdrv_reopen_get_flags(q, bs);
|
|
|
|
return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
|
|
}
|
|
|
|
/*
|
|
* Return whether the BDS can be written to. This is not necessarily
|
|
* the same as !bdrv_is_read_only(bs), as inactivated images may not
|
|
* be written to but do not count as read-only images.
|
|
*/
|
|
bool bdrv_is_writable(BlockDriverState *bs)
|
|
{
|
|
IO_CODE();
|
|
return bdrv_is_writable_after_reopen(bs, NULL);
|
|
}
|
|
|
|
static char *bdrv_child_user_desc(BdrvChild *c)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return c->klass->get_parent_desc(c);
|
|
}
|
|
|
|
/*
|
|
* Check that @a allows everything that @b needs. @a and @b must reference same
|
|
* child node.
|
|
*/
|
|
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
|
|
{
|
|
const char *child_bs_name;
|
|
g_autofree char *a_user = NULL;
|
|
g_autofree char *b_user = NULL;
|
|
g_autofree char *perms = NULL;
|
|
|
|
assert(a->bs);
|
|
assert(a->bs == b->bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if ((b->perm & a->shared_perm) == b->perm) {
|
|
return true;
|
|
}
|
|
|
|
child_bs_name = bdrv_get_node_name(b->bs);
|
|
a_user = bdrv_child_user_desc(a);
|
|
b_user = bdrv_child_user_desc(b);
|
|
perms = bdrv_perm_names(b->perm & ~a->shared_perm);
|
|
|
|
error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
|
|
"both required by %s (uses node '%s' as '%s' child) and "
|
|
"unshared by %s (uses node '%s' as '%s' child).",
|
|
child_bs_name, perms,
|
|
b_user, child_bs_name, b->name,
|
|
a_user, child_bs_name, a->name);
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
|
|
{
|
|
BdrvChild *a, *b;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* During the loop we'll look at each pair twice. That's correct because
|
|
* bdrv_a_allow_b() is asymmetric and we should check each pair in both
|
|
* directions.
|
|
*/
|
|
QLIST_FOREACH(a, &bs->parents, next_parent) {
|
|
QLIST_FOREACH(b, &bs->parents, next_parent) {
|
|
if (a == b) {
|
|
continue;
|
|
}
|
|
|
|
if (!bdrv_a_allow_b(a, b, errp)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
|
|
BdrvChild *c, BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t parent_perm, uint64_t parent_shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
assert(bs->drv && bs->drv->bdrv_child_perm);
|
|
GLOBAL_STATE_CODE();
|
|
bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
|
|
parent_perm, parent_shared,
|
|
nperm, nshared);
|
|
/* TODO Take force_share from reopen_queue */
|
|
if (child_bs && child_bs->force_share) {
|
|
*nshared = BLK_PERM_ALL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Adds the whole subtree of @bs (including @bs itself) to the @list (except for
|
|
* nodes that are already in the @list, of course) so that final list is
|
|
* topologically sorted. Return the result (GSList @list object is updated, so
|
|
* don't use old reference after function call).
|
|
*
|
|
* On function start @list must be already topologically sorted and for any node
|
|
* in the @list the whole subtree of the node must be in the @list as well. The
|
|
* simplest way to satisfy this criteria: use only result of
|
|
* bdrv_topological_dfs() or NULL as @list parameter.
|
|
*/
|
|
static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
|
|
BlockDriverState *bs)
|
|
{
|
|
BdrvChild *child;
|
|
g_autoptr(GHashTable) local_found = NULL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!found) {
|
|
assert(!list);
|
|
found = local_found = g_hash_table_new(NULL, NULL);
|
|
}
|
|
|
|
if (g_hash_table_contains(found, bs)) {
|
|
return list;
|
|
}
|
|
g_hash_table_add(found, bs);
|
|
|
|
QLIST_FOREACH(child, &bs->children, next) {
|
|
list = bdrv_topological_dfs(list, found, child->bs);
|
|
}
|
|
|
|
return g_slist_prepend(list, bs);
|
|
}
|
|
|
|
typedef struct BdrvChildSetPermState {
|
|
BdrvChild *child;
|
|
uint64_t old_perm;
|
|
uint64_t old_shared_perm;
|
|
} BdrvChildSetPermState;
|
|
|
|
static void bdrv_child_set_perm_abort(void *opaque)
|
|
{
|
|
BdrvChildSetPermState *s = opaque;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
s->child->perm = s->old_perm;
|
|
s->child->shared_perm = s->old_shared_perm;
|
|
}
|
|
|
|
static TransactionActionDrv bdrv_child_set_pem_drv = {
|
|
.abort = bdrv_child_set_perm_abort,
|
|
.clean = g_free,
|
|
};
|
|
|
|
static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
|
|
uint64_t shared, Transaction *tran)
|
|
{
|
|
BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
*s = (BdrvChildSetPermState) {
|
|
.child = c,
|
|
.old_perm = c->perm,
|
|
.old_shared_perm = c->shared_perm,
|
|
};
|
|
|
|
c->perm = perm;
|
|
c->shared_perm = shared;
|
|
|
|
tran_add(tran, &bdrv_child_set_pem_drv, s);
|
|
}
|
|
|
|
static void bdrv_drv_set_perm_commit(void *opaque)
|
|
{
|
|
BlockDriverState *bs = opaque;
|
|
uint64_t cumulative_perms, cumulative_shared_perms;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (bs->drv->bdrv_set_perm) {
|
|
bdrv_get_cumulative_perm(bs, &cumulative_perms,
|
|
&cumulative_shared_perms);
|
|
bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
|
|
}
|
|
}
|
|
|
|
static void bdrv_drv_set_perm_abort(void *opaque)
|
|
{
|
|
BlockDriverState *bs = opaque;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (bs->drv->bdrv_abort_perm_update) {
|
|
bs->drv->bdrv_abort_perm_update(bs);
|
|
}
|
|
}
|
|
|
|
TransactionActionDrv bdrv_drv_set_perm_drv = {
|
|
.abort = bdrv_drv_set_perm_abort,
|
|
.commit = bdrv_drv_set_perm_commit,
|
|
};
|
|
|
|
static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
|
|
uint64_t shared_perm, Transaction *tran,
|
|
Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (!bs->drv) {
|
|
return 0;
|
|
}
|
|
|
|
if (bs->drv->bdrv_check_perm) {
|
|
int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (tran) {
|
|
tran_add(tran, &bdrv_drv_set_perm_drv, bs);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct BdrvReplaceChildState {
|
|
BdrvChild *child;
|
|
BdrvChild **childp;
|
|
BlockDriverState *old_bs;
|
|
bool free_empty_child;
|
|
} BdrvReplaceChildState;
|
|
|
|
static void bdrv_replace_child_commit(void *opaque)
|
|
{
|
|
BdrvReplaceChildState *s = opaque;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (s->free_empty_child && !s->child->bs) {
|
|
bdrv_child_free(s->child);
|
|
}
|
|
bdrv_unref(s->old_bs);
|
|
}
|
|
|
|
static void bdrv_replace_child_abort(void *opaque)
|
|
{
|
|
BdrvReplaceChildState *s = opaque;
|
|
BlockDriverState *new_bs = s->child->bs;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
/*
|
|
* old_bs reference is transparently moved from @s to s->child.
|
|
*
|
|
* Pass &s->child here instead of s->childp, because:
|
|
* (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
|
|
* modify the BdrvChild * pointer we indirectly pass to it, i.e. it
|
|
* will not modify s->child. From that perspective, it does not matter
|
|
* whether we pass s->childp or &s->child.
|
|
* (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use
|
|
* it here.
|
|
* (3) If new_bs is NULL, *s->childp will have been NULLed by
|
|
* bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
|
|
* must not pass a NULL *s->childp here.
|
|
*
|
|
* So whether new_bs was NULL or not, we cannot pass s->childp here; and in
|
|
* any case, there is no reason to pass it anyway.
|
|
*/
|
|
bdrv_replace_child_noperm(&s->child, s->old_bs, true);
|
|
/*
|
|
* The child was pre-existing, so s->old_bs must be non-NULL, and
|
|
* s->child thus must not have been freed
|
|
*/
|
|
assert(s->child != NULL);
|
|
if (!new_bs) {
|
|
/* As described above, *s->childp was cleared, so restore it */
|
|
assert(s->childp != NULL);
|
|
*s->childp = s->child;
|
|
}
|
|
bdrv_unref(new_bs);
|
|
}
|
|
|
|
static TransactionActionDrv bdrv_replace_child_drv = {
|
|
.commit = bdrv_replace_child_commit,
|
|
.abort = bdrv_replace_child_abort,
|
|
.clean = g_free,
|
|
};
|
|
|
|
/*
|
|
* bdrv_replace_child_tran
|
|
*
|
|
* Note: real unref of old_bs is done only on commit.
|
|
*
|
|
* The function doesn't update permissions, caller is responsible for this.
|
|
*
|
|
* (*childp)->bs must not be NULL.
|
|
*
|
|
* Note that if new_bs == NULL, @childp is stored in a state object attached
|
|
* to @tran, so that the old child can be reinstated in the abort handler.
|
|
* Therefore, if @new_bs can be NULL, @childp must stay valid until the
|
|
* transaction is committed or aborted.
|
|
*
|
|
* If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
|
|
* freed (on commit). @free_empty_child should only be false if the
|
|
* caller will free the BDrvChild themselves (which may be important
|
|
* if this is in turn called in another transactional context).
|
|
*/
|
|
static void bdrv_replace_child_tran(BdrvChild **childp,
|
|
BlockDriverState *new_bs,
|
|
Transaction *tran,
|
|
bool free_empty_child)
|
|
{
|
|
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
|
|
*s = (BdrvReplaceChildState) {
|
|
.child = *childp,
|
|
.childp = new_bs == NULL ? childp : NULL,
|
|
.old_bs = (*childp)->bs,
|
|
.free_empty_child = free_empty_child,
|
|
};
|
|
tran_add(tran, &bdrv_replace_child_drv, s);
|
|
|
|
/* The abort handler relies on this */
|
|
assert(s->old_bs != NULL);
|
|
|
|
if (new_bs) {
|
|
bdrv_ref(new_bs);
|
|
}
|
|
/*
|
|
* Pass free_empty_child=false, we will free the child (if
|
|
* necessary) in bdrv_replace_child_commit() (if our
|
|
* @free_empty_child parameter was true).
|
|
*/
|
|
bdrv_replace_child_noperm(childp, new_bs, false);
|
|
/* old_bs reference is transparently moved from *childp to @s */
|
|
}
|
|
|
|
/*
|
|
* Refresh permissions in @bs subtree. The function is intended to be called
|
|
* after some graph modification that was done without permission update.
|
|
*/
|
|
static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
|
|
Transaction *tran, Error **errp)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BdrvChild *c;
|
|
int ret;
|
|
uint64_t cumulative_perms, cumulative_shared_perms;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
|
|
|
|
/* Write permissions never work with read-only images */
|
|
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
|
|
!bdrv_is_writable_after_reopen(bs, q))
|
|
{
|
|
if (!bdrv_is_writable_after_reopen(bs, NULL)) {
|
|
error_setg(errp, "Block node is read-only");
|
|
} else {
|
|
error_setg(errp, "Read-only block node '%s' cannot support "
|
|
"read-write users", bdrv_get_node_name(bs));
|
|
}
|
|
|
|
return -EPERM;
|
|
}
|
|
|
|
/*
|
|
* Unaligned requests will automatically be aligned to bl.request_alignment
|
|
* and without RESIZE we can't extend requests to write to space beyond the
|
|
* end of the image, so it's required that the image size is aligned.
|
|
*/
|
|
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
|
|
!(cumulative_perms & BLK_PERM_RESIZE))
|
|
{
|
|
if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
|
|
error_setg(errp, "Cannot get 'write' permission without 'resize': "
|
|
"Image size is not a multiple of request "
|
|
"alignment");
|
|
return -EPERM;
|
|
}
|
|
}
|
|
|
|
/* Check this node */
|
|
if (!drv) {
|
|
return 0;
|
|
}
|
|
|
|
ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
|
|
errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
/* Drivers that never have children can omit .bdrv_child_perm() */
|
|
if (!drv->bdrv_child_perm) {
|
|
assert(QLIST_EMPTY(&bs->children));
|
|
return 0;
|
|
}
|
|
|
|
/* Check all children */
|
|
QLIST_FOREACH(c, &bs->children, next) {
|
|
uint64_t cur_perm, cur_shared;
|
|
|
|
bdrv_child_perm(bs, c->bs, c, c->role, q,
|
|
cumulative_perms, cumulative_shared_perms,
|
|
&cur_perm, &cur_shared);
|
|
bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
|
|
Transaction *tran, Error **errp)
|
|
{
|
|
int ret;
|
|
BlockDriverState *bs;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
for ( ; list; list = list->next) {
|
|
bs = list->data;
|
|
|
|
if (bdrv_parent_perms_conflict(bs, errp)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = bdrv_node_refresh_perm(bs, q, tran, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
|
|
uint64_t *shared_perm)
|
|
{
|
|
BdrvChild *c;
|
|
uint64_t cumulative_perms = 0;
|
|
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(c, &bs->parents, next_parent) {
|
|
cumulative_perms |= c->perm;
|
|
cumulative_shared_perms &= c->shared_perm;
|
|
}
|
|
|
|
*perm = cumulative_perms;
|
|
*shared_perm = cumulative_shared_perms;
|
|
}
|
|
|
|
char *bdrv_perm_names(uint64_t perm)
|
|
{
|
|
struct perm_name {
|
|
uint64_t perm;
|
|
const char *name;
|
|
} permissions[] = {
|
|
{ BLK_PERM_CONSISTENT_READ, "consistent read" },
|
|
{ BLK_PERM_WRITE, "write" },
|
|
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
|
|
{ BLK_PERM_RESIZE, "resize" },
|
|
{ 0, NULL }
|
|
};
|
|
|
|
GString *result = g_string_sized_new(30);
|
|
struct perm_name *p;
|
|
|
|
for (p = permissions; p->name; p++) {
|
|
if (perm & p->perm) {
|
|
if (result->len > 0) {
|
|
g_string_append(result, ", ");
|
|
}
|
|
g_string_append(result, p->name);
|
|
}
|
|
}
|
|
|
|
return g_string_free(result, FALSE);
|
|
}
|
|
|
|
|
|
static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
|
|
{
|
|
int ret;
|
|
Transaction *tran = tran_new();
|
|
g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
|
|
tran_finalize(tran, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
Transaction *tran = tran_new();
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_child_set_perm(c, perm, shared, tran);
|
|
|
|
ret = bdrv_refresh_perms(c->bs, &local_err);
|
|
|
|
tran_finalize(tran, ret);
|
|
|
|
if (ret < 0) {
|
|
if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
|
|
/* tighten permissions */
|
|
error_propagate(errp, local_err);
|
|
} else {
|
|
/*
|
|
* Our caller may intend to only loosen restrictions and
|
|
* does not expect this function to fail. Errors are not
|
|
* fatal in such a case, so we can just hide them from our
|
|
* caller.
|
|
*/
|
|
error_free(local_err);
|
|
ret = 0;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
|
|
{
|
|
uint64_t parent_perms, parent_shared;
|
|
uint64_t perms, shared;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
|
|
bdrv_child_perm(bs, c->bs, c, c->role, NULL,
|
|
parent_perms, parent_shared, &perms, &shared);
|
|
|
|
return bdrv_child_try_set_perm(c, perms, shared, errp);
|
|
}
|
|
|
|
/*
|
|
* Default implementation for .bdrv_child_perm() for block filters:
|
|
* Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
|
|
* filtered child.
|
|
*/
|
|
static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
|
|
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
|
|
}
|
|
|
|
static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
assert(role & BDRV_CHILD_COW);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* We want consistent read from backing files if the parent needs it.
|
|
* No other operations are performed on backing files.
|
|
*/
|
|
perm &= BLK_PERM_CONSISTENT_READ;
|
|
|
|
/*
|
|
* If the parent can deal with changing data, we're okay with a
|
|
* writable and resizable backing file.
|
|
* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
|
|
*/
|
|
if (shared & BLK_PERM_WRITE) {
|
|
shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
} else {
|
|
shared = 0;
|
|
}
|
|
|
|
shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
|
|
|
|
if (bs->open_flags & BDRV_O_INACTIVE) {
|
|
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
*nperm = perm;
|
|
*nshared = shared;
|
|
}
|
|
|
|
static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
int flags;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
|
|
|
|
flags = bdrv_reopen_get_flags(reopen_queue, bs);
|
|
|
|
/*
|
|
* Apart from the modifications below, the same permissions are
|
|
* forwarded and left alone as for filters
|
|
*/
|
|
bdrv_filter_default_perms(bs, c, role, reopen_queue,
|
|
perm, shared, &perm, &shared);
|
|
|
|
if (role & BDRV_CHILD_METADATA) {
|
|
/* Format drivers may touch metadata even if the guest doesn't write */
|
|
if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
|
|
perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
/*
|
|
* bs->file always needs to be consistent because of the
|
|
* metadata. We can never allow other users to resize or write
|
|
* to it.
|
|
*/
|
|
if (!(flags & BDRV_O_NO_IO)) {
|
|
perm |= BLK_PERM_CONSISTENT_READ;
|
|
}
|
|
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
|
|
}
|
|
|
|
if (role & BDRV_CHILD_DATA) {
|
|
/*
|
|
* Technically, everything in this block is a subset of the
|
|
* BDRV_CHILD_METADATA path taken above, and so this could
|
|
* be an "else if" branch. However, that is not obvious, and
|
|
* this function is not performance critical, therefore we let
|
|
* this be an independent "if".
|
|
*/
|
|
|
|
/*
|
|
* We cannot allow other users to resize the file because the
|
|
* format driver might have some assumptions about the size
|
|
* (e.g. because it is stored in metadata, or because the file
|
|
* is split into fixed-size data files).
|
|
*/
|
|
shared &= ~BLK_PERM_RESIZE;
|
|
|
|
/*
|
|
* WRITE_UNCHANGED often cannot be performed as such on the
|
|
* data file. For example, the qcow2 driver may still need to
|
|
* write copied clusters on copy-on-read.
|
|
*/
|
|
if (perm & BLK_PERM_WRITE_UNCHANGED) {
|
|
perm |= BLK_PERM_WRITE;
|
|
}
|
|
|
|
/*
|
|
* If the data file is written to, the format driver may
|
|
* expect to be able to resize it by writing beyond the EOF.
|
|
*/
|
|
if (perm & BLK_PERM_WRITE) {
|
|
perm |= BLK_PERM_RESIZE;
|
|
}
|
|
}
|
|
|
|
if (bs->open_flags & BDRV_O_INACTIVE) {
|
|
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
*nperm = perm;
|
|
*nshared = shared;
|
|
}
|
|
|
|
void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role, BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (role & BDRV_CHILD_FILTERED) {
|
|
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
|
|
BDRV_CHILD_COW)));
|
|
bdrv_filter_default_perms(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else if (role & BDRV_CHILD_COW) {
|
|
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
|
|
bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
|
|
bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else {
|
|
g_assert_not_reached();
|
|
}
|
|
}
|
|
|
|
uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
|
{
|
|
static const uint64_t permissions[] = {
|
|
[BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ,
|
|
[BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
|
|
[BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
|
|
[BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
|
|
};
|
|
|
|
QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
|
|
QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
|
|
|
|
assert(qapi_perm < BLOCK_PERMISSION__MAX);
|
|
|
|
return permissions[qapi_perm];
|
|
}
|
|
|
|
/**
|
|
* Replace (*childp)->bs by @new_bs.
|
|
*
|
|
* If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
|
|
* generally cannot handle a BdrvChild with .bs == NULL, so clearing
|
|
* BdrvChild.bs should generally immediately be followed by the
|
|
* BdrvChild pointer being cleared as well.
|
|
*
|
|
* If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
|
|
* freed. @free_empty_child should only be false if the caller will
|
|
* free the BdrvChild themselves (this may be important in a
|
|
* transactional context, where it may only be freed on commit).
|
|
*/
|
|
static void bdrv_replace_child_noperm(BdrvChild **childp,
|
|
BlockDriverState *new_bs,
|
|
bool free_empty_child)
|
|
{
|
|
BdrvChild *child = *childp;
|
|
BlockDriverState *old_bs = child->bs;
|
|
int new_bs_quiesce_counter;
|
|
int drain_saldo;
|
|
|
|
assert(!child->frozen);
|
|
assert(old_bs != new_bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (old_bs && new_bs) {
|
|
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
|
|
}
|
|
|
|
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
|
|
drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
|
|
|
|
/*
|
|
* If the new child node is drained but the old one was not, flush
|
|
* all outstanding requests to the old child node.
|
|
*/
|
|
while (drain_saldo > 0 && child->klass->drained_begin) {
|
|
bdrv_parent_drained_begin_single(child, true);
|
|
drain_saldo--;
|
|
}
|
|
|
|
if (old_bs) {
|
|
/* Detach first so that the recursive drain sections coming from @child
|
|
* are already gone and we only end the drain sections that came from
|
|