mirror of
https://gitlab.com/qemu-project/qemu.git
synced 2024-09-13 20:26:46 +03:00
bcd63b55b0
Recent commit "qapi: Smarter camel_to_upper() to reduce need for 'prefix'" added a temporary 'prefix' to delay changing the generated code. Revert it. This improves XDbgBlockGraphNodeType's generated enumeration constant prefix from X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND to XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND. Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-ID: <20240904111836.3273842-4-armbru@redhat.com>
8433 lines
247 KiB
C
8433 lines
247 KiB
C
/*
|
|
* QEMU System Emulator block driver
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
* Copyright (c) 2020 Virtuozzo International GmbH.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "block/trace.h"
|
|
#include "block/block_int.h"
|
|
#include "block/blockjob.h"
|
|
#include "block/dirty-bitmap.h"
|
|
#include "block/fuse.h"
|
|
#include "block/nbd.h"
|
|
#include "block/qdict.h"
|
|
#include "qemu/error-report.h"
|
|
#include "block/module_block.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/module.h"
|
|
#include "qapi/error.h"
|
|
#include "qapi/qmp/qdict.h"
|
|
#include "qapi/qmp/qjson.h"
|
|
#include "qapi/qmp/qnull.h"
|
|
#include "qapi/qmp/qstring.h"
|
|
#include "qapi/qobject-output-visitor.h"
|
|
#include "qapi/qapi-visit-block-core.h"
|
|
#include "sysemu/block-backend.h"
|
|
#include "qemu/notify.h"
|
|
#include "qemu/option.h"
|
|
#include "qemu/coroutine.h"
|
|
#include "block/qapi.h"
|
|
#include "qemu/timer.h"
|
|
#include "qemu/cutils.h"
|
|
#include "qemu/id.h"
|
|
#include "qemu/range.h"
|
|
#include "qemu/rcu.h"
|
|
#include "block/coroutines.h"
|
|
|
|
#ifdef CONFIG_BSD
|
|
#include <sys/ioctl.h>
|
|
#include <sys/queue.h>
|
|
#if defined(HAVE_SYS_DISK_H)
|
|
#include <sys/disk.h>
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
|
|
|
|
/* Protected by BQL */
|
|
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
|
|
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
|
|
|
|
/* Protected by BQL */
|
|
static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
|
|
QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
|
|
|
|
/* Protected by BQL */
|
|
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
|
|
QLIST_HEAD_INITIALIZER(bdrv_drivers);
|
|
|
|
static BlockDriverState *bdrv_open_inherit(const char *filename,
|
|
const char *reference,
|
|
QDict *options, int flags,
|
|
BlockDriverState *parent,
|
|
const BdrvChildClass *child_class,
|
|
BdrvChildRole child_role,
|
|
bool parse_filename,
|
|
Error **errp);
|
|
|
|
static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
|
BlockDriverState *child);
|
|
|
|
static void GRAPH_WRLOCK
|
|
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs);
|
|
|
|
static void GRAPH_WRLOCK
|
|
bdrv_remove_child(BdrvChild *child, Transaction *tran);
|
|
|
|
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
|
|
BlockReopenQueue *queue,
|
|
Transaction *change_child_tran, Error **errp);
|
|
static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
|
|
static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
|
|
|
|
static bool bdrv_backing_overridden(BlockDriverState *bs);
|
|
|
|
static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
|
GHashTable *visited, Transaction *tran,
|
|
Error **errp);
|
|
|
|
/* If non-zero, use only whitelisted block drivers */
|
|
static int use_bdrv_whitelist;
|
|
|
|
#ifdef _WIN32
|
|
static int is_windows_drive_prefix(const char *filename)
|
|
{
|
|
return (((filename[0] >= 'a' && filename[0] <= 'z') ||
|
|
(filename[0] >= 'A' && filename[0] <= 'Z')) &&
|
|
filename[1] == ':');
|
|
}
|
|
|
|
int is_windows_drive(const char *filename)
|
|
{
|
|
if (is_windows_drive_prefix(filename) &&
|
|
filename[2] == '\0')
|
|
return 1;
|
|
if (strstart(filename, "\\\\.\\", NULL) ||
|
|
strstart(filename, "//./", NULL))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
size_t bdrv_opt_mem_align(BlockDriverState *bs)
|
|
{
|
|
if (!bs || !bs->drv) {
|
|
/* page size or 4k (hdd sector size) should be on the safe side */
|
|
return MAX(4096, qemu_real_host_page_size());
|
|
}
|
|
IO_CODE();
|
|
|
|
return bs->bl.opt_mem_alignment;
|
|
}
|
|
|
|
size_t bdrv_min_mem_align(BlockDriverState *bs)
|
|
{
|
|
if (!bs || !bs->drv) {
|
|
/* page size or 4k (hdd sector size) should be on the safe side */
|
|
return MAX(4096, qemu_real_host_page_size());
|
|
}
|
|
IO_CODE();
|
|
|
|
return bs->bl.min_mem_alignment;
|
|
}
|
|
|
|
/* check if the path starts with "<protocol>:" */
|
|
int path_has_protocol(const char *path)
|
|
{
|
|
const char *p;
|
|
|
|
#ifdef _WIN32
|
|
if (is_windows_drive(path) ||
|
|
is_windows_drive_prefix(path)) {
|
|
return 0;
|
|
}
|
|
p = path + strcspn(path, ":/\\");
|
|
#else
|
|
p = path + strcspn(path, ":/");
|
|
#endif
|
|
|
|
return *p == ':';
|
|
}
|
|
|
|
int path_is_absolute(const char *path)
|
|
{
|
|
#ifdef _WIN32
|
|
/* specific case for names like: "\\.\d:" */
|
|
if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
|
|
return 1;
|
|
}
|
|
return (*path == '/' || *path == '\\');
|
|
#else
|
|
return (*path == '/');
|
|
#endif
|
|
}
|
|
|
|
/* if filename is absolute, just return its duplicate. Otherwise, build a
|
|
path to it by considering it is relative to base_path. URL are
|
|
supported. */
|
|
char *path_combine(const char *base_path, const char *filename)
|
|
{
|
|
const char *protocol_stripped = NULL;
|
|
const char *p, *p1;
|
|
char *result;
|
|
int len;
|
|
|
|
if (path_is_absolute(filename)) {
|
|
return g_strdup(filename);
|
|
}
|
|
|
|
if (path_has_protocol(base_path)) {
|
|
protocol_stripped = strchr(base_path, ':');
|
|
if (protocol_stripped) {
|
|
protocol_stripped++;
|
|
}
|
|
}
|
|
p = protocol_stripped ?: base_path;
|
|
|
|
p1 = strrchr(base_path, '/');
|
|
#ifdef _WIN32
|
|
{
|
|
const char *p2;
|
|
p2 = strrchr(base_path, '\\');
|
|
if (!p1 || p2 > p1) {
|
|
p1 = p2;
|
|
}
|
|
}
|
|
#endif
|
|
if (p1) {
|
|
p1++;
|
|
} else {
|
|
p1 = base_path;
|
|
}
|
|
if (p1 > p) {
|
|
p = p1;
|
|
}
|
|
len = p - base_path;
|
|
|
|
result = g_malloc(len + strlen(filename) + 1);
|
|
memcpy(result, base_path, len);
|
|
strcpy(result + len, filename);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Helper function for bdrv_parse_filename() implementations to remove optional
|
|
* protocol prefixes (especially "file:") from a filename and for putting the
|
|
* stripped filename into the options QDict if there is such a prefix.
|
|
*/
|
|
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
|
|
QDict *options)
|
|
{
|
|
if (strstart(filename, prefix, &filename)) {
|
|
/* Stripping the explicit protocol prefix may result in a protocol
|
|
* prefix being (wrongly) detected (if the filename contains a colon) */
|
|
if (path_has_protocol(filename)) {
|
|
GString *fat_filename;
|
|
|
|
/* This means there is some colon before the first slash; therefore,
|
|
* this cannot be an absolute path */
|
|
assert(!path_is_absolute(filename));
|
|
|
|
/* And we can thus fix the protocol detection issue by prefixing it
|
|
* by "./" */
|
|
fat_filename = g_string_new("./");
|
|
g_string_append(fat_filename, filename);
|
|
|
|
assert(!path_has_protocol(fat_filename->str));
|
|
|
|
qdict_put(options, "filename",
|
|
qstring_from_gstring(fat_filename));
|
|
} else {
|
|
/* If no protocol prefix was detected, we can use the shortened
|
|
* filename as-is */
|
|
qdict_put_str(options, "filename", filename);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Returns whether the image file is opened as read-only. Note that this can
|
|
* return false and writing to the image file is still not possible because the
|
|
* image is inactivated. */
|
|
bool bdrv_is_read_only(BlockDriverState *bs)
|
|
{
|
|
IO_CODE();
|
|
return !(bs->open_flags & BDRV_O_RDWR);
|
|
}
|
|
|
|
static int GRAPH_RDLOCK
|
|
bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
|
|
bool ignore_allow_rdw, Error **errp)
|
|
{
|
|
IO_CODE();
|
|
|
|
/* Do not set read_only if copy_on_read is enabled */
|
|
if (bs->copy_on_read && read_only) {
|
|
error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
|
|
bdrv_get_device_or_node_name(bs));
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Do not clear read_only if it is prohibited */
|
|
if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
|
|
!ignore_allow_rdw)
|
|
{
|
|
error_setg(errp, "Node '%s' is read only",
|
|
bdrv_get_device_or_node_name(bs));
|
|
return -EPERM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Called by a driver that can only provide a read-only image.
|
|
*
|
|
* Returns 0 if the node is already read-only or it could switch the node to
|
|
* read-only because BDRV_O_AUTO_RDONLY is set.
|
|
*
|
|
* Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
|
|
* or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
|
|
* is not NULL, it is used as the error message for the Error object.
|
|
*/
|
|
int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
|
|
Error **errp)
|
|
{
|
|
int ret = 0;
|
|
IO_CODE();
|
|
|
|
if (!(bs->open_flags & BDRV_O_RDWR)) {
|
|
return 0;
|
|
}
|
|
if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
|
|
goto fail;
|
|
}
|
|
|
|
ret = bdrv_can_set_read_only(bs, true, false, NULL);
|
|
if (ret < 0) {
|
|
goto fail;
|
|
}
|
|
|
|
bs->open_flags &= ~BDRV_O_RDWR;
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
error_setg(errp, "%s", errmsg ?: "Image is read-only");
|
|
return -EACCES;
|
|
}
|
|
|
|
/*
|
|
* If @backing is empty, this function returns NULL without setting
|
|
* @errp. In all other cases, NULL will only be returned with @errp
|
|
* set.
|
|
*
|
|
* Therefore, a return value of NULL without @errp set means that
|
|
* there is no backing file; if @errp is set, there is one but its
|
|
* absolute filename cannot be generated.
|
|
*/
|
|
char *bdrv_get_full_backing_filename_from_filename(const char *backed,
|
|
const char *backing,
|
|
Error **errp)
|
|
{
|
|
if (backing[0] == '\0') {
|
|
return NULL;
|
|
} else if (path_has_protocol(backing) || path_is_absolute(backing)) {
|
|
return g_strdup(backing);
|
|
} else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
|
|
error_setg(errp, "Cannot use relative backing file names for '%s'",
|
|
backed);
|
|
return NULL;
|
|
} else {
|
|
return path_combine(backed, backing);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If @filename is empty or NULL, this function returns NULL without
|
|
* setting @errp. In all other cases, NULL will only be returned with
|
|
* @errp set.
|
|
*/
|
|
static char * GRAPH_RDLOCK
|
|
bdrv_make_absolute_filename(BlockDriverState *relative_to,
|
|
const char *filename, Error **errp)
|
|
{
|
|
char *dir, *full_name;
|
|
|
|
if (!filename || filename[0] == '\0') {
|
|
return NULL;
|
|
} else if (path_has_protocol(filename) || path_is_absolute(filename)) {
|
|
return g_strdup(filename);
|
|
}
|
|
|
|
dir = bdrv_dirname(relative_to, errp);
|
|
if (!dir) {
|
|
return NULL;
|
|
}
|
|
|
|
full_name = g_strconcat(dir, filename, NULL);
|
|
g_free(dir);
|
|
return full_name;
|
|
}
|
|
|
|
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
|
|
}
|
|
|
|
void bdrv_register(BlockDriver *bdrv)
|
|
{
|
|
assert(bdrv->format_name);
|
|
GLOBAL_STATE_CODE();
|
|
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
|
|
}
|
|
|
|
BlockDriverState *bdrv_new(void)
|
|
{
|
|
BlockDriverState *bs;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bs = g_new0(BlockDriverState, 1);
|
|
QLIST_INIT(&bs->dirty_bitmaps);
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
QLIST_INIT(&bs->op_blockers[i]);
|
|
}
|
|
qemu_mutex_init(&bs->reqs_lock);
|
|
qemu_mutex_init(&bs->dirty_bitmap_mutex);
|
|
bs->refcnt = 1;
|
|
bs->aio_context = qemu_get_aio_context();
|
|
|
|
qemu_co_queue_init(&bs->flush_queue);
|
|
|
|
qemu_co_mutex_init(&bs->bsc_modify_lock);
|
|
bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
|
|
|
|
for (i = 0; i < bdrv_drain_all_count; i++) {
|
|
bdrv_drained_begin(bs);
|
|
}
|
|
|
|
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
|
|
|
|
return bs;
|
|
}
|
|
|
|
static BlockDriver *bdrv_do_find_format(const char *format_name)
|
|
{
|
|
BlockDriver *drv1;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
|
if (!strcmp(drv1->format_name, format_name)) {
|
|
return drv1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
BlockDriver *bdrv_find_format(const char *format_name)
|
|
{
|
|
BlockDriver *drv1;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
drv1 = bdrv_do_find_format(format_name);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
/* The driver isn't registered, maybe we need to load a module */
|
|
for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
|
|
if (!strcmp(block_driver_modules[i].format_name, format_name)) {
|
|
Error *local_err = NULL;
|
|
int rv = block_module_load(block_driver_modules[i].library_name,
|
|
&local_err);
|
|
if (rv > 0) {
|
|
return bdrv_do_find_format(format_name);
|
|
} else if (rv < 0) {
|
|
error_report_err(local_err);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
|
|
{
|
|
static const char *whitelist_rw[] = {
|
|
CONFIG_BDRV_RW_WHITELIST
|
|
NULL
|
|
};
|
|
static const char *whitelist_ro[] = {
|
|
CONFIG_BDRV_RO_WHITELIST
|
|
NULL
|
|
};
|
|
const char **p;
|
|
|
|
if (!whitelist_rw[0] && !whitelist_ro[0]) {
|
|
return 1; /* no whitelist, anything goes */
|
|
}
|
|
|
|
for (p = whitelist_rw; *p; p++) {
|
|
if (!strcmp(format_name, *p)) {
|
|
return 1;
|
|
}
|
|
}
|
|
if (read_only) {
|
|
for (p = whitelist_ro; *p; p++) {
|
|
if (!strcmp(format_name, *p)) {
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_format_is_whitelisted(drv->format_name, read_only);
|
|
}
|
|
|
|
bool bdrv_uses_whitelist(void)
|
|
{
|
|
return use_bdrv_whitelist;
|
|
}
|
|
|
|
typedef struct CreateCo {
|
|
BlockDriver *drv;
|
|
char *filename;
|
|
QemuOpts *opts;
|
|
int ret;
|
|
Error *err;
|
|
} CreateCo;
|
|
|
|
int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
|
|
QemuOpts *opts, Error **errp)
|
|
{
|
|
ERRP_GUARD();
|
|
int ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!drv->bdrv_co_create_opts) {
|
|
error_setg(errp, "Driver '%s' does not support image creation",
|
|
drv->format_name);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
|
|
if (ret < 0 && !*errp) {
|
|
error_setg_errno(errp, -ret, "Could not create image");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Helper function for bdrv_create_file_fallback(): Resize @blk to at
|
|
* least the given @minimum_size.
|
|
*
|
|
* On success, return @blk's actual length.
|
|
* Otherwise, return -errno.
|
|
*/
|
|
static int64_t coroutine_fn GRAPH_UNLOCKED
|
|
create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int64_t size;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
|
|
&local_err);
|
|
if (ret < 0 && ret != -ENOTSUP) {
|
|
error_propagate(errp, local_err);
|
|
return ret;
|
|
}
|
|
|
|
size = blk_co_getlength(blk);
|
|
if (size < 0) {
|
|
error_free(local_err);
|
|
error_setg_errno(errp, -size,
|
|
"Failed to inquire the new image file's length");
|
|
return size;
|
|
}
|
|
|
|
if (size < minimum_size) {
|
|
/* Need to grow the image, but we failed to do that */
|
|
error_propagate(errp, local_err);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
error_free(local_err);
|
|
local_err = NULL;
|
|
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* Helper function for bdrv_create_file_fallback(): Zero the first
|
|
* sector to remove any potentially pre-existing image header.
|
|
*/
|
|
static int coroutine_fn
|
|
create_file_fallback_zero_first_sector(BlockBackend *blk,
|
|
int64_t current_size,
|
|
Error **errp)
|
|
{
|
|
int64_t bytes_to_clear;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
|
|
if (bytes_to_clear) {
|
|
ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret,
|
|
"Failed to clear the new image's first sector");
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Simple implementation of bdrv_co_create_opts for protocol drivers
|
|
* which only support creation via opening a file
|
|
* (usually existing raw storage device)
|
|
*/
|
|
int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
|
|
const char *filename,
|
|
QemuOpts *opts,
|
|
Error **errp)
|
|
{
|
|
ERRP_GUARD();
|
|
BlockBackend *blk;
|
|
QDict *options;
|
|
int64_t size = 0;
|
|
char *buf = NULL;
|
|
PreallocMode prealloc;
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
|
|
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
|
|
prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
|
|
PREALLOC_MODE_OFF, &local_err);
|
|
g_free(buf);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
error_setg(errp, "Unsupported preallocation mode '%s'",
|
|
PreallocMode_str(prealloc));
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
options = qdict_new();
|
|
qdict_put_str(options, "driver", drv->format_name);
|
|
|
|
blk = blk_co_new_open(filename, NULL, options,
|
|
BDRV_O_RDWR | BDRV_O_RESIZE, errp);
|
|
if (!blk) {
|
|
error_prepend(errp, "Protocol driver '%s' does not support creating "
|
|
"new images, so an existing image must be selected as "
|
|
"the target; however, opening the given target as an "
|
|
"existing image failed: ",
|
|
drv->format_name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
size = create_file_fallback_truncate(blk, size, errp);
|
|
if (size < 0) {
|
|
ret = size;
|
|
goto out;
|
|
}
|
|
|
|
ret = create_file_fallback_zero_first_sector(blk, size, errp);
|
|
if (ret < 0) {
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
blk_co_unref(blk);
|
|
return ret;
|
|
}
|
|
|
|
int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
|
|
Error **errp)
|
|
{
|
|
QemuOpts *protocol_opts;
|
|
BlockDriver *drv;
|
|
QDict *qdict;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
drv = bdrv_find_protocol(filename, true, errp);
|
|
if (drv == NULL) {
|
|
return -ENOENT;
|
|
}
|
|
|
|
if (!drv->create_opts) {
|
|
error_setg(errp, "Driver '%s' does not support image creation",
|
|
drv->format_name);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/*
|
|
* 'opts' contains a QemuOptsList with a combination of format and protocol
|
|
* default values.
|
|
*
|
|
* The format properly removes its options, but the default values remain
|
|
* in 'opts->list'. So if the protocol has options with the same name
|
|
* (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
|
|
* of the format, since for overlapping options, the format wins.
|
|
*
|
|
* To avoid this issue, lets convert QemuOpts to QDict, in this way we take
|
|
* only the set options, and then convert it back to QemuOpts, using the
|
|
* create_opts of the protocol. So the new QemuOpts, will contain only the
|
|
* protocol defaults.
|
|
*/
|
|
qdict = qemu_opts_to_qdict(opts, NULL);
|
|
protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
|
|
if (protocol_opts == NULL) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
ret = bdrv_co_create(drv, filename, protocol_opts, errp);
|
|
out:
|
|
qemu_opts_del(protocol_opts);
|
|
qobject_unref(qdict);
|
|
return ret;
|
|
}
|
|
|
|
int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
|
|
IO_CODE();
|
|
assert(bs != NULL);
|
|
assert_bdrv_graph_readable();
|
|
|
|
if (!bs->drv) {
|
|
error_setg(errp, "Block node '%s' is not opened", bs->filename);
|
|
return -ENOMEDIUM;
|
|
}
|
|
|
|
if (!bs->drv->bdrv_co_delete_file) {
|
|
error_setg(errp, "Driver '%s' does not support image deletion",
|
|
bs->drv->format_name);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
|
|
if (ret < 0) {
|
|
error_propagate(errp, local_err);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
|
|
{
|
|
Error *local_err = NULL;
|
|
int ret;
|
|
IO_CODE();
|
|
|
|
if (!bs) {
|
|
return;
|
|
}
|
|
|
|
ret = bdrv_co_delete_file(bs, &local_err);
|
|
/*
|
|
* ENOTSUP will happen if the block driver doesn't support
|
|
* the 'bdrv_co_delete_file' interface. This is a predictable
|
|
* scenario and shouldn't be reported back to the user.
|
|
*/
|
|
if (ret == -ENOTSUP) {
|
|
error_free(local_err);
|
|
} else if (ret < 0) {
|
|
error_report_err(local_err);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Try to get @bs's logical and physical block size.
|
|
* On success, store them in @bsz struct and return 0.
|
|
* On failure return -errno.
|
|
* @bs must not be empty.
|
|
*/
|
|
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BlockDriverState *filtered = bdrv_filter_bs(bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (drv && drv->bdrv_probe_blocksizes) {
|
|
return drv->bdrv_probe_blocksizes(bs, bsz);
|
|
} else if (filtered) {
|
|
return bdrv_probe_blocksizes(filtered, bsz);
|
|
}
|
|
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/**
|
|
* Try to get @bs's geometry (cyls, heads, sectors).
|
|
* On success, store them in @geo struct and return 0.
|
|
* On failure return -errno.
|
|
* @bs must not be empty.
|
|
*/
|
|
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BlockDriverState *filtered;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
GRAPH_RDLOCK_GUARD_MAINLOOP();
|
|
|
|
if (drv && drv->bdrv_probe_geometry) {
|
|
return drv->bdrv_probe_geometry(bs, geo);
|
|
}
|
|
|
|
filtered = bdrv_filter_bs(bs);
|
|
if (filtered) {
|
|
return bdrv_probe_geometry(filtered, geo);
|
|
}
|
|
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
/*
|
|
* Create a uniquely-named empty temporary file.
|
|
* Return the actual file name used upon success, otherwise NULL.
|
|
* This string should be freed with g_free() when not needed any longer.
|
|
*
|
|
* Note: creating a temporary file for the caller to (re)open is
|
|
* inherently racy. Use g_file_open_tmp() instead whenever practical.
|
|
*/
|
|
char *create_tmp_file(Error **errp)
|
|
{
|
|
int fd;
|
|
const char *tmpdir;
|
|
g_autofree char *filename = NULL;
|
|
|
|
tmpdir = g_get_tmp_dir();
|
|
#ifndef _WIN32
|
|
/*
|
|
* See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot")
|
|
*
|
|
* This function is used to create temporary disk images (like -snapshot),
|
|
* so the files can become very large. /tmp is often a tmpfs where as
|
|
* /var/tmp is usually on a disk, so more appropriate for disk images.
|
|
*/
|
|
if (!g_strcmp0(tmpdir, "/tmp")) {
|
|
tmpdir = "/var/tmp";
|
|
}
|
|
#endif
|
|
|
|
filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir);
|
|
fd = g_mkstemp(filename);
|
|
if (fd < 0) {
|
|
error_setg_errno(errp, errno, "Could not open temporary file '%s'",
|
|
filename);
|
|
return NULL;
|
|
}
|
|
close(fd);
|
|
|
|
return g_steal_pointer(&filename);
|
|
}
|
|
|
|
/*
|
|
* Detect host devices. By convention, /dev/cdrom[N] is always
|
|
* recognized as a host CDROM.
|
|
*/
|
|
static BlockDriver *find_hdev_driver(const char *filename)
|
|
{
|
|
int score_max = 0, score;
|
|
BlockDriver *drv = NULL, *d;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
if (d->bdrv_probe_device) {
|
|
score = d->bdrv_probe_device(filename);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
drv = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
return drv;
|
|
}
|
|
|
|
static BlockDriver *bdrv_do_find_protocol(const char *protocol)
|
|
{
|
|
BlockDriver *drv1;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
|
if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
|
|
return drv1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
BlockDriver *bdrv_find_protocol(const char *filename,
|
|
bool allow_protocol_prefix,
|
|
Error **errp)
|
|
{
|
|
BlockDriver *drv1;
|
|
char protocol[128];
|
|
int len;
|
|
const char *p;
|
|
int i;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* XXX(hch): we really should not let host device detection
|
|
* override an explicit protocol specification, but moving this
|
|
* later breaks access to device names with colons in them.
|
|
* Thanks to the brain-dead persistent naming schemes on udev-
|
|
* based Linux systems those actually are quite common.
|
|
*/
|
|
drv1 = find_hdev_driver(filename);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
if (!path_has_protocol(filename) || !allow_protocol_prefix) {
|
|
return &bdrv_file;
|
|
}
|
|
|
|
p = strchr(filename, ':');
|
|
assert(p != NULL);
|
|
len = p - filename;
|
|
if (len > sizeof(protocol) - 1)
|
|
len = sizeof(protocol) - 1;
|
|
memcpy(protocol, filename, len);
|
|
protocol[len] = '\0';
|
|
|
|
drv1 = bdrv_do_find_protocol(protocol);
|
|
if (drv1) {
|
|
return drv1;
|
|
}
|
|
|
|
for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
|
|
if (block_driver_modules[i].protocol_name &&
|
|
!strcmp(block_driver_modules[i].protocol_name, protocol)) {
|
|
int rv = block_module_load(block_driver_modules[i].library_name, errp);
|
|
if (rv > 0) {
|
|
drv1 = bdrv_do_find_protocol(protocol);
|
|
} else if (rv < 0) {
|
|
return NULL;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!drv1) {
|
|
error_setg(errp, "Unknown protocol '%s'", protocol);
|
|
}
|
|
return drv1;
|
|
}
|
|
|
|
/*
|
|
* Guess image format by probing its contents.
|
|
* This is not a good idea when your image is raw (CVE-2008-2004), but
|
|
* we do it anyway for backward compatibility.
|
|
*
|
|
* @buf contains the image's first @buf_size bytes.
|
|
* @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
|
|
* but can be smaller if the image file is smaller)
|
|
* @filename is its filename.
|
|
*
|
|
* For all block drivers, call the bdrv_probe() method to get its
|
|
* probing score.
|
|
* Return the first block driver with the highest probing score.
|
|
*/
|
|
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
|
|
const char *filename)
|
|
{
|
|
int score_max = 0, score;
|
|
BlockDriver *drv = NULL, *d;
|
|
IO_CODE();
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
if (d->bdrv_probe) {
|
|
score = d->bdrv_probe(buf, buf_size, filename);
|
|
if (score > score_max) {
|
|
score_max = score;
|
|
drv = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
return drv;
|
|
}
|
|
|
|
static int find_image_format(BlockBackend *file, const char *filename,
|
|
BlockDriver **pdrv, Error **errp)
|
|
{
|
|
BlockDriver *drv;
|
|
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
|
|
int ret = 0;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
|
|
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
|
|
*pdrv = &bdrv_raw;
|
|
return ret;
|
|
}
|
|
|
|
ret = blk_pread(file, 0, sizeof(buf), buf, 0);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not read image for determining its "
|
|
"format");
|
|
*pdrv = NULL;
|
|
return ret;
|
|
}
|
|
|
|
drv = bdrv_probe_all(buf, sizeof(buf), filename);
|
|
if (!drv) {
|
|
error_setg(errp, "Could not determine image format: No compatible "
|
|
"driver found");
|
|
*pdrv = NULL;
|
|
return -ENOENT;
|
|
}
|
|
|
|
*pdrv = drv;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set the current 'total_sectors' value
|
|
* Return 0 on success, -errno on error.
|
|
*/
|
|
int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
|
|
int64_t hint)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
IO_CODE();
|
|
assert_bdrv_graph_readable();
|
|
|
|
if (!drv) {
|
|
return -ENOMEDIUM;
|
|
}
|
|
|
|
/* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */
|
|
if (bdrv_is_sg(bs))
|
|
return 0;
|
|
|
|
/* query actual device if possible, otherwise just trust the hint */
|
|
if (drv->bdrv_co_getlength) {
|
|
int64_t length = drv->bdrv_co_getlength(bs);
|
|
if (length < 0) {
|
|
return length;
|
|
}
|
|
hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
|
|
}
|
|
|
|
bs->total_sectors = hint;
|
|
|
|
if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
|
|
return -EFBIG;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Combines a QDict of new block driver @options with any missing options taken
|
|
* from @old_options, so that leaving out an option defaults to its old value.
|
|
*/
|
|
static void bdrv_join_options(BlockDriverState *bs, QDict *options,
|
|
QDict *old_options)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (bs->drv && bs->drv->bdrv_join_options) {
|
|
bs->drv->bdrv_join_options(options, old_options);
|
|
} else {
|
|
qdict_join(options, old_options, false);
|
|
}
|
|
}
|
|
|
|
static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
|
|
int open_flags,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
char *value = qemu_opt_get_del(opts, "detect-zeroes");
|
|
BlockdevDetectZeroesOptions detect_zeroes =
|
|
qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
|
|
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
|
|
GLOBAL_STATE_CODE();
|
|
g_free(value);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return detect_zeroes;
|
|
}
|
|
|
|
if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
|
|
!(open_flags & BDRV_O_UNMAP))
|
|
{
|
|
error_setg(errp, "setting detect-zeroes to unmap is not allowed "
|
|
"without setting discard operation to unmap");
|
|
}
|
|
|
|
return detect_zeroes;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for aio engine
|
|
*
|
|
* Return 0 on success, -1 if the engine specified is invalid
|
|
*/
|
|
int bdrv_parse_aio(const char *mode, int *flags)
|
|
{
|
|
if (!strcmp(mode, "threads")) {
|
|
/* do nothing, default */
|
|
} else if (!strcmp(mode, "native")) {
|
|
*flags |= BDRV_O_NATIVE_AIO;
|
|
#ifdef CONFIG_LINUX_IO_URING
|
|
} else if (!strcmp(mode, "io_uring")) {
|
|
*flags |= BDRV_O_IO_URING;
|
|
#endif
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for a given discard mode
|
|
*
|
|
* Return 0 on success, -1 if the discard mode was invalid.
|
|
*/
|
|
int bdrv_parse_discard_flags(const char *mode, int *flags)
|
|
{
|
|
*flags &= ~BDRV_O_UNMAP;
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
|
|
/* do nothing */
|
|
} else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
|
|
*flags |= BDRV_O_UNMAP;
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Set open flags for a given cache mode
|
|
*
|
|
* Return 0 on success, -1 if the cache mode was invalid.
|
|
*/
|
|
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
|
|
{
|
|
*flags &= ~BDRV_O_CACHE_MASK;
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
|
|
*writethrough = false;
|
|
*flags |= BDRV_O_NOCACHE;
|
|
} else if (!strcmp(mode, "directsync")) {
|
|
*writethrough = true;
|
|
*flags |= BDRV_O_NOCACHE;
|
|
} else if (!strcmp(mode, "writeback")) {
|
|
*writethrough = false;
|
|
} else if (!strcmp(mode, "unsafe")) {
|
|
*writethrough = false;
|
|
*flags |= BDRV_O_NO_FLUSH;
|
|
} else if (!strcmp(mode, "writethrough")) {
|
|
*writethrough = true;
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static char *bdrv_child_get_parent_desc(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
|
|
}
|
|
|
|
static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
bdrv_do_drained_begin_quiesce(bs, NULL);
|
|
}
|
|
|
|
static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
return bdrv_drain_poll(bs, NULL, false);
|
|
}
|
|
|
|
static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
bdrv_drained_end(bs);
|
|
}
|
|
|
|
static int bdrv_child_cb_inactivate(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
GLOBAL_STATE_CODE();
|
|
assert(bs->open_flags & BDRV_O_INACTIVE);
|
|
return 0;
|
|
}
|
|
|
|
static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
|
|
GHashTable *visited, Transaction *tran,
|
|
Error **errp)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
return bdrv_change_aio_context(bs, ctx, visited, tran, errp);
|
|
}
|
|
|
|
/*
|
|
* Returns the options and flags that a temporary snapshot should get, based on
|
|
* the originally requested flags (the originally requested image will have
|
|
* flags like a backing file)
|
|
*/
|
|
static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
|
|
int parent_flags, QDict *parent_options)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
*child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
|
|
|
|
/* For temporary files, unconditional cache=unsafe is fine */
|
|
qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
|
|
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
|
|
|
|
/* Copy the read-only and discard options from the parent */
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
|
|
|
|
/* aio=native doesn't work for cache.direct=off, so disable it for the
|
|
* temporary snapshot */
|
|
*child_flags &= ~BDRV_O_NATIVE_AIO;
|
|
}
|
|
|
|
static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
BlockDriverState *backing_hd = c->bs;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(!parent->backing_blocker);
|
|
error_setg(&parent->backing_blocker,
|
|
"node is used as backing hd of '%s'",
|
|
bdrv_get_device_or_node_name(parent));
|
|
|
|
bdrv_refresh_filename(backing_hd);
|
|
|
|
parent->open_flags &= ~BDRV_O_NO_BACKING;
|
|
|
|
bdrv_op_block_all(backing_hd, parent->backing_blocker);
|
|
/* Otherwise we won't be able to commit or stream */
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
|
|
parent->backing_blocker);
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
|
|
parent->backing_blocker);
|
|
/*
|
|
* We do backup in 3 ways:
|
|
* 1. drive backup
|
|
* The target bs is new opened, and the source is top BDS
|
|
* 2. blockdev backup
|
|
* Both the source and the target are top BDSes.
|
|
* 3. internal backup(used for block replication)
|
|
* Both the source and the target are backing file
|
|
*
|
|
* In case 1 and 2, neither the source nor the target is the backing file.
|
|
* In case 3, we will block the top BDS, so there is only one block job
|
|
* for the top BDS and its backing chain.
|
|
*/
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
|
|
parent->backing_blocker);
|
|
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
|
|
parent->backing_blocker);
|
|
}
|
|
|
|
static void bdrv_backing_detach(BdrvChild *c)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(parent->backing_blocker);
|
|
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
|
|
error_free(parent->backing_blocker);
|
|
parent->backing_blocker = NULL;
|
|
}
|
|
|
|
static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
|
|
const char *filename,
|
|
bool backing_mask_protocol,
|
|
Error **errp)
|
|
{
|
|
BlockDriverState *parent = c->opaque;
|
|
bool read_only = bdrv_is_read_only(parent);
|
|
int ret;
|
|
const char *format_name;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (read_only) {
|
|
ret = bdrv_reopen_set_read_only(parent, false, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (base->drv) {
|
|
/*
|
|
* If the new base image doesn't have a format driver layer, which we
|
|
* detect by the fact that @base is a protocol driver, we record
|
|
* 'raw' as the format instead of putting the protocol name as the
|
|
* backing format
|
|
*/
|
|
if (backing_mask_protocol && base->drv->protocol_name) {
|
|
format_name = "raw";
|
|
} else {
|
|
format_name = base->drv->format_name;
|
|
}
|
|
} else {
|
|
format_name = "";
|
|
}
|
|
|
|
ret = bdrv_change_backing_file(parent, filename, format_name, false);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not update backing file link");
|
|
}
|
|
|
|
if (read_only) {
|
|
bdrv_reopen_set_read_only(parent, true, NULL);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Returns the options and flags that a generic child of a BDS should
|
|
* get, based on the given options and flags for the parent BDS.
|
|
*/
|
|
static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
|
|
int *child_flags, QDict *child_options,
|
|
int parent_flags, QDict *parent_options)
|
|
{
|
|
int flags = parent_flags;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
|
|
* Generally, the question to answer is: Should this child be
|
|
* format-probed by default?
|
|
*/
|
|
|
|
/*
|
|
* Pure and non-filtered data children of non-format nodes should
|
|
* be probed by default (even when the node itself has BDRV_O_PROTOCOL
|
|
* set). This only affects a very limited set of drivers (namely
|
|
* quorum and blkverify when this comment was written).
|
|
* Force-clear BDRV_O_PROTOCOL then.
|
|
*/
|
|
if (!parent_is_format &&
|
|
(role & BDRV_CHILD_DATA) &&
|
|
!(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
|
|
{
|
|
flags &= ~BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/*
|
|
* All children of format nodes (except for COW children) and all
|
|
* metadata children in general should never be format-probed.
|
|
* Force-set BDRV_O_PROTOCOL then.
|
|
*/
|
|
if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
|
|
(role & BDRV_CHILD_METADATA))
|
|
{
|
|
flags |= BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/*
|
|
* If the cache mode isn't explicitly set, inherit direct and no-flush from
|
|
* the parent.
|
|
*/
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
|
|
|
|
if (role & BDRV_CHILD_COW) {
|
|
/* backing files are opened read-only by default */
|
|
qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
|
|
qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
|
|
} else {
|
|
/* Inherit the read-only option from the parent if it's not set */
|
|
qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
|
|
qdict_copy_default(child_options, parent_options,
|
|
BDRV_OPT_AUTO_READ_ONLY);
|
|
}
|
|
|
|
/*
|
|
* bdrv_co_pdiscard() respects unmap policy for the parent, so we
|
|
* can default to enable it on lower layers regardless of the
|
|
* parent option.
|
|
*/
|
|
qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
|
|
|
|
/* Clear flags that only apply to the top layer */
|
|
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
|
|
|
|
if (role & BDRV_CHILD_METADATA) {
|
|
flags &= ~BDRV_O_NO_IO;
|
|
}
|
|
if (role & BDRV_CHILD_COW) {
|
|
flags &= ~BDRV_O_TEMPORARY;
|
|
}
|
|
|
|
*child_flags = flags;
|
|
}
|
|
|
|
static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
|
|
assert_bdrv_graph_writable();
|
|
QLIST_INSERT_HEAD(&bs->children, child, next);
|
|
if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
|
|
/*
|
|
* Here we handle filters and block/raw-format.c when it behave like
|
|
* filter. They generally have a single PRIMARY child, which is also the
|
|
* FILTERED child, and that they may have multiple more children, which
|
|
* are neither PRIMARY nor FILTERED. And never we have a COW child here.
|
|
* So bs->file will be the PRIMARY child, unless the PRIMARY child goes
|
|
* into bs->backing on exceptional cases; and bs->backing will be
|
|
* nothing else.
|
|
*/
|
|
assert(!(child->role & BDRV_CHILD_COW));
|
|
if (child->role & BDRV_CHILD_PRIMARY) {
|
|
assert(child->role & BDRV_CHILD_FILTERED);
|
|
assert(!bs->backing);
|
|
assert(!bs->file);
|
|
|
|
if (bs->drv->filtered_child_is_backing) {
|
|
bs->backing = child;
|
|
} else {
|
|
bs->file = child;
|
|
}
|
|
} else {
|
|
assert(!(child->role & BDRV_CHILD_FILTERED));
|
|
}
|
|
} else if (child->role & BDRV_CHILD_COW) {
|
|
assert(bs->drv->supports_backing);
|
|
assert(!(child->role & BDRV_CHILD_PRIMARY));
|
|
assert(!bs->backing);
|
|
bs->backing = child;
|
|
bdrv_backing_attach(child);
|
|
} else if (child->role & BDRV_CHILD_PRIMARY) {
|
|
assert(!bs->file);
|
|
bs->file = child;
|
|
}
|
|
}
|
|
|
|
static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
|
|
{
|
|
BlockDriverState *bs = child->opaque;
|
|
|
|
if (child->role & BDRV_CHILD_COW) {
|
|
bdrv_backing_detach(child);
|
|
}
|
|
|
|
assert_bdrv_graph_writable();
|
|
QLIST_REMOVE(child, next);
|
|
if (child == bs->backing) {
|
|
assert(child != bs->file);
|
|
bs->backing = NULL;
|
|
} else if (child == bs->file) {
|
|
bs->file = NULL;
|
|
}
|
|
}
|
|
|
|
static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
|
|
const char *filename,
|
|
bool backing_mask_protocol,
|
|
Error **errp)
|
|
{
|
|
if (c->role & BDRV_CHILD_COW) {
|
|
return bdrv_backing_update_filename(c, base, filename,
|
|
backing_mask_protocol,
|
|
errp);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
|
|
{
|
|
BlockDriverState *bs = c->opaque;
|
|
IO_CODE();
|
|
|
|
return bdrv_get_aio_context(bs);
|
|
}
|
|
|
|
const BdrvChildClass child_of_bds = {
|
|
.parent_is_bds = true,
|
|
.get_parent_desc = bdrv_child_get_parent_desc,
|
|
.inherit_options = bdrv_inherited_options,
|
|
.drained_begin = bdrv_child_cb_drained_begin,
|
|
.drained_poll = bdrv_child_cb_drained_poll,
|
|
.drained_end = bdrv_child_cb_drained_end,
|
|
.attach = bdrv_child_cb_attach,
|
|
.detach = bdrv_child_cb_detach,
|
|
.inactivate = bdrv_child_cb_inactivate,
|
|
.change_aio_ctx = bdrv_child_cb_change_aio_ctx,
|
|
.update_filename = bdrv_child_cb_update_filename,
|
|
.get_parent_aio_context = child_of_bds_get_parent_aio_context,
|
|
};
|
|
|
|
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
|
|
{
|
|
IO_CODE();
|
|
return c->klass->get_parent_aio_context(c);
|
|
}
|
|
|
|
static int bdrv_open_flags(BlockDriverState *bs, int flags)
|
|
{
|
|
int open_flags = flags;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* Clear flags that are internal to the block layer before opening the
|
|
* image.
|
|
*/
|
|
open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
|
|
|
|
return open_flags;
|
|
}
|
|
|
|
static void update_flags_from_options(int *flags, QemuOpts *opts)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
|
|
*flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
|
|
*flags |= BDRV_O_NO_FLUSH;
|
|
}
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
|
|
*flags |= BDRV_O_NOCACHE;
|
|
}
|
|
|
|
if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
|
|
*flags |= BDRV_O_RDWR;
|
|
}
|
|
|
|
if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
|
|
*flags |= BDRV_O_AUTO_RDONLY;
|
|
}
|
|
}
|
|
|
|
static void update_options_from_flags(QDict *options, int flags)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
|
|
qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
|
|
qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
|
|
flags & BDRV_O_NO_FLUSH);
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
|
|
qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
|
|
}
|
|
if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
|
|
qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
|
|
flags & BDRV_O_AUTO_RDONLY);
|
|
}
|
|
}
|
|
|
|
static void bdrv_assign_node_name(BlockDriverState *bs,
|
|
const char *node_name,
|
|
Error **errp)
|
|
{
|
|
char *gen_node_name = NULL;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!node_name) {
|
|
node_name = gen_node_name = id_generate(ID_BLOCK);
|
|
} else if (!id_wellformed(node_name)) {
|
|
/*
|
|
* Check for empty string or invalid characters, but not if it is
|
|
* generated (generated names use characters not available to the user)
|
|
*/
|
|
error_setg(errp, "Invalid node-name: '%s'", node_name);
|
|
return;
|
|
}
|
|
|
|
/* takes care of avoiding namespaces collisions */
|
|
if (blk_by_name(node_name)) {
|
|
error_setg(errp, "node-name=%s is conflicting with a device id",
|
|
node_name);
|
|
goto out;
|
|
}
|
|
|
|
/* takes care of avoiding duplicates node names */
|
|
if (bdrv_find_node(node_name)) {
|
|
error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
|
|
goto out;
|
|
}
|
|
|
|
/* Make sure that the node name isn't truncated */
|
|
if (strlen(node_name) >= sizeof(bs->node_name)) {
|
|
error_setg(errp, "Node name too long");
|
|
goto out;
|
|
}
|
|
|
|
/* copy node name into the bs and insert it into the graph list */
|
|
pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
|
|
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
|
|
out:
|
|
g_free(gen_node_name);
|
|
}
|
|
|
|
static int no_coroutine_fn GRAPH_UNLOCKED
|
|
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
|
|
QDict *options, int open_flags, Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
int i, ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_assign_node_name(bs, node_name, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
bs->drv = drv;
|
|
bs->opaque = g_malloc0(drv->instance_size);
|
|
|
|
assert(!drv->bdrv_needs_filename || bs->filename[0]);
|
|
if (drv->bdrv_open) {
|
|
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
|
|
if (ret < 0) {
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
} else if (bs->filename[0]) {
|
|
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
|
|
} else {
|
|
error_setg_errno(errp, -ret, "Could not open image");
|
|
}
|
|
goto open_failed;
|
|
}
|
|
|
|
assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
|
|
assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
|
|
|
|
/*
|
|
* Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
|
|
* drivers that pass read/write requests through to a child the trouble of
|
|
* declaring support explicitly.
|
|
*
|
|
* Drivers must not propagate this flag accidentally when they initiate I/O
|
|
* to a bounce buffer. That case should be rare though.
|
|
*/
|
|
bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
|
|
bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
|
|
|
|
ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
|
|
if (ret < 0) {
|
|
error_setg_errno(errp, -ret, "Could not refresh total sector count");
|
|
return ret;
|
|
}
|
|
|
|
bdrv_graph_rdlock_main_loop();
|
|
bdrv_refresh_limits(bs, NULL, &local_err);
|
|
bdrv_graph_rdunlock_main_loop();
|
|
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
assert(bdrv_opt_mem_align(bs) != 0);
|
|
assert(bdrv_min_mem_align(bs) != 0);
|
|
assert(is_power_of_2(bs->bl.request_alignment));
|
|
|
|
for (i = 0; i < bs->quiesce_counter; i++) {
|
|
if (drv->bdrv_drain_begin) {
|
|
drv->bdrv_drain_begin(bs);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
open_failed:
|
|
bs->drv = NULL;
|
|
|
|
bdrv_graph_wrlock();
|
|
if (bs->file != NULL) {
|
|
bdrv_unref_child(bs, bs->file);
|
|
assert(!bs->file);
|
|
}
|
|
bdrv_graph_wrunlock();
|
|
|
|
g_free(bs->opaque);
|
|
bs->opaque = NULL;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Create and open a block node.
|
|
*
|
|
* @options is a QDict of options to pass to the block drivers, or NULL for an
|
|
* empty set of options. The reference to the QDict belongs to the block layer
|
|
* after the call (even on failure), so if the caller intends to reuse the
|
|
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
|
*/
|
|
BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
|
|
const char *node_name,
|
|
QDict *options, int flags,
|
|
Error **errp)
|
|
{
|
|
BlockDriverState *bs;
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bs = bdrv_new();
|
|
bs->open_flags = flags;
|
|
bs->options = options ?: qdict_new();
|
|
bs->explicit_options = qdict_clone_shallow(bs->options);
|
|
bs->opaque = NULL;
|
|
|
|
update_options_from_flags(bs->options, flags);
|
|
|
|
ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
|
|
if (ret < 0) {
|
|
qobject_unref(bs->explicit_options);
|
|
bs->explicit_options = NULL;
|
|
qobject_unref(bs->options);
|
|
bs->options = NULL;
|
|
bdrv_unref(bs);
|
|
return NULL;
|
|
}
|
|
|
|
return bs;
|
|
}
|
|
|
|
/* Create and open a block node. */
|
|
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
|
|
int flags, Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
|
|
}
|
|
|
|
QemuOptsList bdrv_runtime_opts = {
|
|
.name = "bdrv_common",
|
|
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
|
|
.desc = {
|
|
{
|
|
.name = "node-name",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Node name of the block device node",
|
|
},
|
|
{
|
|
.name = "driver",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Block driver to use for the node",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_CACHE_DIRECT,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Bypass software writeback cache on the host",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_CACHE_NO_FLUSH,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Ignore flush requests",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_READ_ONLY,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Node is opened in read-only mode",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_AUTO_READ_ONLY,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "Node can become read-only if opening read-write fails",
|
|
},
|
|
{
|
|
.name = "detect-zeroes",
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "try to optimize zero writes (off, on, unmap)",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_DISCARD,
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "discard operation (ignore/off, unmap/on)",
|
|
},
|
|
{
|
|
.name = BDRV_OPT_FORCE_SHARE,
|
|
.type = QEMU_OPT_BOOL,
|
|
.help = "always accept other writers (default: off)",
|
|
},
|
|
{ /* end of list */ }
|
|
},
|
|
};
|
|
|
|
QemuOptsList bdrv_create_opts_simple = {
|
|
.name = "simple-create-opts",
|
|
.head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
|
|
.desc = {
|
|
{
|
|
.name = BLOCK_OPT_SIZE,
|
|
.type = QEMU_OPT_SIZE,
|
|
.help = "Virtual disk size"
|
|
},
|
|
{
|
|
.name = BLOCK_OPT_PREALLOC,
|
|
.type = QEMU_OPT_STRING,
|
|
.help = "Preallocation mode (allowed values: off)"
|
|
},
|
|
{ /* end of list */ }
|
|
}
|
|
};
|
|
|
|
/*
|
|
* Common part for opening disk images and files
|
|
*
|
|
* Removes all processed options from *options.
|
|
*/
|
|
static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
|
|
QDict *options, Error **errp)
|
|
{
|
|
int ret, open_flags;
|
|
const char *filename;
|
|
const char *driver_name = NULL;
|
|
const char *node_name = NULL;
|
|
const char *discard;
|
|
QemuOpts *opts;
|
|
BlockDriver *drv;
|
|
Error *local_err = NULL;
|
|
bool ro;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_graph_rdlock_main_loop();
|
|
assert(bs->file == NULL);
|
|
assert(options != NULL && bs->options != options);
|
|
bdrv_graph_rdunlock_main_loop();
|
|
|
|
opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
|
|
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
update_flags_from_options(&bs->open_flags, opts);
|
|
|
|
driver_name = qemu_opt_get(opts, "driver");
|
|
drv = bdrv_find_format(driver_name);
|
|
assert(drv != NULL);
|
|
|
|
bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
|
|
|
|
if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
|
|
error_setg(errp,
|
|
BDRV_OPT_FORCE_SHARE
|
|
"=on can only be used with read-only images");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
if (file != NULL) {
|
|
bdrv_graph_rdlock_main_loop();
|
|
bdrv_refresh_filename(blk_bs(file));
|
|
bdrv_graph_rdunlock_main_loop();
|
|
|
|
filename = blk_bs(file)->filename;
|
|
} else {
|
|
/*
|
|
* Caution: while qdict_get_try_str() is fine, getting
|
|
* non-string types would require more care. When @options
|
|
* come from -blockdev or blockdev_add, its members are typed
|
|
* according to the QAPI schema, but when they come from
|
|
* -drive, they're all QString.
|
|
*/
|
|
filename = qdict_get_try_str(options, "filename");
|
|
}
|
|
|
|
if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
|
|
error_setg(errp, "The '%s' block driver requires a file name",
|
|
drv->format_name);
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
|
|
drv->format_name);
|
|
|
|
ro = bdrv_is_read_only(bs);
|
|
|
|
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
|
|
if (!ro && bdrv_is_whitelisted(drv, true)) {
|
|
bdrv_graph_rdlock_main_loop();
|
|
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
|
|
bdrv_graph_rdunlock_main_loop();
|
|
} else {
|
|
ret = -ENOTSUP;
|
|
}
|
|
if (ret < 0) {
|
|
error_setg(errp,
|
|
!ro && bdrv_is_whitelisted(drv, true)
|
|
? "Driver '%s' can only be used for read-only devices"
|
|
: "Driver '%s' is not whitelisted",
|
|
drv->format_name);
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
/* bdrv_new() and bdrv_close() make it so */
|
|
assert(qatomic_read(&bs->copy_on_read) == 0);
|
|
|
|
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
|
|
if (!ro) {
|
|
bdrv_enable_copy_on_read(bs);
|
|
} else {
|
|
error_setg(errp, "Can't use copy-on-read on read-only device");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
|
|
if (discard != NULL) {
|
|
if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
|
|
error_setg(errp, "Invalid discard option");
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
}
|
|
|
|
bs->detect_zeroes =
|
|
bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
ret = -EINVAL;
|
|
goto fail_opts;
|
|
}
|
|
|
|
if (filename != NULL) {
|
|
pstrcpy(bs->filename, sizeof(bs->filename), filename);
|
|
} else {
|
|
bs->filename[0] = '\0';
|
|
}
|
|
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
|
|
|
|
/* Open the image, either directly or using a protocol */
|
|
open_flags = bdrv_open_flags(bs, bs->open_flags);
|
|
node_name = qemu_opt_get(opts, "node-name");
|
|
|
|
assert(!drv->protocol_name || file == NULL);
|
|
ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
|
|
if (ret < 0) {
|
|
goto fail_opts;
|
|
}
|
|
|
|
qemu_opts_del(opts);
|
|
return 0;
|
|
|
|
fail_opts:
|
|
qemu_opts_del(opts);
|
|
return ret;
|
|
}
|
|
|
|
static QDict *parse_json_filename(const char *filename, Error **errp)
|
|
{
|
|
ERRP_GUARD();
|
|
QObject *options_obj;
|
|
QDict *options;
|
|
int ret;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
ret = strstart(filename, "json:", &filename);
|
|
assert(ret);
|
|
|
|
options_obj = qobject_from_json(filename, errp);
|
|
if (!options_obj) {
|
|
error_prepend(errp, "Could not parse the JSON options: ");
|
|
return NULL;
|
|
}
|
|
|
|
options = qobject_to(QDict, options_obj);
|
|
if (!options) {
|
|
qobject_unref(options_obj);
|
|
error_setg(errp, "Invalid JSON object given");
|
|
return NULL;
|
|
}
|
|
|
|
qdict_flatten(options);
|
|
|
|
return options;
|
|
}
|
|
|
|
static void parse_json_protocol(QDict *options, const char **pfilename,
|
|
Error **errp)
|
|
{
|
|
QDict *json_options;
|
|
Error *local_err = NULL;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/* Parse json: pseudo-protocol */
|
|
if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
|
|
return;
|
|
}
|
|
|
|
json_options = parse_json_filename(*pfilename, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return;
|
|
}
|
|
|
|
/* Options given in the filename have lower priority than options
|
|
* specified directly */
|
|
qdict_join(options, json_options, false);
|
|
qobject_unref(json_options);
|
|
*pfilename = NULL;
|
|
}
|
|
|
|
/*
|
|
* Fills in default options for opening images and converts the legacy
|
|
* filename/flags pair to option QDict entries.
|
|
* The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
|
|
* block driver has been specified explicitly.
|
|
*/
|
|
static int bdrv_fill_options(QDict **options, const char *filename,
|
|
int *flags, bool allow_parse_filename,
|
|
Error **errp)
|
|
{
|
|
const char *drvname;
|
|
bool protocol = *flags & BDRV_O_PROTOCOL;
|
|
bool parse_filename = false;
|
|
BlockDriver *drv = NULL;
|
|
Error *local_err = NULL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* Caution: while qdict_get_try_str() is fine, getting non-string
|
|
* types would require more care. When @options come from
|
|
* -blockdev or blockdev_add, its members are typed according to
|
|
* the QAPI schema, but when they come from -drive, they're all
|
|
* QString.
|
|
*/
|
|
drvname = qdict_get_try_str(*options, "driver");
|
|
if (drvname) {
|
|
drv = bdrv_find_format(drvname);
|
|
if (!drv) {
|
|
error_setg(errp, "Unknown driver '%s'", drvname);
|
|
return -ENOENT;
|
|
}
|
|
/* If the user has explicitly specified the driver, this choice should
|
|
* override the BDRV_O_PROTOCOL flag */
|
|
protocol = drv->protocol_name;
|
|
}
|
|
|
|
if (protocol) {
|
|
*flags |= BDRV_O_PROTOCOL;
|
|
} else {
|
|
*flags &= ~BDRV_O_PROTOCOL;
|
|
}
|
|
|
|
/* Translate cache options from flags into options */
|
|
update_options_from_flags(*options, *flags);
|
|
|
|
/* Fetch the file name from the options QDict if necessary */
|
|
if (protocol && filename) {
|
|
if (!qdict_haskey(*options, "filename")) {
|
|
qdict_put_str(*options, "filename", filename);
|
|
parse_filename = allow_parse_filename;
|
|
} else {
|
|
error_setg(errp, "Can't specify 'file' and 'filename' options at "
|
|
"the same time");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
/* Find the right block driver */
|
|
/* See cautionary note on accessing @options above */
|
|
filename = qdict_get_try_str(*options, "filename");
|
|
|
|
if (!drvname && protocol) {
|
|
if (filename) {
|
|
drv = bdrv_find_protocol(filename, parse_filename, errp);
|
|
if (!drv) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
drvname = drv->format_name;
|
|
qdict_put_str(*options, "driver", drvname);
|
|
} else {
|
|
error_setg(errp, "Must specify either driver or file");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
assert(drv || !protocol);
|
|
|
|
/* Driver-specific filename parsing */
|
|
if (drv && drv->bdrv_parse_filename && parse_filename) {
|
|
drv->bdrv_parse_filename(filename, *options, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!drv->bdrv_needs_filename) {
|
|
qdict_del(*options, "filename");
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct BlockReopenQueueEntry {
|
|
bool prepared;
|
|
BDRVReopenState state;
|
|
QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
|
|
} BlockReopenQueueEntry;
|
|
|
|
/*
|
|
* Return the flags that @bs will have after the reopens in @q have
|
|
* successfully completed. If @q is NULL (or @bs is not contained in @q),
|
|
* return the current flags.
|
|
*/
|
|
static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
|
|
{
|
|
BlockReopenQueueEntry *entry;
|
|
|
|
if (q != NULL) {
|
|
QTAILQ_FOREACH(entry, q, entry) {
|
|
if (entry->state.bs == bs) {
|
|
return entry->state.flags;
|
|
}
|
|
}
|
|
}
|
|
|
|
return bs->open_flags;
|
|
}
|
|
|
|
/* Returns whether the image file can be written to after the reopen queue @q
|
|
* has been successfully applied, or right now if @q is NULL. */
|
|
static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
|
|
BlockReopenQueue *q)
|
|
{
|
|
int flags = bdrv_reopen_get_flags(q, bs);
|
|
|
|
return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
|
|
}
|
|
|
|
/*
|
|
* Return whether the BDS can be written to. This is not necessarily
|
|
* the same as !bdrv_is_read_only(bs), as inactivated images may not
|
|
* be written to but do not count as read-only images.
|
|
*/
|
|
bool bdrv_is_writable(BlockDriverState *bs)
|
|
{
|
|
IO_CODE();
|
|
return bdrv_is_writable_after_reopen(bs, NULL);
|
|
}
|
|
|
|
static char *bdrv_child_user_desc(BdrvChild *c)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
return c->klass->get_parent_desc(c);
|
|
}
|
|
|
|
/*
|
|
* Check that @a allows everything that @b needs. @a and @b must reference same
|
|
* child node.
|
|
*/
|
|
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
|
|
{
|
|
const char *child_bs_name;
|
|
g_autofree char *a_user = NULL;
|
|
g_autofree char *b_user = NULL;
|
|
g_autofree char *perms = NULL;
|
|
|
|
assert(a->bs);
|
|
assert(a->bs == b->bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if ((b->perm & a->shared_perm) == b->perm) {
|
|
return true;
|
|
}
|
|
|
|
child_bs_name = bdrv_get_node_name(b->bs);
|
|
a_user = bdrv_child_user_desc(a);
|
|
b_user = bdrv_child_user_desc(b);
|
|
perms = bdrv_perm_names(b->perm & ~a->shared_perm);
|
|
|
|
error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
|
|
"both required by %s (uses node '%s' as '%s' child) and "
|
|
"unshared by %s (uses node '%s' as '%s' child).",
|
|
child_bs_name, perms,
|
|
b_user, child_bs_name, b->name,
|
|
a_user, child_bs_name, a->name);
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool GRAPH_RDLOCK
|
|
bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
|
|
{
|
|
BdrvChild *a, *b;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* During the loop we'll look at each pair twice. That's correct because
|
|
* bdrv_a_allow_b() is asymmetric and we should check each pair in both
|
|
* directions.
|
|
*/
|
|
QLIST_FOREACH(a, &bs->parents, next_parent) {
|
|
QLIST_FOREACH(b, &bs->parents, next_parent) {
|
|
if (a == b) {
|
|
continue;
|
|
}
|
|
|
|
if (!bdrv_a_allow_b(a, b, errp)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void GRAPH_RDLOCK
|
|
bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
|
|
BdrvChild *c, BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t parent_perm, uint64_t parent_shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
assert(bs->drv && bs->drv->bdrv_child_perm);
|
|
GLOBAL_STATE_CODE();
|
|
bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
|
|
parent_perm, parent_shared,
|
|
nperm, nshared);
|
|
/* TODO Take force_share from reopen_queue */
|
|
if (child_bs && child_bs->force_share) {
|
|
*nshared = BLK_PERM_ALL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Adds the whole subtree of @bs (including @bs itself) to the @list (except for
|
|
* nodes that are already in the @list, of course) so that final list is
|
|
* topologically sorted. Return the result (GSList @list object is updated, so
|
|
* don't use old reference after function call).
|
|
*
|
|
* On function start @list must be already topologically sorted and for any node
|
|
* in the @list the whole subtree of the node must be in the @list as well. The
|
|
* simplest way to satisfy this criteria: use only result of
|
|
* bdrv_topological_dfs() or NULL as @list parameter.
|
|
*/
|
|
static GSList * GRAPH_RDLOCK
|
|
bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs)
|
|
{
|
|
BdrvChild *child;
|
|
g_autoptr(GHashTable) local_found = NULL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!found) {
|
|
assert(!list);
|
|
found = local_found = g_hash_table_new(NULL, NULL);
|
|
}
|
|
|
|
if (g_hash_table_contains(found, bs)) {
|
|
return list;
|
|
}
|
|
g_hash_table_add(found, bs);
|
|
|
|
QLIST_FOREACH(child, &bs->children, next) {
|
|
list = bdrv_topological_dfs(list, found, child->bs);
|
|
}
|
|
|
|
return g_slist_prepend(list, bs);
|
|
}
|
|
|
|
typedef struct BdrvChildSetPermState {
|
|
BdrvChild *child;
|
|
uint64_t old_perm;
|
|
uint64_t old_shared_perm;
|
|
} BdrvChildSetPermState;
|
|
|
|
static void bdrv_child_set_perm_abort(void *opaque)
|
|
{
|
|
BdrvChildSetPermState *s = opaque;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
s->child->perm = s->old_perm;
|
|
s->child->shared_perm = s->old_shared_perm;
|
|
}
|
|
|
|
static TransactionActionDrv bdrv_child_set_pem_drv = {
|
|
.abort = bdrv_child_set_perm_abort,
|
|
.clean = g_free,
|
|
};
|
|
|
|
static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
|
|
uint64_t shared, Transaction *tran)
|
|
{
|
|
BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
*s = (BdrvChildSetPermState) {
|
|
.child = c,
|
|
.old_perm = c->perm,
|
|
.old_shared_perm = c->shared_perm,
|
|
};
|
|
|
|
c->perm = perm;
|
|
c->shared_perm = shared;
|
|
|
|
tran_add(tran, &bdrv_child_set_pem_drv, s);
|
|
}
|
|
|
|
static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque)
|
|
{
|
|
BlockDriverState *bs = opaque;
|
|
uint64_t cumulative_perms, cumulative_shared_perms;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (bs->drv->bdrv_set_perm) {
|
|
bdrv_get_cumulative_perm(bs, &cumulative_perms,
|
|
&cumulative_shared_perms);
|
|
bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
|
|
}
|
|
}
|
|
|
|
static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque)
|
|
{
|
|
BlockDriverState *bs = opaque;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (bs->drv->bdrv_abort_perm_update) {
|
|
bs->drv->bdrv_abort_perm_update(bs);
|
|
}
|
|
}
|
|
|
|
TransactionActionDrv bdrv_drv_set_perm_drv = {
|
|
.abort = bdrv_drv_set_perm_abort,
|
|
.commit = bdrv_drv_set_perm_commit,
|
|
};
|
|
|
|
/*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a reader lock for the graph.
|
|
*/
|
|
static int GRAPH_RDLOCK
|
|
bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
|
|
Transaction *tran, Error **errp)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (!bs->drv) {
|
|
return 0;
|
|
}
|
|
|
|
if (bs->drv->bdrv_check_perm) {
|
|
int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (tran) {
|
|
tran_add(tran, &bdrv_drv_set_perm_drv, bs);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct BdrvReplaceChildState {
|
|
BdrvChild *child;
|
|
BlockDriverState *old_bs;
|
|
} BdrvReplaceChildState;
|
|
|
|
static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque)
|
|
{
|
|
BdrvReplaceChildState *s = opaque;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_schedule_unref(s->old_bs);
|
|
}
|
|
|
|
static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque)
|
|
{
|
|
BdrvReplaceChildState *s = opaque;
|
|
BlockDriverState *new_bs = s->child->bs;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert_bdrv_graph_writable();
|
|
|
|
/* old_bs reference is transparently moved from @s to @s->child */
|
|
if (!s->child->bs) {
|
|
/*
|
|
* The parents were undrained when removing old_bs from the child. New
|
|
* requests can't have been made, though, because the child was empty.
|
|
*
|
|
* TODO Make bdrv_replace_child_noperm() transactionable to avoid
|
|
* undraining the parent in the first place. Once this is done, having
|
|
* new_bs drained when calling bdrv_replace_child_tran() is not a
|
|
* requirement any more.
|
|
*/
|
|
bdrv_parent_drained_begin_single(s->child);
|
|
assert(!bdrv_parent_drained_poll_single(s->child));
|
|
}
|
|
assert(s->child->quiesced_parent);
|
|
bdrv_replace_child_noperm(s->child, s->old_bs);
|
|
|
|
bdrv_unref(new_bs);
|
|
}
|
|
|
|
static TransactionActionDrv bdrv_replace_child_drv = {
|
|
.commit = bdrv_replace_child_commit,
|
|
.abort = bdrv_replace_child_abort,
|
|
.clean = g_free,
|
|
};
|
|
|
|
/*
|
|
* bdrv_replace_child_tran
|
|
*
|
|
* Note: real unref of old_bs is done only on commit.
|
|
*
|
|
* Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
|
|
* kept drained until the transaction is completed.
|
|
*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a writer lock for the graph.
|
|
*
|
|
* The function doesn't update permissions, caller is responsible for this.
|
|
*/
|
|
static void GRAPH_WRLOCK
|
|
bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
|
|
Transaction *tran)
|
|
{
|
|
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
|
|
|
|
assert(child->quiesced_parent);
|
|
assert(!new_bs || new_bs->quiesce_counter);
|
|
|
|
*s = (BdrvReplaceChildState) {
|
|
.child = child,
|
|
.old_bs = child->bs,
|
|
};
|
|
tran_add(tran, &bdrv_replace_child_drv, s);
|
|
|
|
if (new_bs) {
|
|
bdrv_ref(new_bs);
|
|
}
|
|
|
|
bdrv_replace_child_noperm(child, new_bs);
|
|
/* old_bs reference is transparently moved from @child to @s */
|
|
}
|
|
|
|
/*
|
|
* Refresh permissions in @bs subtree. The function is intended to be called
|
|
* after some graph modification that was done without permission update.
|
|
*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a reader lock for the graph.
|
|
*/
|
|
static int GRAPH_RDLOCK
|
|
bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
|
|
Transaction *tran, Error **errp)
|
|
{
|
|
BlockDriver *drv = bs->drv;
|
|
BdrvChild *c;
|
|
int ret;
|
|
uint64_t cumulative_perms, cumulative_shared_perms;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
|
|
|
|
/* Write permissions never work with read-only images */
|
|
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
|
|
!bdrv_is_writable_after_reopen(bs, q))
|
|
{
|
|
if (!bdrv_is_writable_after_reopen(bs, NULL)) {
|
|
error_setg(errp, "Block node is read-only");
|
|
} else {
|
|
error_setg(errp, "Read-only block node '%s' cannot support "
|
|
"read-write users", bdrv_get_node_name(bs));
|
|
}
|
|
|
|
return -EPERM;
|
|
}
|
|
|
|
/*
|
|
* Unaligned requests will automatically be aligned to bl.request_alignment
|
|
* and without RESIZE we can't extend requests to write to space beyond the
|
|
* end of the image, so it's required that the image size is aligned.
|
|
*/
|
|
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
|
|
!(cumulative_perms & BLK_PERM_RESIZE))
|
|
{
|
|
if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
|
|
error_setg(errp, "Cannot get 'write' permission without 'resize': "
|
|
"Image size is not a multiple of request "
|
|
"alignment");
|
|
return -EPERM;
|
|
}
|
|
}
|
|
|
|
/* Check this node */
|
|
if (!drv) {
|
|
return 0;
|
|
}
|
|
|
|
ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
|
|
errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
/* Drivers that never have children can omit .bdrv_child_perm() */
|
|
if (!drv->bdrv_child_perm) {
|
|
assert(QLIST_EMPTY(&bs->children));
|
|
return 0;
|
|
}
|
|
|
|
/* Check all children */
|
|
QLIST_FOREACH(c, &bs->children, next) {
|
|
uint64_t cur_perm, cur_shared;
|
|
|
|
bdrv_child_perm(bs, c->bs, c, c->role, q,
|
|
cumulative_perms, cumulative_shared_perms,
|
|
&cur_perm, &cur_shared);
|
|
bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* @list is a product of bdrv_topological_dfs() (may be called several times) -
|
|
* a topologically sorted subgraph.
|
|
*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a reader lock for the graph.
|
|
*/
|
|
static int GRAPH_RDLOCK
|
|
bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
|
|
Error **errp)
|
|
{
|
|
int ret;
|
|
BlockDriverState *bs;
|
|
GLOBAL_STATE_CODE();
|
|
|
|
for ( ; list; list = list->next) {
|
|
bs = list->data;
|
|
|
|
if (bdrv_parent_perms_conflict(bs, errp)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = bdrv_node_refresh_perm(bs, q, tran, errp);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* @list is any list of nodes. List is completed by all subtrees and
|
|
* topologically sorted. It's not a problem if some node occurs in the @list
|
|
* several times.
|
|
*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a reader lock for the graph.
|
|
*/
|
|
static int GRAPH_RDLOCK
|
|
bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
|
|
Error **errp)
|
|
{
|
|
g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
|
|
g_autoptr(GSList) refresh_list = NULL;
|
|
|
|
for ( ; list; list = list->next) {
|
|
refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
|
|
}
|
|
|
|
return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
|
|
}
|
|
|
|
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
|
|
uint64_t *shared_perm)
|
|
{
|
|
BdrvChild *c;
|
|
uint64_t cumulative_perms = 0;
|
|
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
QLIST_FOREACH(c, &bs->parents, next_parent) {
|
|
cumulative_perms |= c->perm;
|
|
cumulative_shared_perms &= c->shared_perm;
|
|
}
|
|
|
|
*perm = cumulative_perms;
|
|
*shared_perm = cumulative_shared_perms;
|
|
}
|
|
|
|
char *bdrv_perm_names(uint64_t perm)
|
|
{
|
|
struct perm_name {
|
|
uint64_t perm;
|
|
const char *name;
|
|
} permissions[] = {
|
|
{ BLK_PERM_CONSISTENT_READ, "consistent read" },
|
|
{ BLK_PERM_WRITE, "write" },
|
|
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
|
|
{ BLK_PERM_RESIZE, "resize" },
|
|
{ 0, NULL }
|
|
};
|
|
|
|
GString *result = g_string_sized_new(30);
|
|
struct perm_name *p;
|
|
|
|
for (p = permissions; p->name; p++) {
|
|
if (perm & p->perm) {
|
|
if (result->len > 0) {
|
|
g_string_append(result, ", ");
|
|
}
|
|
g_string_append(result, p->name);
|
|
}
|
|
}
|
|
|
|
return g_string_free(result, FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
* @tran is allowed to be NULL. In this case no rollback is possible.
|
|
*
|
|
* After calling this function, the transaction @tran may only be completed
|
|
* while holding a reader lock for the graph.
|
|
*/
|
|
static int GRAPH_RDLOCK
|
|
bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp)
|
|
{
|
|
int ret;
|
|
Transaction *local_tran = NULL;
|
|
g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
if (!tran) {
|
|
tran = local_tran = tran_new();
|
|
}
|
|
|
|
ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
|
|
|
|
if (local_tran) {
|
|
tran_finalize(local_tran, ret);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
Transaction *tran = tran_new();
|
|
int ret;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_child_set_perm(c, perm, shared, tran);
|
|
|
|
ret = bdrv_refresh_perms(c->bs, tran, &local_err);
|
|
|
|
tran_finalize(tran, ret);
|
|
|
|
if (ret < 0) {
|
|
if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
|
|
/* tighten permissions */
|
|
error_propagate(errp, local_err);
|
|
} else {
|
|
/*
|
|
* Our caller may intend to only loosen restrictions and
|
|
* does not expect this function to fail. Errors are not
|
|
* fatal in such a case, so we can just hide them from our
|
|
* caller.
|
|
*/
|
|
error_free(local_err);
|
|
ret = 0;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
|
|
{
|
|
uint64_t parent_perms, parent_shared;
|
|
uint64_t perms, shared;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
|
|
bdrv_child_perm(bs, c->bs, c, c->role, NULL,
|
|
parent_perms, parent_shared, &perms, &shared);
|
|
|
|
return bdrv_child_try_set_perm(c, perms, shared, errp);
|
|
}
|
|
|
|
/*
|
|
* Default implementation for .bdrv_child_perm() for block filters:
|
|
* Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
|
|
* filtered child.
|
|
*/
|
|
static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
|
|
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
|
|
}
|
|
|
|
static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
assert(role & BDRV_CHILD_COW);
|
|
GLOBAL_STATE_CODE();
|
|
|
|
/*
|
|
* We want consistent read from backing files if the parent needs it.
|
|
* No other operations are performed on backing files.
|
|
*/
|
|
perm &= BLK_PERM_CONSISTENT_READ;
|
|
|
|
/*
|
|
* If the parent can deal with changing data, we're okay with a
|
|
* writable and resizable backing file.
|
|
* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
|
|
*/
|
|
if (shared & BLK_PERM_WRITE) {
|
|
shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
} else {
|
|
shared = 0;
|
|
}
|
|
|
|
shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
|
|
|
|
if (bs->open_flags & BDRV_O_INACTIVE) {
|
|
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
*nperm = perm;
|
|
*nshared = shared;
|
|
}
|
|
|
|
static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
int flags;
|
|
|
|
GLOBAL_STATE_CODE();
|
|
assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
|
|
|
|
flags = bdrv_reopen_get_flags(reopen_queue, bs);
|
|
|
|
/*
|
|
* Apart from the modifications below, the same permissions are
|
|
* forwarded and left alone as for filters
|
|
*/
|
|
bdrv_filter_default_perms(bs, c, role, reopen_queue,
|
|
perm, shared, &perm, &shared);
|
|
|
|
if (role & BDRV_CHILD_METADATA) {
|
|
/* Format drivers may touch metadata even if the guest doesn't write */
|
|
if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
|
|
perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
/*
|
|
* bs->file always needs to be consistent because of the
|
|
* metadata. We can never allow other users to resize or write
|
|
* to it.
|
|
*/
|
|
if (!(flags & BDRV_O_NO_IO)) {
|
|
perm |= BLK_PERM_CONSISTENT_READ;
|
|
}
|
|
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
|
|
}
|
|
|
|
if (role & BDRV_CHILD_DATA) {
|
|
/*
|
|
* Technically, everything in this block is a subset of the
|
|
* BDRV_CHILD_METADATA path taken above, and so this could
|
|
* be an "else if" branch. However, that is not obvious, and
|
|
* this function is not performance critical, therefore we let
|
|
* this be an independent "if".
|
|
*/
|
|
|
|
/*
|
|
* We cannot allow other users to resize the file because the
|
|
* format driver might have some assumptions about the size
|
|
* (e.g. because it is stored in metadata, or because the file
|
|
* is split into fixed-size data files).
|
|
*/
|
|
shared &= ~BLK_PERM_RESIZE;
|
|
|
|
/*
|
|
* WRITE_UNCHANGED often cannot be performed as such on the
|
|
* data file. For example, the qcow2 driver may still need to
|
|
* write copied clusters on copy-on-read.
|
|
*/
|
|
if (perm & BLK_PERM_WRITE_UNCHANGED) {
|
|
perm |= BLK_PERM_WRITE;
|
|
}
|
|
|
|
/*
|
|
* If the data file is written to, the format driver may
|
|
* expect to be able to resize it by writing beyond the EOF.
|
|
*/
|
|
if (perm & BLK_PERM_WRITE) {
|
|
perm |= BLK_PERM_RESIZE;
|
|
}
|
|
}
|
|
|
|
if (bs->open_flags & BDRV_O_INACTIVE) {
|
|
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
|
|
}
|
|
|
|
*nperm = perm;
|
|
*nshared = shared;
|
|
}
|
|
|
|
void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role, BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
GLOBAL_STATE_CODE();
|
|
if (role & BDRV_CHILD_FILTERED) {
|
|
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
|
|
BDRV_CHILD_COW)));
|
|
bdrv_filter_default_perms(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else if (role & BDRV_CHILD_COW) {
|
|
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
|
|
bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
|
|
bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
} else |