Block layer patches:

- Drain fixes
 - node-name parameters for block-commit
 - Refactor block jobs to use transactional callbacks for exiting
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJbqlBsAAoJEPQH2wBh1c9ABCQIAJ4adAAlr65kmcktHrOeQc6A
 7VwSSCUa9B8BJS+/H3V8XF3eX1fa016cRQCHfH/ua3Wqavw00qcuS8Bz/ggc6qls
 S1kNSSGhemvtf6ebTCN++HCxEg8g1RjsdnxaTiNWrYLKadX5kjLGofU1eAM2J/re
 k5YsyB01X4RHS2L0eAUbYGgNFs+UJEU4p0aKGBPHsOj1LIYEzFhDTVNJ8OjNUG9R
 mcMXFiYmQpJTV0hlIqL+pOtRvlR9YOKgkO8dmFkRe+z82f+GA+EZhLfpACxc7ilQ
 HS4V2NMVucZ7G8gGudg9mqvd3u/AV5BiUtIGd0iIQ9pU9fUuPVPl977i5WHewoM=
 =3TQt
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/xanclic/tags/pull-block-2018-09-25' into staging

Block layer patches:
- Drain fixes
- node-name parameters for block-commit
- Refactor block jobs to use transactional callbacks for exiting

# gpg: Signature made Tue 25 Sep 2018 16:12:44 BST
# gpg:                using RSA key F407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/xanclic/tags/pull-block-2018-09-25: (42 commits)
  test-bdrv-drain: Test draining job source child and parent
  block: Use a single global AioWait
  test-bdrv-drain: Fix outdated comments
  test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort
  job: Avoid deadlocks in job_completed_txn_abort()
  test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()
  block: Remove aio_poll() in bdrv_drain_poll variants
  blockjob: Lie better in child_job_drained_poll()
  block-backend: Decrease in_flight only after callback
  block-backend: Fix potential double blk_delete()
  block-backend: Add .drained_poll callback
  block: Add missing locking in bdrv_co_drain_bh_cb()
  test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback
  job: Use AIO_WAIT_WHILE() in job_finish_sync()
  test-blockjob: Acquire AioContext around job_cancel_sync()
  test-bdrv-drain: Drain with block jobs in an I/O thread
  aio-wait: Increase num_waiters even in home thread
  blockjob: Wake up BDS when job becomes idle
  job: Fix missing locking due to mismerge
  job: Fix nested aio_poll() hanging in job_txn_apply
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-09-25 16:47:35 +01:00
commit c5e4e49258
29 changed files with 856 additions and 317 deletions

View File

@ -2792,6 +2792,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
bdrv_parent_cb_change_media(bs, true);
qobject_unref(options);
options = NULL;
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
@ -4885,11 +4886,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
return bs ? bs->aio_context : qemu_get_aio_context();
}
AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
{
return bs ? &bs->wait : NULL;
}
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
{
aio_co_enter(bdrv_get_aio_context(bs), co);

View File

@ -88,7 +88,6 @@ struct BlockBackend {
* Accessed with atomic ops.
*/
unsigned int in_flight;
AioWait wait;
};
typedef struct BlockBackendAIOCB {
@ -121,6 +120,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
abort();
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
@ -294,6 +294,7 @@ static const BdrvChildRole child_root = {
.get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_poll = blk_root_drained_poll,
.drained_end = blk_root_drained_end,
.activate = blk_root_activate,
@ -433,6 +434,7 @@ int blk_get_refcnt(BlockBackend *blk)
*/
void blk_ref(BlockBackend *blk)
{
assert(blk->refcnt > 0);
blk->refcnt++;
}
@ -445,7 +447,13 @@ void blk_unref(BlockBackend *blk)
{
if (blk) {
assert(blk->refcnt > 0);
if (!--blk->refcnt) {
if (blk->refcnt > 1) {
blk->refcnt--;
} else {
blk_drain(blk);
/* blk_drain() cannot resurrect blk, nobody held a reference */
assert(blk->refcnt == 1);
blk->refcnt = 0;
blk_delete(blk);
}
}
@ -1289,7 +1297,7 @@ static void blk_inc_in_flight(BlockBackend *blk)
static void blk_dec_in_flight(BlockBackend *blk)
{
atomic_dec(&blk->in_flight);
aio_wait_kick(&blk->wait);
aio_wait_kick();
}
static void error_callback_bh(void *opaque)
@ -1330,8 +1338,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
blk_dec_in_flight(acb->rwco.blk);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
blk_dec_in_flight(acb->rwco.blk);
qemu_aio_unref(acb);
}
}
@ -1590,9 +1598,8 @@ void blk_drain(BlockBackend *blk)
}
/* We may have -ENOMEDIUM completions in flight */
AIO_WAIT_WHILE(&blk->wait,
blk_get_aio_context(blk),
atomic_mb_read(&blk->in_flight) > 0);
AIO_WAIT_WHILE(blk_get_aio_context(blk),
atomic_mb_read(&blk->in_flight) > 0);
if (bs) {
bdrv_drained_end(bs);
@ -1611,8 +1618,7 @@ void blk_drain_all(void)
aio_context_acquire(ctx);
/* We may have -ENOMEDIUM completions in flight */
AIO_WAIT_WHILE(&blk->wait, ctx,
atomic_mb_read(&blk->in_flight) > 0);
AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
aio_context_release(ctx);
}
@ -2189,6 +2195,13 @@ static void blk_root_drained_begin(BdrvChild *child)
}
}
static bool blk_root_drained_poll(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
assert(blk->quiesce_counter);
return !!blk->in_flight;
}
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;

View File

@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockDriverState *base_bs;
BlockdevOnError on_error;
int base_flags;
char *backing_file_str;
@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
return 0;
}
static void commit_exit(Job *job)
static int commit_prepare(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
BlockJob *bjob = &s->common;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
BlockDriverState *commit_top_bs = s->commit_top_bs;
bool remove_commit_top_bs = false;
/* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
bdrv_ref(top);
bdrv_ref(commit_top_bs);
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
s->base = NULL;
if (!job_is_cancelled(job) && job->ret == 0) {
/* success */
job->ret = bdrv_drop_intermediate(s->commit_top_bs, base,
s->backing_file_str);
} else {
/* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
remove_commit_top_bs = true;
/* FIXME: bdrv_drop_intermediate treats total failures and partial failures
* identically. Further work is needed to disambiguate these cases. */
return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
s->backing_file_str);
}
static void commit_abort(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
BlockDriverState *top_bs = blk_bs(s->top);
/* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
bdrv_ref(top_bs);
bdrv_ref(s->commit_top_bs);
if (s->base) {
blk_unref(s->base);
}
/* free the blockers on the intermediate nodes so that bdrv_replace_nodes
* can succeed */
block_job_remove_all_bdrv(&s->common);
/* If bdrv_drop_intermediate() failed (or was not invoked), remove the
* commit filter driver from the backing chain now. Do this as the final
* step so that the 'consistent read' permission can be granted.
*
* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
&error_abort);
bdrv_unref(s->commit_top_bs);
bdrv_unref(top_bs);
}
static void commit_clean(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
/* restore base open flags here if appropriate (e.g., change the base back
* to r/o). These reopens do not need to be atomic, since we won't abort
* even on failure here */
if (s->base_flags != bdrv_get_flags(base)) {
bdrv_reopen(base, s->base_flags, NULL);
if (s->base_flags != bdrv_get_flags(s->base_bs)) {
bdrv_reopen(s->base_bs, s->base_flags, NULL);
}
g_free(s->backing_file_str);
blk_unref(s->top);
/* If there is more than one reference to the job (e.g. if called from
* job_finish_sync()), job_completed() won't free it and therefore the
* blockers on the intermediate nodes remain. This would cause
* bdrv_set_backing_hd() to fail. */
block_job_remove_all_bdrv(bjob);
/* If bdrv_drop_intermediate() didn't already do that, remove the commit
* filter driver from the backing chain. Do this as the final step so that
* the 'consistent read' permission can be granted. */
if (remove_commit_top_bs) {
bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
&error_abort);
}
bdrv_unref(commit_top_bs);
bdrv_unref(top);
}
static int coroutine_fn commit_run(Job *job, Error **errp)
@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = commit_run,
.exit = commit_exit,
.prepare = commit_prepare,
.abort = commit_abort,
.clean = commit_clean
},
};
@ -249,7 +258,8 @@ static BlockDriver bdrv_commit_top = {
};
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockDriverState *base, BlockDriverState *top,
int creation_flags, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
const char *filter_node_name, Error **errp)
{
@ -267,7 +277,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
}
s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
speed, JOB_DEFAULT, NULL, NULL, errp);
speed, creation_flags, NULL, NULL, errp);
if (!s) {
return;
}
@ -344,6 +354,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
s->base_bs = base;
/* Required permissions are already taken with block_job_add_bdrv() */
s->top = blk_new(0, BLK_PERM_ALL);

View File

@ -38,8 +38,6 @@
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
static AioWait drain_all_aio_wait;
static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
@ -268,10 +266,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent)
{
/* Execute pending BHs first and check everything else only after the BHs
* have executed. */
while (aio_poll(bs->aio_context, false));
return bdrv_drain_poll(bs, recursive, ignore_parent, false);
}
@ -288,6 +282,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
BlockDriverState *bs = data->bs;
if (bs) {
AioContext *ctx = bdrv_get_aio_context(bs);
AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
/*
* When the coroutine yielded, the lock for its home context was
* released, so we need to re-acquire it here. If it explicitly
* acquired a different context, the lock is still held and we don't
* want to lock it a second time (or AIO_WAIT_WHILE() would hang).
*/
if (ctx == co_ctx) {
aio_context_acquire(ctx);
}
bdrv_dec_in_flight(bs);
if (data->begin) {
bdrv_do_drained_begin(bs, data->recursive, data->parent,
@ -296,6 +302,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_do_drained_end(bs, data->recursive, data->parent,
data->ignore_bds_parents);
}
if (ctx == co_ctx) {
aio_context_release(ctx);
}
} else {
assert(data->begin);
bdrv_drain_all_begin();
@ -496,10 +505,6 @@ static bool bdrv_drain_all_poll(void)
BlockDriverState *bs = NULL;
bool result = false;
/* Execute pending BHs first (may modify the graph) and check everything
* else only after the BHs have executed. */
while (aio_poll(qemu_get_aio_context(), false));
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
while ((bs = bdrv_next_all_states(bs))) {
@ -550,7 +555,7 @@ void bdrv_drain_all_begin(void)
}
/* Now poll the in-flight requests */
AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
@ -706,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
void bdrv_wakeup(BlockDriverState *bs)
{
aio_wait_kick(bdrv_get_aio_wait(bs));
aio_wait_kick(&drain_all_aio_wait);
aio_wait_kick();
}
void bdrv_dec_in_flight(BlockDriverState *bs)

View File

@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
aio_context_acquire(s->aio_context);
qemu_laio_process_completions(s);
aio_context_acquire(s->aio_context);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}

View File

@ -79,6 +79,7 @@ typedef struct MirrorBlockJob {
int max_iov;
bool initial_zeroing_ongoing;
int in_active_write_counter;
bool prepared;
} MirrorBlockJob;
typedef struct MirrorBDSOpaque {
@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s)
}
}
static void mirror_exit(Job *job)
/**
* mirror_exit_common: handle both abort() and prepare() cases.
* for .prepare, returns 0 on success and -errno on failure.
* for .abort cases, denoted by abort = true, MUST return 0.
*/
static int mirror_exit_common(Job *job)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
BlockJob *bjob = &s->common;
@ -617,7 +623,13 @@ static void mirror_exit(Job *job)
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
int ret = job->ret;
bool abort = job->ret < 0;
int ret = 0;
if (s->prepared) {
return 0;
}
s->prepared = true;
bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
@ -642,7 +654,7 @@ static void mirror_exit(Job *job)
* required before it could become a backing file of target_bs. */
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
@ -658,11 +670,8 @@ static void mirror_exit(Job *job)
aio_context_acquire(replace_aio_context);
}
if (s->should_complete && ret == 0) {
BlockDriverState *to_replace = src;
if (s->to_replace) {
to_replace = s->to_replace;
}
if (s->should_complete && !abort) {
BlockDriverState *to_replace = s->to_replace ?: src;
if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
@ -711,7 +720,18 @@ static void mirror_exit(Job *job)
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
job->ret = ret;
return ret;
}
static int mirror_prepare(Job *job)
{
return mirror_exit_common(job);
}
static void mirror_abort(Job *job)
{
int ret = mirror_exit_common(job);
assert(ret == 0);
}
static void mirror_throttle(MirrorBlockJob *s)
@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = mirror_run,
.exit = mirror_exit,
.prepare = mirror_prepare,
.abort = mirror_abort,
.pause = mirror_pause,
.complete = mirror_complete,
},
@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = mirror_run,
.exit = mirror_exit,
.prepare = mirror_prepare,
.abort = mirror_abort,
.pause = mirror_pause,
.complete = mirror_complete,
},
@ -1639,7 +1661,8 @@ fail:
void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
@ -1655,7 +1678,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces,
mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL,
&mirror_job_driver, is_none_mode, base, false,

View File

@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
}
static void stream_exit(Job *job)
static int stream_prepare(Job *job)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockJob *bjob = &s->common;
BlockDriverState *bs = blk_bs(bjob->blk);
BlockDriverState *base = s->base;
Error *local_err = NULL;
int ret = job->ret;
int ret = 0;
if (!job_is_cancelled(job) && bs->backing && ret == 0) {
if (bs->backing) {
const char *base_id = NULL, *base_fmt = NULL;
if (base) {
base_id = s->backing_file_str;
@ -75,12 +75,19 @@ static void stream_exit(Job *job)
bdrv_set_backing_hd(bs, base, &local_err);
if (local_err) {
error_report_err(local_err);
ret = -EPERM;
goto out;
return -EPERM;
}
}
out:
return ret;
}
static void stream_clean(Job *job)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockJob *bjob = &s->common;
BlockDriverState *bs = blk_bs(bjob->blk);
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
/* Give up write permissions before making it read-only */
@ -89,7 +96,6 @@ out:
}
g_free(s->backing_file_str);
job->ret = ret;
}
static int coroutine_fn stream_run(Job *job, Error **errp)
@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = {
.job_type = JOB_TYPE_STREAM,
.free = block_job_free,
.run = stream_run,
.exit = stream_exit,
.prepare = stream_prepare,
.clean = stream_clean,
.user_resume = block_job_user_resume,
.drain = block_job_drain,
},
@ -214,7 +221,8 @@ static const BlockJobDriver stream_job_driver = {
void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
int64_t speed, BlockdevOnError on_error, Error **errp)
int creation_flags, int64_t speed,
BlockdevOnError on_error, Error **errp)
{
StreamBlockJob *s;
BlockDriverState *iter;
@ -236,7 +244,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BLK_PERM_GRAPH_MOD,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
speed, JOB_DEFAULT, NULL, NULL, errp);
speed, creation_flags, NULL, NULL, errp);
if (!s) {
goto fail;
}

View File

@ -2182,7 +2182,13 @@ static const BlkActionOps actions[] = {
.instance_size = sizeof(BlockDirtyBitmapState),
.prepare = block_dirty_bitmap_disable_prepare,
.abort = block_dirty_bitmap_disable_abort,
}
},
/* Where are transactions for MIRROR, COMMIT and STREAM?
* Although these blockjobs use transaction callbacks like the backup job,
* these jobs do not necessarily adhere to transaction semantics.
* These jobs may not fully undo all of their actions on abort, nor do they
* necessarily work in transactions with more than one job in them.
*/
};
/**
@ -3116,6 +3122,8 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_on_error, BlockdevOnError on_error,
bool has_auto_finalize, bool auto_finalize,
bool has_auto_dismiss, bool auto_dismiss,
Error **errp)
{
BlockDriverState *bs, *iter;
@ -3123,6 +3131,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
AioContext *aio_context;
Error *local_err = NULL;
const char *base_name = NULL;
int job_flags = JOB_DEFAULT;
if (!has_on_error) {
on_error = BLOCKDEV_ON_ERROR_REPORT;
@ -3184,8 +3193,15 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
/* backing_file string overrides base bs filename */
base_name = has_backing_file ? backing_file : base_name;
if (has_auto_finalize && !auto_finalize) {
job_flags |= JOB_MANUAL_FINALIZE;
}
if (has_auto_dismiss && !auto_dismiss) {
job_flags |= JOB_MANUAL_DISMISS;
}
stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
has_speed ? speed : 0, on_error, &local_err);
job_flags, has_speed ? speed : 0, on_error, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto out;
@ -3198,11 +3214,15 @@ out:
}
void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_base_node, const char *base_node,
bool has_base, const char *base,
bool has_top_node, const char *top_node,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_filter_node_name, const char *filter_node_name,
bool has_auto_finalize, bool auto_finalize,
bool has_auto_dismiss, bool auto_dismiss,
Error **errp)
{
BlockDriverState *bs;
@ -3214,6 +3234,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
* BlockdevOnError change for blkmirror makes it in
*/
BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
int job_flags = JOB_DEFAULT;
if (!has_speed) {
speed = 0;
@ -3221,6 +3242,12 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_filter_node_name) {
filter_node_name = NULL;
}
if (has_auto_finalize && !auto_finalize) {
job_flags |= JOB_MANUAL_FINALIZE;
}
if (has_auto_dismiss && !auto_dismiss) {
job_flags |= JOB_MANUAL_DISMISS;
}
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@ -3250,7 +3277,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
/* default top_bs is the active layer */
top_bs = bs;
if (has_top && top) {
if (has_top_node && has_top) {
error_setg(errp, "'top-node' and 'top' are mutually exclusive");
goto out;
} else if (has_top_node) {
top_bs = bdrv_lookup_bs(NULL, top_node, errp);
if (top_bs == NULL) {
goto out;
}
if (!bdrv_chain_contains(bs, top_bs)) {
error_setg(errp, "'%s' is not in this backing file chain",
top_node);
goto out;
}
} else if (has_top && top) {
if (strcmp(bs->filename, top) != 0) {
top_bs = bdrv_find_backing_image(bs, top);
}
@ -3263,7 +3303,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
assert(bdrv_get_aio_context(top_bs) == aio_context);
if (has_base && base) {
if (has_base_node && has_base) {
error_setg(errp, "'base-node' and 'base' are mutually exclusive");
goto out;
} else if (has_base_node) {
base_bs = bdrv_lookup_bs(NULL, base_node, errp);
if (base_bs == NULL) {
goto out;
}
if (!bdrv_chain_contains(top_bs, base_bs)) {
error_setg(errp, "'%s' is not in this backing file chain",
base_node);
goto out;
}
} else if (has_base && base) {
base_bs = bdrv_find_backing_image(top_bs, base);
} else {
base_bs = bdrv_find_base(top_bs);
@ -3295,15 +3348,15 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
JOB_DEFAULT, speed, on_error,
job_flags, speed, on_error,
filter_node_name, NULL, NULL, false, &local_err);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
goto out;
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags,
speed, on_error, has_backing_file ? backing_file : NULL,
filter_node_name, &local_err);
}
if (local_err != NULL) {
@ -3587,8 +3640,11 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_filter_node_name,
const char *filter_node_name,
bool has_copy_mode, MirrorCopyMode copy_mode,
bool has_auto_finalize, bool auto_finalize,
bool has_auto_dismiss, bool auto_dismiss,
Error **errp)
{
int job_flags = JOB_DEFAULT;
if (!has_speed) {
speed = 0;
@ -3614,6 +3670,12 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_copy_mode) {
copy_mode = MIRROR_COPY_MODE_BACKGROUND;
}
if (has_auto_finalize && !auto_finalize) {
job_flags |= JOB_MANUAL_FINALIZE;
}
if (has_auto_dismiss && !auto_dismiss) {
job_flags |= JOB_MANUAL_DISMISS;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@ -3641,7 +3703,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
* and will allow to check whether the node still exist at mirror completion
*/
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
has_replaces ? replaces : NULL, job_flags,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, filter_node_name,
copy_mode, errp);
@ -3791,6 +3853,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_unmap, arg->unmap,
false, NULL,
arg->has_copy_mode, arg->copy_mode,
arg->has_auto_finalize, arg->auto_finalize,
arg->has_auto_dismiss, arg->auto_dismiss,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@ -3812,6 +3876,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
bool has_filter_node_name,
const char *filter_node_name,
bool has_copy_mode, MirrorCopyMode copy_mode,
bool has_auto_finalize, bool auto_finalize,
bool has_auto_dismiss, bool auto_dismiss,
Error **errp)
{
BlockDriverState *bs;
@ -3845,6 +3911,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
true, true,
has_filter_node_name, filter_node_name,
has_copy_mode, copy_mode,
has_auto_finalize, auto_finalize,
has_auto_dismiss, auto_dismiss,
&local_err);
error_propagate(errp, local_err);

View File

@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c)
/* An inactive or completed job doesn't have any pending requests. Jobs
* with !job->busy are either already paused or have a pause point after
* being reentered, so no job driver code will run before they pause. */
if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
if (!job->busy || job_is_completed(job)) {
return false;
}
@ -221,6 +221,11 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
return 0;
}
static void block_job_on_idle(Notifier *n, void *opaque)
{
aio_wait_kick();
}
bool block_job_is_internal(BlockJob *job)
{
return (job->job.id == NULL);
@ -416,6 +421,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
job->finalize_completed_notifier.notify = block_job_event_completed;
job->pending_notifier.notify = block_job_event_pending;
job->ready_notifier.notify = block_job_event_ready;
job->idle_notifier.notify = block_job_on_idle;
notifier_list_add(&job->job.on_finalize_cancelled,
&job->finalize_cancelled_notifier);
@ -423,6 +429,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
&job->finalize_completed_notifier);
notifier_list_add(&job->job.on_pending, &job->pending_notifier);
notifier_list_add(&job->job.on_ready, &job->ready_notifier);
notifier_list_add(&job->job.on_idle, &job->idle_notifier);
error_setg(&job->blocker, "block device is in use by block job: %s",
job_type_str(&job->job));

5
hmp.c
View File

@ -1907,8 +1907,9 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
int64_t speed = qdict_get_try_int(qdict, "speed", 0);
qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
false, NULL, qdict_haskey(qdict, "speed"), speed,
true, BLOCKDEV_ON_ERROR_REPORT, &error);
false, NULL, qdict_haskey(qdict, "speed"), speed, true,
BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
&error);
hmp_handle_error(mon, &error);
}

View File

@ -30,14 +30,15 @@
/**
* AioWait:
*
* An object that facilitates synchronous waiting on a condition. The main
* loop can wait on an operation running in an IOThread as follows:
* An object that facilitates synchronous waiting on a condition. A single
* global AioWait object (global_aio_wait) is used internally.
*
* The main loop can wait on an operation running in an IOThread as follows:
*
* AioWait *wait = ...;
* AioContext *ctx = ...;
* MyWork work = { .done = false };
* schedule_my_work_in_iothread(ctx, &work);
* AIO_WAIT_WHILE(wait, ctx, !work.done);
* AIO_WAIT_WHILE(ctx, !work.done);
*
* The IOThread must call aio_wait_kick() to notify the main loop when
* work.done changes:
@ -46,7 +47,7 @@
* {
* ...
* work.done = true;
* aio_wait_kick(wait);
* aio_wait_kick();
* }
*/
typedef struct {
@ -54,9 +55,10 @@ typedef struct {
unsigned num_waiters;
} AioWait;
extern AioWait global_aio_wait;
/**
* AIO_WAIT_WHILE:
* @wait: the aio wait object
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
* caller does not hold a lock) are involved in the polling condition.
* @cond: wait while this conditional expression is true
@ -72,10 +74,12 @@ typedef struct {
* wait on conditions between two IOThreads since that could lead to deadlock,
* go via the main loop instead.
*/
#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
#define AIO_WAIT_WHILE(ctx, cond) ({ \
bool waited_ = false; \
AioWait *wait_ = (wait); \
AioWait *wait_ = &global_aio_wait; \
AioContext *ctx_ = (ctx); \
/* Increment wait_->num_waiters before evaluating cond. */ \
atomic_inc(&wait_->num_waiters); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
@ -84,8 +88,6 @@ typedef struct {
} else { \
assert(qemu_get_current_aio_context() == \
qemu_get_aio_context()); \
/* Increment wait_->num_waiters before evaluating cond. */ \
atomic_inc(&wait_->num_waiters); \
while ((cond)) { \
if (ctx_) { \
aio_context_release(ctx_); \
@ -96,20 +98,18 @@ typedef struct {
} \
waited_ = true; \
} \
atomic_dec(&wait_->num_waiters); \
} \
atomic_dec(&wait_->num_waiters); \
waited_; })
/**
* aio_wait_kick:
* @wait: the aio wait object that should re-evaluate its condition
*
* Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
* synchronous operations performed in an IOThread, the main thread lets the
* IOThread's event loop run, waiting for the operation to complete. A
* aio_wait_kick() call will wake up the main thread.
*/
void aio_wait_kick(AioWait *wait);
void aio_wait_kick(void);
/**
* aio_wait_bh_oneshot:

View File

@ -410,13 +410,9 @@ void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
/* Returns NULL when bs == NULL */
AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
#define BDRV_POLL_WHILE(bs, cond) ({ \
BlockDriverState *bs_ = (bs); \
AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
bdrv_get_aio_context(bs_), \
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes);

View File

@ -794,9 +794,6 @@ struct BlockDriverState {
unsigned int in_flight;
unsigned int serialising_in_flight;
/* Kicked to signal main loop when a request completes. */
AioWait wait;
/* counter for nested bdrv_io_plug.
* Accessed with atomic ops.
*/
@ -958,6 +955,8 @@ int is_windows_drive(const char *filename);
* flatten the whole backing file chain onto @bs.
* @backing_file_str: The file name that will be written to @bs as the
* the new backing file if the job completes. Ignored if @base is %NULL.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @errp: Error object.
@ -971,7 +970,8 @@ int is_windows_drive(const char *filename);
*/
void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
int64_t speed, BlockdevOnError on_error, Error **errp);
int creation_flags, int64_t speed,
BlockdevOnError on_error, Error **errp);
/**
* commit_start:
@ -980,6 +980,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* @bs: Active block device.
* @top: Top block device to be committed.
* @base: Block device that will be written into, and become the new top.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @backing_file_str: String to use as the backing file in @top's overlay
@ -990,7 +992,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
*
*/
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockDriverState *base, BlockDriverState *top,
int creation_flags, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
const char *filter_node_name, Error **errp);
/**
@ -1026,6 +1029,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
* @target: Block device to write to.
* @replaces: Block graph node name to replace once the mirror is done. Can
* only be used when full mirroring is selected.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @granularity: The chosen granularity for the dirty bitmap.
* @buf_size: The amount of data that can be in flight at one time.
@ -1047,7 +1052,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
*/
void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,

View File

@ -70,6 +70,9 @@ typedef struct BlockJob {
/** Called when the job transitions to READY */
Notifier ready_notifier;
/** Called when the job coroutine yields or terminates */
Notifier idle_notifier;
/** BlockDriverStates that are involved in this block job */
GSList *nodes;
} BlockJob;

View File

@ -89,6 +89,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
*/
void coroutine_fn qemu_coroutine_yield(void);
/**
* Get the AioContext of the given coroutine
*/
AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
/**
* Get the currently executing coroutine
*/

View File

@ -76,6 +76,9 @@ typedef struct Job {
* Set to false by the job while the coroutine has yielded and may be
* re-entered by job_enter(). There may still be I/O or event loop activity
* pending. Accessed under block_job_mutex (in blockjob.c).
*
* When the job is deferred to the main loop, busy is true as long as the
* bottom half is still pending.
*/
bool busy;
@ -156,6 +159,9 @@ typedef struct Job {
/** Notifiers called when the job transitions to READY */
NotifierList on_ready;
/** Notifiers called when the job coroutine yields or terminates */
NotifierList on_idle;
/** Element of the list of jobs */
QLIST_ENTRY(Job) job_list;
@ -221,17 +227,6 @@ struct JobDriver {
*/
void (*drain)(Job *job);
/**
* If the callback is not NULL, exit will be invoked from the main thread
* when the job's coroutine has finished, but before transactional
* convergence; before @prepare or @abort.
*
* FIXME TODO: This callback is only temporary to transition remaining jobs
* to prepare/commit/abort/clean callbacks and will be removed before 3.1.
* is released.
*/
void (*exit)(Job *job);
/**
* If the callback is not NULL, prepare will be invoked when all the jobs
* belonging to the same transaction complete; or upon this job's completion
@ -532,6 +527,8 @@ void job_user_cancel(Job *job, bool force, Error **errp);
*
* Returns the return value from the job if the job actually completed
* during the call, or -ECANCELED if it was canceled.
*
* Callers must hold the AioContext lock of job->aio_context.
*/
int job_cancel_sync(Job *job);
@ -549,6 +546,8 @@ void job_cancel_sync_all(void);
* function).
*
* Returns the return value from the job.
*
* Callers must hold the AioContext lock of job->aio_context.
*/
int job_complete_sync(Job *job, Error **errp);
@ -574,6 +573,8 @@ void job_dismiss(Job **job, Error **errp);
*
* Returns 0 if the job is successfully completed, -ECANCELED if the job was
* cancelled before completing, and -errno in other error cases.
*
* Callers must hold the AioContext lock of job->aio_context.
*/
int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);

144
job.c
View File

@ -29,6 +29,7 @@
#include "qemu/job.h"
#include "qemu/id.h"
#include "qemu/main-loop.h"
#include "block/aio-wait.h"
#include "trace-root.h"
#include "qapi/qapi-events-job.h"
@ -136,21 +137,13 @@ static void job_txn_del_job(Job *job)
}
}
static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock)
static int job_txn_apply(JobTxn *txn, int fn(Job *))
{
AioContext *ctx;
Job *job, *next;
int rc = 0;
QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
if (lock) {
ctx = job->aio_context;
aio_context_acquire(ctx);
}
rc = fn(job);
if (lock) {
aio_context_release(ctx);
}
if (rc) {
break;
}
@ -410,6 +403,11 @@ static void job_event_ready(Job *job)
notifier_list_notify(&job->on_ready, job);
}
static void job_event_idle(Job *job)
{
notifier_list_notify(&job->on_idle, job);
}
void job_enter_cond(Job *job, bool(*fn)(Job *job))
{
if (!job_started(job)) {
@ -455,6 +453,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
timer_mod(&job->sleep_timer, ns);
}
job->busy = false;
job_event_idle(job);
job_unlock();
qemu_coroutine_yield();
@ -535,49 +534,6 @@ void job_drain(Job *job)
}
}
static void job_completed(Job *job);
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
AioContext *aio_context = job->aio_context;
if (job->driver->exit) {
aio_context_acquire(aio_context);
job->driver->exit(job);
aio_context_release(aio_context);
}
job_completed(job);
}
/**
* All jobs must allow a pause point before entering their job proper. This
* ensures that jobs can be paused prior to being started, then resumed later.
*/
static void coroutine_fn job_co_entry(void *opaque)
{
Job *job = opaque;
assert(job && job->driver && job->driver->run);
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
job->deferred_to_main_loop = true;
aio_bh_schedule_oneshot(qemu_get_aio_context(