diff --git a/src/qemu/MIGRATION.txt b/src/qemu/MIGRATION.txt new file mode 100644 index 0000000000..a26a0f3c48 --- /dev/null +++ b/src/qemu/MIGRATION.txt @@ -0,0 +1,100 @@ + QEMU Migration Phases + ===================== + +Qemu supports only migration protocols 2 and 3 (1 was lacking too many +steps). Repeating the protocol sequences from libvirt.c: + +Sequence v2: + + Src: DumpXML + - Generate XML to pass to dst + + Dst: Prepare + - Get ready to accept incoming VM + - Generate optional cookie to pass to src + + Src: Perform + - Start migration and wait for send completion + - Kill off VM if successful, resume if failed + + Dst: Finish + - Wait for recv completion and check status + - Kill off VM if unsuccessful + +Sequence v3: + + Src: Begin + - Generate XML to pass to dst + - Generate optional cookie to pass to dst + + Dst: Prepare + - Get ready to accept incoming VM + - Generate optional cookie to pass to src + + Src: Perform + - Start migration and wait for send completion + - Generate optional cookie to pass to dst + + Dst: Finish + - Wait for recv completion and check status + - Kill off VM if failed, resume if success + - Generate optional cookie to pass to src + + Src: Confirm + - Kill off VM if success, resume if failed + + QEMU Migration Locking Rules + ============================ + +Migration is a complicated beast which may span across several APIs on both +source and destination side and we need to keep the domain we are migrating in +a consistent state during the whole process. + +To avoid anyone from changing the domain in the middle of migration we need to +keep MIGRATION_OUT job active during migration from Begin to Confirm on the +source side and MIGRATION_IN job has to be active from Prepare to Finish on +the destination side. + +For this purpose we introduce several helper methods to deal with locking +primitives (described in THREADS.txt) in the right way: + +* qemuMigrationJobStart + +* qemuMigrationJobContinue + +* qemuMigrationJobStartPhase + +* qemuMigrationJobSetPhase + +* qemuMigrationJobFinish + +The sequence of calling qemuMigrationJob* helper methods is as follows: + +- The first API of a migration protocol (Prepare or Perform/Begin depending on + migration type and version) has to start migration job and keep it active: + + qemuMigrationJobStart(driver, vm, QEMU_JOB_MIGRATION_{IN,OUT}); + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_*); + ...do work... + qemuMigrationJobContinue(vm); + +- All consequent phases except for the last one have to keep the job active: + + if (!qemuMigrationJobIsActive(vm, QEMU_JOB_MIGRATION_{IN,OUT})) + return; + qemuMigrationJobStartPhase(driver, vm, QEMU_MIGRATION_PHASE_*); + ...do work... + qemuMigrationJobContinue(vm); + +- The last migration phase finally finishes the migration job: + + if (!qemuMigrationJobIsActive(vm, QEMU_JOB_MIGRATION_{IN,OUT})) + return; + qemuMigrationJobStartPhase(driver, vm, QEMU_MIGRATION_PHASE_*); + ...do work... + qemuMigrationJobFinish(driver, vm); + +While migration job is running (i.e., after qemuMigrationJobStart* but before +qemuMigrationJob{Continue,Finish}), migration phase can be advanced using + + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_*); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 1a17f9955b..51b74e90c9 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_command.h" #include "qemu_capabilities.h" +#include "qemu_migration.h" #include "memory.h" #include "logging.h" #include "virterror_internal.h" @@ -72,6 +73,8 @@ qemuDomainAsyncJobPhaseToString(enum qemuDomainAsyncJob job, switch (job) { case QEMU_ASYNC_JOB_MIGRATION_OUT: case QEMU_ASYNC_JOB_MIGRATION_IN: + return qemuMigrationJobPhaseTypeToString(phase); + case QEMU_ASYNC_JOB_SAVE: case QEMU_ASYNC_JOB_DUMP: case QEMU_ASYNC_JOB_NONE: @@ -92,6 +95,8 @@ qemuDomainAsyncJobPhaseFromString(enum qemuDomainAsyncJob job, switch (job) { case QEMU_ASYNC_JOB_MIGRATION_OUT: case QEMU_ASYNC_JOB_MIGRATION_IN: + return qemuMigrationJobPhaseTypeFromString(phase); + case QEMU_ASYNC_JOB_SAVE: case QEMU_ASYNC_JOB_DUMP: case QEMU_ASYNC_JOB_NONE: diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9f6b372a74..fba88c91b8 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -46,6 +46,19 @@ #define VIR_FROM_THIS VIR_FROM_QEMU +VIR_ENUM_IMPL(qemuMigrationJobPhase, QEMU_MIGRATION_PHASE_LAST, + "none", + "perform2", + "begin3", + "perform3", + "perform3_done", + "confirm3_cancelled", + "confirm3", + "prepare", + "finish2", + "finish3", +); + enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE, @@ -2776,3 +2789,81 @@ cleanup: } return ret; } + +int +qemuMigrationJobStart(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuDomainAsyncJob job) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (qemuDomainObjBeginAsyncJobWithDriver(driver, vm, job) < 0) + return -1; + + if (job == QEMU_ASYNC_JOB_MIGRATION_IN) + qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE); + else + qemuDomainObjSetAsyncJobMask(vm, DEFAULT_JOB_MASK); + + priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + + return 0; +} + +void +qemuMigrationJobSetPhase(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuMigrationJobPhase phase) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (phase < priv->job.phase) { + VIR_ERROR(_("migration protocol going backwards %s => %s"), + qemuMigrationJobPhaseTypeToString(priv->job.phase), + qemuMigrationJobPhaseTypeToString(phase)); + return; + } + + qemuDomainObjSetJobPhase(driver, vm, phase); +} + +void +qemuMigrationJobStartPhase(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuMigrationJobPhase phase) +{ + virDomainObjRef(vm); + qemuMigrationJobSetPhase(driver, vm, phase); +} + +int +qemuMigrationJobContinue(virDomainObjPtr vm) +{ + return virDomainObjUnref(vm); +} + +bool +qemuMigrationJobIsActive(virDomainObjPtr vm, + enum qemuDomainAsyncJob job) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (priv->job.asyncJob != job) { + const char *msg; + + if (job == QEMU_ASYNC_JOB_MIGRATION_IN) + msg = _("domain '%s' is not processing incoming migration"); + else + msg = _("domain '%s' is not being migrated"); + + qemuReportError(VIR_ERR_OPERATION_INVALID, msg, vm->def->name); + return false; + } + return true; +} + +int +qemuMigrationJobFinish(struct qemud_driver *driver, virDomainObjPtr vm) +{ + return qemuDomainObjEndAsyncJob(driver, vm); +} diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 3a9b94ee81..005e415b31 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -23,6 +23,7 @@ # define __QEMU_MIGRATION_H__ # include "qemu_conf.h" +# include "qemu_domain.h" /* All supported qemu migration flags. */ # define QEMU_MIGRATION_FLAGS \ @@ -35,6 +36,42 @@ VIR_MIGRATE_NON_SHARED_DISK | \ VIR_MIGRATE_NON_SHARED_INC) +enum qemuMigrationJobPhase { + QEMU_MIGRATION_PHASE_NONE = 0, + QEMU_MIGRATION_PHASE_PERFORM2, + QEMU_MIGRATION_PHASE_BEGIN3, + QEMU_MIGRATION_PHASE_PERFORM3, + QEMU_MIGRATION_PHASE_PERFORM3_DONE, + QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED, + QEMU_MIGRATION_PHASE_CONFIRM3, + QEMU_MIGRATION_PHASE_PREPARE, + QEMU_MIGRATION_PHASE_FINISH2, + QEMU_MIGRATION_PHASE_FINISH3, + + QEMU_MIGRATION_PHASE_LAST +}; +VIR_ENUM_DECL(qemuMigrationJobPhase) + +int qemuMigrationJobStart(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuDomainAsyncJob job) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +void qemuMigrationJobSetPhase(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuMigrationJobPhase phase) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +void qemuMigrationJobStartPhase(struct qemud_driver *driver, + virDomainObjPtr vm, + enum qemuMigrationJobPhase phase) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +int qemuMigrationJobContinue(virDomainObjPtr obj) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; +bool qemuMigrationJobIsActive(virDomainObjPtr vm, + enum qemuDomainAsyncJob job) + ATTRIBUTE_NONNULL(1); +int qemuMigrationJobFinish(struct qemud_driver *driver, virDomainObjPtr obj) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; + bool qemuMigrationIsAllowed(virDomainDefPtr def) ATTRIBUTE_NONNULL(1); int qemuMigrationSetOffline(struct qemud_driver *driver,