Migration pull request

- Avihai's fix to allow vmstate iterators to not starve for VFIO
 - Maksim's fix on additional check on precopy load error
 - Fabiano's fix on fdatasync() hang in mapped-ram
 - Jonathan's fix on vring cached access over MMIO regions
 - Cedric's cleanup patches 1-4 out of his error report series
 - Yu's fix for RDMA migration (which used to be broken even for 8.2)
 - Anthony's small cleanup/fix on err message
 - Steve's patches on privatize migration.h
 - Xiang's patchset to enable zero page detections in multifd threads
 -----BEGIN PGP SIGNATURE-----
 
 iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZe9+uBIccGV0ZXJ4QHJl
 ZGhhdC5jb20ACgkQO1/MzfOr1wamaQD/SvmpMEcuRndT9LPSxzXowAGDZTBpYUfv
 5XAbx80dS9IBAO8PJJgQJIBHBeacyLBjHP9CsdVtgw5/VW+wCsbfV4AB
 =xavb
 -----END PGP SIGNATURE-----

Merge tag 'migration-20240311-pull-request' of https://gitlab.com/peterx/qemu into staging

Migration pull request

- Avihai's fix to allow vmstate iterators to not starve for VFIO
- Maksim's fix on additional check on precopy load error
- Fabiano's fix on fdatasync() hang in mapped-ram
- Jonathan's fix on vring cached access over MMIO regions
- Cedric's cleanup patches 1-4 out of his error report series
- Yu's fix for RDMA migration (which used to be broken even for 8.2)
- Anthony's small cleanup/fix on err message
- Steve's patches on privatize migration.h
- Xiang's patchset to enable zero page detections in multifd threads

# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZe9+uBIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wamaQD/SvmpMEcuRndT9LPSxzXowAGDZTBpYUfv
# 5XAbx80dS9IBAO8PJJgQJIBHBeacyLBjHP9CsdVtgw5/VW+wCsbfV4AB
# =xavb
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 11 Mar 2024 21:59:20 GMT
# gpg:                using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg:                issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [marginal]
# gpg:                 aka "Peter Xu <peterx@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D  D1A9 3B5F CCCD F3AB D706

* tag 'migration-20240311-pull-request' of https://gitlab.com/peterx/qemu: (34 commits)
  migration/multifd: Add new migration test cases for legacy zero page checking.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Implement zero page transmission on the multifd thread.
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Allow clearing of the file_bmap from multifd
  migration/multifd: Allow zero pages in file migration
  migration: purge MigrationState from public interface
  migration: delete unused accessors
  migration: privatize colo interfaces
  migration: migration_file_set_error
  migration: migration_is_device
  migration: migration_thread_is_self
  migration: export vcpu_dirty_limit_period
  migration: export migration_is_running
  migration: export migration_is_active
  migration: export migration_is_setup_or_active
  migration: remove migration.h references
  migration: export fewer options
  migration: Fix format in error message
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-03-12 11:35:41 +00:00
commit 8f3f329f5e
44 changed files with 979 additions and 295 deletions

View File

@ -44,7 +44,8 @@ over any transport.
- file migration: do the migration using a file that is passed to QEMU
by path. A file offset option is supported to allow a management
application to add its own metadata to the start of the file without
QEMU interference.
QEMU interference. Note that QEMU does not flush cached file
data/metadata at the end of migration.
In addition, support is included for migration using RDMA, which
transports the page data using ``RDMA``, where the hardware takes care of

View File

@ -32,7 +32,9 @@
#include "hw/virtio/virtio-net.h"
#include "audio/audio.h"
GlobalProperty hw_compat_8_2[] = {};
GlobalProperty hw_compat_8_2[] = {
{ "migration", "zero-page-detection", "legacy"},
};
const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
GlobalProperty hw_compat_8_1[] = {

View File

@ -693,6 +693,16 @@ const PropertyInfo qdev_prop_granule_mode = {
.set_default_value = qdev_propinfo_set_default_value_enum,
};
const PropertyInfo qdev_prop_zero_page_detection = {
.name = "ZeroPageDetection",
.description = "zero_page_detection values, "
"none,legacy,multifd",
.enum_table = &ZeroPageDetection_lookup,
.get = qdev_propinfo_get_enum,
.set = qdev_propinfo_set_enum,
.set_default_value = qdev_propinfo_set_default_value_enum,
};
/* --- Reserved Region --- */
/*

View File

@ -39,7 +39,6 @@
#include "sysemu/runstate.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
#include "migration/misc.h"
#include "migration/blocker.h"
#include "migration/qemu-file.h"
@ -150,14 +149,8 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
static void vfio_set_migration_error(int err)
{
MigrationState *ms = migrate_get_current();
if (migration_is_setup_or_active(ms->state)) {
WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
if (ms->to_dst_file) {
qemu_file_set_error(ms->to_dst_file, err);
}
}
if (migration_is_setup_or_active()) {
migration_file_set_error(err);
}
}
@ -180,10 +173,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer)
{
VFIODevice *vbasedev;
MigrationState *ms = migrate_get_current();
if (ms->state != MIGRATION_STATUS_ACTIVE &&
ms->state != MIGRATION_STATUS_DEVICE) {
if (!migration_is_active() && !migration_is_device()) {
return false;
}
@ -225,7 +216,7 @@ vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer)
{
VFIODevice *vbasedev;
if (!migration_is_active(migrate_get_current())) {
if (!migration_is_active()) {
return false;
}

View File

@ -32,7 +32,6 @@
#include "sysemu/reset.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
#include "pci.h"
VFIOGroupList vfio_group_list =

View File

@ -17,14 +17,12 @@
#include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h"
#include "migration/migration.h"
#include "migration/options.h"
#include "migration/misc.h"
#include "migration/savevm.h"
#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "migration/blocker.h"
#include "migration/misc.h"
#include "qapi/error.h"
#include "exec/ramlist.h"
#include "exec/ram_addr.h"
@ -505,6 +503,12 @@ static bool vfio_is_active_iterate(void *opaque)
return vfio_device_state_is_precopy(vbasedev);
}
/*
* Note about migration rate limiting: VFIO migration buffer size is currently
* limited to 1MB, so there is no need to check if migration rate exceeded (as
* in the worst case it will exceed by 1MB). However, if the buffer size is
* later changed to a bigger value, migration rate should be enforced here.
*/
static int vfio_save_iterate(QEMUFile *f, void *opaque)
{
VFIODevice *vbasedev = opaque;
@ -529,11 +533,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size,
migration->precopy_dirty_size);
/*
* A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero.
* Return 1 so following handlers will not be potentially blocked.
*/
return 1;
return !migration->precopy_init_size && !migration->precopy_dirty_size;
}
static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
@ -713,9 +713,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running,
* Migration should be aborted in this case, but vm_state_notify()
* currently does not support reporting failures.
*/
if (migrate_get_current()->to_dst_file) {
qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
}
migration_file_set_error(ret);
}
trace_vfio_vmstate_change_prepare(vbasedev->name, running,
@ -745,9 +743,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
* Migration should be aborted in this case, but vm_state_notify()
* currently does not support reporting failures.
*/
if (migrate_get_current()->to_dst_file) {
qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
}
migration_file_set_error(ret);
}
trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),

View File

@ -26,7 +26,6 @@
#include "qemu/sockets.h"
#include "sysemu/runstate.h"
#include "sysemu/cryptodev.h"
#include "migration/migration.h"
#include "migration/postcopy-ram.h"
#include "trace.h"
#include "exec/ramblock.h"

View File

@ -31,8 +31,6 @@
#include "trace.h"
#include "qemu/error-report.h"
#include "migration/misc.h"
#include "migration/migration.h"
#include "migration/options.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"

View File

@ -9,6 +9,7 @@ extern const PropertyInfo qdev_prop_reserved_region;
extern const PropertyInfo qdev_prop_multifd_compression;
extern const PropertyInfo qdev_prop_mig_mode;
extern const PropertyInfo qdev_prop_granule_mode;
extern const PropertyInfo qdev_prop_zero_page_detection;
extern const PropertyInfo qdev_prop_losttickpolicy;
extern const PropertyInfo qdev_prop_blockdev_on_error;
extern const PropertyInfo qdev_prop_bios_chs_trans;
@ -50,6 +51,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
MigMode)
#define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode)
#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
ZeroPageDetection)
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
LostTickPolicy)

View File

@ -0,0 +1,25 @@
/*
* QEMU public migration capabilities
*
* Copyright (c) 2012-2023 Red Hat Inc
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H
#define QEMU_MIGRATION_CLIENT_OPTIONS_H
/* capabilities */
bool migrate_background_snapshot(void);
bool migrate_dirty_limit(void);
bool migrate_postcopy_ram(void);
bool migrate_switchover_ack(void);
/* parameters */
MigMode migrate_mode(void);
uint64_t migrate_vcpu_dirty_limit_period(void);
#endif

View File

@ -17,6 +17,7 @@
#include "qemu/notify.h"
#include "qapi/qapi-types-migration.h"
#include "qapi/qapi-types-net.h"
#include "migration/client-options.h"
/* migration/ram.c */
@ -59,8 +60,10 @@ void dump_vmstate_json_to_file(FILE *out_fp);
void migration_object_init(void);
void migration_shutdown(void);
bool migration_is_idle(void);
bool migration_is_active(MigrationState *);
bool migrate_mode_is_cpr(MigrationState *);
bool migration_is_active(void);
bool migration_is_device(void);
bool migration_thread_is_self(void);
bool migration_is_setup_or_active(void);
typedef enum MigrationEventType {
MIG_EVENT_PRECOPY_SETUP,
@ -99,16 +102,15 @@ void migration_add_notifier_mode(NotifierWithReturn *notify,
MigrationNotifyFunc func, MigMode mode);
void migration_remove_notifier(NotifierWithReturn *notify);
int migration_call_notifiers(MigrationState *s, MigrationEventType type,
Error **errp);
bool migration_in_setup(MigrationState *);
bool migration_has_finished(MigrationState *);
bool migration_has_failed(MigrationState *);
/* ...and after the device transmission */
bool migration_is_running(void);
void migration_file_set_error(int err);
/* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */
bool migration_in_incoming_postcopy(void);
/* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */
bool migration_incoming_postcopy_advised(void);
/* True if background snapshot is active */
bool migration_in_bg_snapshot(void);

View File

@ -16,30 +16,130 @@
#include "hw/vmstate-if.h"
/**
* struct SaveVMHandlers: handler structure to finely control
* migration of complex subsystems and devices, such as RAM, block and
* VFIO.
*/
typedef struct SaveVMHandlers {
/* This runs inside the BQL. */
SaveStateHandler *save_state;
/*
* save_prepare is called early, even before migration starts, and can be
* used to perform early checks.
/* The following handlers run inside the BQL. */
/**
* @save_state
*
* Saves state section on the source using the latest state format
* version.
*
* Legacy method. Should be deprecated when all users are ported
* to VMStateDescription.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*/
void (*save_state)(QEMUFile *f, void *opaque);
/**
* @save_prepare
*
* Called early, even before migration starts, and can be used to
* perform early checks.
*
* @opaque: data pointer passed to register_savevm_live()
* @errp: pointer to Error*, to store an error if it happens.
*
* Returns zero to indicate success and negative for error
*/
int (*save_prepare)(void *opaque, Error **errp);
/**
* @save_setup
*
* Initializes the data structures on the source and transmits
* first section containing information on the device
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_setup)(QEMUFile *f, void *opaque);
/**
* @save_cleanup
*
* Uninitializes the data structures on the source
*
* @opaque: data pointer passed to register_savevm_live()
*/
void (*save_cleanup)(void *opaque);
/**
* @save_live_complete_postcopy
*
* Called at the end of postcopy for all postcopyable devices.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
/**
* @save_live_complete_precopy
*
* Transmits the last section for the device containing any
* remaining data at the end of a precopy phase. When postcopy is
* enabled, devices that support postcopy will skip this step,
* where the final data will be flushed at the end of postcopy via
* @save_live_complete_postcopy instead.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
/* This runs both outside and inside the BQL. */
/**
* @is_active
*
* Will skip a state section if not active
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if state section is active else false
*/
bool (*is_active)(void *opaque);
/**
* @has_postcopy
*
* Checks if a device supports postcopy
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true for postcopy support else false
*/
bool (*has_postcopy)(void *opaque);
/* is_active_iterate
* If it is not NULL then qemu_savevm_state_iterate will skip iteration if
* it returns false. For example, it is needed for only-postcopy-states,
* which needs to be handled by qemu_savevm_state_setup and
* qemu_savevm_state_pending, but do not need iterations until not in
* postcopy stage.
/**
* @is_active_iterate
*
* As #SaveVMHandlers.is_active(), will skip an inactive state
* section in qemu_savevm_state_iterate.
*
* For example, it is needed for only-postcopy-states, which needs
* to be handled by qemu_savevm_state_setup() and
* qemu_savevm_state_pending(), but do not need iterations until
* not in postcopy stage.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if state section is active else false
*/
bool (*is_active_iterate)(void *opaque);
@ -48,44 +148,155 @@ typedef struct SaveVMHandlers {
* use data that is local to the migration thread or protected
* by other locks.
*/
/**
* @save_live_iterate
*
* Should send a chunk of data until the point that stream
* bandwidth limits tell it to stop. Each call generates one
* section.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns 0 to indicate that there is still more data to send,
* 1 that there is no more data to send and
* negative to indicate an error.
*/
int (*save_live_iterate)(QEMUFile *f, void *opaque);
/* This runs outside the BQL! */
/* Note for save_live_pending:
* must_precopy:
* - must be migrated in precopy or in stopped state
* - i.e. must be migrated before target start
/**
* @state_pending_estimate
*
* can_postcopy:
* - can migrate in postcopy or in stopped state
* - i.e. can migrate after target start
* - some can also be migrated during precopy (RAM)
* - some must be migrated after source stops (block-dirty-bitmap)
* This estimates the remaining data to transfer
*
* Sum of can_postcopy and must_postcopy is the whole amount of
* Sum of @can_postcopy and @must_postcopy is the whole amount of
* pending data.
*
* @opaque: data pointer passed to register_savevm_live()
* @must_precopy: amount of data that must be migrated in precopy
* or in stopped state, i.e. that must be migrated
* before target start.
* @can_postcopy: amount of data that can be migrated in postcopy
* or in stopped state, i.e. after target start.
* Some can also be migrated during precopy (RAM).
* Some must be migrated after source stops
* (block-dirty-bitmap)
*/
/* This estimates the remaining data to transfer */
void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy);
/* This calculate the exact remaining data to transfer */
/**
* @state_pending_exact
*
* This calculates the exact remaining data to transfer
*
* Sum of @can_postcopy and @must_postcopy is the whole amount of
* pending data.
*
* @opaque: data pointer passed to register_savevm_live()
* @must_precopy: amount of data that must be migrated in precopy
* or in stopped state, i.e. that must be migrated
* before target start.
* @can_postcopy: amount of data that can be migrated in postcopy
* or in stopped state, i.e. after target start.
* Some can also be migrated during precopy (RAM).
* Some must be migrated after source stops
* (block-dirty-bitmap)
*/
void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy);
LoadStateHandler *load_state;
/**
* @load_state
*
* Load sections generated by any of the save functions that
* generate sections.
*
* Legacy method. Should be deprecated when all users are ported
* to VMStateDescription.
*
* @f: QEMUFile where to receive the data
* @opaque: data pointer passed to register_savevm_live()
* @version_id: the maximum version_id supported
*
* Returns zero to indicate success and negative for error
*/
int (*load_state)(QEMUFile *f, void *opaque, int version_id);
/**
* @load_setup
*
* Initializes the data structures on the destination.
*
* @f: QEMUFile where to receive the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*load_setup)(QEMUFile *f, void *opaque);
/**
* @load_cleanup
*
* Uninitializes the data structures on the destination.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*load_cleanup)(void *opaque);
/* Called when postcopy migration wants to resume from failure */
/**
* @resume_prepare
*
* Called when postcopy migration wants to resume from failure
*
* @s: Current migration state
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*resume_prepare)(MigrationState *s, void *opaque);
/* Checks if switchover ack should be used. Called only in dest */
/**
* @switchover_ack_needed
*
* Checks if switchover ack should be used. Called only on
* destination.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if switchover ack should be used and false
* otherwise
*/
bool (*switchover_ack_needed)(void *opaque);
} SaveVMHandlers;
/**
* register_savevm_live: Register a set of custom migration handlers
*
* @idstr: state section identifier
* @instance_id: instance id
* @version_id: version id supported
* @ops: SaveVMHandlers structure
* @opaque: data pointer passed to SaveVMHandlers handlers
*/
int register_savevm_live(const char *idstr,
uint32_t instance_id,
int version_id,
const SaveVMHandlers *ops,
void *opaque);
/**
* unregister_savevm: Unregister custom migration handlers
*
* @obj: object associated with state section
* @idstr: state section identifier
* @opaque: data pointer passed to register_savevm_live()
*/
void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque);
#endif

View File

@ -151,8 +151,6 @@ typedef struct IRQState *qemu_irq;
/*
* Function types
*/
typedef void SaveStateHandler(QEMUFile *f, void *opaque);
typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
#endif /* QEMU_TYPEDEFS_H */

View File

@ -242,11 +242,6 @@ static int qio_channel_file_close(QIOChannel *ioc,
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
if (qemu_fdatasync(fioc->fd) < 0) {
error_setg_errno(errp, errno,
"Unable to synchronize file data with storage device");
return -1;
}
if (qemu_close(fioc->fd) < 0) {
error_setg_errno(errp, errno,
"Unable to close file");

View File

@ -63,9 +63,9 @@ static bool colo_runstate_is_stopped(void)
return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
}
static void colo_checkpoint_notify(void *opaque)
static void colo_checkpoint_notify(void)
{
MigrationState *s = opaque;
MigrationState *s = migrate_get_current();
int64_t next_notify_time;
qemu_event_set(&s->colo_checkpoint_event);
@ -74,10 +74,15 @@ static void colo_checkpoint_notify(void *opaque)
timer_mod(s->colo_delay_timer, next_notify_time);
}
static void colo_checkpoint_notify_timer(void *opaque)
{
colo_checkpoint_notify();
}
void colo_checkpoint_delay_set(void)
{
if (migration_in_colo_state()) {
colo_checkpoint_notify(migrate_get_current());
colo_checkpoint_notify();
}
}
@ -162,7 +167,7 @@ static void primary_vm_do_failover(void)
* kick COLO thread which might wait at
* qemu_sem_wait(&s->colo_checkpoint_sem).
*/
colo_checkpoint_notify(s);
colo_checkpoint_notify();
/*
* Wake up COLO thread which may blocked in recv() or send(),
@ -518,7 +523,7 @@ out:
static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
{
colo_checkpoint_notify(data);
colo_checkpoint_notify();
}
static void colo_process_checkpoint(MigrationState *s)
@ -642,7 +647,7 @@ void migrate_start_colo_process(MigrationState *s)
bql_unlock();
qemu_event_init(&s->colo_checkpoint_event, false);
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify, s);
colo_checkpoint_notify_timer, NULL);
qemu_sem_init(&s->colo_exit_sem, 0);
colo_process_checkpoint(s);

View File

@ -159,7 +159,7 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp)
int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
int niov, RAMBlock *block, Error **errp)
{
ssize_t ret = -1;
ssize_t ret = 0;
int i, slice_idx, slice_num;
uintptr_t base, next, offset;
size_t len;
@ -191,7 +191,7 @@ int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
*/
offset = (uintptr_t) iov[slice_idx].iov_base - (uintptr_t) block->host;
if (offset >= block->used_length) {
error_setg(errp, "offset " RAM_ADDR_FMT
error_setg(errp, "offset %" PRIxPTR
"outside of ramblock %s range", offset, block->idstr);
ret = -1;
break;

View File

@ -22,6 +22,7 @@ system_ss.add(files(
'migration.c',
'multifd.c',
'multifd-zlib.c',
'multifd-zero-page.c',
'ram-compress.c',
'options.c',
'postcopy-ram.c',

View File

@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
MultiFDCompression_str(params->multifd_compression));
assert(params->has_zero_page_detection);
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
qapi_enum_lookup(&ZeroPageDetection_lookup,
params->zero_page_detection));
monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
params->xbzrle_cache_size);
@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_multifd_zstd_level = true;
visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
break;
case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
p->has_zero_page_detection = true;
visit_type_ZeroPageDetection(v, param, &p->zero_page_detection, &err);
break;
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
p->has_xbzrle_cache_size = true;
if (!visit_type_size(v, param, &cache_size, &err)) {

View File

@ -1081,9 +1081,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
* Return true if we're already in the middle of a migration
* (i.e. any of the active or setup states)
*/
bool migration_is_setup_or_active(int state)
bool migration_is_setup_or_active(void)
{
switch (state) {
MigrationState *s = current_migration;
switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
@ -1101,9 +1103,11 @@ bool migration_is_setup_or_active(int state)
}
}
bool migration_is_running(int state)
bool migration_is_running(void)
{
switch (state) {
MigrationState *s = current_migration;
switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED:
@ -1404,7 +1408,7 @@ static void migrate_fd_cleanup(MigrationState *s)
qemu_fclose(tmp);
}
assert(!migration_is_active(s));
assert(!migration_is_active());
if (s->state == MIGRATION_STATUS_CANCELLING) {
migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
@ -1475,7 +1479,7 @@ static void migrate_fd_cancel(MigrationState *s)
do {
old_state = s->state;
if (!migration_is_running(old_state)) {
if (!migration_is_running()) {
break;
}
/* If the migration is paused, kick it out of the pause */
@ -1544,16 +1548,6 @@ int migration_call_notifiers(MigrationState *s, MigrationEventType type,
return ret;
}
bool migration_in_setup(MigrationState *s)
{
return s->state == MIGRATION_STATUS_SETUP;
}
bool migration_has_finished(MigrationState *s)
{
return s->state == MIGRATION_STATUS_COMPLETED;
}
bool migration_has_failed(MigrationState *s)
{
return (s->state == MIGRATION_STATUS_CANCELLED ||
@ -1601,10 +1595,8 @@ bool migration_incoming_postcopy_advised(void)
bool migration_in_bg_snapshot(void)
{
MigrationState *s = migrate_get_current();
return migrate_background_snapshot() &&
migration_is_setup_or_active(s->state);
migration_is_setup_or_active();
}
bool migration_is_idle(void)
@ -1637,12 +1629,28 @@ bool migration_is_idle(void)
return false;
}
bool migration_is_active(MigrationState *s)
bool migration_is_active(void)
{
MigrationState *s = current_migration;
return (s->state == MIGRATION_STATUS_ACTIVE ||
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
}
bool migration_is_device(void)
{
MigrationState *s = current_migration;
return s->state == MIGRATION_STATUS_DEVICE;
}
bool migration_thread_is_self(void)
{
MigrationState *s = current_migration;
return qemu_thread_is_self(&s->thread);
}
bool migrate_mode_is_cpr(MigrationState *s)
{
return s->parameters.mode == MIG_MODE_CPR_REBOOT;
@ -1960,7 +1968,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return true;
}
if (migration_is_running(s->state)) {
if (migration_is_running()) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
return false;
}
@ -2297,7 +2305,7 @@ static void *source_return_path_thread(void *opaque)
trace_source_return_path_thread_entry();
rcu_register_thread();
while (migration_is_setup_or_active(ms->state)) {
while (migration_is_setup_or_active()) {
trace_source_return_path_thread_loop_top();
header_type = qemu_get_be16(rp);
@ -3020,6 +3028,17 @@ static MigThrError postcopy_pause(MigrationState *s)
}
}
void migration_file_set_error(int err)
{
MigrationState *s = current_migration;
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
if (s->to_dst_file) {
qemu_file_set_error(s->to_dst_file, err);
}
}
}
static MigThrError migration_detect_error(MigrationState *s)
{
int ret;
@ -3461,7 +3480,7 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete();
while (migration_is_active(s)) {
while (migration_is_active()) {
if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
MigIterateState iter_state = migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) {
@ -3607,7 +3626,7 @@ static void *bg_migration_thread(void *opaque)
migration_bh_schedule(bg_migration_vm_start_bh, s);
bql_unlock();
while (migration_is_active(s)) {
while (migration_is_active()) {
MigIterateState iter_state = bg_migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) {
continue;

View File

@ -26,6 +26,7 @@
#include "qom/object.h"
#include "postcopy-ram.h"
#include "sysemu/runstate.h"
#include "migration/misc.h"
struct PostcopyBlocktimeContext;
@ -479,8 +480,8 @@ bool migrate_has_error(MigrationState *s);
void migrate_fd_connect(MigrationState *s, Error *error_in);
bool migration_is_setup_or_active(int state);
bool migration_is_running(int state);
int migration_call_notifiers(MigrationState *s, MigrationEventType type,
Error **errp);
int migrate_init(MigrationState *s, Error **errp);
bool migration_is_blocked(Error **errp);
@ -488,6 +489,8 @@ bool migration_is_blocked(Error **errp);
bool migration_in_postcopy(void);
bool migration_postcopy_is_alive(int state);
MigrationState *migrate_get_current(void);
bool migration_has_failed(MigrationState *);
bool migrate_mode_is_cpr(MigrationState *);
uint64_t ram_get_total_transferred_pages(void);

View File

@ -0,0 +1,87 @@
/*
* Multifd zero page detection implementation.
*
* Copyright (c) 2024 Bytedance Inc
*
* Authors:
* Hao Xiang <hao.xiang@bytedance.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "exec/ramblock.h"
#include "migration.h"
#include "multifd.h"
#include "options.h"
#include "ram.h"
static bool multifd_zero_page_enabled(void)
{
return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
}
static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
{
ram_addr_t temp;
if (a == b) {
return;
}
temp = pages_offset[a];
pages_offset[a] = pages_offset[b];
pages_offset[b] = temp;
}
/**
* multifd_send_zero_page_detect: Perform zero page detection on all pages.
*
* Sorts normal pages before zero pages in p->pages->offset and updates
* p->pages->normal_num.
*
* @param p A pointer to the send params.
*/
void multifd_send_zero_page_detect(MultiFDSendParams *p)
{
MultiFDPages_t *pages = p->pages;
RAMBlock *rb = pages->block;
int i = 0;
int j = pages->num - 1;
if (!multifd_zero_page_enabled()) {
pages->normal_num = pages->num;
return;
}
/*
* Sort the page offset array by moving all normal pages to
* the left and all zero pages to the right of the array.
*/
while (i <= j) {
uint64_t offset = pages->offset[i];
if (!buffer_is_zero(rb->host + offset, p->page_size)) {
i++;
continue;
}
swap_page_offset(pages->offset, i, j);
ram_release_page(rb->idstr, offset);
j--;
}
pages->normal_num = i;
}
void multifd_recv_zero_page_process(MultiFDRecvParams *p)
{
for (int i = 0; i < p->zero_num; i++) {
void *page = p->host + p->zero[i];
if (!buffer_is_zero(page, p->page_size)) {
memset(page, 0, p->page_size);
}
}
}

View File

@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
int ret;
uint32_t i;
multifd_send_prepare_header(p);
if (!multifd_send_prepare_common(p)) {
goto out;
}
for (i = 0; i < pages->num; i++) {
for (i = 0; i < pages->normal_num; i++) {
uint32_t available = z->zbuff_len - out_size;
int flush = Z_NO_FLUSH;
if (i == pages->num - 1) {
if (i == pages->normal_num - 1) {
flush = Z_SYNC_FLUSH;
}
@ -172,10 +174,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
p->iov[p->iovs_num].iov_len = out_size;
p->iovs_num++;
p->next_packet_size = out_size;
out:
p->flags |= MULTIFD_FLAG_ZLIB;
multifd_send_fill_packet(p);
return 0;
}
@ -261,6 +263,14 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_ZLIB);
return -1;
}
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
assert(in_size == 0);
return 0;
}
ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
if (ret != 0) {
@ -310,6 +320,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp)
p->id, out_size, expected_size);
return -1;
}
return 0;
}

View File

@ -118,16 +118,18 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
int ret;
uint32_t i;
multifd_send_prepare_header(p);
if (!multifd_send_prepare_common(p)) {
goto out;
}
z->out.dst = z->zbuff;
z->out.size = z->zbuff_len;
z->out.pos = 0;
for (i = 0; i < pages->num; i++) {
for (i = 0; i < pages->normal_num; i++) {
ZSTD_EndDirective flush = ZSTD_e_continue;
if (i == pages->num - 1) {
if (i == pages->normal_num - 1) {
flush = ZSTD_e_flush;
}
z->in.src = p->pages->block->host + pages->offset[i];
@ -161,10 +163,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
p->iov[p->iovs_num].iov_len = z->out.pos;
p->iovs_num++;
p->next_packet_size = z->out.pos;
out:
p->flags |= MULTIFD_FLAG_ZSTD;
multifd_send_fill_packet(p);
return 0;
}
@ -257,6 +259,14 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_ZSTD);
return -1;
}
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
assert(in_size == 0);
return 0;
}
ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
if (ret != 0) {

View File

@ -11,6 +11,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/rcu.h"
#include "exec/target_page.h"
#include "sysemu/sysemu.h"
@ -111,11 +112,16 @@ void multifd_send_channel_created(void)
static void multifd_set_file_bitmap(MultiFDSendParams *p)
{
MultiFDPages_t *pages = p->pages;
uint32_t zero_num = p->pages->num - p->pages->normal_num;
assert(pages->block);
for (int i = 0; i < p->pages->num; i++) {
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]);
for (int i = 0; i < p->pages->normal_num; i++) {
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
}
for (int i = p->pages->num; i < zero_num; i++) {
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
}
}
@ -153,13 +159,13 @@ static void multifd_send_prepare_iovs(MultiFDSendParams *p)
{
MultiFDPages_t *pages = p->pages;
for (int i = 0; i < pages->num; i++) {
for (int i = 0; i < pages->normal_num; i++) {
p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
p->iov[p->iovs_num].iov_len = p->page_size;
p->iovs_num++;
}
p->next_packet_size = pages->num * p->page_size;
p->next_packet_size = pages->normal_num * p->page_size;
}
/**
@ -178,6 +184,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
bool use_zero_copy_send = migrate_zero_copy_send();
int ret;
multifd_send_zero_page_detect(p);
if (!multifd_use_packets()) {
multifd_send_prepare_iovs(p);
multifd_set_file_bitmap(p);
@ -261,6 +269,13 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_NOCOMP);
return -1;
}
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
return 0;
}
for (int i = 0; i < p->normal_num; i++) {
p->iov[i].iov_base = p->host + p->normal[i];
p->iov[i].iov_len = p->page_size;
@ -295,6 +310,7 @@ static void multifd_pages_reset(MultiFDPages_t *pages)
* overwritten later when reused.
*/
pages->num = 0;
pages->normal_num = 0;
pages->block = NULL;
}
@ -386,11 +402,13 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
MultiFDPacket_t *packet = p->packet;
MultiFDPages_t *pages = p->pages;
uint64_t packet_num;
uint32_t zero_num = pages->num - pages->normal_num;
int i;
packet->flags = cpu_to_be32(p->flags);
packet->pages_alloc = cpu_to_be32(p->pages->allocated);
packet->normal_pages = cpu_to_be32(pages->num);
packet->normal_pages = cpu_to_be32(pages->normal_num);
packet->zero_pages = cpu_to_be32(zero_num);
packet->next_packet_size = cpu_to_be32(p->next_packet_size);
packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
@ -408,10 +426,11 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
}
p->packets_sent++;
p->total_normal_pages += pages->num;
p->total_normal_pages += pages->normal_num;
p->total_zero_pages += zero_num;
trace_multifd_send(p->id, packet_num, pages->num, p->flags,
p->next_packet_size);
trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num,
p->flags, p->next_packet_size);
}
static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@ -452,20 +471,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
p->normal_num = be32_to_cpu(packet->normal_pages);
if (p->normal_num > packet->pages_alloc) {
error_setg(errp, "multifd: received packet "
"with %u pages and expected maximum pages are %u",
"with %u normal pages and expected maximum pages are %u",
p->normal_num, packet->pages_alloc) ;
return -1;
}
p->zero_num = be32_to_cpu(packet->zero_pages);
if (p->zero_num > packet->pages_alloc - p->normal_num) {
error_setg(errp, "multifd: received packet "
"with %u zero pages and expected maximum zero pages are %u",
p->zero_num, packet->pages_alloc - p->normal_num) ;
return -1;
}