From 9b6efe436d177877145a5a6e8e548e2d54ad1a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Tue, 17 Mar 2020 08:55:27 +0100 Subject: [PATCH] migrate: add live-migration of replicated disks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit with incremental drive-mirror and dirty-bitmap tracking. 1.) get replicated disks that are currently referenced by running VM 2.) add a block-dirty-bitmap to each of them 3.) replicate ALL replicated disks 4.) pass bitmaps from 2) to drive-mirror for disks from 1) 5.) skip replicated disks when cleaning up volumes on either source or target added error handling is just removing the bitmaps if an error occurs at any point after 2, except when the handover to the target node has already happened, since the bitmaps are cleaned up together with the source VM in that case. Signed-off-by: Fabian Grünbichler Signed-off-by: Thomas Lamprecht Tested-by: Stefan Reiter --- PVE/QemuMigrate.pm | 52 ++++++++++++++++++++++++++++++++++++++++++++-- PVE/QemuServer.pm | 7 +++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm index 5eb9dfee..6f300de8 100644 --- a/PVE/QemuMigrate.pm +++ b/PVE/QemuMigrate.pm @@ -447,8 +447,30 @@ sub sync_disks { my $rep_cfg = PVE::ReplicationConfig->new(); if (my $jobcfg = $rep_cfg->find_local_replication_job($vmid, $self->{node})) { - die "can't live migrate VM with replicated volumes\n" if $self->{running}; + if ($self->{running}) { + my $live_replicatable_volumes = {}; + PVE::QemuServer::foreach_drive($conf, sub { + my ($ds, $drive) = @_; + + my $volid = $drive->{file}; + $live_replicatable_volumes->{$ds} = $volid + if defined($replicatable_volumes->{$volid}); + }); + foreach my $drive (keys %$live_replicatable_volumes) { + my $volid = $live_replicatable_volumes->{$drive}; + + my $bitmap = "repl_$drive"; + + # start tracking before replication to get full delta + a few duplicates + $self->log('info', "$drive: start tracking writes using block-dirty-bitmap '$bitmap'"); + mon_cmd($vmid, 'block-dirty-bitmap-add', node => "drive-$drive", name => $bitmap); + + # other info comes from target node in phase 2 + $self->{target_drive}->{$drive}->{bitmap} = $bitmap; + } + } $self->log('info', "replicating disk images"); + my $start_time = time(); my $logfunc = sub { $self->log('info', shift) }; $self->{replicated_volumes} = PVE::Replication::run_replication( @@ -503,6 +525,8 @@ sub cleanup_remotedisks { my ($self) = @_; foreach my $target_drive (keys %{$self->{target_drive}}) { + # don't clean up replicated disks! + next if defined($self->{target_drive}->{$target_drive}->{bitmap}); my $drive = PVE::QemuServer::parse_drive($target_drive, $self->{target_drive}->{$target_drive}->{drivestr}); my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}); @@ -517,6 +541,16 @@ sub cleanup_remotedisks { } } +sub cleanup_bitmaps { + my ($self) = @_; + foreach my $drive (%{$self->{target_drive}}) { + my $bitmap = $self->{target_drive}->{$drive}->{bitmap}; + next if !$bitmap; + $self->log('info', "$drive: removing block-dirty-bitmap '$bitmap'"); + mon_cmd($self->{vmid}, 'block-dirty-bitmap-remove', node => "drive-$drive", name => $bitmap); + } +} + sub phase1 { my ($self, $vmid) = @_; @@ -553,6 +587,12 @@ sub phase1_cleanup { # fixme: try to remove ? } } + + eval { $self->cleanup_bitmaps() }; + if (my $err =$@) { + $self->log('err', $err); + } + } sub phase2 { @@ -737,9 +777,10 @@ sub phase2 { my $target_sid = PVE::Storage::Plugin::parse_volume_id($target_drive->{file}); my $bwlimit = PVE::Storage::get_bandwidth_limit('migration', [$source_sid, $target_sid], $opt_bwlimit); + my $bitmap = $target->{bitmap}; $self->log('info', "$drive: start migration to $nbd_uri"); - PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit); + PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit, $bitmap); } } @@ -957,6 +998,10 @@ sub phase2_cleanup { if (my $err = $@) { $self->log('err', $err); } + eval { $self->cleanup_bitmaps() }; + if (my $err =$@) { + $self->log('err', $err); + } } my $nodename = PVE::INotify::nodename(); @@ -1118,6 +1163,9 @@ sub phase3_cleanup { my $volids = $self->{online_local_volumes}; foreach my $volid (@$volids) { + # keep replicated volumes! + next if $self->{replicated_volumes}->{$volid}; + eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; if (my $err = $@) { $self->log('err', "removing local copy of '$volid' failed - $err"); diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 8b746d98..4bc6ab42 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -4755,11 +4755,18 @@ sub vm_start { $local_volumes->{$ds} = [$volid, $storeid, $volname]; }); + my $replicatable_volumes = PVE::QemuConfig->get_replicatable_volumes($storecfg, $vmid, $conf); + my $format = undef; foreach my $opt (sort keys %$local_volumes) { my ($volid, $storeid, $volname) = @{$local_volumes->{$opt}}; + if ($replicatable_volumes->{$volid}) { + # re-use existing, replicated volume with bitmap on source side + $local_volumes->{$opt} = $conf->{${opt}}; + next; + } my $drive = parse_drive($opt, $conf->{$opt}); # If a remote storage is specified and the format of the original