5
0
mirror of git://git.proxmox.com/git/qemu-server.git synced 2025-01-24 02:04:10 +03:00

add qemu_drive_mirror_monitor completion modes

With Qemu 4.2 we encountered a problem with unix sockets and SSH socket
forwarding for drive-mirror. It seems the socket gets reopened again and
again after it closes for some reason. This can be worked around by
specifying 'block-job-cancel' instead of 'block-job-complete' when we're
not interested in swapping the disks again from NBD to their original
protocol. This is always the case when we use drive-mirror for live
migrating a VM.

qemu_drive_mirror is used for migration and for clone_disk. All in all
we have 3 cases to handle. Either the 'skip' case which skips the
completion of the job. The 'wait' case which was the default before and
still is when $completion is undefined. And the new 'wait_noswap' case
which is used for the live migration.
If 'wait_noswap' is specified, we issue a 'block-job-cancel' once the block
job is in 'ready' state. This completes the block job without swapping the
disks.

clone_disk always uses 'block-job-cancel' via the qemu_blockjobs_cancel
sub.

Signed-off-by: Mira Limbeck <m.limbeck@proxmox.com>
This commit is contained in:
Mira Limbeck 2020-03-17 20:56:09 +01:00 committed by Fabian Grünbichler
parent 758a08eb39
commit e02fb12620
3 changed files with 19 additions and 8 deletions

View File

@ -2938,6 +2938,7 @@ __PACKAGE__->register_method({
foreach my $opt (keys %$drives) {
my $drive = $drives->{$opt};
my $skipcomplete = ($total_jobs != $i); # finish after last drive
my $completion = $skipcomplete ? 'skip' : 'wait';
my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
my $storage_list = [ $src_sid ];
@ -2946,7 +2947,7 @@ __PACKAGE__->register_method({
my $newdrive = PVE::QemuServer::clone_disk($storecfg, $vmid, $running, $opt, $drive, $snapname,
$newid, $storage, $format, $fullclone->{$opt}, $newvollist,
$jobs, $skipcomplete, $oldconf->{agent}, $clonelimit);
$jobs, $completion, $oldconf->{agent}, $clonelimit);
$newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);

View File

@ -703,7 +703,7 @@ sub phase2 {
my $bwlimit = PVE::Storage::get_bandwidth_limit('migration', [$source_sid, $target_sid], $opt_bwlimit);
$self->log('info', "$drive: start migration to $nbd_uri");
PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 1, undef, $bwlimit);
PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit);
}
}
@ -968,7 +968,7 @@ sub phase3_cleanup {
if ($self->{storage_migration}) {
# finish block-job
eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}); };
eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}, 'wait_noswap'); };
if (my $err = $@) {
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $self->{storage_migration_jobs}) };

View File

@ -6521,7 +6521,7 @@ sub qemu_img_format {
}
sub qemu_drive_mirror {
my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete, $qga, $bwlimit) = @_;
my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit) = @_;
$jobs = {} if !$jobs;
@ -6563,11 +6563,13 @@ sub qemu_drive_mirror {
die "mirroring error: $err\n";
}
qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga);
qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
}
sub qemu_drive_mirror_monitor {
my ($vmid, $vmiddst, $jobs, $skipcomplete, $qga) = @_;
my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
$completion //= 'wait'; # same semantic as with 'skipcomplete' before
eval {
my $err_complete = 0;
@ -6612,7 +6614,7 @@ sub qemu_drive_mirror_monitor {
if ($readycounter == scalar(keys %$jobs)) {
print "all mirroring jobs are ready \n";
last if $skipcomplete; #do the complete later
last if $completion eq 'skip'; #do the complete later
if ($vmiddst && $vmiddst != $vmid) {
my $agent_running = $qga && qga_check_running($vmid);
@ -6642,7 +6644,15 @@ sub qemu_drive_mirror_monitor {
# try to switch the disk if source and destination are on the same guest
print "$job: Completing block job...\n";
eval { mon_cmd($vmid, "block-job-complete", device => $job) };
my $op;
if ($completion eq 'wait') {
$op = 'block-job-complete';
} elsif ($completion eq 'wait_noswap') {
$op = 'block-job-cancel';
} else {
die "invalid completion value: $completion\n";
}
eval { mon_cmd($vmid, $op, device => $job) };
if ($@ =~ m/cannot be completed/) {
print "$job: Block job cannot be completed, try again.\n";
$err_complete++;