From 8b0470b4e17aafee95a817cf93e005c7aeb55aaf Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 20 Jan 2015 09:26:31 +0100 Subject: [PATCH] cleanup vzdump -stop implementation * do not unlink $pidfile inside stop_running_backups to avoid race conditions * use pve UPID to save pid (see PVE::Tools::upid_decode) * allow to pass -stop parameter to nomal backup job * simple return 'OK' instead of calling exit() inside API call. * rename stop_all_backups to stop_running_backups --- PVE/API2/VZDump.pm | 21 ++++---- PVE/VZDump.pm | 111 ++++++++++++++++++---------------------- bin/init.d/pve-manager | 2 +- bin/vzdump | 1 + debian/changelog.Debian | 4 ++ 5 files changed, 67 insertions(+), 72 deletions(-) diff --git a/PVE/API2/VZDump.pm b/PVE/API2/VZDump.pm index 3ae98de71..64a113930 100644 --- a/PVE/API2/VZDump.pm +++ b/PVE/API2/VZDump.pm @@ -62,17 +62,18 @@ __PACKAGE__->register_method ({ PVE::VZDump::verify_vzdump_parameters($param, 1); # silent exit if we run on wrong node - exit(0) if $param->{node} && $param->{node} ne $nodename; + return 'OK' if $param->{node} && $param->{node} ne $nodename; - if($param->{stop}){ - PVE::VZDump::stop_all_backups; - } - my $cmdline = PVE::VZDump::command_line($param); # convert string lists to arrays my @vmids = PVE::Tools::split_list(extract_param($param, 'vmid')); + if($param->{stop}){ + PVE::VZDump::stop_running_backups(); + return 'OK' if !scalar(@vmids); + } + my $skiplist = []; if (!$param->{all}) { if (!$param->{node}) { @@ -88,7 +89,7 @@ __PACKAGE__->register_method ({ } @vmids = @localvmids; # silent exit if specified VMs run on other nodes - exit(0) if !scalar(@vmids); + return "OK" if !scalar(@vmids); } $param->{vmids} = PVE::VZDump::check_vmids(@vmids) @@ -122,15 +123,17 @@ __PACKAGE__->register_method ({ my $vzdump = PVE::VZDump->new($cmdline, $param, $skiplist); my $worker = sub { + my $upid = shift; + $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub { die "interrupted by signal\n"; }; eval { - $vzdump->getlock(); # only one process allowed + $vzdump->getlock($upid); # only one process allowed }; - if ($@) { - $vzdump->sendmail([], 0, $@); + if (my $err = $@) { + $vzdump->sendmail([], 0, $err); exit(-1); } diff --git a/PVE/VZDump.pm b/PVE/VZDump.pm index 4144790ab..f5aac84b8 100644 --- a/PVE/VZDump.pm +++ b/PVE/VZDump.pm @@ -598,58 +598,54 @@ sub get_lvm_device { } sub getlock { - my ($self) = @_; + my ($self, $upid) = @_; my $fh; - open($fh, "> $pidfile") - or die "cannot open $pidfile: $!"; - - print $fh $$, "\n",PVE::ProcFSTools::read_proc_starttime($$), "\n"; - - close $fh || warn "close $pidfile failed: $!"; - my $maxwait = $self->{opts}->{lockwait} || $self->{lockwait}; + die "missimg UPID" if !$upid; # should not happen + if (!open (SERVER_FLCK, ">>$lockfile")) { debugmsg ('err', "can't open lock on file '$lockfile' - $!", undef, 1); die "can't open lock on file '$lockfile' - $!"; } - if (flock (SERVER_FLCK, LOCK_EX|LOCK_NB)) { - return; - } + if (!flock (SERVER_FLCK, LOCK_EX|LOCK_NB)) { - if (!$maxwait) { - debugmsg ('err', "can't aquire lock '$lockfile' (wait = 0)", undef, 1); - die "can't aquire lock '$lockfile' (wait = 0)"; - } - - debugmsg('info', "trying to get global lock - waiting...", undef, 1); - - eval { - alarm ($maxwait * 60); - - local $SIG{ALRM} = sub { alarm (0); die "got timeout\n"; }; - - if (!flock (SERVER_FLCK, LOCK_EX)) { - my $err = $!; - close (SERVER_FLCK); - alarm (0); - die "$err\n"; + if (!$maxwait) { + debugmsg ('err', "can't aquire lock '$lockfile' (wait = 0)", undef, 1); + die "can't aquire lock '$lockfile' (wait = 0)"; } - alarm (0); - }; - alarm (0); - - my $err = $@; - if ($err) { - debugmsg ('err', "can't aquire lock '$lockfile' - $err", undef, 1); - die "can't aquire lock '$lockfile' - $err"; + debugmsg('info', "trying to get global lock - waiting...", undef, 1); + + eval { + alarm ($maxwait * 60); + + local $SIG{ALRM} = sub { alarm (0); die "got timeout\n"; }; + + if (!flock (SERVER_FLCK, LOCK_EX)) { + my $err = $!; + close (SERVER_FLCK); + alarm (0); + die "$err\n"; + } + alarm (0); + }; + alarm (0); + + my $err = $@; + + if ($err) { + debugmsg ('err', "can't aquire lock '$lockfile' - $err", undef, 1); + die "can't aquire lock '$lockfile' - $err"; + } + + debugmsg('info', "got global lock", undef, 1); } - debugmsg('info', "got global lock", undef, 1); + PVE::Tools::file_set_contents($pidfile, $upid); } sub run_hook_script { @@ -1026,11 +1022,6 @@ sub exec_backup { my $opts = $self->{opts}; - if ($opts->{stop}) { - stop_all_backups($self); - return; - } - debugmsg ('info', "starting new backup job: $self->{cmdline}", undef, 1); debugmsg ('info', "skip external VMs: " . join(', ', @{$self->{skiplist}})) if scalar(@{$self->{skiplist}}); @@ -1185,7 +1176,7 @@ my $confdesc = { }), stop => { type => 'boolean', - description => "Stop all current runnig backup jobs on this host.", + description => "Stop runnig backup jobs on this host.", optional => 1, default => 0, }, @@ -1254,7 +1245,7 @@ sub verify_vzdump_parameters { my ($param, $check_missing) = @_; raise_param_exc({ all => "option conflicts with option 'vmid'"}) - if ($param->{all} || $param->{stop}) && $param->{vmid}; + if $param->{all} && $param->{vmid}; raise_param_exc({ exclude => "option conflicts with option 'vmid'"}) if $param->{exclude} && $param->{vmid}; @@ -1268,29 +1259,25 @@ sub verify_vzdump_parameters { } -sub stop_all_backups { +sub stop_running_backups { my($self) = @_; - return if ! -e $pidfile; + my $upid = PVE::Tools::file_read_firstline($pidfile); + return if !$upid; - my @param = split(/\n/,PVE::Tools::file_get_contents($pidfile)); - my $pid; - my $stime; + my $task = PVE::Tools::upid_decode($upid); - if ($param[0] =~ /^([-\@\w.]+)$/){ - $pid = $1; + if (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart}) && + PVE::ProcFSTools::read_proc_starttime($task->{pid}) == $task->{pstart}) { + kill(15, $task->{pid}); + # wait max 15 seconds to shut down (else, do nothing for now) + my $i; + for ($i = 15; $i > 0; $i--) { + last if !PVE::ProcFSTools::check_process_running(($task->{pid}, $task->{pstart})); + sleep (1); + } + die "stoping backup process $task->{pid} failed\n" if $i == 0; } - if ($param[1] =~/^([-\@\w.]+)$/){ - $stime = $1; - } - - if(PVE::ProcFSTools::check_process_running($pid, $stime) && - PVE::ProcFSTools::read_proc_starttime($pid) == $stime){ - print "toll"; - kill(15,$pid); - } - - unlink $pidfile; } sub command_line { diff --git a/bin/init.d/pve-manager b/bin/init.d/pve-manager index 6540425a9..a8b0e6478 100755 --- a/bin/init.d/pve-manager +++ b/bin/init.d/pve-manager @@ -31,7 +31,7 @@ case "$1" in pvesh --nooutput create /nodes/localhost/startall ;; stop) - echo "Stopping all running Backups" + echo "Stopping running Backup" vzdump -stop echo "Stopping VMs and Containers" pvesh --nooutput create /nodes/localhost/stopall diff --git a/bin/vzdump b/bin/vzdump index 2472f272f..8e4c346c7 100755 --- a/bin/vzdump +++ b/bin/vzdump @@ -29,6 +29,7 @@ $rpcenv->set_user('root@pam'); my $cmddef = [ 'PVE::API2::VZDump', 'vzdump', 'vmid', undef, sub { my $upid = shift; + exit(0) if $upid eq 'OK'; my $status = PVE::Tools::upid_read_status($upid); exit($status eq 'OK' ? 0 : -1); }]; diff --git a/debian/changelog.Debian b/debian/changelog.Debian index b15e04cd0..7f3ef60ae 100644 --- a/debian/changelog.Debian +++ b/debian/changelog.Debian @@ -1,5 +1,9 @@ pve-manager (3.3-11) unstable; urgency=low + * Fix backup failure at shutdown (stop backup on host shutdown) + + * vzdump: new option --stop to abort running backup job + * Support additional e1000 variants for VM machines -- Proxmox Support Team Tue, 20 Jan 2015 07:17:34 +0100