From ddcbc8fbcf3165db560cb93ba59e8621315d8067 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Thu, 11 May 2017 12:40:02 +0200 Subject: [PATCH] Tools: make file-locking aware of external exception sources Previously an external exception (eg. caused by a SIGARLM in a code which is already inside a run_with_timeout() call) could happen in various places where we did not properly this situation. For instance after calling $lock_func() but before reaching the cleanup code. In this case a lock was leaked. Additionally the code was broken in that it used perl's automatic hash creation side effect ($a->{x}->{y} implicitly initializing $a->{x} with an empty hash when it did not exist). The effect was that if our own time out was triggered after the initial check for an existing file handle inside $lock_func() happened (extremely rare since perl would have to be running insanely slow), the cleanup did: if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) { This recreated $lock_handles->{$$}->{$filename} as an empty hash. A subsequent call to lock_file_full() will think a file descriptor already exists because the check simply used: if (!$lock_handles->{$$}->{$filename}) { While this could have been a one-line fix for this one particular case, we'd still not be taking external timeouts into account causing the first issue described above. --- src/PVE/Tools.pm | 95 +++++++++++++++------------ test/Makefile | 1 + test/lock_file.pl | 162 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 218 insertions(+), 40 deletions(-) create mode 100755 test/lock_file.pl diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm index fe35ef2..04f7469 100644 --- a/src/PVE/Tools.pm +++ b/src/PVE/Tools.pm @@ -25,6 +25,7 @@ use Time::HiRes qw(usleep gettimeofday tv_interval alarm); use Net::DBus qw(dbus_uint32 dbus_uint64); use Net::DBus::Callback; use Net::DBus::Reactor; +use Scalar::Util 'weaken'; # avoid warning when parsing long hex values with hex() no warnings 'portable'; # Support for 64-bit ints required @@ -122,7 +123,12 @@ sub run_with_timeout { } # flock: we use one file handle per process, so lock file -# can be called multiple times and succeeds for the same process. +# can be nested multiple times and succeeds for the same process. +# +# Since this is the only way we lock now and we don't have the old +# 'lock(); code(); unlock();' pattern anymore we do not actually need to +# count how deep we're nesting. Therefore this hash now stores a weak reference +# to a boolean telling us whether we already have a lock. my $lock_handles = {}; @@ -133,58 +139,67 @@ sub lock_file_full { my $mode = $shared ? LOCK_SH : LOCK_EX; - my $lock_func = sub { - if (!$lock_handles->{$$}->{$filename}) { - my $fh = new IO::File(">>$filename") || - die "can't open file - $!\n"; - $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0}; - } + my $lockhash = ($lock_handles->{$$} //= {}); - if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode|LOCK_NB)) { - print STDERR "trying to acquire lock..."; + # Returns a locked file handle. + my $get_locked_file = sub { + my $fh = IO::File->new(">>$filename") + or die "can't open file - $!\n"; + + if (!flock($fh, $mode|LOCK_NB)) { + print STDERR "trying to acquire lock..."; my $success; while(1) { - $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode); + $success = flock($fh, $mode); # try again on EINTR (see bug #273) if ($success || ($! != EINTR)) { last; } } - if (!$success) { - print STDERR " failed\n"; - die "can't acquire lock '$filename' - $!\n"; - } - print STDERR " OK\n"; - } - $lock_handles->{$$}->{$filename}->{refcount}++; + if (!$success) { + print STDERR " failed\n"; + die "can't acquire lock '$filename' - $!\n"; + } + print STDERR " OK\n"; + } + + return $fh; }; my $res; - - eval { run_with_timeout($timeout, $lock_func); }; - my $err = $@; - if ($err) { - $err = "can't lock file '$filename' - $err"; - } else { - eval { $res = &$code(@param) }; - $err = $@; - } - - if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) { - my $refcount = --$lock_handles->{$$}->{$filename}->{refcount}; - if ($refcount <= 0) { - $lock_handles->{$$}->{$filename} = undef; - close ($fh); + my $checkptr = $lockhash->{$filename}; + my $check = 0; # This must not go out of scope before running the code. + my $local_fh; # This must stay local + if (!$checkptr || !$$checkptr) { + # We cannot create a weak reference in a single atomic step, so we first + # create a false-value, then create a reference to it, then weaken it, + # and after successfully locking the file we change the boolean value. + # + # The reason for this is that if an outer SIGALRM throws an exception + # between creating the reference and weakening it, a subsequent call to + # lock_file_full() will see a leftover full reference to a valid + # variable. This variable must be 0 in order for said call to attempt to + # lock the file anew. + # + # An externally triggered exception elsewhere in the code will cause the + # weak reference to become 'undef', and since the file handle is only + # stored in the local scope in $local_fh, the file will be closed by + # perl's cleanup routines as well. + # + # This still assumes that an IO::File handle can properly deal with such + # exceptions thrown during its own destruction, but that's up to perls + # guts now. + $lockhash->{$filename} = \$check; + weaken $lockhash->{$filename}; + $local_fh = eval { run_with_timeout($timeout, $get_locked_file) }; + if ($@) { + $@ = "can't lock file '$filename' - $@"; + return undef; } + $check = 1; } - - if ($err) { - $@ = $err; - return undef; - } - - $@ = undef; - + $res = eval { &$code(@param); }; + return undef if $@; return $res; } diff --git a/test/Makefile b/test/Makefile index 82851cb..0a5bb38 100644 --- a/test/Makefile +++ b/test/Makefile @@ -6,6 +6,7 @@ all: check: for d in $(SUBDIRS); do $(MAKE) -C $$d check; done + ./lock_file.pl install: check distclean: clean diff --git a/test/lock_file.pl b/test/lock_file.pl new file mode 100755 index 0000000..d3a60d6 --- /dev/null +++ b/test/lock_file.pl @@ -0,0 +1,162 @@ +#!/usr/bin/perl + +use lib '../src'; +use strict; +use warnings; + +use Socket; +use POSIX (); # don't import assert() + +use PVE::Tools 'lock_file_full'; + +my $name = "test.lockfile.$$-"; + +END { + system("rm $name*"); +}; + +# Utilities: + +sub forked($$) { + my ($code1, $code2) = @_; + + pipe(my $except_r, my $except_w) or die "pipe: $!\n"; + + my $pid = fork(); + die "fork failed: $!\n" if !defined($pid); + + if ($pid == 0) { + close($except_r); + eval { $code1->() }; + if ($@) { + print {$except_w} $@; + $except_w->flush(); + POSIX::_exit(1); + } + POSIX::_exit(0); + } + close($except_w); + + eval { $code2->() }; + my $err = $@; + if ($err) { + kill(15, $pid); + } else { + my $err = do { local $/ = undef; <$except_r> }; + } + die "interrupted\n" if waitpid($pid, 0) != $pid; + die $err if $err; + + # Check exit code: + my $status = POSIX::WEXITSTATUS($?); + if ($? == -1) { + die "failed to execute\n"; + } elsif (POSIX::WIFSIGNALED($?)) { + my $sig = POSIX::WTERMSIG($?); + die "got signal $sig\n"; + } elsif ($status != 0) { + die "exit code $status\n"; + } +} + +# Book-keeping: + +my %_ran; +sub new { + %_ran = (); +} +sub ran { + my ($what) = @_; + $_ran{$what} = 1; +} +sub assert { + my ($what) = @_; + die "code didn't run: $what\n" if !$_ran{$what}; +} +sub assert_not { + my ($what) = @_; + die "code shouldn't have run: $what\n" if $_ran{$what}; +} + +# Regular lock: +new(); +lock_file_full($name, 10, 0, sub { ran('single lock') }); +assert('single lock'); + +# Lock multiple times in a row: +new(); +lock_file_full($name, 10, 0, sub { ran('lock A') }); +assert('lock A'); +lock_file_full($name, 10, 0, sub { ran('lock B') }); +assert('lock B'); + +# Nested lock: +new(); +lock_file_full($name, 10, 0, sub { + ran('lock A'); + lock_file_full($name, 10, 0, sub { ran('lock B') }); + assert('lock B'); + ran('lock C'); +}); +assert('lock A'); +assert('lock B'); +assert('lock C'); + +# Independent locks: +new(); +lock_file_full($name, 10, 0, sub { + ran('lock A'); + # locks file "${name}2" + lock_file_full($name.2, 10, 0, sub { ran('lock B') }); + assert('lock B'); + ran('lock C'); +}); +assert('lock A'); +assert('lock B'); +assert('lock C'); + +# Does it actually lock? (shared=0) +# Can we get two simultaneous shared locks? (shared=1) +sub forktest1($) { + my ($shared) = @_; + new(); + # socket pair for synchronization + socketpair(my $fmain, my $fother, AF_UNIX, SOCK_STREAM, PF_UNSPEC) + or die "socketpair(): $!\n"; + forked sub { + # other side + close($fmain); + my $line; + lock_file_full($name, 60, $shared, sub { + ran('other side'); + # tell parent we've acquired the lock + print {$fother} "1\n"; + $fother->flush(); + # wait for parent to be done trying to lock + $line = <$fother>; + }); + die $@ if $@; + die "parent failed\n" if !$line || $line ne "2\n"; + assert('other side'); + }, sub { + # main process + # Wait for our child to lock: + close($fother); + my $line = <$fmain>; + die "child failed to acquire a lock\n" if !$line || $line ne "1\n"; + lock_file_full($name, 1, $shared, sub { + ran('local side'); + }); + if ($shared) { + assert('local side'); + } else { + assert_not('local side'); + } + print {$fmain} "2\n"; + $fmain->flush(); + }; + close($fmain); +} +forktest1(0); +forktest1(1); +print "Ok\n"; # Line-terminate the 'trying to acquire lock' message(s)