lvmdbusd: Add lock to prevent concurrent lvm shell access

There is a window of time where the following can occur. 1. An API request is in process to the lvm shell, we have written some command to the lvm shell and we are blocked on that thread waiting 2. A signal arrives to the daemon which causes us to exit. The signal handling code path goes directly to the lvm shell and writes "exit\n". This causes the lvm shell to simply exit. 3. The thread that was waiting for a response gets an EIO as the child process has exited. This bubbles up a failure. This is addressed by placing a lock in the lvm shell to prevent concurrent access to the shell. We also gather additional debug data when we get an error in the lvm shell read path. This should help if the lvm shell exits/crashes on its own.
2024-12-21 13:34:40 +03:00 · 2023-02-27 08:57:24 -06:00 · 2023-02-27 08:57:24 -06:00 · b0e75bd356
commit b0e75bd356
parent e3b7395af4
1 changed files with 43 additions and 31 deletions
--- a/daemons/lvmdbusd/lvm_shell_proxy.py.in
+++ b/daemons/lvmdbusd/lvm_shell_proxy.py.in
@ -18,6 +18,7 @@ import pty
 import sys
 import tempfile
 import time
 import threading
 import select
 try:
@ -107,11 +108,14 @@ class LVMShellProxy(object):
 								else:
 									raise LvmBug(
 										"lvm returned no JSON output!")
-
+			except Exception as e:
-			except IOError as ioe:
+				log_error("While reading from lvm shell we encountered an error %s" % str(e))
-				log_debug(str(ioe))
+				log_error("stdout= %s\nstderr= %s\n" % (stdout, stderr))
-				self.exit_shell()
+				if self.lvm_shell.poll() is not None:
-				raise ioe
+					log_error("Underlying lvm shell process unexpectedly exited: %d" % self.lvm_shell.returncode)
 				else:
 					log_error("Underlying lvm shell process is still present!")
 				raise e
 		if keep_reading and cfg.run.value == 0:
 			# We didn't complete as we are shutting down
@ -131,6 +135,10 @@ class LVMShellProxy(object):
 		tmp_dir = tempfile.mkdtemp(prefix="lvmdbus_")
 		tmp_file = "%s/lvmdbus_report" % (tmp_dir)
 		# Create a lock so that we don't step on each other when we are waiting for a command
 		# to finish and some other request comes in concurrently, like to exit the shell.
 		self.shell_lock = threading.RLock()
 		# Create a fifo for the report output
 		os.mkfifo(tmp_file, 0o600)
@ -188,7 +196,8 @@ class LVMShellProxy(object):
 			os.unlink(tmp_file)
 			os.rmdir(tmp_dir)
-	def get_last_log(self):
+	def _get_last_log(self):
 		# Precondition, lock is held
 		self._write_cmd('lastlog\n')
 		report_json = self._read_response()[1]
 		return get_error_msg(report_json)
@ -209,28 +218,29 @@ class LVMShellProxy(object):
 		cmd += "\n"
 		# run the command by writing it to the shell's STDIN
-		self._write_cmd(cmd)
+		with self.shell_lock:
 			self._write_cmd(cmd)
-		# read everything from the STDOUT to the next prompt
+			# read everything from the STDOUT to the next prompt
-		stdout, report_json, stderr = self._read_response()
+			stdout, report_json, stderr = self._read_response()
-		# Parse the report to see what happened
+			# Parse the report to see what happened
-		if 'log' in report_json:
+			if 'log' in report_json:
-			ret_code = int(report_json['log'][-1:][0]['log_ret_code'])
+				ret_code = int(report_json['log'][-1:][0]['log_ret_code'])
-			# If we have an exported vg we get a log_ret_code == 5 when
+				# If we have an exported vg we get a log_ret_code == 5 when
-			# we do a 'fullreport'
+				# we do a 'fullreport'
-			# Note: 0 == error
+				# Note: 0 == error
-			if (ret_code == 1) or (ret_code == 5 and argv[0] == 'fullreport'):
+				if (ret_code == 1) or (ret_code == 5 and argv[0] == 'fullreport'):
-				rc = 0
+					rc = 0
-			else:
+				else:
-				# Depending on where lvm fails the command, it may not have anything
+					# Depending on where lvm fails the command, it may not have anything
-				# to report for "lastlog", so we need to check for a message in the
+					# to report for "lastlog", so we need to check for a message in the
-				# report json too.
+					# report json too.
-				error_msg = self.get_last_log()
+					error_msg = self._get_last_log()
 				if error_msg is None:
 					error_msg = get_error_msg(report_json)
 					if error_msg is None:
-						error_msg = 'No error reason provided! (missing "log" section)'
+						error_msg = get_error_msg(report_json)
 						if error_msg is None:
 							error_msg = 'No error reason provided! (missing "log" section)'
 		if debug or rc != 0:
 			log_error(("CMD= %s" % cmd))
@ -240,12 +250,14 @@ class LVMShellProxy(object):
 		return rc, report_json, error_msg
 	def exit_shell(self):
-		try:
+		with self.shell_lock:
-			self._write_cmd('exit\n')
+			try:
-			self.lvm_shell.wait(1)
+				if self.lvm_shell is not None:
-			self.lvm_shell = None
+					self._write_cmd('exit\n')
-		except Exception as _e:
+					self.lvm_shell.wait(1)
-			log_error(str(_e))
+					self.lvm_shell = None
 			except Exception as _e:
 				log_error("exit_shell: %s" % (str(_e)))
 	def __del__(self):
 		# Note: When we are shutting down the daemon and the main process has already exited