From b0e75bd356a070423754676872b7b6948913be2e Mon Sep 17 00:00:00 2001
From: Tony Asleson <tasleson@redhat.com>
Date: Mon, 27 Feb 2023 08:57:24 -0600
Subject: [PATCH] lvmdbusd: Add lock to prevent concurrent lvm shell access

There is a window of time where the following can occur.

1. An API request is in process to the lvm shell, we have written some
   command to the lvm shell and we are blocked on that thread waiting
2. A signal arrives to the daemon which causes us to exit.  The signal
   handling code path goes directly to the lvm shell and writes
   "exit\n".  This causes the lvm shell to simply exit.
3. The thread that was waiting for a response gets an EIO as the child
   process has exited.  This bubbles up a failure.

This is addressed by placing a lock in the lvm shell to prevent
concurrent access to the shell.  We also gather additional debug data
when we get an error in the lvm shell read path.  This should help if
the lvm shell exits/crashes on its own.
---
 daemons/lvmdbusd/lvm_shell_proxy.py.in | 74 +++++++++++++++-----------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/daemons/lvmdbusd/lvm_shell_proxy.py.in b/daemons/lvmdbusd/lvm_shell_proxy.py.in
index 37d73218b..b8c8fa565 100755
--- a/daemons/lvmdbusd/lvm_shell_proxy.py.in
+++ b/daemons/lvmdbusd/lvm_shell_proxy.py.in
@@ -18,6 +18,7 @@ import pty
 import sys
 import tempfile
 import time
+import threading
 import select
 
 try:
@@ -107,11 +108,14 @@ class LVMShellProxy(object):
 								else:
 									raise LvmBug(
 										"lvm returned no JSON output!")
-
-			except IOError as ioe:
-				log_debug(str(ioe))
-				self.exit_shell()
-				raise ioe
+			except Exception as e:
+				log_error("While reading from lvm shell we encountered an error %s" % str(e))
+				log_error("stdout= %s\nstderr= %s\n" % (stdout, stderr))
+				if self.lvm_shell.poll() is not None:
+					log_error("Underlying lvm shell process unexpectedly exited: %d" % self.lvm_shell.returncode)
+				else:
+					log_error("Underlying lvm shell process is still present!")
+				raise e
 
 		if keep_reading and cfg.run.value == 0:
 			# We didn't complete as we are shutting down
@@ -131,6 +135,10 @@ class LVMShellProxy(object):
 		tmp_dir = tempfile.mkdtemp(prefix="lvmdbus_")
 		tmp_file = "%s/lvmdbus_report" % (tmp_dir)
 
+		# Create a lock so that we don't step on each other when we are waiting for a command
+		# to finish and some other request comes in concurrently, like to exit the shell.
+		self.shell_lock = threading.RLock()
+
 		# Create a fifo for the report output
 		os.mkfifo(tmp_file, 0o600)
 
@@ -188,7 +196,8 @@ class LVMShellProxy(object):
 			os.unlink(tmp_file)
 			os.rmdir(tmp_dir)
 
-	def get_last_log(self):
+	def _get_last_log(self):
+		# Precondition, lock is held
 		self._write_cmd('lastlog\n')
 		report_json = self._read_response()[1]
 		return get_error_msg(report_json)
@@ -209,28 +218,29 @@ class LVMShellProxy(object):
 		cmd += "\n"
 
 		# run the command by writing it to the shell's STDIN
-		self._write_cmd(cmd)
+		with self.shell_lock:
+			self._write_cmd(cmd)
 
-		# read everything from the STDOUT to the next prompt
-		stdout, report_json, stderr = self._read_response()
+			# read everything from the STDOUT to the next prompt
+			stdout, report_json, stderr = self._read_response()
 
-		# Parse the report to see what happened
-		if 'log' in report_json:
-			ret_code = int(report_json['log'][-1:][0]['log_ret_code'])
-			# If we have an exported vg we get a log_ret_code == 5 when
-			# we do a 'fullreport'
-			# Note: 0 == error
-			if (ret_code == 1) or (ret_code == 5 and argv[0] == 'fullreport'):
-				rc = 0
-			else:
-				# Depending on where lvm fails the command, it may not have anything
-				# to report for "lastlog", so we need to check for a message in the
-				# report json too.
-				error_msg = self.get_last_log()
-				if error_msg is None:
-					error_msg = get_error_msg(report_json)
+			# Parse the report to see what happened
+			if 'log' in report_json:
+				ret_code = int(report_json['log'][-1:][0]['log_ret_code'])
+				# If we have an exported vg we get a log_ret_code == 5 when
+				# we do a 'fullreport'
+				# Note: 0 == error
+				if (ret_code == 1) or (ret_code == 5 and argv[0] == 'fullreport'):
+					rc = 0
+				else:
+					# Depending on where lvm fails the command, it may not have anything
+					# to report for "lastlog", so we need to check for a message in the
+					# report json too.
+					error_msg = self._get_last_log()
 					if error_msg is None:
-						error_msg = 'No error reason provided! (missing "log" section)'
+						error_msg = get_error_msg(report_json)
+						if error_msg is None:
+							error_msg = 'No error reason provided! (missing "log" section)'
 
 		if debug or rc != 0:
 			log_error(("CMD= %s" % cmd))
@@ -240,12 +250,14 @@ class LVMShellProxy(object):
 		return rc, report_json, error_msg
 
 	def exit_shell(self):
-		try:
-			self._write_cmd('exit\n')
-			self.lvm_shell.wait(1)
-			self.lvm_shell = None
-		except Exception as _e:
-			log_error(str(_e))
+		with self.shell_lock:
+			try:
+				if self.lvm_shell is not None:
+					self._write_cmd('exit\n')
+					self.lvm_shell.wait(1)
+					self.lvm_shell = None
+			except Exception as _e:
+				log_error("exit_shell: %s" % (str(_e)))
 
 	def __del__(self):
 		# Note: When we are shutting down the daemon and the main process has already exited