geo-rep: Update geo-rep status, if monitor process is killed
Problem: When the monitor process itself is getting killed, geo-rep session still shows as active. Status command will just pick up the content from the status file to show the output. Monitor process is the one which updates the Status file. When the monitor process itself gets killed, there is no way to update the status file. So, geo-rep session status command ends up showing last updated Status present in the status file. Solution: While getting the status output, check whether monitor process is running. If it is NOT running, update the status as STOPPED. Change-Id: I86a7ac1746dd8f27eef93658e992ef16f6068d9d BUG: 1251980 Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com> Reviewed-on: http://review.gluster.org/11873 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Milind Changire <mchangir@redhat.com> Reviewed-by: Kotresh HR <khiremat@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
This commit is contained in:
parent
d713488e47
commit
4d4c7d5dc5
@ -615,7 +615,8 @@ def main_i():
|
||||
status_get = rconf.get('status_get')
|
||||
if status_get:
|
||||
for brick in gconf.path:
|
||||
brick_status = GeorepStatus(gconf.state_file, brick)
|
||||
brick_status = GeorepStatus(gconf.state_file, brick,
|
||||
getattr(gconf, "pid_file", None))
|
||||
checkpoint_time = int(getattr(gconf, "checkpoint", "0"))
|
||||
brick_status.print_status(checkpoint_time=checkpoint_time)
|
||||
return
|
||||
|
@ -16,6 +16,7 @@ import urllib
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
from errno import EACCES, EAGAIN
|
||||
|
||||
DEFAULT_STATUS = "N/A"
|
||||
MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
|
||||
@ -113,7 +114,7 @@ def set_monitor_status(status_file, status):
|
||||
|
||||
|
||||
class GeorepStatus(object):
|
||||
def __init__(self, monitor_status_file, brick):
|
||||
def __init__(self, monitor_status_file, brick, monitor_pid_file=None):
|
||||
self.work_dir = os.path.dirname(monitor_status_file)
|
||||
self.monitor_status_file = monitor_status_file
|
||||
self.filename = os.path.join(self.work_dir,
|
||||
@ -126,6 +127,7 @@ class GeorepStatus(object):
|
||||
os.close(fd)
|
||||
self.brick = brick
|
||||
self.default_values = get_default_values()
|
||||
self.monitor_pid_file = monitor_pid_file
|
||||
|
||||
def _update(self, mergerfunc):
|
||||
with LockedOpen(self.filename, 'r+') as f:
|
||||
@ -254,6 +256,19 @@ class GeorepStatus(object):
|
||||
pass
|
||||
monitor_status = self.get_monitor_status()
|
||||
|
||||
# Verifying whether monitor process running and adjusting status
|
||||
if monitor_status in ["Started", "Paused"]:
|
||||
try:
|
||||
with open(self.monitor_pid_file, "r+") as f:
|
||||
fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
monitor_status = "Stopped"
|
||||
except (IOError, OSError) as e:
|
||||
if e.errno in (EACCES, EAGAIN):
|
||||
# cannot grab. so, monitor process still running..move on
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
if monitor_status in ["Created", "Paused", "Stopped"]:
|
||||
data["worker_status"] = monitor_status
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user