geo-rep: Update geo-rep status, if monitor process is killed

Problem:
When the monitor process itself is getting killed, geo-rep session
still shows as active.

Status command will just pick up the content from the status file
to show the output. Monitor process is the one which updates the Status file.

When the monitor process itself gets killed, there is no way to update
the status file. So, geo-rep session status command ends up showing
last updated Status present in the status file.

Solution:
While getting the status output, check whether monitor process is running.
If it is NOT running, update the status as STOPPED.

Change-Id: I86a7ac1746dd8f27eef93658e992ef16f6068d9d
BUG: 1251980
Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-on: http://review.gluster.org/11873
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
This commit is contained in:
Saravanakumar Arumugam 2015-08-10 18:42:05 +05:30 committed by Jeff Darcy
parent d713488e47
commit 4d4c7d5dc5
2 changed files with 18 additions and 2 deletions

View File

@ -615,7 +615,8 @@ def main_i():
status_get = rconf.get('status_get')
if status_get:
for brick in gconf.path:
brick_status = GeorepStatus(gconf.state_file, brick)
brick_status = GeorepStatus(gconf.state_file, brick,
getattr(gconf, "pid_file", None))
checkpoint_time = int(getattr(gconf, "checkpoint", "0"))
brick_status.print_status(checkpoint_time=checkpoint_time)
return

View File

@ -16,6 +16,7 @@ import urllib
import json
import time
from datetime import datetime
from errno import EACCES, EAGAIN
DEFAULT_STATUS = "N/A"
MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
@ -113,7 +114,7 @@ def set_monitor_status(status_file, status):
class GeorepStatus(object):
def __init__(self, monitor_status_file, brick):
def __init__(self, monitor_status_file, brick, monitor_pid_file=None):
self.work_dir = os.path.dirname(monitor_status_file)
self.monitor_status_file = monitor_status_file
self.filename = os.path.join(self.work_dir,
@ -126,6 +127,7 @@ class GeorepStatus(object):
os.close(fd)
self.brick = brick
self.default_values = get_default_values()
self.monitor_pid_file = monitor_pid_file
def _update(self, mergerfunc):
with LockedOpen(self.filename, 'r+') as f:
@ -254,6 +256,19 @@ class GeorepStatus(object):
pass
monitor_status = self.get_monitor_status()
# Verifying whether monitor process running and adjusting status
if monitor_status in ["Started", "Paused"]:
try:
with open(self.monitor_pid_file, "r+") as f:
fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
monitor_status = "Stopped"
except (IOError, OSError) as e:
if e.errno in (EACCES, EAGAIN):
# cannot grab. so, monitor process still running..move on
pass
else:
raise
if monitor_status in ["Created", "Paused", "Stopped"]:
data["worker_status"] = monitor_status