common-ha: delete-node implementation

omnibus patch consisting of:
+ completed implemenation of delete-node         (BZ 1212823)
+ teardown leaves /var/lib/nfs symlink           (BZ 1210712)
+ setup copy config, teardown clean /etc/cluster (BZ 1212823)

setup for copy config, teardown clean /etc/cluster:
1. on one (primary) node in the cluster, run:
  `ssh-keygen -f /var/lib/glusterd/nfs/secret.pem`

  Press Enter twice to avoid passphrase.

2. deploy the pubkey ~root/.ssh/authorized keys on _all_ nodes, run:
  `ssh-copy-id -i /var/lib/glusterd/nfs/secret.pem.pub root@$node`

3. copy the keys to _all_ nodes in the cluster, run:
  `scp /var/lib/glusterd/nfs/secret.*  $node:/var/lib/glusterd/nfs/`
  N.B. this allows setup, teardown, etc., to be run on any node

Change-Id: I9fcd3a57073ead24cd2d0ef0ee7a67c524f3d4b0
BUG: 1213933
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: http://review.gluster.org/10234
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Reviewed-by: soumya k <skoduri@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Kaleb S. KEITHLEY 2015-04-14 08:17:10 -04:00 committed by Kaleb KEITHLEY
parent 5b5048670c
commit d28a99d6f2
4 changed files with 220 additions and 117 deletions

View File

@ -135,6 +135,9 @@ ganesha_grace_stop()
ganesha_grace_monitor()
{
# logger "ganesha_grace_monitor()"
if [ ! -d /var/run/ganesha ]; then
mkdir -p /var/run/ganesha
fi
pcs status | grep dead_ip-1 | sort > /var/run/ganesha/pcs_status
return $OCF_SUCCESS
}

View File

@ -51,23 +51,11 @@ resource agent for nfs-ganesha.
<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
<parameters>
<parameter name="ha_vol_name">
<longdesc lang="en">HA State Volume Name</longdesc>
<shortdesc lang="en">HA_State Volume Name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ha_vol_mnt">
<longdesc lang="en">HA State Volume Mount Point</longdesc>
<shortdesc lang="en">HA_State Volume Mount Point</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ha_vol_server">
<longdesc lang="en">HA State Volume Server</longdesc>
<shortdesc lang="en">HA_State Volume Server</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
@ -106,34 +94,16 @@ ganesha_nfsd_start()
ganesha_nfsd_stop()
{
local mounted=""
local mntptinuse=""
local instance_host=""
local short_host=""
local resource_prefix=${OCF_RESOURCE_INSTANCE:0:9}
local short_host=$(hostname -s)
local long_host=""
if [ "X${resource_prefix}X" = "Xnfs_startX" ]; then
if [ "X${OCF_RESOURCE_INSTANCE:0:9}X" = "Xnfs_startX" ]; then
mounted=$(mount | grep $OCF_RESKEY_ha_vol_name)
mntptinuse=$(mount | grep -o $OCF_RESKEY_ha_vol_mnt)
short_host=$(hostname -s)
# if this is any nfs_start, go ahead. worst case we
# find the link already exists and do nothing
long_host=$(hostname)
if [[ ! ${mounted} ]]; then
if [ -d $OCF_RESKEY_ha_vol_mnt ]; then
if [[ ${mntptinuse} ]]; then
return $OCF_ERR_GENERIC
fi
else
mkdir ${mntpt}
fi
mount -t glusterfs $OCF_RESKEY_ha_vol_server:$OCF_RESKEY_ha_vol_name $OCF_RESKEY_ha_vol_mnt
if [ $? -ne 0 ]; then
logger "warning: mount -t glusterfs $OCF_RESKEY_ha_vol_server:$OCF_RESKEY_ha_vol_name $OCF_RESKEY_ha_vol_mnt failed"
fi
if [ -d /var/lib/nfs ]; then
mv /var/lib/nfs /var/lib/nfs.backup
ln -s $OCF_RESKEY_ha_vol_mnt/${long_host}/nfs /var/lib/nfs
if [ $? -ne 0 ]; then
@ -141,20 +111,17 @@ ganesha_nfsd_stop()
fi
fi
service nfs-ganesha start
if [ $? -ne 0 ]; then
logger "warning: service nfs-ganesha start failed"
fi
else
umount $OCF_RESKEY_ha_vol_mnt
if [ $? -ne 0 ]; then
logger "warning: umount $OCF_RESKEY_ha_vol_mnt failed"
fi
service nfs-ganesha stop
if [ $? -ne 0 ]; then
logger "warning: service nfs-ganesha stop failed"
# if this is a clone resource or is specific to this node
# remove the symlink and restore /var/lib/nfs
if [ "X${OCF_RESOURCE_INSTANCE}X" = "Xnfs_stopX" ] ||
[ "X${OCF_RESOURCE_INSTANCE}X" = "Xnfs_stop-${short_host}X" ]; then
if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then
rm -f /var/lib/nfs
mv /var/lib/nfs.backup /var/lib/nfs
fi
fi
fi

View File

@ -1,8 +1,10 @@
#!/bin/bash
# Copyright 2015 Red Hat Inc. All Rights Reserved
#
# Pacemaker+Corosync High Availability for NFS-Ganesha
#
# setup, teardown, add-node, delete-node, refresh-config, and status
# setup, teardown, add, delete, refresh-config, and status
#
# Each participating node in the cluster is assigned a virtual IP (VIP)
# which fails over to another node when its associated ganesha.nfsd dies
@ -21,8 +23,9 @@
HA_NUM_SERVERS=0
HA_SERVERS=""
HA_CONFDIR=""
HA_SHARED_VOLUME="gluster_shared_storage"
HA_VOL_NAME="gluster_shared_storage"
HA_VOL_MNT="/var/run/gluster/shared_storage"
CONF=$(cat /etc/sysconfig/ganesha | grep "CONFFILE" | cut -f 2 -d "=")
RHEL6_PCS_CNAME_OPTION="--name"
@ -40,6 +43,7 @@ check_cluster_exists()
fi
}
determine_servers()
{
local cmd=${1}
@ -47,15 +51,7 @@ determine_servers()
local tmp_ifs=${IFS}
local ha_servers=""
if [[ "X${cmd}X" != "XteardownX" ]]; then
IFS=$','
for server in ${HA_CLUSTER_NODES} ; do
num_servers=$(expr ${num_servers} + 1)
done
IFS=${tmp_ifs}
HA_NUM_SERVERS=${num_servers}
HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
else
if [[ "X${cmd}X" != "XsetupX" ]]; then
ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
IFS=$' '
for server in ${ha_servers} ; do
@ -64,9 +60,18 @@ determine_servers()
IFS=${tmp_ifs}
HA_NUM_SERVERS=${num_servers}
HA_SERVERS="${ha_servers}"
else
IFS=$','
for server in ${HA_CLUSTER_NODES} ; do
num_servers=$(expr ${num_servers} + 1)
done
IFS=${tmp_ifs}
HA_NUM_SERVERS=${num_servers}
HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
fi
}
setup_cluster()
{
local name=${1}
@ -110,6 +115,7 @@ setup_cluster()
fi
}
setup_finalize()
{
local cibfile=${1}
@ -125,19 +131,38 @@ setup_finalize()
}
setup_copy_config()
{
local short_host=$(hostname -s)
if [ -e /var/lib/glusterd/nfs/secret.pem ]; then
while [[ ${1} ]]; do
if [ ${short_host} != ${1} ]; then
scp -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/nfs/secret.pem /etc/ganesha/ganesha-ha.conf ${1}:/etc/ganesha/
if [ $? -ne 0 ]; then
logger "warning: scp ganesha-ha.conf to ${1} failed"
fi
fi
shift
done
else
logger "warning: scp ganesha-ha.conf to ${1} failed"
fi
}
teardown_cluster()
{
local name=${1}
logger "tearing down cluster $name"
for server in ${HA_SERVERS} ; do
if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
logger "info: ${server} is not in config, removing"
pcs cluster stop ${server}
if [ $? -ne 0 ]; then
logger "pcs cluster stop ${server}"
logger "warning: pcs cluster stop ${server} failed"
fi
pcs cluster node remove ${server}
@ -167,6 +192,36 @@ teardown_cluster()
fi
}
cleanup_ganesha_config ()
{
rm -rf ${HA_CONFDIR}/exports/*.conf
rm -rf ${HA_CONFDIR}/.export_added
rm -rf /etc/cluster/cluster.conf*
> $CONF
}
teardown_clean_etccluster()
{
local short_host=$(hostname -s)
if [ -e /var/lib/glusterd/nfs/secret.pem ]; then
while [[ ${1} ]]; do
if [ ${short_host} != ${1} ]; then
ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/nfs/secret.pem ${1} rm -f /etc/cluster/cluster.*
if [ $? -ne 0 ]; then
logger "warning: ssh ${1} rm -f /etc/cluster/cluster.* failed"
fi
fi
shift
done
else
logger "warning: ssh ${1} rm -f /etc/cluster/cluster.* failed"
fi
}
do_create_virt_ip_constraints()
{
local cibfile=${1}; shift
@ -201,6 +256,7 @@ do_create_virt_ip_constraints()
fi
}
wrap_create_virt_ip_constraints()
{
local cibfile=${1}; shift
@ -226,9 +282,11 @@ wrap_create_virt_ip_constraints()
do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head}
}
create_virt_ip_constraints()
{
local cibfile=${1}; shift
while [[ ${1} ]]; do
wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS}
shift
@ -241,9 +299,9 @@ setup_create_resources()
local cibfile=$(mktemp -u)
# mount the HA-state volume and start ganesha.nfsd on all nodes
pcs resource create nfs_start ganesha_nfsd ha_vol_name=${HA_VOL_NAME} ha_vol_mnt=${HA_VOL_MNT} ha_vol_server=${HA_VOL_SERVER} --clone
pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource create nfs_start ganesha_nfsd --clone failed"
logger "warning: pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
fi
sleep 1
# cloned resources seem to never have their start() invoked when they
@ -310,14 +368,28 @@ setup_create_resources()
rm -f ${cibfile}
}
teardown_resources()
{
# local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
# unmount the HA-state volume and terminate ganesha.nfsd on all nodes
pcs resource create nfs_stop ganesha_nfsd ha_vol_name=dummy ha_vol_mnt=${HA_VOL_MNT} ha_vol_server=dummy --clone
# delete -clone resource agents
# in particular delete the ganesha monitor so we don't try to
# trigger anything when we shut down ganesha next.
pcs resource delete nfs-mon-clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource create nfs_stop ganesha_nfsd --clone failed"
logger "warning: pcs resource delete nfs-mon-clone failed"
fi
pcs resource delete nfs-grace-clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource delete nfs-grace-clone failed"
fi
# unmount the HA-state volume and terminate ganesha.nfsd on all nodes
pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
fi
sleep 1
# cloned resources seem to never have their start() invoked when they
@ -343,29 +415,14 @@ teardown_resources()
shift
done
# delete -clone resource agents
pcs resource delete nfs-mon-clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource delete nfs-mon-clone failed"
fi
pcs resource delete nfs-grace-clone
if [ $? -ne 0 ]; then
logger "warning: pcs resource delete nfs-grace-clone failed"
fi
}
recreate_resources()
{
local cibfile=${1}; shift
local add_node=${1}; shift
local add_vip=${1}; shift
while [[ ${1} ]]; do
# ipaddr=$(grep ^${1} ${HA_CONFIG_FILE} | cut -d = -f 2)
ipaddrx="VIP_${1//-/_}"
ipaddr=${!ipaddrx}
@ -397,6 +454,16 @@ recreate_resources()
shift
done
}
addnode_recreate_resources()
{
local cibfile=${1}; shift
local add_node=${1}; shift
local add_vip=${1}; shift
recreate_resources ${cibfile} ${HA_SERVERS}
pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=15s
if [ $? -ne 0 ]; then
@ -422,18 +489,14 @@ recreate_resources()
if [ $? -ne 0 ]; then
logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
fi
}
clear_and_recreate_resources()
clear_resources()
{
local cibfile=${1}; shift
local add_node=${1}; shift
local add_vip=${1}; shift
while [[ ${1} ]]; do
pcs -f ${cibfile} resource delete ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs -f ${cibfile} resource delete ${1}-cluster_ip-1"
@ -464,9 +527,9 @@ addnode_create_resources()
logger "warning: pcs cluster cib ${cibfile} failed"
fi
pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_name=${HA_VOL_NAME} ha_vol_mnt=${HA_VOL_MNT} ha_vol_server=${HA_VOL_SERVER}
pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
if [ $? -ne 0 ]; then
logger "warning: pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_name=${HA_VOL_NAME} ha_vol_mnt=${HA_VOL_MNT} ha_vol_server=${HA_VOL_SERVER} failed"
logger "warning: pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
fi
pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${newnode}=INFINITY
@ -483,7 +546,6 @@ addnode_create_resources()
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib-push ${cibfile} failed"
fi
rm -f ${cibfile}
# start HA on the new node
@ -505,10 +567,38 @@ addnode_create_resources()
# delete all the -cluster_ip-1 and -trigger_ip-1 resources,
# clearing their constraints, then create them again so we can
# rejigger their constraints
clear_and_recreate_resources ${cibfile} ${add_node} ${add_vip} ${HA_SERVERS}
# recompute their constraints
clear_resources ${cibfile} ${HA_SERVERS}
addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
HA_SERVERS="${HA_SERVERS} ${add_node}"
create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
pcs cluster cib-push ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib-push ${cibfile} failed"
fi
rm -f ${cibfile}
}
deletenode_delete_resources()
{
local node=${1}; shift
local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//)
local cibfile=$(mktemp -u)
pcs cluster cib ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib ${cibfile} failed"
fi
# delete all the -cluster_ip-1 and -trigger_ip-1 resources,
# clearing their constraints, then create them again so we can
# recompute their constraints
clear_resources ${cibfile} ${HA_SERVERS}
recreate_resources ${cibfile} ${ha_servers}
HA_SERVERS="${ha_servers}"
create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
@ -516,47 +606,75 @@ addnode_create_resources()
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib-push ${cibfile} failed"
fi
}
deletenode_delete_resources()
{
local node=${1}; shift
rm -f ${cibfile}
pcs cluster cib ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib ${cibfile} failed"
fi
pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
if [ $? -ne 0 ]; then
logger "warning: pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
fi
pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY
if [ $? -ne 0 ]; then
logger "warning: pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY failed"
fi
pcs cluster cib-push ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib-push ${cibfile} failed"
fi
rm -f ${cibfile}
pcs resource delete nfs_stop-${node}
if [ $? -ne 0 ]; then
logger "warning: pcs resource delete nfs_stop-${node} failed"
fi
}
setup_state_volume()
{
local mnt=$(mktemp -d)
local mnt=${HA_VOL_MNT}
local longname=""
local shortname=""
local dname=""
mount -t glusterfs ${HA_VOL_SERVER}:/${HA_VOL_NAME} ${mnt}
longname=$(hostname)
dname=${longname#$(hostname -s)}
while [[ ${1} ]]; do
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname} ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd
fi
touch ${mnt}/nfs-ganesha/${1}${dname}/nfs/state
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4recov ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4recov
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4old ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/v4old
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm.bak ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/sm.bak
fi
if [ ! -d ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/state ]; then
mkdir ${mnt}/nfs-ganesha/${1}${dname}/nfs/statd/state
fi
for server in ${HA_SERVERS} ; do
if [ ${server} != ${1}${dname} ]; then
ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${1}${dname}/nfs/ganesha/${server}
@ -566,10 +684,9 @@ setup_state_volume()
shift
done
umount ${mnt}
rmdir ${mnt}
}
main()
{
local cmd=${1}; shift
@ -594,12 +711,14 @@ main()
if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
setup_state_volume ${HA_SERVERS}
setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
setup_create_resources ${HA_SERVERS}
setup_state_volume ${HA_SERVERS}
setup_copy_config ${HA_SERVERS}
setup_finalize
else
@ -615,6 +734,12 @@ main()
teardown_resources ${HA_SERVERS}
teardown_cluster ${HA_NAME}
teardown_clean_etccluster ${HA_SERVERS}
;;
cleanup | --cleanup)
cleanup_ganesha_config ${HA_CONFDIR}
;;
add | --add)
@ -632,6 +757,9 @@ main()
addnode_create_resources ${node} ${vip}
setup_state_volume ${node}
setup_copy_config ${node}
;;
delete | --delete)
@ -648,6 +776,9 @@ main()
logger "warning: pcs cluster node remove ${node} failed"
fi
# TODO: delete node's directory in shared state
teardown_clean_etccluster ${node}
;;
status | --status)
@ -658,6 +789,8 @@ main()
;;
*)
# setup and teardown are not intended to be used by a
# casual user
logger "Usage: ganesha-ha.sh setup|teardown|add|delete|status"
;;

View File

@ -956,8 +956,8 @@ fi
%files ganesha
%{_sysconfdir}/ganesha/*
%{_libexecdir}/ganesha/*
%{_prefix}/lib/ocf/resource.d/heartbeat/*
%attr(0755,-,-) %{_libexecdir}/ganesha/*
%attr(0755,-,-) %{_prefix}/lib/ocf/resource.d/heartbeat/*
%if ( 0%{!?_without_georeplication:1} )
%files geo-replication