mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
IFACE handling. Assume links are always good on nstartup (they almost always
Simplify the handling of setting the links in the 10.interface eventscript and remove the optimization to only call setifacelink on state change to make the code simpler to read. If a take ip event fails, flag the node as unhealthy. Add a check to the interface script to check if the interface exists or if it has been deleted. So that we can capture and become UNHELTHY if someone deletes an interface we are using to host public addresses. (This used to be ctdb commit 4ab63d2a7262aff30d5eced184c294c9c9dd4974)
This commit is contained in:
parent
3b826a6720
commit
c04505724a
@ -38,13 +38,12 @@ monitor_interfaces()
|
||||
INTERFACES=`for IFACE in $INTERFACES ; do echo $IFACE ; done | sort | uniq`
|
||||
|
||||
local fail=0
|
||||
local force_fail=0
|
||||
local ok=0
|
||||
for IFACE in $INTERFACES ; do
|
||||
|
||||
local OLDLINK=`echo -n "$IFACES" | grep "^:$IFACE:" | cut -d ':' -f3 | xargs`
|
||||
test -z "$OLDLINK" && {
|
||||
force_fail=1
|
||||
ip addr show $IFACE 2>/dev/null >/dev/null || {
|
||||
echo Interface $IFACE does not exist but it is used by public addresses.
|
||||
exit 1
|
||||
}
|
||||
|
||||
# These interfaces are sometimes bond devices
|
||||
@ -55,44 +54,34 @@ monitor_interfaces()
|
||||
grep -q 'Currently Active Slave: None' /proc/net/bonding/$REALIFACE && {
|
||||
echo "ERROR: No active slaves for bond device $REALIFACE"
|
||||
fail=1
|
||||
test -n "$OLDLINK" && {
|
||||
ctdb setifacelink $IFACE down
|
||||
}
|
||||
ctdb setifacelink $IFACE down
|
||||
continue;
|
||||
}
|
||||
grep -q '^MII Status: up' /proc/net/bonding/$REALIFACE || {
|
||||
echo "ERROR: public network interface $REALIFACE is down"
|
||||
fail=1
|
||||
test -n "$OLDLINK" && {
|
||||
ctdb setifacelink $IFACE down
|
||||
}
|
||||
ctdb setifacelink $IFACE down
|
||||
continue;
|
||||
}
|
||||
test -n "$OLDLINK" && {
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
}
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
continue;
|
||||
}
|
||||
|
||||
case $IFACE in
|
||||
lo*)
|
||||
# loopback is always working
|
||||
test -n "$OLDLINK" && {
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
}
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
;;
|
||||
ib*)
|
||||
# we dont know how to test ib links
|
||||
test -n "$OLDLINK" && {
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
}
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
;;
|
||||
*)
|
||||
[ -z "$IFACE" ] || {
|
||||
[ "$(basename $(readlink /sys/class/net/$IFACE/device/driver))" = virtio_net ] ||
|
||||
[ "$(basename $(readlink /sys/class/net/$IFACE/device/driver) 2>/dev/null)" = virtio_net ] ||
|
||||
ethtool $IFACE | grep -q 'Link detected: yes' || {
|
||||
# On some systems, this is not successful when a
|
||||
# cable is plugged but the interface has not been
|
||||
@ -102,16 +91,12 @@ monitor_interfaces()
|
||||
ethtool $IFACE | grep -q 'Link detected: yes' || {
|
||||
echo "ERROR: No link on the public network interface $IFACE"
|
||||
fail=1
|
||||
test -n "$OLDLINK" && {
|
||||
ctdb setifacelink $IFACE down
|
||||
}
|
||||
ctdb setifacelink $IFACE down
|
||||
continue
|
||||
}
|
||||
}
|
||||
test -n "$OLDLINK" && {
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
}
|
||||
ok=1 # we only set ok for interfaces known to ctdbd
|
||||
ctdb setifacelink $IFACE up
|
||||
}
|
||||
;;
|
||||
esac
|
||||
@ -122,10 +107,6 @@ monitor_interfaces()
|
||||
return 0;
|
||||
}
|
||||
|
||||
test x"$force_fail" != x"0" && {
|
||||
return 1;
|
||||
}
|
||||
|
||||
test x"$ok" = x"1" && {
|
||||
return 2;
|
||||
}
|
||||
@ -148,6 +129,13 @@ case "$1" in
|
||||
# called after ctdbd has done its initial recovery
|
||||
# and we start the services to become healthy
|
||||
startup)
|
||||
# Assume all links are good initially
|
||||
INTERFACES=`for IFACE in $INTERFACES ; do echo $IFACE ; done | sort | uniq`
|
||||
|
||||
for IFACE in $INTERFACES ; do
|
||||
ctdb setifacelink $IFACE down
|
||||
done
|
||||
|
||||
monitor_interfaces
|
||||
|
||||
;;
|
||||
|
@ -334,6 +334,8 @@ static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
|
||||
TDB_DATA data;
|
||||
|
||||
if (status != 0) {
|
||||
struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
|
||||
|
||||
if (status == -ETIME) {
|
||||
ctdb_ban_self(ctdb);
|
||||
}
|
||||
@ -341,6 +343,8 @@ static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
|
||||
ctdb_addr_to_str(&state->vnn->public_address),
|
||||
ctdb_vnn_iface_string(state->vnn)));
|
||||
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
||||
|
||||
node->flags |= NODE_FLAGS_UNHEALTHY;
|
||||
talloc_free(state);
|
||||
return;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user