mirror of
https://github.com/OpenNebula/one.git
synced 2025-02-02 09:47:00 +03:00
F #3859: Monitord HA
This commit is contained in:
parent
7dbed19430
commit
7705d669a7
@ -20,6 +20,7 @@
|
||||
#include "DriverManager.h"
|
||||
#include "ActionManager.h"
|
||||
#include "OpenNebulaMessages.h"
|
||||
#include "RaftManager.h"
|
||||
|
||||
class HostPool;
|
||||
class Host;
|
||||
@ -94,6 +95,11 @@ public:
|
||||
*/
|
||||
void delete_host(int hid);
|
||||
|
||||
/**
|
||||
* Set raft status, send info to monitor daemon
|
||||
*/
|
||||
void raft_status(RaftManager::State raft);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Received undefined message -> print error
|
||||
|
@ -134,6 +134,41 @@ public:
|
||||
*/
|
||||
void follower(unsigned int term);
|
||||
|
||||
static std::string state_to_str(State _state)
|
||||
{
|
||||
string st;
|
||||
|
||||
switch (_state)
|
||||
{
|
||||
case SOLO:
|
||||
st = "SOLO";
|
||||
break;
|
||||
case CANDIDATE:
|
||||
st = "CANDIDATE";
|
||||
break;
|
||||
case FOLLOWER:
|
||||
st = "FOLLOWER";
|
||||
break;
|
||||
case LEADER:
|
||||
st = "LEADER";
|
||||
break;
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
State get_state()
|
||||
{
|
||||
State _state;
|
||||
|
||||
pthread_mutex_lock(&mutex);
|
||||
|
||||
_state = state;
|
||||
|
||||
pthread_mutex_unlock(&mutex);
|
||||
|
||||
return _state;
|
||||
}
|
||||
|
||||
unsigned int get_term()
|
||||
{
|
||||
unsigned int _term;
|
||||
|
@ -185,13 +185,13 @@ RAFT = [
|
||||
# Executed when a server transits from follower->leader
|
||||
# RAFT_LEADER_HOOK = [
|
||||
# COMMAND = "raft/vip.sh",
|
||||
# ARGUMENTS = "leader <interface> <ip_cidr>"
|
||||
# ARGUMENTS = "leader interface ip_cidr [interface ip_cidr ...]"
|
||||
# ]
|
||||
|
||||
# Executed when a server transits from leader->follower
|
||||
# RAFT_FOLLOWER_HOOK = [
|
||||
# COMMAND = "raft/vip.sh",
|
||||
# ARGUMENTS = "follower <interface> <ip_cidr>"
|
||||
# ARGUMENTS = "follower interface ip_cidr [interface ip_cidr ...]"
|
||||
# ]
|
||||
|
||||
#*******************************************************************************
|
||||
|
@ -1,9 +1,11 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/bash
|
||||
|
||||
ACTION="$1"
|
||||
INTERFACE="$2"
|
||||
IFADDR="$3"
|
||||
IP="${IFADDR%%/*}"
|
||||
# Setup virtual IP
|
||||
# usage:
|
||||
# vip.sh action interface ip [interface ip ...]
|
||||
# Where action is one of:
|
||||
# leader - New raft leader, set virtual IPs
|
||||
# follower - unset virtual IPs
|
||||
|
||||
#
|
||||
# functions
|
||||
@ -37,21 +39,65 @@ is_systemd_unit_startable()
|
||||
return 1
|
||||
}
|
||||
|
||||
# (Un)set the virtual IP
|
||||
function virtualip() {
|
||||
INTERFACE="$1"
|
||||
IFADDR="$2"
|
||||
IP="${IFADDR%%/*}"
|
||||
|
||||
if [ -z "$INTERFACE" ]; then
|
||||
echo "Missing interface." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$IFADDR" ]; then
|
||||
echo "Missing IP." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
###
|
||||
|
||||
case $ACTION in
|
||||
leader)
|
||||
sudo -n ip address add $IFADDR dev $INTERFACE
|
||||
|
||||
for i in $(seq 5); do
|
||||
sudo -n arping -c 1 -U -I $INTERFACE ${IP}
|
||||
sleep 1
|
||||
sudo -n arping -c 1 -A -I $INTERFACE ${IP}
|
||||
sleep 1
|
||||
done
|
||||
;;
|
||||
|
||||
follower)
|
||||
if ip address show dev $INTERFACE | grep -qi " ${IP}/"; then
|
||||
sudo -n ip address del $IFADDR dev $INTERFACE
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown action '$ACTION'" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
#
|
||||
# main
|
||||
#
|
||||
|
||||
if [ -z "$INTERFACE" ]; then
|
||||
echo "Missing interface." >&2
|
||||
exit 1
|
||||
fi
|
||||
ACTION="$1"
|
||||
shift
|
||||
|
||||
if [ -z "$IFADDR" ]; then
|
||||
echo "Missing IP." >&2
|
||||
exit 1
|
||||
fi
|
||||
# Process all parameters in the form of interface:IP
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
virtualip $1 $2
|
||||
shift
|
||||
shift
|
||||
done
|
||||
|
||||
###
|
||||
# Start or stop OpenNebula services
|
||||
|
||||
if which systemctl &>/dev/null && [ -d /etc/systemd ]; then
|
||||
IS_SYSTEMD=yes
|
||||
@ -61,15 +107,6 @@ fi
|
||||
|
||||
case $ACTION in
|
||||
leader)
|
||||
sudo -n ip address add $IFADDR dev $INTERFACE
|
||||
|
||||
for i in $(seq 5); do
|
||||
sudo -n arping -c 1 -U -I $INTERFACE ${IP}
|
||||
sleep 1
|
||||
sudo -n arping -c 1 -A -I $INTERFACE ${IP}
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ "${IS_SYSTEMD}" = 'yes' ]; then
|
||||
if systemctl is-enabled opennebula-flow >/dev/null 2>&1; then
|
||||
sudo -n systemctl start opennebula-flow
|
||||
@ -107,10 +144,6 @@ leader)
|
||||
;;
|
||||
|
||||
follower)
|
||||
if ip address show dev $INTERFACE | grep -qi " ${IP}/"; then
|
||||
sudo -n ip address del $IFADDR dev $INTERFACE
|
||||
fi
|
||||
|
||||
if [ "${IS_SYSTEMD}" = 'yes' ]; then
|
||||
if systemctl is-enabled opennebula-flow >/dev/null 2>&1 ||
|
||||
systemctl is-active opennebula-flow >/dev/null 2>&1;
|
||||
|
@ -36,6 +36,8 @@ class Replicator
|
||||
:service => 'opennebula' },
|
||||
{ :name => 'ec2_driver.default',
|
||||
:service => 'opennebula' },
|
||||
{ :name => 'monitord.conf',
|
||||
:service => 'opennebula' },
|
||||
{ :name => 'econe.conf',
|
||||
:service => 'opennebula-econe' },
|
||||
{ :name => 'oneflow-server.conf',
|
||||
|
@ -60,29 +60,10 @@ int InformationManager::start()
|
||||
NebulaLog::info("InM", "Information Manager stopped.");
|
||||
});
|
||||
|
||||
// Send the list of hosts to the driver
|
||||
auto rftm = Nebula::instance().get_raftm();
|
||||
raft_status(rftm->get_state());
|
||||
|
||||
auto * imd = get_driver("monitord");
|
||||
|
||||
if (!imd)
|
||||
{
|
||||
NebulaLog::error("InM", "Could not find information driver 'monitor'");
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
string xml_hosts;
|
||||
|
||||
hpool->dump(xml_hosts, "", 0, -1, false);
|
||||
|
||||
Message<OpenNebulaMessages> msg;
|
||||
|
||||
msg.type(OpenNebulaMessages::HOST_LIST);
|
||||
msg.payload(xml_hosts);
|
||||
|
||||
imd->write(msg);
|
||||
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
@ -94,7 +75,7 @@ void InformationManager::stop_monitor(int hid, const string& name, const string&
|
||||
|
||||
if (!imd)
|
||||
{
|
||||
NebulaLog::error("InM", "Could not find information driver 'monitor'");
|
||||
NebulaLog::error("InM", "Could not find information driver 'monitord'");
|
||||
|
||||
return;
|
||||
}
|
||||
@ -127,7 +108,7 @@ int InformationManager::start_monitor(Host * host, bool update_remotes)
|
||||
|
||||
if (!imd)
|
||||
{
|
||||
host->error("Cannot find driver: 'monitor'");
|
||||
host->error("Cannot find driver: 'monitord'");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -187,6 +168,43 @@ void InformationManager::delete_host(int hid)
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void InformationManager::raft_status(RaftManager::State state)
|
||||
{
|
||||
auto imd = get_driver("monitord");
|
||||
|
||||
if (!imd)
|
||||
{
|
||||
NebulaLog::error("InM", "Could not find information driver 'monitord'");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (state == RaftManager::SOLO || state == RaftManager::LEADER)
|
||||
{
|
||||
// Send host pool to Monitor Daemon
|
||||
string xml_hosts;
|
||||
|
||||
hpool->dump(xml_hosts, "", 0, -1, false);
|
||||
|
||||
Message<OpenNebulaMessages> msg;
|
||||
|
||||
msg.type(OpenNebulaMessages::HOST_LIST);
|
||||
msg.payload(xml_hosts);
|
||||
|
||||
imd->write(msg);
|
||||
}
|
||||
|
||||
Message<OpenNebulaMessages> msg;
|
||||
|
||||
msg.type(OpenNebulaMessages::RAFT_STATUS);
|
||||
msg.payload(RaftManager::state_to_str(state));
|
||||
|
||||
imd->write(msg);
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void InformationManager::_undefined(unique_ptr<Message<OpenNebulaMessages>> msg)
|
||||
{
|
||||
NebulaLog::warn("InM", "Received undefined message: " + msg->payload() +
|
||||
@ -555,4 +573,3 @@ void InformationManager::_vm_state(unique_ptr<Message<OpenNebulaMessages>> msg)
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
|
@ -44,7 +44,7 @@ class MonitorClient
|
||||
MESSAGE_TYPES.each do |mt|
|
||||
define_method("#{mt}_udp".downcase.to_sym) do |rc, payload|
|
||||
msg = "#{mt} #{MESSAGE_STATUS[rc]} #{@hostid} #{pack(payload)}"
|
||||
@socket_udp.send(msg, 0)
|
||||
@socket_udp.send(msg, 0, @host, @port)
|
||||
end
|
||||
end
|
||||
|
||||
@ -75,7 +75,6 @@ class MonitorClient
|
||||
@port = addr[1]
|
||||
|
||||
@socket_udp = UDPSocket.new(@family)
|
||||
@socket_udp.connect(@host, @port)
|
||||
|
||||
@pubkey = @opts[:pubkey]
|
||||
|
||||
|
@ -81,6 +81,12 @@ public:
|
||||
*/
|
||||
void stop_host_monitor(int oid);
|
||||
|
||||
/**
|
||||
* Raft status changed
|
||||
* @param state SOLO, CANDIDATE, FOLLOWER, LEADER
|
||||
*/
|
||||
void raft_status(const string& state);
|
||||
|
||||
/**
|
||||
* Updates the information of the given host. If it does not exist it is
|
||||
* added to the pool
|
||||
@ -158,6 +164,7 @@ private:
|
||||
*/
|
||||
int monitor_interval_host;
|
||||
|
||||
bool is_leader;
|
||||
/**
|
||||
* Time in seconds to expire a monitoring action (5 minutes)
|
||||
*/
|
||||
|
@ -85,6 +85,11 @@ private:
|
||||
*/
|
||||
static void _stop_monitor(message_t msg);
|
||||
|
||||
/**
|
||||
* Raft status changed
|
||||
*/
|
||||
static void _raft_status(message_t msg);
|
||||
|
||||
private:
|
||||
static HostMonitorManager * hm;
|
||||
};
|
||||
|
@ -30,11 +30,12 @@ enum class OpenNebulaMessages : unsigned short int
|
||||
HOST_LIST,
|
||||
UPDATE_HOST,
|
||||
DEL_HOST,
|
||||
START_MONITOR, // not used
|
||||
STOP_MONITOR, // not used
|
||||
START_MONITOR,
|
||||
STOP_MONITOR,
|
||||
HOST_STATE,
|
||||
VM_STATE,
|
||||
HOST_SYSTEM,
|
||||
RAFT_STATUS,
|
||||
ENUM_MAX
|
||||
};
|
||||
|
||||
|
@ -50,6 +50,7 @@ HostMonitorManager::HostMonitorManager(
|
||||
, threads(_threads)
|
||||
, timer_period(timer_period)
|
||||
, monitor_interval_host(monitor_interval_host)
|
||||
, is_leader(false)
|
||||
{
|
||||
oned_driver = new OneMonitorDriver(this);
|
||||
udp_driver = new UDPMonitorDriver(addr, port);
|
||||
@ -220,6 +221,16 @@ void HostMonitorManager::stop_host_monitor(int oid)
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void HostMonitorManager::raft_status(const string& state)
|
||||
{
|
||||
NebulaLog::info("HMM", "Raft status: " + state);
|
||||
|
||||
is_leader = state == "LEADER" || state == "SOLO";
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void HostMonitorManager::monitor_host(int oid, bool result, const Template &tmpl)
|
||||
{
|
||||
auto host = hpool->get(oid);
|
||||
@ -403,6 +414,11 @@ void HostMonitorManager::timer_action()
|
||||
hpool->clean_expired_monitoring();
|
||||
vmpool->clean_expired_monitoring();
|
||||
|
||||
if (!is_leader)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
set<int> discovered_hosts;
|
||||
time_t now = time(nullptr);
|
||||
time_t target_time = now - monitor_interval_host;
|
||||
@ -458,6 +474,11 @@ void HostMonitorManager::timer_action()
|
||||
|
||||
void HostMonitorManager::start_host_monitor(const HostRPCPool::HostBaseLock& host)
|
||||
{
|
||||
if (!is_leader)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto driver = driver_manager->get_driver(host->im_mad());
|
||||
|
||||
if (!driver)
|
||||
@ -482,6 +503,11 @@ void HostMonitorManager::start_host_monitor(const HostRPCPool::HostBaseLock& hos
|
||||
|
||||
void HostMonitorManager::stop_host_monitor(const HostRPCPool::HostBaseLock& host)
|
||||
{
|
||||
if (!is_leader)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto driver = driver_manager->get_driver(host->im_mad());
|
||||
|
||||
if (!driver)
|
||||
|
@ -42,6 +42,9 @@ OneMonitorDriver::OneMonitorDriver(HostMonitorManager * _hm)
|
||||
|
||||
register_action(OpenNebulaMessages::STOP_MONITOR,
|
||||
&OneMonitorDriver::_stop_monitor);
|
||||
|
||||
register_action(OpenNebulaMessages::RAFT_STATUS,
|
||||
&OneMonitorDriver::_raft_status);
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
@ -153,3 +156,11 @@ void OneMonitorDriver::_stop_monitor(message_t msg)
|
||||
{
|
||||
hm->stop_host_monitor(msg->oid());
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void OneMonitorDriver::_raft_status(message_t msg)
|
||||
{
|
||||
hm->raft_status(msg->payload());
|
||||
}
|
||||
|
@ -53,6 +53,7 @@ const EString<OpenNebulaMessages> Message<OpenNebulaMessages>::_type_str({
|
||||
{"HOST_STATE", OpenNebulaMessages::HOST_STATE},
|
||||
{"VM_STATE", OpenNebulaMessages::VM_STATE},
|
||||
{"HOST_SYSTEM", OpenNebulaMessages::HOST_SYSTEM},
|
||||
{"RAFT_STATUS", OpenNebulaMessages::RAFT_STATUS},
|
||||
});
|
||||
|
||||
/* ************************************************************************** */
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "LogDB.h"
|
||||
#include "AclManager.h"
|
||||
#include "Nebula.h"
|
||||
#include "InformationManager.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@ -475,6 +476,9 @@ void RaftManager::leader()
|
||||
|
||||
aclm->reload_rules();
|
||||
|
||||
auto im = nd.get_im();
|
||||
im->raft_status(state);
|
||||
|
||||
if ( nd.is_federation_master() )
|
||||
{
|
||||
frm->start_replica_threads();
|
||||
@ -542,6 +546,9 @@ void RaftManager::follower(unsigned int _term)
|
||||
commit = lapplied;
|
||||
leader_id = -1;
|
||||
|
||||
auto im = nd.get_im();
|
||||
im->raft_status(state);
|
||||
|
||||
NebulaLog::log("RCM", Log::INFO, "oned is set to follower mode");
|
||||
|
||||
next.clear();
|
||||
|
Loading…
x
Reference in New Issue
Block a user