1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-02-02 09:47:00 +03:00

F #3859: Monitord HA

This commit is contained in:
Pavel Czerny 2020-05-15 16:15:52 +02:00
parent 7dbed19430
commit 7705d669a7
No known key found for this signature in database
GPG Key ID: 58DD5CE2A719A826
14 changed files with 208 additions and 58 deletions

View File

@ -20,6 +20,7 @@
#include "DriverManager.h"
#include "ActionManager.h"
#include "OpenNebulaMessages.h"
#include "RaftManager.h"
class HostPool;
class Host;
@ -94,6 +95,11 @@ public:
*/
void delete_host(int hid);
/**
* Set raft status, send info to monitor daemon
*/
void raft_status(RaftManager::State raft);
protected:
/**
* Received undefined message -> print error

View File

@ -134,6 +134,41 @@ public:
*/
void follower(unsigned int term);
static std::string state_to_str(State _state)
{
string st;
switch (_state)
{
case SOLO:
st = "SOLO";
break;
case CANDIDATE:
st = "CANDIDATE";
break;
case FOLLOWER:
st = "FOLLOWER";
break;
case LEADER:
st = "LEADER";
break;
}
return st;
}
State get_state()
{
State _state;
pthread_mutex_lock(&mutex);
_state = state;
pthread_mutex_unlock(&mutex);
return _state;
}
unsigned int get_term()
{
unsigned int _term;

View File

@ -185,13 +185,13 @@ RAFT = [
# Executed when a server transits from follower->leader
# RAFT_LEADER_HOOK = [
# COMMAND = "raft/vip.sh",
# ARGUMENTS = "leader <interface> <ip_cidr>"
# ARGUMENTS = "leader interface ip_cidr [interface ip_cidr ...]"
# ]
# Executed when a server transits from leader->follower
# RAFT_FOLLOWER_HOOK = [
# COMMAND = "raft/vip.sh",
# ARGUMENTS = "follower <interface> <ip_cidr>"
# ARGUMENTS = "follower interface ip_cidr [interface ip_cidr ...]"
# ]
#*******************************************************************************

View File

@ -1,9 +1,11 @@
#!/bin/bash -e
#!/bin/bash
ACTION="$1"
INTERFACE="$2"
IFADDR="$3"
IP="${IFADDR%%/*}"
# Setup virtual IP
# usage:
# vip.sh action interface ip [interface ip ...]
# Where action is one of:
# leader - New raft leader, set virtual IPs
# follower - unset virtual IPs
#
# functions
@ -37,21 +39,65 @@ is_systemd_unit_startable()
return 1
}
# (Un)set the virtual IP
function virtualip() {
INTERFACE="$1"
IFADDR="$2"
IP="${IFADDR%%/*}"
if [ -z "$INTERFACE" ]; then
echo "Missing interface." >&2
exit 1
fi
if [ -z "$IFADDR" ]; then
echo "Missing IP." >&2
exit 1
fi
###
case $ACTION in
leader)
sudo -n ip address add $IFADDR dev $INTERFACE
for i in $(seq 5); do
sudo -n arping -c 1 -U -I $INTERFACE ${IP}
sleep 1
sudo -n arping -c 1 -A -I $INTERFACE ${IP}
sleep 1
done
;;
follower)
if ip address show dev $INTERFACE | grep -qi " ${IP}/"; then
sudo -n ip address del $IFADDR dev $INTERFACE
fi
;;
*)
echo "Unknown action '$ACTION'" >&2
exit 1
;;
esac
}
#
# main
#
if [ -z "$INTERFACE" ]; then
echo "Missing interface." >&2
exit 1
fi
ACTION="$1"
shift
if [ -z "$IFADDR" ]; then
echo "Missing IP." >&2
exit 1
fi
# Process all parameters in the form of interface:IP
while [[ $# -gt 0 ]]
do
virtualip $1 $2
shift
shift
done
###
# Start or stop OpenNebula services
if which systemctl &>/dev/null && [ -d /etc/systemd ]; then
IS_SYSTEMD=yes
@ -61,15 +107,6 @@ fi
case $ACTION in
leader)
sudo -n ip address add $IFADDR dev $INTERFACE
for i in $(seq 5); do
sudo -n arping -c 1 -U -I $INTERFACE ${IP}
sleep 1
sudo -n arping -c 1 -A -I $INTERFACE ${IP}
sleep 1
done
if [ "${IS_SYSTEMD}" = 'yes' ]; then
if systemctl is-enabled opennebula-flow >/dev/null 2>&1; then
sudo -n systemctl start opennebula-flow
@ -107,10 +144,6 @@ leader)
;;
follower)
if ip address show dev $INTERFACE | grep -qi " ${IP}/"; then
sudo -n ip address del $IFADDR dev $INTERFACE
fi
if [ "${IS_SYSTEMD}" = 'yes' ]; then
if systemctl is-enabled opennebula-flow >/dev/null 2>&1 ||
systemctl is-active opennebula-flow >/dev/null 2>&1;

View File

@ -36,6 +36,8 @@ class Replicator
:service => 'opennebula' },
{ :name => 'ec2_driver.default',
:service => 'opennebula' },
{ :name => 'monitord.conf',
:service => 'opennebula' },
{ :name => 'econe.conf',
:service => 'opennebula-econe' },
{ :name => 'oneflow-server.conf',

View File

@ -60,29 +60,10 @@ int InformationManager::start()
NebulaLog::info("InM", "Information Manager stopped.");
});
// Send the list of hosts to the driver
auto rftm = Nebula::instance().get_raftm();
raft_status(rftm->get_state());
auto * imd = get_driver("monitord");
if (!imd)
{
NebulaLog::error("InM", "Could not find information driver 'monitor'");
return rc;
}
string xml_hosts;
hpool->dump(xml_hosts, "", 0, -1, false);
Message<OpenNebulaMessages> msg;
msg.type(OpenNebulaMessages::HOST_LIST);
msg.payload(xml_hosts);
imd->write(msg);
return rc;
return 0;
}
/* -------------------------------------------------------------------------- */
@ -94,7 +75,7 @@ void InformationManager::stop_monitor(int hid, const string& name, const string&
if (!imd)
{
NebulaLog::error("InM", "Could not find information driver 'monitor'");
NebulaLog::error("InM", "Could not find information driver 'monitord'");
return;
}
@ -127,7 +108,7 @@ int InformationManager::start_monitor(Host * host, bool update_remotes)
if (!imd)
{
host->error("Cannot find driver: 'monitor'");
host->error("Cannot find driver: 'monitord'");
return -1;
}
@ -187,6 +168,43 @@ void InformationManager::delete_host(int hid)
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::raft_status(RaftManager::State state)
{
auto imd = get_driver("monitord");
if (!imd)
{
NebulaLog::error("InM", "Could not find information driver 'monitord'");
return;
}
if (state == RaftManager::SOLO || state == RaftManager::LEADER)
{
// Send host pool to Monitor Daemon
string xml_hosts;
hpool->dump(xml_hosts, "", 0, -1, false);
Message<OpenNebulaMessages> msg;
msg.type(OpenNebulaMessages::HOST_LIST);
msg.payload(xml_hosts);
imd->write(msg);
}
Message<OpenNebulaMessages> msg;
msg.type(OpenNebulaMessages::RAFT_STATUS);
msg.payload(RaftManager::state_to_str(state));
imd->write(msg);
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::_undefined(unique_ptr<Message<OpenNebulaMessages>> msg)
{
NebulaLog::warn("InM", "Received undefined message: " + msg->payload() +
@ -555,4 +573,3 @@ void InformationManager::_vm_state(unique_ptr<Message<OpenNebulaMessages>> msg)
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

View File

@ -44,7 +44,7 @@ class MonitorClient
MESSAGE_TYPES.each do |mt|
define_method("#{mt}_udp".downcase.to_sym) do |rc, payload|
msg = "#{mt} #{MESSAGE_STATUS[rc]} #{@hostid} #{pack(payload)}"
@socket_udp.send(msg, 0)
@socket_udp.send(msg, 0, @host, @port)
end
end
@ -75,7 +75,6 @@ class MonitorClient
@port = addr[1]
@socket_udp = UDPSocket.new(@family)
@socket_udp.connect(@host, @port)
@pubkey = @opts[:pubkey]

View File

@ -81,6 +81,12 @@ public:
*/
void stop_host_monitor(int oid);
/**
* Raft status changed
* @param state SOLO, CANDIDATE, FOLLOWER, LEADER
*/
void raft_status(const string& state);
/**
* Updates the information of the given host. If it does not exist it is
* added to the pool
@ -158,6 +164,7 @@ private:
*/
int monitor_interval_host;
bool is_leader;
/**
* Time in seconds to expire a monitoring action (5 minutes)
*/

View File

@ -85,6 +85,11 @@ private:
*/
static void _stop_monitor(message_t msg);
/**
* Raft status changed
*/
static void _raft_status(message_t msg);
private:
static HostMonitorManager * hm;
};

View File

@ -30,11 +30,12 @@ enum class OpenNebulaMessages : unsigned short int
HOST_LIST,
UPDATE_HOST,
DEL_HOST,
START_MONITOR, // not used
STOP_MONITOR, // not used
START_MONITOR,
STOP_MONITOR,
HOST_STATE,
VM_STATE,
HOST_SYSTEM,
RAFT_STATUS,
ENUM_MAX
};

View File

@ -50,6 +50,7 @@ HostMonitorManager::HostMonitorManager(
, threads(_threads)
, timer_period(timer_period)
, monitor_interval_host(monitor_interval_host)
, is_leader(false)
{
oned_driver = new OneMonitorDriver(this);
udp_driver = new UDPMonitorDriver(addr, port);
@ -220,6 +221,16 @@ void HostMonitorManager::stop_host_monitor(int oid)
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void HostMonitorManager::raft_status(const string& state)
{
NebulaLog::info("HMM", "Raft status: " + state);
is_leader = state == "LEADER" || state == "SOLO";
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void HostMonitorManager::monitor_host(int oid, bool result, const Template &tmpl)
{
auto host = hpool->get(oid);
@ -403,6 +414,11 @@ void HostMonitorManager::timer_action()
hpool->clean_expired_monitoring();
vmpool->clean_expired_monitoring();
if (!is_leader)
{
return;
}
set<int> discovered_hosts;
time_t now = time(nullptr);
time_t target_time = now - monitor_interval_host;
@ -458,6 +474,11 @@ void HostMonitorManager::timer_action()
void HostMonitorManager::start_host_monitor(const HostRPCPool::HostBaseLock& host)
{
if (!is_leader)
{
return;
}
auto driver = driver_manager->get_driver(host->im_mad());
if (!driver)
@ -482,6 +503,11 @@ void HostMonitorManager::start_host_monitor(const HostRPCPool::HostBaseLock& hos
void HostMonitorManager::stop_host_monitor(const HostRPCPool::HostBaseLock& host)
{
if (!is_leader)
{
return;
}
auto driver = driver_manager->get_driver(host->im_mad());
if (!driver)

View File

@ -42,6 +42,9 @@ OneMonitorDriver::OneMonitorDriver(HostMonitorManager * _hm)
register_action(OpenNebulaMessages::STOP_MONITOR,
&OneMonitorDriver::_stop_monitor);
register_action(OpenNebulaMessages::RAFT_STATUS,
&OneMonitorDriver::_raft_status);
}
/* -------------------------------------------------------------------------- */
@ -153,3 +156,11 @@ void OneMonitorDriver::_stop_monitor(message_t msg)
{
hm->stop_host_monitor(msg->oid());
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void OneMonitorDriver::_raft_status(message_t msg)
{
hm->raft_status(msg->payload());
}

View File

@ -53,6 +53,7 @@ const EString<OpenNebulaMessages> Message<OpenNebulaMessages>::_type_str({
{"HOST_STATE", OpenNebulaMessages::HOST_STATE},
{"VM_STATE", OpenNebulaMessages::VM_STATE},
{"HOST_SYSTEM", OpenNebulaMessages::HOST_SYSTEM},
{"RAFT_STATUS", OpenNebulaMessages::RAFT_STATUS},
});
/* ************************************************************************** */

View File

@ -22,6 +22,7 @@
#include "LogDB.h"
#include "AclManager.h"
#include "Nebula.h"
#include "InformationManager.h"
#include <cstdlib>
@ -475,6 +476,9 @@ void RaftManager::leader()
aclm->reload_rules();
auto im = nd.get_im();
im->raft_status(state);
if ( nd.is_federation_master() )
{
frm->start_replica_threads();
@ -542,6 +546,9 @@ void RaftManager::follower(unsigned int _term)
commit = lapplied;
leader_id = -1;
auto im = nd.get_im();
im->raft_status(state);
NebulaLog::log("RCM", Log::INFO, "oned is set to follower mode");
next.clear();