1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-03-21 14:50:08 +03:00

B #6687: Fix monitoring initialization and restart

* Cleanup oned in case of initialization error

* Fix monitoring after onemonitord restart. The code includes a "hook" point in case a driver is re-started so custom code can be executed. InformationManager sends the list of hosts and raft status in this case.

* B #5801: Update error msg, in case of duplicated drivers

(cherry picked from commit 7b07def90e9d0c1507952de497c7137cce3f78c7)
This commit is contained in:
Pavel Czerný 2024-09-05 16:25:03 +02:00 committed by Ruben S. Montero
parent a40c048f9d
commit c546ed5240
No known key found for this signature in database
GPG Key ID: A0CEA6FA880A1D87
11 changed files with 116 additions and 64 deletions

View File

@ -115,6 +115,14 @@ public:
streamer.register_action(t, a);
};
/**
* Set a callback to be called when the driver is restarted and reconnects
*/
void set_reconnect_callback(std::function<void()> callback)
{
reconnect_callback = callback;
}
protected:
Driver() = default;
@ -161,6 +169,11 @@ private:
*/
std::atomic<bool> terminate = {false};
/**
* Reconnect callback, called when the driver restarts
*/
std::function<void()> reconnect_callback;
/**
* Starts the driver. This function creates a new process and sets up the
* communication pipes.
@ -358,6 +371,8 @@ void Driver<MSG>
start_driver(error);
streamer.fd(from_drv);
if (reconnect_callback) reconnect_callback();
}
});
}

View File

@ -91,6 +91,12 @@ protected:
static Log::MessageType log_type(char type);
/**
* Callback called when the driver is reconnected. Override this function
* to perform any actions when the driver is reconnected
*/
virtual void reconnected() {};
private:
std::map<std::string, std::unique_ptr<D>> drivers;
@ -207,7 +213,10 @@ int DriverManager<D>::start(std::string& error)
{
for (auto& driver : drivers)
{
driver.second->set_reconnect_callback(std::bind(&DriverManager<D>::reconnected, this));
auto rc = driver.second->start(error);
if (rc != 0)
{
NebulaLog::error("DrM", "Unable to start driver '" + driver.first

View File

@ -87,6 +87,11 @@ public:
*/
void raft_status(RaftManager::State raft);
/**
* Called when the driver is reconnected
*/
void reconnected() override;
protected:
/**
* Received undefined message -> print error

View File

@ -156,6 +156,11 @@ public:
return _log_type;
};
static bool initialized()
{
return (logger != 0);
}
private:
NebulaLog() {};

View File

@ -19,7 +19,6 @@
if [ -z "$ONE_LOCATION" ]; then
ONE_PID=/var/run/one/oned.pid
ONE_SCHEDPID=/var/run/one/sched.pid
ONE_HEMPID=/var/run/one/hem.pid
ONE_CONF=/etc/one/oned.conf
ONE_DB=/var/lib/one/one.db
ONE_LOG=/var/log/one/oned.log
@ -36,7 +35,6 @@ if [ -z "$ONE_LOCATION" ]; then
else
ONE_PID=$ONE_LOCATION/var/oned.pid
ONE_SCHEDPID=$ONE_LOCATION/var/sched.pid
ONE_HEMPID=$ONE_LOCATION/var/hem.pid
ONE_CONF=$ONE_LOCATION/etc/oned.conf
ONE_DB=$ONE_LOCATION/var/one.db
ONE_LOG=$ONE_LOCATION/var/oned.log
@ -246,14 +244,19 @@ start_hem()
[ -f "$ONE_HEM_LOG" ] && mv $ONE_HEM_LOG{,.$(date '+%Y%m%d%H%M%S')}
fi
onehem-server start > /dev/null 2>&1
HEM_ERROR=$(mktemp /tmp/hem-error.XXXXXX)
onehem-server start > $HEM_ERROR 2>&1
LASTRC=$?
if [ $LASTRC -ne 0 ]; then
echo "Error starting onehem-server"
echo "Error starting onehem-server: $(cat $HEM_ERROR)"
rm -f $HEM_ERROR
exit 1
fi
rm -f $HEM_ERROR
}
#------------------------------------------------------------------------------

View File

@ -195,6 +195,16 @@ void InformationManager::raft_status(RaftManager::State state)
imd->write(msg);
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::reconnected()
{
auto rftm = Nebula::instance().get_raftm();
raft_status(rftm->get_state());
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

View File

@ -48,8 +48,7 @@ void Monitor::start()
if (config->load_configuration() != 0)
{
throw runtime_error("Error reading monitor configuration file" +
conf_filename);
throw runtime_error("Error reading monitor configuration file " + conf_filename);
}
string datastore_location;

View File

@ -86,6 +86,67 @@ using namespace std;
Nebula::~Nebula()
{
// -----------------------------------------------------------
// Stop the managers & free resources
// -----------------------------------------------------------
if (rm) rm->finalize();
if (raftm) raftm->finalize();
if (!cache)
{
if (sam) sam->finalize();
if (vmm) vmm->finalize();
if (lcm) lcm->finalize();
if (tm) tm->finalize();
if (dm) dm->finalize();
if (im) im->finalize();
if (hm) hm->finalize();
if (imagem) imagem->finalize();
if (marketm) marketm->finalize();
if (ipamm) ipamm->finalize();
//sleep to wait drivers???
if (vmm) vmm->join_thread();
if (lcm) lcm->join_thread();
if (tm) tm->join_thread();
if (dm) dm->join_thread();
if (hm) hm->join_thread();
if (ipamm) ipamm->join_thread();
}
if (aclm) aclm->finalize();
if (authm)
{
authm->finalize();
authm->join_thread();
}
if (is_federation_slave() && aclm)
{
aclm->join_thread();
}
//XML Library
xmlCleanupParser();
ssl_util::SSLMutex::finalize();
if (NebulaLog::initialized())
{
NebulaLog::log("ONE", Log::INFO, "All modules finalized, exiting.\n");
}
delete vmpool;
delete vnpool;
delete hpool;
@ -1201,7 +1262,6 @@ void Nebula::start(bool bootstrap_only)
#ifdef SYSTEMD
// ---- Notify service manager ----
sd_notify(0, "READY=1");
#endif
@ -1215,61 +1275,6 @@ void Nebula::start(bool bootstrap_only)
sigwait(&mask, &signal);
// -----------------------------------------------------------
// Stop the managers & free resources
// -----------------------------------------------------------
rm->finalize();
raftm->finalize();
if (!cache)
{
sam->finalize();
vmm->finalize();
lcm->finalize();
tm->finalize();
dm->finalize();
im->finalize();
hm->finalize();
imagem->finalize();
marketm->finalize();
ipamm->finalize();
//sleep to wait drivers???
vmm->join_thread();
lcm->join_thread();
tm->join_thread();
dm->join_thread();
hm->join_thread();
ipamm->join_thread();
}
aclm->finalize();
authm->finalize();
authm->join_thread();
if (is_federation_slave())
{
aclm->join_thread();
}
//XML Library
xmlCleanupParser();
ssl_util::SSLMutex::finalize();
NebulaLog::log("ONE", Log::INFO, "All modules finalized, exiting.\n");
return;
error_mad:

View File

@ -38,7 +38,7 @@ int NebulaTemplate::load_configuration()
if ( rc != 0 && error != 0)
{
cout << "\nError while parsing configuration file:\n" << error << endl;
cout << "\nError while parsing configuration file: " << error << endl;
free(error);

View File

@ -2742,6 +2742,7 @@ int VirtualMachineManager::load_drivers(const vector<const VectorAttribute*>& _m
if ( rc != 0 )
{
NebulaLog::error("VMM", "\tDriver already exists, name: " + name);
return rc;
}

View File

@ -128,7 +128,7 @@ VirtualMachineManagerDriver::VirtualMachineManagerDriver(
}
else
{
NebulaLog::log("VMM", Log::INFO, "Using default imported VMs actions");
NebulaLog::log("VMM", Log::INFO, "\tUsing default imported VMs actions");
it = attrs.find("NAME");