1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-01-03 01:17:41 +03:00

feautre #4403: Adds OFFLINE mode to Host.

This commit is contained in:
Ruben S. Montero 2016-04-12 00:33:09 +02:00
parent a9f8dbd13d
commit f6c9924629
12 changed files with 270 additions and 319 deletions

View File

@ -34,20 +34,29 @@ class Host : public PoolObjectSQL, public ClusterableSingle
{
public:
// ----------------------------------------------------------------------
// Host States
// ----------------------------------------------------------------------
// HOST STATES +----------------+
// | VM DEPLOYMENT |
// +----------------+------------+--------+-------+
// | STATE | MONITORING | MANUAL | SCHED |
// +----------------+------------+--------+-------+
// | INIT/MONITORED | Yes | Yes |
// +----------------+------------+--------+-------+
// | DISABLED | Yes | Yes | No |
// +----------------+------------+----------------+
// | OFFLINE | No | No |
// +----------------+-----------------------------+
enum HostState
{
INIT = 0, /**< Initial state for enabled hosts. */
MONITORING_MONITORED = 1, /**< Monitoring the host (from monitored). */
MONITORED = 2, /**< The host has been successfully monitored. */
ERROR = 3, /**< An error ocurrer while monitoring the host. */
DISABLED = 4, /**< The host is disabled won't be monitored. */
MONITORED = 2, /**< The host has been monitored. */
ERROR = 3, /**< An error ocurrer in host monitoring. */
DISABLED = 4, /**< The host is disabled see above. */
MONITORING_ERROR = 5, /**< Monitoring the host (from error). */
MONITORING_INIT = 6, /**< Monitoring the host (from init). */
MONITORING_DISABLED = 7 /**< Monitoring the host (from disabled). */
MONITORING_DISABLED = 7, /**< Monitoring the host (from disabled). */
OFFLINE = 8 /**< The host is set offline, see above */
};
/**
@ -65,27 +74,6 @@ public:
*/
int from_xml(const string &xml_str);
/**
* Check if the host is enabled
* @return true if the host is enabled
*/
bool isEnabled() const
{
return state != DISABLED && state != MONITORING_DISABLED;
}
/**
* Check if the host is being monitored
* @return true if the host is enabled
*/
bool isMonitoring() const
{
return ((state == MONITORING_ERROR) ||
(state == MONITORING_MONITORED)||
(state == MONITORING_INIT)||
(state == MONITORING_DISABLED));
}
/**
* Checks if the host is a remote public cloud
* @return true if the host is a remote public cloud
@ -93,8 +81,15 @@ public:
bool is_public_cloud() const;
/**
* Disables the current host, it will not be monitored nor used by the
* scheduler
* Sets the current host offline, it will not be monitored nor used by the
* scheduler, manual VM deployment is also restricted
*/
void offline();
/**
* Sets the current host disable, it will receive monitor updates, manual
* deployment of VMs is allowed and the scheduler will not consider this
* host.
*/
void disable();
@ -102,10 +97,7 @@ public:
* Enables the current host, it will be monitored and could be used by
* the scheduler
*/
void enable()
{
state = INIT;
};
void enable();
/**
* Sets the host in error
@ -125,6 +117,10 @@ public:
switch (state)
{
case OFFLINE:
state = OFFLINE;
break;
case DISABLED:
case MONITORING_DISABLED:
state = DISABLED;
@ -145,8 +141,6 @@ public:
state = ERROR;
}
break;
default:
break;
}
};

View File

@ -28,6 +28,7 @@ extern "C" void * im_action_loop(void *arg);
class HostPool;
class ClusterPool;
class Host;
class InformationManager : public MadManager, public ActionListener
{
@ -56,21 +57,6 @@ public:
~InformationManager(){};
enum Actions
{
STOPMONITOR /** Sent by the RM when a host is deleted **/
};
/**
* Triggers specific actions to the Information Manager.
* @param action the IM action
* @param hid Host unique id. This is the argument of the passed to the
* invoked action.
*/
void trigger(
Actions action,
int vid);
/**
* This functions starts the associated listener thread, and creates a
* new thread for the Information Manager. This thread will wait in
@ -88,10 +74,6 @@ public:
return im_thread;
};
/**
*
*/
int load_mads(int uid=0);
/**
*
@ -101,7 +83,35 @@ public:
am.trigger(ACTION_FINALIZE,0);
};
void stop_monitor(int hid);
/**
* Load the information drivers
* @return 0 on success
*/
int load_mads(int uid=0);
/**
* Sends a STOPMONITR command to the associated driver and host
* @param hid the host id
* @param name of the host
* @param im_mad the driver name
*/
void stop_monitor(int hid, const string& name, const string& im_mad)
{
const InformationManagerDriver * imd = get(im_mad);
if (imd != 0)
{
imd->stop_monitor(hid, name);
}
}
/**
* Starts the monitor process on the host
* @param host to monitor
* @param update_remotes to copy the monitor probes to the host
* @return 0 on success
*/
int start_monitor(Host * host, bool update_remotes);
private:
/**

View File

@ -29,9 +29,9 @@ using namespace std;
class RequestManagerHost: public Request
{
protected:
RequestManagerHost( const string& method_name,
const string& help,
const string& params)
RequestManagerHost(const string& method_name,
const string& help,
const string& params)
:Request(method_name,params,help)
{
Nebula& nd = Nebula::instance();
@ -51,18 +51,23 @@ protected:
/* ------------------------------------------------------------------------- */
/* ------------------------------------------------------------------------- */
class HostEnable : public RequestManagerHost
class HostStatus : public RequestManagerHost
{
public:
HostEnable():
RequestManagerHost("HostEnable",
"Enables or disables a host",
"A:sib")
enum Status
{
ENABLED = 0,
DISABLED = 1,
OFFLINE = 2
};
HostStatus():
RequestManagerHost("HostStatus", "Sets the status of the host", "A:sii")
{
auth_op = AuthRequest::ADMIN;
};
~HostEnable(){};
~HostStatus(){};
void request_execute(xmlrpc_c::paramList const& _paramList,
RequestAttributes& att);

View File

@ -131,7 +131,7 @@ cmd=CommandParser::CmdParser.new(ARGV) do
end
enable_desc = <<-EOT.unindent
Enables the given Host
Enables the given host, fully operational
EOT
command :enable, enable_desc, [:range,:hostid_list] do
@ -141,7 +141,10 @@ cmd=CommandParser::CmdParser.new(ARGV) do
end
disable_desc = <<-EOT.unindent
Disables the given Host
Disables the given host:
- monitor: enabled
- scheduler deployment: disabled
- manual deployment: enabled
EOT
command :disable, disable_desc, [:range,:hostid_list] do
@ -150,6 +153,19 @@ cmd=CommandParser::CmdParser.new(ARGV) do
end
end
offline_desc = <<-EOT.unindent
Sets the host offline:
- monitor: disabled
- scheduler deployment: disabled
- manual deployment: disabled
EOT
command :offline, offline_desc, [:range,:hostid_list] do
helper.perform_actions(args[0],options,"offline") do |host|
host.offline
end
end
update_desc = <<-EOT.unindent
Update the template contents. If a path is not provided the editor will
be launched to modify the current content.

View File

@ -261,6 +261,11 @@ int Host::update_info(Template &tmpl,
int num_zombies = 0;
int num_wilds = 0;
if ( state == OFFLINE )
{
return -1;
}
// -------------------------------------------------------------------------
// Remove expired information from current template
// -------------------------------------------------------------------------
@ -285,31 +290,28 @@ int Host::update_info(Template &tmpl,
touch(true);
if (isEnabled())
{
get_reserved_capacity(reserved_cpu, reserved_mem);
get_reserved_capacity(reserved_cpu, reserved_mem);
erase_template_attribute("TOTALCPU", val);
host_share.max_cpu = val - reserved_cpu;
erase_template_attribute("TOTALMEMORY", val);
host_share.max_mem = val - reserved_mem;
erase_template_attribute("DS_LOCATION_TOTAL_MB", val);
host_share.max_disk = val;
erase_template_attribute("TOTALCPU", val);
host_share.max_cpu = val - reserved_cpu;
erase_template_attribute("TOTALMEMORY", val);
host_share.max_mem = val - reserved_mem;
erase_template_attribute("DS_LOCATION_TOTAL_MB", val);
host_share.max_disk = val;
erase_template_attribute("FREECPU", val);
host_share.free_cpu = val;
erase_template_attribute("FREEMEMORY", val);
host_share.free_mem = val;
erase_template_attribute("DS_LOCATION_FREE_MB", val);
host_share.free_disk = val;
erase_template_attribute("FREECPU", val);
host_share.free_cpu = val;
erase_template_attribute("FREEMEMORY", val);
host_share.free_mem = val;
erase_template_attribute("DS_LOCATION_FREE_MB", val);
host_share.free_disk = val;
erase_template_attribute("USEDCPU", val);
host_share.used_cpu = val;
erase_template_attribute("USEDMEMORY", val);
host_share.used_mem = val;
erase_template_attribute("DS_LOCATION_USED_MB", val);
host_share.used_disk = val;
}
erase_template_attribute("USEDCPU", val);
host_share.used_cpu = val;
erase_template_attribute("USEDMEMORY", val);
host_share.used_mem = val;
erase_template_attribute("DS_LOCATION_USED_MB", val);
host_share.used_disk = val;
// -------------------------------------------------------------------------
// Correlate VM information with the list of running VMs
@ -461,9 +463,35 @@ int Host::update_info(Template &tmpl,
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void Host::enable()
{
if (state == OFFLINE)
{
Nebula::instance().get_im()->start_monitor(this, true);
}
state = INIT;
};
/* -------------------------------------------------------------------------- */
void Host::disable()
{
if (state == OFFLINE)
{
Nebula::instance().get_im()->start_monitor(this, true);
}
state = DISABLED;
};
/* -------------------------------------------------------------------------- */
void Host::offline()
{
Nebula::instance().get_im()->stop_monitor(get_oid(),get_name(),get_im_mad());
state = OFFLINE;
host_share.max_cpu = 0;
host_share.max_mem = 0;
@ -583,14 +611,14 @@ string& Host::to_xml(string& xml) const
oss <<
"<HOST>"
"<ID>" << oid << "</ID>" <<
"<NAME>" << name << "</NAME>" <<
"<STATE>" << state << "</STATE>" <<
"<ID>" << oid << "</ID>" <<
"<NAME>" << name << "</NAME>" <<
"<STATE>" << state << "</STATE>" <<
"<IM_MAD>" << one_util::escape_xml(im_mad_name) << "</IM_MAD>" <<
"<VM_MAD>" << one_util::escape_xml(vmm_mad_name) << "</VM_MAD>" <<
"<LAST_MON_TIME>" << last_monitored << "</LAST_MON_TIME>" <<
"<CLUSTER_ID>" << cluster_id << "</CLUSTER_ID>" <<
"<CLUSTER>" << cluster << "</CLUSTER>" <<
"<LAST_MON_TIME>" << last_monitored << "</LAST_MON_TIME>" <<
"<CLUSTER_ID>" << cluster_id << "</CLUSTER_ID>" <<
"<CLUSTER>" << cluster << "</CLUSTER>" <<
host_share.to_xml(share_xml) <<
vm_collection.to_xml(vm_collection_xml) <<
obj_template->to_xml(template_xml) <<

View File

@ -119,30 +119,6 @@ int InformationManager::start()
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::trigger(Actions action, int _hid)
{
int * hid;
string aname;
hid = new int(_hid);
switch (action)
{
case STOPMONITOR:
aname = "STOPMONITOR";
break;
default:
delete hid;
return;
}
am.trigger(aname,hid);
}
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::do_action(const string &action, void * arg)
{
if (action == ACTION_TIMER)
@ -155,13 +131,6 @@ void InformationManager::do_action(const string &action, void * arg)
MadManager::stop();
}
else if (action == "STOPMONITOR")
{
int hid = *(static_cast<int *>(arg));
delete static_cast<int *>(arg);
stop_monitor(hid);
}
else
{
ostringstream oss;
@ -174,68 +143,37 @@ void InformationManager::do_action(const string &action, void * arg)
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void InformationManager::stop_monitor(int hid)
int InformationManager::start_monitor(Host * host, bool update_remotes)
{
string error_msg;
int rc;
ostringstream oss;
string dsloc;
// -------------------------------------------------------------------------
// Drop host from DB
// -------------------------------------------------------------------------
Host * host = hpool->get(hid,true);
const InformationManagerDriver * imd;
if (host == 0) //Already deleted silently return
oss << "Monitoring host "<< host->get_name()<< " ("<< host->get_oid()<< ")";
NebulaLog::log("InM",Log::DEBUG,oss);
imd = get(host->get_im_mad());
if (imd == 0)
{
return;
oss.str("");
oss << "Could not find information driver " << host->get_im_mad();
NebulaLog::log("InM",Log::ERROR,oss);
host->set_error();
return -1;
}
int cluster_id = host->get_cluster_id();
string im_mad = host->get_im_mad();
host->set_monitoring_state();
rc = hpool->drop(host, error_msg);
Nebula::instance().get_ds_location(dsloc);
host->unlock();
imd->monitor(host->get_oid(), host->get_name(), dsloc, update_remotes);
if (rc != 0) //Error (a VM has been allocated or DB error)
{
ostringstream oss;
oss << "Could not delete host " << hid << ": " << error_msg;
NebulaLog::log("InM", Log::ERROR, oss);
return;
}
// -------------------------------------------------------------------------
// Send STOPMONITOR to the IM driver if defined
// -------------------------------------------------------------------------
const InformationManagerDriver * imd = get(im_mad);
if (imd != 0)
{
imd->stop_monitor(hid, host->get_name());
}
// -------------------------------------------------------------------------
// Remove host from cluster
// -------------------------------------------------------------------------
Cluster * cluster = clpool->get(cluster_id, true);
if( cluster != 0 )
{
rc = cluster->del_host(hid, error_msg);
if ( rc < 0 )
{
cluster->unlock();
return;
}
clpool->update(cluster);
cluster->unlock();
}
return 0;
}
/* -------------------------------------------------------------------------- */
@ -245,23 +183,18 @@ void InformationManager::timer_action()
{
static int mark = 0;
int rc;
time_t now;
ostringstream oss;
int rc;
time_t now;
set<int> discovered_hosts;
set<int>::iterator it;
set<int> discovered_hosts;
set<int>::iterator it;
const InformationManagerDriver * imd;
Host * host;
istringstream iss;
Host * host;
time_t monitor_length;
time_t target_time;
bool do_monitor;
mark = mark + timer_period;
if ( mark >= 600 )
@ -270,7 +203,6 @@ void InformationManager::timer_action()
mark = 0;
}
// Clear the expired monitoring records
hpool->clean_expired_monitoring();
now = time(0);
@ -284,7 +216,7 @@ void InformationManager::timer_action()
return;
}
for(it=discovered_hosts.begin();it!=discovered_hosts.end();it++)
for( it=discovered_hosts.begin() ; it!=discovered_hosts.end() ; ++it )
{
host = hpool->get(*it,true);
@ -295,106 +227,38 @@ void InformationManager::timer_action()
monitor_length = now - host->get_last_monitored();
/**
* Monitor hosts that are:
* - enabled and have been being monitored for more than monitor_expire
* - enabled and not being monitored
* - disabled and not being monitored but have running vms
* - disabled with running vms and have been being monitored
* for more than monitor_expire secs.
*/
do_monitor = false;
if (host->isEnabled())
switch (host->get_state())
{
if (!host->isMonitoring())
{
do_monitor = true;
}
else if (monitor_length >= monitor_expire )
{
do_monitor = true;
}
}
else if ( host->get_share_running_vms() > 0 )
{
if (!host->isMonitoring())
{
do_monitor = true;
}
else if (monitor_length >= monitor_expire)
{
do_monitor = true;
}
}
// Not received an update in the monitor period.
case Host::INIT:
case Host::MONITORED:
case Host::ERROR:
case Host::DISABLED:
start_monitor(host, (host->get_last_monitored() == 0));
break;
if (do_monitor)
{
oss.str("");
oss << "Monitoring host " << host->get_name() << " ("
<< host->get_oid() << ")";
// Update last_mon_time to rotate HostPool::discover output. Update
// monitoring values with 0s.
case Host::OFFLINE:
host->touch(true);
hpool->update_monitoring(host);
break;
NebulaLog::log("InM",Log::DEBUG,oss);
imd = get(host->get_im_mad());
if (imd == 0)
{
oss.str("");
oss << "Could not find information driver " << host->get_im_mad();
NebulaLog::log("InM",Log::ERROR,oss);
host->set_error();
hpool->update(host);
host->unlock();
}
else
{
Nebula& nd = Nebula::instance();
bool update_remotes = false;
string name = host->get_name();
int oid = host->get_oid();
string dsloc;
//Force remotes update if the host has never been monitored.
if (host->get_last_monitored() == 0)
// Host is being monitored for more than monitor_expire secs.
case Host::MONITORING_DISABLED:
case Host::MONITORING_INIT:
case Host::MONITORING_ERROR:
case Host::MONITORING_MONITORED:
if (monitor_length >= monitor_expire )
{
update_remotes = true;
start_monitor(host, (host->get_last_monitored() == 0));
}
host->set_monitoring_state();
hpool->update(host);
host->unlock();
nd.get_ds_location(dsloc);
imd->monitor(oid, name, dsloc, update_remotes);
}
break;
}
else if (!host->isEnabled() && host->get_share_running_vms() == 0 )
{
// Disabled hosts without VMs are not monitored, but we need to
// update the last_mon_time to rotate the Hosts returned by
// HostPool::discover. We also update the monitoring values with
// 0s
host->touch(true);
hpool->update_monitoring(host);
hpool->update(host);
hpool->update(host);
host->unlock();
}
else
{
host->unlock();
}
host->unlock();
}
}

View File

@ -78,6 +78,15 @@ void MonitorThread::do_message()
return;
}
if ( host->get_state() == Host::OFFLINE ) //Should not receive any info
{
delete hinfo;
host->unlock();
return;
}
// -------------------------------------------------------------------------
// Monitoring Error. VMs running on the host are moved to UNKNOWN
// -------------------------------------------------------------------------
@ -181,8 +190,6 @@ void MonitorThread::do_message()
rc = host->update_info(tmpl, vm_poll, lost, found, non_shared_ds,
reserved_cpu, reserved_mem);
hpool->update(host);
if (rc != 0)
{
host->unlock();
@ -190,6 +197,8 @@ void MonitorThread::do_message()
return;
}
hpool->update(host);
hpool->update_monitoring(host);
oss << "Host " << host->get_name() << " (" << host->get_oid() << ")"

View File

@ -24,28 +24,35 @@ module OpenNebula
# Constants and Class Methods
#######################################################################
HOST_METHODS = {
:info => "host.info",
:allocate => "host.allocate",
:delete => "host.delete",
:enable => "host.enable",
:status => "host.status",
:update => "host.update",
:monitoring => "host.monitoring",
:rename => "host.rename"
}
HOST_STATES=%w{INIT MONITORING_MONITORED MONITORED ERROR DISABLED MONITORING_ERROR MONITORING_INIT MONITORING_DISABLED}
HOST_STATES=%w{INIT MONITORING_MONITORED MONITORED ERROR DISABLED
MONITORING_ERROR MONITORING_INIT MONITORING_DISABLED OFFLINE}
SHORT_HOST_STATES={
"INIT" => "init",
"MONITORING_MONITORED" => "update",
"MONITORED" => "on",
"ERROR" => "err",
"DISABLED" => "off",
"DISABLED" => "dsbl",
"MONITORING_ERROR" => "retry",
"MONITORING_INIT" => "init",
"MONITORING_DISABLED" => "off"
"MONITORING_DISABLED" => "dsbl",
"OFFLINE" => "off"
}
HOST_STATUS={
"ENABLED" => 0,
"DISABLED" => 1,
"OFFLINE" => 2
}
# Creates a Host description with just its identifier
@ -104,12 +111,17 @@ module OpenNebula
# Enables the Host
def enable()
set_enabled(true)
set_status("ENABLED")
end
# Disables the Host
def disable()
set_enabled(false)
set_status("DISABLED")
end
# Sets the Host offline
def offline()
set_status("OFFLINE")
end
def flush()
@ -255,10 +267,10 @@ module OpenNebula
end
private
def set_enabled(enabled)
def set_status(status)
return Error.new('ID not defined') if !@pe_id
rc = @client.call(HOST_METHODS[:enable], @pe_id, enabled)
rc = @client.call(HOST_METHODS[:status], @pe_id, HOST_STATUS[status])
rc = nil if !OpenNebula.is_error?(rc)
return rc

View File

@ -414,7 +414,7 @@ void RequestManager::register_xml_methods()
xmlrpc_c::methodPtr vrouter_pool_info(new VirtualRouterPoolInfo());
// Host Methods
xmlrpc_c::methodPtr host_enable(new HostEnable());
xmlrpc_c::methodPtr host_status(new HostStatus());
xmlrpc_c::methodPtr host_monitoring(new HostMonitoring());
xmlrpc_c::methodPtr host_pool_monitoring(new HostPoolMonitoring());
@ -527,7 +527,7 @@ void RequestManager::register_xml_methods()
RequestManagerRegistry.addMethod("one.templatepool.info",template_pool_info);
/* Host related methods*/
RequestManagerRegistry.addMethod("one.host.enable", host_enable);
RequestManagerRegistry.addMethod("one.host.status", host_status);
RequestManagerRegistry.addMethod("one.host.update", host_update);
RequestManagerRegistry.addMethod("one.host.allocate", host_allocate);
RequestManagerRegistry.addMethod("one.host.delete", host_delete);

View File

@ -262,14 +262,10 @@ Request::ErrorCode TemplateDelete::request_execute(
int HostDelete::drop(int oid, PoolObjectSQL * object, string& error_msg)
{
Nebula& nd = Nebula::instance();
InformationManager * im = nd.get_im();
HostPool * hpool = nd.get_hpool();
InformationManager * im = Nebula::instance().get_im();
Host* host = static_cast<Host *>(object);
//Do not trigger delete event on IM if there are VMs running on the host
if ( host->get_share_running_vms() > 0 )
{
error_msg = "Can not remove a host with running VMs";
@ -279,13 +275,12 @@ int HostDelete::drop(int oid, PoolObjectSQL * object, string& error_msg)
return -1;
}
host->disable();
string im_mad = host->get_im_mad();
string name = host->get_name();
hpool->update(host);
RequestManagerDelete::drop(oid, object, error_msg);
host->unlock();
im->trigger(InformationManager::STOPMONITOR, oid);
im->stop_monitor(oid, name, im_mad);
return 0;
}

View File

@ -20,13 +20,11 @@
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
void HostEnable::request_execute(xmlrpc_c::paramList const& paramList,
void HostStatus::request_execute(xmlrpc_c::paramList const& paramList,
RequestAttributes& att)
{
int id = xmlrpc_c::value_int(paramList.getInt(1));
bool enable = xmlrpc_c::value_boolean(paramList.getBoolean(2));
Host * host;
int id = xmlrpc_c::value_int(paramList.getInt(1));
int status = xmlrpc_c::value_int(paramList.getInt(2));
HostPool * hpool = static_cast<HostPool *>(pool);
@ -35,7 +33,7 @@ void HostEnable::request_execute(xmlrpc_c::paramList const& paramList,
return;
}
host = hpool->get(id,true);
Host * host = hpool->get(id,true);
if ( host == 0 )
{
@ -45,13 +43,23 @@ void HostEnable::request_execute(xmlrpc_c::paramList const& paramList,
return;
}
if ( enable == true)
switch (status)
{
host->enable();
}
else
{
host->disable();
case ENABLED:
host->enable();
break;
case DISABLED:
host->disable();
break;
case OFFLINE:
host->offline();
break;
default:
att.resp_msg = "Wrong status code";
failure_response(INTERNAL, att);
host->unlock();
return;
}
hpool->update(host);

View File

@ -341,6 +341,16 @@ int RequestManagerVirtualMachine::get_host_information(
return -1;
}
if ( host->get_state() == Host::OFFLINE )
{
att.resp_msg = "Host is offline, cannot use it to deploy VM";
failure_response(ACTION, att);
host->unlock();
return -1;
}
name = host->get_name();
vmm = host->get_vmm_mad();