From ee89a2185a21fde16add402b799c4a6c5266c735 Mon Sep 17 00:00:00 2001 From: "Ruben S. Montero" Date: Wed, 4 Nov 2015 17:31:56 +0100 Subject: [PATCH] feature #3987: Monitor system datastores through the front-end + BRIDGE_LIST. Add monitor scripts for shared, ceph. Distributed system DS as ssh monitored through probes. --- include/ImageManager.h | 5 +- install.sh | 3 + share/etc/oned.conf | 3 +- src/datastore_mad/one_datastore.rb | 87 ++++++++++++++++------ src/datastore_mad/remotes/libfs.sh | 5 +- src/im/MonitorThread.cc | 25 +------ src/image/ImageManager.cc | 53 +++++++++++--- src/image/ImageManagerActions.cc | 18 +++-- src/tm_mad/ceph/monitor | 114 +++++++++++++++++++++++++++++ src/tm_mad/shared/monitor | 93 +++++++++++++++++++++++ src/tm_mad/ssh/monitor | 20 +++++ 11 files changed, 359 insertions(+), 67 deletions(-) create mode 100755 src/tm_mad/ceph/monitor create mode 100755 src/tm_mad/shared/monitor create mode 100755 src/tm_mad/ssh/monitor diff --git a/include/ImageManager.h b/include/ImageManager.h index c74b26d402..4f2e5020a5 100644 --- a/include/ImageManager.h +++ b/include/ImageManager.h @@ -305,9 +305,12 @@ private: * * @param img_data Image XML representation * @param ds_data Datastore XML representation + * @param extra_data additional XML formatted data for the driver * @return the XML message */ - static string * format_message(const string& img_data, const string& ds_data); + static string * format_message(const string& img_data, + const string& ds_data, + const string& extra_data); /** * This function is executed periodically to monitor Datastores. diff --git a/install.sh b/install.sh index ad34b1d0a4..39ef98ad57 100755 --- a/install.sh +++ b/install.sh @@ -994,6 +994,7 @@ TM_SHARED_FILES="src/tm_mad/shared/clone \ src/tm_mad/shared/snap_create_live \ src/tm_mad/shared/snap_delete \ src/tm_mad/shared/snap_revert \ + src/tm_mad/shared/monitor \ src/tm_mad/shared/cpds" TM_FS_LVM_FILES="src/tm_mad/fs_lvm/clone \ @@ -1042,6 +1043,7 @@ TM_SSH_FILES="src/tm_mad/ssh/clone \ src/tm_mad/ssh/snap_create_live \ src/tm_mad/ssh/snap_delete \ src/tm_mad/ssh/snap_revert \ + src/tm_mad/ssh/monitor \ src/tm_mad/ssh/cpds" TM_DUMMY_FILES="src/tm_mad/dummy/clone \ @@ -1107,6 +1109,7 @@ TM_CEPH_FILES="src/tm_mad/ceph/clone \ src/tm_mad/ceph/delete \ src/tm_mad/ceph/context \ src/tm_mad/ceph/mkimage \ + src/tm_mad/ceph/monitor \ src/tm_mad/ceph/mkswap" TM_DEV_FILES="src/tm_mad/dev/clone \ diff --git a/share/etc/oned.conf b/share/etc/oned.conf index cdff05f7c0..13e3813bb0 100644 --- a/share/etc/oned.conf +++ b/share/etc/oned.conf @@ -577,11 +577,12 @@ TM_MAD = [ # arguments : for the driver executable # -t number of threads, i.e. number of repo operations at the same time # -d datastore mads separated by commas +# -s system datastore tm drivers, used to monitor shared system ds. #******************************************************************************* DATASTORE_MAD = [ executable = "one_datastore", - arguments = "-t 15 -d dummy,fs,vmfs,lvm,ceph,dev" + arguments = "-t 15 -d dummy,fs,vmfs,lvm,ceph,dev -s shared,ssh,ceph" ] #******************************************************************************* diff --git a/src/datastore_mad/one_datastore.rb b/src/datastore_mad/one_datastore.rb index f2eb962f74..a39c66c0c7 100755 --- a/src/datastore_mad/one_datastore.rb +++ b/src/datastore_mad/one_datastore.rb @@ -56,8 +56,15 @@ class DatastoreDriver < OpenNebulaDriver :snap_flatten=> "SNAP_FLATTEN" } + # Default System datastores for OpenNebula, override in oned.conf + SYSTEM_DS_TYPES = [ + "shared", + "ssh", + "ceph" + ] + # Register default actions for the protocol - def initialize(ds_type, options={}) + def initialize(ds_type, sys_ds_type, options={}) @options={ :concurrency => 10, :threaded => true, @@ -87,6 +94,16 @@ class DatastoreDriver < OpenNebulaDriver @types = ds_type end + if sys_ds_type == nil + @sys_types = SYSTEM_DS_TYPES + elsif sys_ds_type.class == String + @sys_types = [sys_ds_type] + else + @sys_types = sys_ds_type + end + + @local_tm_scripts_path = File.join(@local_scripts_base_path, 'tm/') + register_action(ACTION[:cp].to_sym, method("cp")) register_action(ACTION[:rm].to_sym, method("rm")) register_action(ACTION[:mkfs].to_sym, method("mkfs")) @@ -103,47 +120,47 @@ class DatastoreDriver < OpenNebulaDriver ############################################################################ def cp(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :cp, "#{drv_message} #{id}") end def rm(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :rm, "#{drv_message} #{id}") end def mkfs(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :mkfs, "#{drv_message} #{id}") end def stat(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :stat, "#{drv_message} #{id}") end def clone(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :clone, "#{drv_message} #{id}") end def monitor(id, drv_message) - ds = get_ds_type(drv_message) - do_image_action(id, ds, :monitor, "#{drv_message} #{id}", true) + ds, sys = get_ds_type(drv_message) + do_image_action(id, ds, :monitor, "#{drv_message} #{id}", sys, true) end def snap_delete(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :snap_delete, "#{drv_message} #{id}") end def snap_revert(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :snap_revert, "#{drv_message} #{id}") end def snap_flatten(id, drv_message) - ds = get_ds_type(drv_message) + ds, sys = get_ds_type(drv_message) do_image_action(id, ds, :snap_flatten, "#{drv_message} #{id}") end @@ -159,12 +176,27 @@ class DatastoreDriver < OpenNebulaDriver end end - def do_image_action(id, ds, action, arguments, encode64=false) - return if not is_available?(ds,id,action) + def is_sys_available?(sys, id, action) + if @sys_types.include?(sys) + return true + else + send_message(ACTION[action], RESULT[:failure], id, + "System datastore driver '#{sys}' not available") + return false + end + end + + def do_image_action(id, ds, action, arguments, sys='', encode64=false) + + if !sys.empty? + return if not is_sys_available?(sys, id, action) + path = File.join(@local_tm_scripts_path, sys) + else + return if not is_available?(ds, id, action) + path = File.join(@local_scripts_path, ds) + end - path = File.join(@local_scripts_path, ds) cmd = File.join(path, ACTION[action].downcase) - cmd << " " << arguments rc = LocalCommand.run(cmd, log_method(id)) @@ -180,10 +212,19 @@ class DatastoreDriver < OpenNebulaDriver message = Base64.decode64(drv_message) xml_doc = REXML::Document.new(message) + dstxt = dssys = '' + dsxml = xml_doc.root.elements['/DS_DRIVER_ACTION_DATA/DATASTORE/DS_MAD'] dstxt = dsxml.text if dsxml - return dstxt + dsxml = xml_doc.root.elements['/DS_DRIVER_ACTION_DATA/DATASTORE/TYPE'] + + if dsxml && dsxml.text == '1' + dsxml = xml_doc.root.elements['/DS_DRIVER_ACTION_DATA/DATASTORE/TM_MAD'] + dssys = dsxml.text if dsxml + end + + return dstxt, dssys end end @@ -194,12 +235,14 @@ end ################################################################################ opts = GetoptLong.new( - [ '--threads', '-t', GetoptLong::OPTIONAL_ARGUMENT ], - [ '--ds-types', '-d', GetoptLong::OPTIONAL_ARGUMENT ] + [ '--threads', '-t', GetoptLong::OPTIONAL_ARGUMENT ], + [ '--ds-types', '-d', GetoptLong::OPTIONAL_ARGUMENT ], + [ '--system-ds-types', '-s', GetoptLong::OPTIONAL_ARGUMENT ] ) -ds_type = nil -threads = 15 +ds_type = nil +sys_ds_type = nil +threads = 15 begin opts.each do |opt, arg| @@ -208,11 +251,13 @@ begin threads = arg.to_i when '--ds-types' ds_type = arg.split(',').map {|a| a.strip } + when '--system-ds-types' + sys_ds_type = arg.split(',').map {|a| a.strip } end end rescue Exception => e exit(-1) end -ds_driver = DatastoreDriver.new(ds_type, :concurrency => threads) +ds_driver = DatastoreDriver.new(ds_type, sys_ds_type, :concurrency => threads) ds_driver.start_driver diff --git a/src/datastore_mad/remotes/libfs.sh b/src/datastore_mad/remotes/libfs.sh index 5c70f9f3e8..24fb95dbca 100644 --- a/src/datastore_mad/remotes/libfs.sh +++ b/src/datastore_mad/remotes/libfs.sh @@ -223,9 +223,10 @@ function check_restricted { } #------------------------------------------------------------------------------- -# Gets the ESX host to be used as bridge to register a VMware disk +# Gets the host to be used as bridge to talk to the storage system # Implements a round robin for the bridges -# @param $1 - Image ID to be used to round-robin between ESX Bridges +# @param $1 - ID to be used to round-robin between host bridges. Random if +# not defined # @return host to be used as bridge #------------------------------------------------------------------------------- function get_destination_host { diff --git a/src/im/MonitorThread.cc b/src/im/MonitorThread.cc index 2be71cf466..bf129c968f 100644 --- a/src/im/MonitorThread.cc +++ b/src/im/MonitorThread.cc @@ -150,30 +150,9 @@ void MonitorThread::do_message() continue; } - if (ds->get_type() == Datastore::SYSTEM_DS) + if (ds->get_type() == Datastore::SYSTEM_DS && !ds->is_shared()) { - if (ds->is_shared()) - { - float total = 0, free = 0, used = 0; - ostringstream oss; - - (itm->second)->vector_value("TOTAL_MB", total); - (itm->second)->vector_value("FREE_MB", free); - (itm->second)->vector_value("USED_MB", used); - - ds->update_monitor(total, free, used); - - oss << "Datastore " << ds->get_name() << " (" << ds->get_oid() - << ") successfully monitored."; - - NebulaLog::log("ImM", Log::DEBUG, oss); - - dspool->update(ds); - } - else - { - non_shared_ds.insert(itm->first); - } + non_shared_ds.insert(itm->first); } ds->unlock(); diff --git a/src/image/ImageManager.cc b/src/image/ImageManager.cc index 18140ed475..61c7514557 100644 --- a/src/image/ImageManager.cc +++ b/src/image/ImageManager.cc @@ -188,12 +188,17 @@ void ImageManager::timer_action() void ImageManager::monitor_datastore(int ds_id) { - string ds_data; + string ds_data, ds_location, ds_name; string* drv_msg; + int cluster_id; + bool shared; + Nebula& nd = Nebula::instance(); DatastorePool * dspool = nd.get_dspool(); + Datastore::DatastoreType ds_type; + ostringstream oss; const ImageManagerDriver* imd = get(); @@ -213,20 +218,46 @@ void ImageManager::monitor_datastore(int ds_id) return; } - if ( ds->get_type() == Datastore::SYSTEM_DS ) - { - ds->unlock(); - return; - } + ds->to_xml(ds_data); - drv_msg = ImageManager::format_message("", ds->to_xml(ds_data)); - - oss << "Monitoring datastore " << ds->get_name() << " (" << ds_id << ")"; - - NebulaLog::log("InM", Log::DEBUG, oss); + cluster_id = ds->get_cluster_id(); + shared = ds->is_shared(); + ds_type = ds->get_type(); + ds_name = ds->get_name(); ds->unlock(); + ds_location = ""; + + switch (ds_type) + { + case Datastore::SYSTEM_DS: + if ( !shared ) + { + return; + } + + if ( nd.get_ds_location(cluster_id, ds_location) != -1 ) + { + oss << "" + << ds_location + << ""; + ds_location = oss.str(); + } + break; + + case Datastore::FILE_DS: + case Datastore::IMAGE_DS: + break; + } + + drv_msg = ImageManager::format_message("", ds_data, ds_location); + + oss.str(""); + oss << "Monitoring datastore " << ds_name << " (" << ds_id << ")"; + + NebulaLog::log("InM", Log::DEBUG, oss); + imd->monitor(ds_id, *drv_msg); delete drv_msg; diff --git a/src/image/ImageManagerActions.cc b/src/image/ImageManagerActions.cc index 8732b0c78f..66ab6e4a8a 100644 --- a/src/image/ImageManagerActions.cc +++ b/src/image/ImageManagerActions.cc @@ -474,7 +474,7 @@ int ImageManager::delete_image(int iid, string& error_str) return -1; } - drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); source = img->get_source(); size = img->get_size(); ds_id = img->get_ds_id(); @@ -688,7 +688,7 @@ int ImageManager::clone_image(int new_id, return -1; } - drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); imd->clone(img->get_oid(), *drv_msg); @@ -734,7 +734,7 @@ int ImageManager::register_image(int iid, const string& ds_data, string& error) return -1; } - drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); path = img->get_path(); if ( path.empty() == true ) //NO PATH @@ -859,7 +859,7 @@ int ImageManager::stat_image(Template* img_tmpl, add_request(&sr); - drv_msg = format_message(img_data.str(), ds_data); + drv_msg = format_message(img_data.str(), ds_data, ""); imd->stat(sr.id, *drv_msg); @@ -882,13 +882,15 @@ int ImageManager::stat_image(Template* img_tmpl, string * ImageManager::format_message( const string& img_data, - const string& ds_data) + const string& ds_data, + const string& extra_data) { ostringstream oss; oss << "" << img_data << ds_data + << extra_data << ""; return one_util::base64_encode(oss.str()); @@ -1011,7 +1013,7 @@ int ImageManager::delete_snapshot(int iid, int sid, string& error) img->set_target_snapshot(sid); string img_tmpl; - string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); imd->snapshot_delete(iid, *drv_msg); @@ -1115,7 +1117,7 @@ int ImageManager::revert_snapshot(int iid, int sid, string& error) img->set_target_snapshot(sid); string img_tmpl; - string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); imd->snapshot_revert(iid, *drv_msg); @@ -1212,7 +1214,7 @@ int ImageManager::flatten_snapshot(int iid, int sid, string& error) img->set_target_snapshot(sid); string img_tmpl; - string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data); + string * drv_msg = format_message(img->to_xml(img_tmpl), ds_data, ""); imd->snapshot_flatten(iid, *drv_msg); diff --git a/src/tm_mad/ceph/monitor b/src/tm_mad/ceph/monitor new file mode 100755 index 0000000000..195edc8df0 --- /dev/null +++ b/src/tm_mad/ceph/monitor @@ -0,0 +1,114 @@ +#!/bin/bash + +# -------------------------------------------------------------------------- # +# Copyright 2002-2015, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +############################################################################### +# This script is used to monitor the free and used space of a datastore +############################################################################### + +# -------- Set up the environment to source common tools & conf ------------ + +if [ -z "${ONE_LOCATION}" ]; then + LIB_LOCATION=/usr/lib/one +else + LIB_LOCATION=$ONE_LOCATION/lib +fi + +. $LIB_LOCATION/sh/scripts_common.sh + +DRIVER_PATH=$(dirname $0) +source ${DRIVER_PATH}/../../datastore/libfs.sh +source ${DRIVER_PATH}/ceph.conf + +# -------- Get datastore arguments from OpenNebula core ------------ + +DRV_ACTION=$1 +ID=$2 + +XPATH="${DRIVER_PATH}/../../datastore/xpath.rb -b $DRV_ACTION" + +unset i j XPATH_ELEMENTS + +while IFS= read -r -d '' element; do + XPATH_ELEMENTS[i++]="$element" +done < <($XPATH /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/CEPH_USER \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/CEPH_CONF) + +BRIDGE_LIST="${XPATH_ELEMENTS[j++]}" +POOL_NAME="${XPATH_ELEMENTS[j++]:-$POOL_NAME}" +CEPH_USER="${XPATH_ELEMENTS[j++]}" +CEPH_CONF="${XPATH_ELEMENTS[j++]}" + +HOST=`get_destination_host` + +if [ -z "$HOST" ]; then + error_message "Datastore template missing 'BRIDGE_LIST' attribute." + exit -1 +fi + +if [ -n "$CEPH_USER" ]; then + CEPH="$CEPH --id ${CEPH_USER}" +fi + +if [ -n "$CEPH_CONF" ]; then + CEPH="$CEPH --conf ${CEPH_CONF}" + RADOS="$RADOS --conf ${CEPH_CONF}" +fi + +# ------------ Compute datastore usage ------------- + +MONITOR_SCRIPT=$(cat <&1) +MONITOR_STATUS=$? + +if [ "$MONITOR_STATUS" = "0" ]; then + echo "$MONITOR_DATA" | tr ' ' '\n' +else + echo "$MONITOR_DATA" + exit $MONITOR_STATUS +fi + diff --git a/src/tm_mad/shared/monitor b/src/tm_mad/shared/monitor new file mode 100755 index 0000000000..4e9a590a92 --- /dev/null +++ b/src/tm_mad/shared/monitor @@ -0,0 +1,93 @@ +#!/bin/bash + +# -------------------------------------------------------------------------- # +# Copyright 2002-2015, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +############################################################################### +# This script is used to monitor the free and used space of a datastore +############################################################################### + +# -------- Set up the environment to source common tools & conf ------------ + +if [ -z "${ONE_LOCATION}" ]; then + LIB_LOCATION=/usr/lib/one +else + LIB_LOCATION=$ONE_LOCATION/lib +fi + +. $LIB_LOCATION/sh/scripts_common.sh + +DRIVER_PATH=$(dirname $0) +source ${DRIVER_PATH}/../../datastore/libfs.sh + +# -------- Get datastore arguments from OpenNebula core ------------ + +DRV_ACTION=$1 +ID=$2 + +XPATH="${DRIVER_PATH}/../../datastore/xpath.rb -b $DRV_ACTION" + +unset i XPATH_ELEMENTS + +while IFS= read -r -d '' element; do + XPATH_ELEMENTS[i++]="$element" +done < <($XPATH /DS_DRIVER_ACTION_DATA/DATASTORE_LOCATION \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST) + +unset i + +BASE_PATH="${XPATH_ELEMENTS[i++]}/$2" +BRIDGE_LIST="${XPATH_ELEMENTS[i++]}" + +# ------------ Compute datastore usage ------------- + +MONITOR_SCRIPT=$(cat </dev/null | tail -n 1 | awk '{print \$3}') +TOTAL_MB=\$(df -B1M -P $BASE_PATH 2>/dev/null | tail -n 1 | awk '{print \$2}') +FREE_MB=\$(df -B1M -P $BASE_PATH 2>/dev/null | tail -n 1 | awk '{print \$4}') + +if [ -z "\$USED_MB" -o -z "\$TOTAL_MB" -o -z "\$FREE_MB" ]; then + msg="Empty value found" + echo "\$msg: USED_MB=\$USED_MB, TOTAL_MB=\$TOTAL_MB, FREE_MB=\$FREE_MB" + exit 1 +fi + +echo "USED_MB=\$USED_MB" +echo "FREE_MB=\$FREE_MB" +echo "TOTAL_MB=\$TOTAL_MB" +EOF +) + +if [ -n "$BRIDGE_LIST" ]; then + HOST=`get_destination_host` + MONITOR_DATA=$(ssh_monitor_and_log "$HOST" "$MONITOR_SCRIPT" "Remote monitor script" 2>&1) +else + MONITOR_DATA=$(monitor_and_log "$MONITOR_SCRIPT" "Monitor script" 2>&1) +fi + +MONITOR_STATUS=$? + +if [ "$MONITOR_STATUS" = "0" ]; then + echo "$MONITOR_DATA" | tr ' ' '\n' + exit 0 +else + echo "$MONITOR_DATA" + exit $MONITOR_STATUS +fi diff --git a/src/tm_mad/ssh/monitor b/src/tm_mad/ssh/monitor new file mode 100755 index 0000000000..369d80b489 --- /dev/null +++ b/src/tm_mad/ssh/monitor @@ -0,0 +1,20 @@ +#!/bin/sh + +# -------------------------------------------------------------------------- # +# Copyright 2002-2015, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +#The ssh system ds is monitored in each host using the monitor_ds.sh probe +exit 0