From 15f211f4c68a6429d034a3e56793cee5512fef97 Mon Sep 17 00:00:00 2001 From: Alejandro Huertas Date: Tue, 19 May 2020 12:05:06 +0200 Subject: [PATCH] M #-: fix minor bug in OneFlow watch dog --- src/flow/lib/LifeCycleManager.rb | 17 ++++++++--- src/flow/lib/ServiceWatchDog.rb | 29 ++++++++++++++++++- .../remotes/lxd-probes.d/vm/status/state.rb | 2 -- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/flow/lib/LifeCycleManager.rb b/src/flow/lib/LifeCycleManager.rb index ca69e2982e..33a5074654 100644 --- a/src/flow/lib/LifeCycleManager.rb +++ b/src/flow/lib/LifeCycleManager.rb @@ -59,6 +59,8 @@ class ServiceLCM @event_manager = EventManager.new(em_conf).am + @wd = ServiceWD.new(client, em_conf) + # Register Action Manager actions @am.register_action(ACTIONS['DEPLOY_CB'], method('deploy_cb')) @@ -89,10 +91,7 @@ class ServiceLCM Thread.new { catch_up(client) } - Thread.new do - wd = ServiceWD.new(client, em_conf) - wd.start(@srv_pool) - end + Thread.new { @wd.start(@srv_pool) } Thread.new do auto_scaler = ServiceAutoScaler.new(@srv_pool, @@ -243,6 +242,8 @@ class ServiceLCM if service.all_roles_running? service.set_state(Service::STATE['RUNNING']) service.update + + @wd.add_service(service_id) end # If there is no node in PENDING the service is not modified. @@ -288,6 +289,8 @@ class ServiceLCM ) end + @wd.remove_service(service_id) + set_deploy_strategy(service) roles = service.roles_shutdown @@ -341,6 +344,8 @@ class ServiceLCM ) end + @wd.remove_service(service_id) + role = service.roles[role_name] if role.nil? @@ -477,6 +482,8 @@ class ServiceLCM if service.all_roles_running? service.set_state(Service::STATE['RUNNING']) + + @wd.add_service(service_id) elsif service.strategy == 'straight' set_deploy_strategy(service) @@ -647,6 +654,8 @@ class ServiceLCM service.set_state(Service::STATE['RUNNING']) service.roles[role_name].set_state(Role::STATE['RUNNING']) + @wd.add_service(service_id) + service.update end diff --git a/src/flow/lib/ServiceWatchDog.rb b/src/flow/lib/ServiceWatchDog.rb index b23551bf9f..ae3edae31c 100644 --- a/src/flow/lib/ServiceWatchDog.rb +++ b/src/flow/lib/ServiceWatchDog.rb @@ -53,6 +53,9 @@ class ServiceWD @cloud_auth = @conf[:cloud_auth] @wait_timeout = @cloud_auth.conf[:wait_timeout] @client = client + + # Array of running services to watch + @services = [] end # Start services WD @@ -93,7 +96,10 @@ class ServiceWD xml = Nokogiri::XML(Base64.decode64(content)) - service_id = split_key[2] + service_id = split_key[2].to_i + + next unless @services.include?(service_id) + node = xml.xpath('/HOOK_MESSAGE/VM/ID').text.to_i state = xml.xpath('/HOOK_MESSAGE/STATE').text lcm_state = xml.xpath('/HOOK_MESSAGE/LCM_STATE').text @@ -120,6 +126,20 @@ class ServiceWD unsubscribe(subscriber) end + # Add service to watch dog + # + # @param service_id [String] Service ID + def add_service(service_id) + @services << service_id.to_i + end + + # Remove service from watch dog + # + # @param service_id [String] Service ID + def remove_service(service_id) + @services.delete(service_id.to_i) + end + private # Get OpenNebula client @@ -164,6 +184,13 @@ class ServiceWD service_pool.each do |service| service.info + if service.state != Service::STATE['RUNNING'] && + service.state != Service::STATE['WARNING'] + next + end + + @services << service.id.to_i + service.roles.each do |name, role| role.nodes_ids.each do |node| check_role_state(client, service.id, name, node) diff --git a/src/im_mad/remotes/lxd-probes.d/vm/status/state.rb b/src/im_mad/remotes/lxd-probes.d/vm/status/state.rb index cfeab8c915..190cfcd438 100755 --- a/src/im_mad/remotes/lxd-probes.d/vm/status/state.rb +++ b/src/im_mad/remotes/lxd-probes.d/vm/status/state.rb @@ -47,8 +47,6 @@ module DomainList vms end - - end xml_txt = STDIN.read