From f714459c2aadcc6c3560c40dbdcfb37883385b81 Mon Sep 17 00:00:00 2001 From: Alejandro Huertas Herrero Date: Wed, 12 Jun 2019 17:23:56 +0200 Subject: [PATCH] F #3371: refactor and lint host fsck (#3421) --- share/linters/.rubocop.yml | 1 - src/onedb/fsck/history.rb | 3 + src/onedb/fsck/host.rb | 301 ++++++++++++++++++++++--------------- 3 files changed, 186 insertions(+), 119 deletions(-) diff --git a/share/linters/.rubocop.yml b/share/linters/.rubocop.yml index 13eede41ae..0b99cedd26 100644 --- a/share/linters/.rubocop.yml +++ b/share/linters/.rubocop.yml @@ -500,7 +500,6 @@ AllCops: - src/onedb/fsck/pool_control.rb - src/onedb/fsck/marketplace.rb - src/onedb/fsck/template.rb - - src/onedb/fsck/host.rb - src/onedb/fsck/vm.rb - src/onedb/fsck/marketplaceapp.rb - src/onedb/fsck.rb diff --git a/src/onedb/fsck/history.rb b/src/onedb/fsck/history.rb index 33631bf419..22ac67f0c4 100644 --- a/src/onedb/fsck/history.rb +++ b/src/onedb/fsck/history.rb @@ -10,6 +10,7 @@ module OneDBFsck check_history_opened end + # Check that etime from non last seq is 0 def check_history_etime # DATA: check history etime @@ -31,6 +32,7 @@ module OneDBFsck end end + # Check that etime is not 0 in DONE vms def check_history_opened history_fix = @fixes_history = [] @@ -73,6 +75,7 @@ module OneDBFsck end end + # Fix the broken history records def fix_history # DATA: FIX: update history records with fixed data # DATA: TODO: check all fixes to always do the same (update vs rewrite) diff --git a/src/onedb/fsck/host.rb b/src/onedb/fsck/host.rb index 0ecdc27a28..3bcf1c0a58 100644 --- a/src/onedb/fsck/host.rb +++ b/src/onedb/fsck/host.rb @@ -1,8 +1,9 @@ - +# Host module module OneDBFsck + # Initialize all the host counters to 0 def init_host_counters - @db.fetch("SELECT oid, name FROM host_pool") do |row| + @db.fetch('SELECT oid, name FROM host_pool') do |row| counters[:host][row[:oid]] = { :name => row[:name], :memory => 0, @@ -12,168 +13,232 @@ module OneDBFsck end end + # Check hosts clusters def check_host_cluster - cluster = @data_cluster + cluster = @data_cluster hosts_fix = @fixes_host_cluster = {} - @db.fetch("SELECT oid,body,cid FROM host_pool") do |row| - doc = Document.new(row[:body]) + @db.fetch('SELECT oid,body,cid FROM host_pool') do |row| + doc = Document.new(row[:body]) + cid = doc.root.get_text('CLUSTER_ID').to_s.to_i + cname = doc.root.get_text('CLUSTER') - cluster_id = doc.root.get_text('CLUSTER_ID').to_s.to_i - cluster_name = doc.root.get_text('CLUSTER') + if cid != row[:cid] + log_error("Host #{row[:oid]} is in cluster #{cid}, but cid " \ + "column has cluster #{row[:cid]}") - if cluster_id != row[:cid] - log_error("Host #{row[:oid]} is in cluster #{cluster_id}, " << - "but cid column has cluster #{row[:cid]}") - hosts_fix[row[:oid]] = {:body => row[:body], :cid => cluster_id} + hosts_fix[row[:oid]] = { :body => row[:body], :cid => cid } end - if cluster_id != -1 - cluster_entry = cluster[cluster_id] + next if cid == -1 - if cluster_entry.nil? - log_error("Host #{row[:oid]} is in cluster " << - "#{cluster_id}, but it does not exist") + cluster_entry = cluster[cid] - doc.root.each_element('CLUSTER_ID') do |e| - e.text = "-1" - end + if cluster_entry.nil? + log_error("Host #{row[:oid]} is in cluster #{cid}, " \ + 'but it does not exist') + + doc.root.each_element('CLUSTER_ID') do |e| + e.text = '-1' + end + + doc.root.each_element('CLUSTER') do |e| + e.text = '' + end + + hosts_fix[row[:oid]] = { :body => doc.root.to_s, :cid => -1 } + else + if cname != cluster_entry[:name] + new_cluster = cluster_entry[:name] + + log_error("Host #{row[:oid]} has a wrong name for " \ + "cluster #{cid}, #{cname}. " \ + "It will be changed to #{new_cluster}") doc.root.each_element('CLUSTER') do |e| - e.text = "" + e.text = new_cluster end - hosts_fix[row[:oid]] = {:body => doc.root.to_s, :cid => -1} - else - if cluster_name != cluster_entry[:name] - log_error("Host #{row[:oid]} has a wrong name for " << - "cluster #{cluster_id}, #{cluster_name}. " << - "It will be changed to #{cluster_entry[:name]}") - - doc.root.each_element('CLUSTER') do |e| - e.text = cluster_entry[:name] - end - - hosts_fix[row[:oid]] = { - body: doc.root.to_s, - cid: cluster_id - } - end - - cluster_entry[:hosts] << row[:oid] + hosts_fix[row[:oid]] = { :body => doc.root.to_s, + :cid => cid } end + + cluster_entry[:hosts] << row[:oid] end end end + # Fix hosts clusters def fix_host_cluster @db.transaction do @fixes_host_cluster.each do |id, entry| - @db[:host_pool].where(oid: id).update( - body: entry[:body], - cid: entry[:cid] - ) + obj = { :body => entry[:body], :cid => entry[:cid] } + + @db[:host_pool].where(:oid => id).update(obj) end end end + # Check hosts information def check_host @fixes_host = {} # DATA: FIX: Calculate the host's xml and write them to host_pool_new @db[:host_pool].each do |row| - host_doc = nokogiri_doc(row[:body]) + host_doc = nokogiri_doc(row[:body]) + hid = row[:oid] + counters_host = counters[:host][hid] + + rvms = counters_host[:rvms].size + cpu_usage = (counters_host[:cpu] * 100).to_i + mem_usage = counters_host[:memory] * 1024 error = false - hid = row[:oid] + host_doc, error = check_running_vms(host_doc, hid, rvms, error) - counters_host = counters[:host][hid] + host_doc, error = check_vms_ids(host_doc, + hid, + counters_host[:rvms], + error) - rvms = counters_host[:rvms].size - cpu_usage = (counters_host[:cpu]*100).to_i - mem_usage = counters_host[:memory]*1024 + host_doc, error = check_pcis(host_doc, + hid, + counters_host[:rvms], + error) - # rewrite running_vms - host_doc.root.xpath("HOST_SHARE/RUNNING_VMS").each {|e| - if e.text != rvms.to_s - log_error( - "Host #{hid} RUNNING_VMS has #{e.text} \tis\t#{rvms}") - e.content = rvms - error = true - end - } + host_doc, error = check_usage(host_doc, + hid, + 'CPU', + cpu_usage, + error) - # re-do list of VM IDs - vms_elem = host_doc.root.at_xpath("VMS").remove - - vms_new_elem = host_doc.create_element("VMS") - host_doc.root.add_child(vms_new_elem) - - counters_host[:rvms].each do |id| - id_elem = vms_elem.at_xpath("ID[.=#{id}]") - - if id_elem.nil? - log_error( - "VM #{id} is missing from Host #{hid} VM id list") - error = true - else - id_elem.remove - end - - vms_new_elem.add_child(host_doc.create_element("ID")).content = id.to_s - end - - vms_elem.xpath("ID").each do |id_elem| - log_error( - "VM #{id_elem.text} is in Host #{hid} VM id list, "<< - "but it should not") - error = true - end - - host_doc.root.xpath("HOST_SHARE/PCI_DEVICES/PCI").each do |pci| - if !pci.at_xpath("VMID").nil? - vmid = pci.at_xpath("VMID").text.to_i - - if vmid != -1 && !counters_host[:rvms].include?(vmid) - log_error("VM #{vmid} has a PCI device assigned in host #{hid}, but it should not. Device: #{pci.at_xpath('DEVICE_NAME').text}") - pci.at_xpath("VMID").content = "-1" - error = true - end - end - end - - # rewrite cpu - host_doc.root.xpath("HOST_SHARE/CPU_USAGE").each {|e| - if e.text != cpu_usage.to_s - log_error( - "Host #{hid} CPU_USAGE has #{e.text} "<< - "\tis\t#{cpu_usage}") - e.content = cpu_usage - error = true - end - } - - # rewrite memory - host_doc.root.xpath("HOST_SHARE/MEM_USAGE").each {|e| - if e.text != mem_usage.to_s - log_error("Host #{hid} MEM_USAGE has #{e.text} "<< - "\tis\t#{mem_usage}") - e.content = mem_usage - error = true - end - } + host_doc, error = check_usage(host_doc, + hid, + 'MEM', + mem_usage, + error) @fixes_host[hid] = host_doc.root.to_s if error end end + # Check running vms on host + # + # @param doc [Document] Hosts information + # @param hid [String] Host ID + # @param rvms [Integer] Running VMs on host + # @param error [Boolean] True if there has been an error, false otherwise + # + # @return [Document,Boolean] Updated hosts information and error if any + def check_running_vms(doc, hid, rvms, error) + doc.root.xpath('HOST_SHARE/RUNNING_VMS').each do |e| + next unless e.text != rvms.to_s + + log_error("Host #{hid} RUNNING_VMS has #{e.text} \tis\t#{rvms}") + + e.content = rvms + error = true + end + + [doc, error] + end + + # Check vms ids on host + # + # @param doc [Document] Hosts information + # @param hid [String] Host ID + # @param rvms [Integer] Running VMs on host + # @param error [Boolean] True if there has been an error, false otherwise + # + # @return [Document,Boolean] Updated hosts information and error if any + def check_vms_ids(doc, hid, rvms, error) + vms_elem = doc.root.at_xpath('VMS').remove + vms_new_elem = doc.create_element('VMS') + + doc.root.add_child(vms_new_elem) + + rvms.each do |id| + id_elem = vms_elem.at_xpath("ID[.=#{id}]") + + if id_elem.nil? + log_error("VM #{id} is missing from Host #{hid} VM id list") + + error = true + else + id_elem.remove + end + + h_e = doc.create_element('ID') + vms_new_elem.add_child(h_e).content = id.to_s + end + + vms_elem.xpath('ID').each do |id_elem| + log_error("VM #{id_elem.text} is in Host #{hid} VM id list, " \ + 'but it should not') + + error = true + end + + [doc, error] + end + + # Check host pcis + # + # @param doc [Document] Hosts information + # @param hid [String] Host ID + # @param rvms [Integer] Running VMs on host + # @param error [Boolean] True if there has been an error, false otherwise + # + # @return [Document,Boolean] Updated hosts information and error if any + def check_pcis(doc, hid, rvms, error) + doc.root.xpath('HOST_SHARE/PCI_DEVICES/PCI').each do |pci| + next if pci.at_xpath('VMID').nil? + + vmid = pci.at_xpath('VMID').text.to_i + + next unless vmid != -1 && !rvms.include?(vmid) + + pci_name = pci.at_xpath('DEVICE_NAME').text + pci.at_xpath('VMID').content = '-1' + error = true + + log_error("VM #{vmid} has a PCI device assigned in host " \ + "#{hid}, but it should not. Device: #{pci_name}") + end + + [doc, error] + end + + # Check host usages + # + # @param doc [Document] Hosts information + # @param hid [String] Host ID + # @param type [String] CPU for cpu_usage, MEM for memory_usage + # @param usage [Integer] Host usage + # @param error [Boolean] True if there has been an error, false otherwise + # + # @return [Document,Boolean] Updated hosts information and error if any + def check_usage(doc, hid, type, usage, error) + doc.root.xpath("HOST_SHARE/#{type}_USAGE").each do |e| + next unless e.text != usage.to_s + + log_error("Host #{hid} #{type}_USAGE has #{e.text} \tis\t#{usage}") + + e.content = usage + error = true + end + + [doc, error] + end + + # Fix hosts information def fix_host @db.transaction do @fixes_host.each do |id, body| - @db[:host_pool].where(oid: id).update(body: body) + @db[:host_pool].where(:oid => id).update(:body => body) end end end -end +end