1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-03-16 22:50:10 +03:00

F #3371: refactor and lint host fsck (#3421)

This commit is contained in:
Alejandro Huertas Herrero 2019-06-12 17:23:56 +02:00 committed by Ruben S. Montero
parent cbd9aee912
commit f714459c2a
3 changed files with 186 additions and 119 deletions

View File

@ -500,7 +500,6 @@ AllCops:
- src/onedb/fsck/pool_control.rb
- src/onedb/fsck/marketplace.rb
- src/onedb/fsck/template.rb
- src/onedb/fsck/host.rb
- src/onedb/fsck/vm.rb
- src/onedb/fsck/marketplaceapp.rb
- src/onedb/fsck.rb

View File

@ -10,6 +10,7 @@ module OneDBFsck
check_history_opened
end
# Check that etime from non last seq is 0
def check_history_etime
# DATA: check history etime
@ -31,6 +32,7 @@ module OneDBFsck
end
end
# Check that etime is not 0 in DONE vms
def check_history_opened
history_fix = @fixes_history = []
@ -73,6 +75,7 @@ module OneDBFsck
end
end
# Fix the broken history records
def fix_history
# DATA: FIX: update history records with fixed data
# DATA: TODO: check all fixes to always do the same (update vs rewrite)

View File

@ -1,8 +1,9 @@
# Host module
module OneDBFsck
# Initialize all the host counters to 0
def init_host_counters
@db.fetch("SELECT oid, name FROM host_pool") do |row|
@db.fetch('SELECT oid, name FROM host_pool') do |row|
counters[:host][row[:oid]] = {
:name => row[:name],
:memory => 0,
@ -12,168 +13,232 @@ module OneDBFsck
end
end
# Check hosts clusters
def check_host_cluster
cluster = @data_cluster
cluster = @data_cluster
hosts_fix = @fixes_host_cluster = {}
@db.fetch("SELECT oid,body,cid FROM host_pool") do |row|
doc = Document.new(row[:body])
@db.fetch('SELECT oid,body,cid FROM host_pool') do |row|
doc = Document.new(row[:body])
cid = doc.root.get_text('CLUSTER_ID').to_s.to_i
cname = doc.root.get_text('CLUSTER')
cluster_id = doc.root.get_text('CLUSTER_ID').to_s.to_i
cluster_name = doc.root.get_text('CLUSTER')
if cid != row[:cid]
log_error("Host #{row[:oid]} is in cluster #{cid}, but cid " \
"column has cluster #{row[:cid]}")
if cluster_id != row[:cid]
log_error("Host #{row[:oid]} is in cluster #{cluster_id}, " <<
"but cid column has cluster #{row[:cid]}")
hosts_fix[row[:oid]] = {:body => row[:body], :cid => cluster_id}
hosts_fix[row[:oid]] = { :body => row[:body], :cid => cid }
end
if cluster_id != -1
cluster_entry = cluster[cluster_id]
next if cid == -1
if cluster_entry.nil?
log_error("Host #{row[:oid]} is in cluster " <<
"#{cluster_id}, but it does not exist")
cluster_entry = cluster[cid]
doc.root.each_element('CLUSTER_ID') do |e|
e.text = "-1"
end
if cluster_entry.nil?
log_error("Host #{row[:oid]} is in cluster #{cid}, " \
'but it does not exist')
doc.root.each_element('CLUSTER_ID') do |e|
e.text = '-1'
end
doc.root.each_element('CLUSTER') do |e|
e.text = ''
end
hosts_fix[row[:oid]] = { :body => doc.root.to_s, :cid => -1 }
else
if cname != cluster_entry[:name]
new_cluster = cluster_entry[:name]
log_error("Host #{row[:oid]} has a wrong name for " \
"cluster #{cid}, #{cname}. " \
"It will be changed to #{new_cluster}")
doc.root.each_element('CLUSTER') do |e|
e.text = ""
e.text = new_cluster
end
hosts_fix[row[:oid]] = {:body => doc.root.to_s, :cid => -1}
else
if cluster_name != cluster_entry[:name]
log_error("Host #{row[:oid]} has a wrong name for " <<
"cluster #{cluster_id}, #{cluster_name}. " <<
"It will be changed to #{cluster_entry[:name]}")
doc.root.each_element('CLUSTER') do |e|
e.text = cluster_entry[:name]
end
hosts_fix[row[:oid]] = {
body: doc.root.to_s,
cid: cluster_id
}
end
cluster_entry[:hosts] << row[:oid]
hosts_fix[row[:oid]] = { :body => doc.root.to_s,
:cid => cid }
end
cluster_entry[:hosts] << row[:oid]
end
end
end
# Fix hosts clusters
def fix_host_cluster
@db.transaction do
@fixes_host_cluster.each do |id, entry|
@db[:host_pool].where(oid: id).update(
body: entry[:body],
cid: entry[:cid]
)
obj = { :body => entry[:body], :cid => entry[:cid] }
@db[:host_pool].where(:oid => id).update(obj)
end
end
end
# Check hosts information
def check_host
@fixes_host = {}
# DATA: FIX: Calculate the host's xml and write them to host_pool_new
@db[:host_pool].each do |row|
host_doc = nokogiri_doc(row[:body])
host_doc = nokogiri_doc(row[:body])
hid = row[:oid]
counters_host = counters[:host][hid]
rvms = counters_host[:rvms].size
cpu_usage = (counters_host[:cpu] * 100).to_i
mem_usage = counters_host[:memory] * 1024
error = false
hid = row[:oid]
host_doc, error = check_running_vms(host_doc, hid, rvms, error)
counters_host = counters[:host][hid]
host_doc, error = check_vms_ids(host_doc,
hid,
counters_host[:rvms],
error)
rvms = counters_host[:rvms].size
cpu_usage = (counters_host[:cpu]*100).to_i
mem_usage = counters_host[:memory]*1024
host_doc, error = check_pcis(host_doc,
hid,
counters_host[:rvms],
error)
# rewrite running_vms
host_doc.root.xpath("HOST_SHARE/RUNNING_VMS").each {|e|
if e.text != rvms.to_s
log_error(
"Host #{hid} RUNNING_VMS has #{e.text} \tis\t#{rvms}")
e.content = rvms
error = true
end
}
host_doc, error = check_usage(host_doc,
hid,
'CPU',
cpu_usage,
error)
# re-do list of VM IDs
vms_elem = host_doc.root.at_xpath("VMS").remove
vms_new_elem = host_doc.create_element("VMS")
host_doc.root.add_child(vms_new_elem)
counters_host[:rvms].each do |id|
id_elem = vms_elem.at_xpath("ID[.=#{id}]")
if id_elem.nil?
log_error(
"VM #{id} is missing from Host #{hid} VM id list")
error = true
else
id_elem.remove
end
vms_new_elem.add_child(host_doc.create_element("ID")).content = id.to_s
end
vms_elem.xpath("ID").each do |id_elem|
log_error(
"VM #{id_elem.text} is in Host #{hid} VM id list, "<<
"but it should not")
error = true
end
host_doc.root.xpath("HOST_SHARE/PCI_DEVICES/PCI").each do |pci|
if !pci.at_xpath("VMID").nil?
vmid = pci.at_xpath("VMID").text.to_i
if vmid != -1 && !counters_host[:rvms].include?(vmid)
log_error("VM #{vmid} has a PCI device assigned in host #{hid}, but it should not. Device: #{pci.at_xpath('DEVICE_NAME').text}")
pci.at_xpath("VMID").content = "-1"
error = true
end
end
end
# rewrite cpu
host_doc.root.xpath("HOST_SHARE/CPU_USAGE").each {|e|
if e.text != cpu_usage.to_s
log_error(
"Host #{hid} CPU_USAGE has #{e.text} "<<
"\tis\t#{cpu_usage}")
e.content = cpu_usage
error = true
end
}
# rewrite memory
host_doc.root.xpath("HOST_SHARE/MEM_USAGE").each {|e|
if e.text != mem_usage.to_s
log_error("Host #{hid} MEM_USAGE has #{e.text} "<<
"\tis\t#{mem_usage}")
e.content = mem_usage
error = true
end
}
host_doc, error = check_usage(host_doc,
hid,
'MEM',
mem_usage,
error)
@fixes_host[hid] = host_doc.root.to_s if error
end
end
# Check running vms on host
#
# @param doc [Document] Hosts information
# @param hid [String] Host ID
# @param rvms [Integer] Running VMs on host
# @param error [Boolean] True if there has been an error, false otherwise
#
# @return [Document,Boolean] Updated hosts information and error if any
def check_running_vms(doc, hid, rvms, error)
doc.root.xpath('HOST_SHARE/RUNNING_VMS').each do |e|
next unless e.text != rvms.to_s
log_error("Host #{hid} RUNNING_VMS has #{e.text} \tis\t#{rvms}")
e.content = rvms
error = true
end
[doc, error]
end
# Check vms ids on host
#
# @param doc [Document] Hosts information
# @param hid [String] Host ID
# @param rvms [Integer] Running VMs on host
# @param error [Boolean] True if there has been an error, false otherwise
#
# @return [Document,Boolean] Updated hosts information and error if any
def check_vms_ids(doc, hid, rvms, error)
vms_elem = doc.root.at_xpath('VMS').remove
vms_new_elem = doc.create_element('VMS')
doc.root.add_child(vms_new_elem)
rvms.each do |id|
id_elem = vms_elem.at_xpath("ID[.=#{id}]")
if id_elem.nil?
log_error("VM #{id} is missing from Host #{hid} VM id list")
error = true
else
id_elem.remove
end
h_e = doc.create_element('ID')
vms_new_elem.add_child(h_e).content = id.to_s
end
vms_elem.xpath('ID').each do |id_elem|
log_error("VM #{id_elem.text} is in Host #{hid} VM id list, " \
'but it should not')
error = true
end
[doc, error]
end
# Check host pcis
#
# @param doc [Document] Hosts information
# @param hid [String] Host ID
# @param rvms [Integer] Running VMs on host
# @param error [Boolean] True if there has been an error, false otherwise
#
# @return [Document,Boolean] Updated hosts information and error if any
def check_pcis(doc, hid, rvms, error)
doc.root.xpath('HOST_SHARE/PCI_DEVICES/PCI').each do |pci|
next if pci.at_xpath('VMID').nil?
vmid = pci.at_xpath('VMID').text.to_i
next unless vmid != -1 && !rvms.include?(vmid)
pci_name = pci.at_xpath('DEVICE_NAME').text
pci.at_xpath('VMID').content = '-1'
error = true
log_error("VM #{vmid} has a PCI device assigned in host " \
"#{hid}, but it should not. Device: #{pci_name}")
end
[doc, error]
end
# Check host usages
#
# @param doc [Document] Hosts information
# @param hid [String] Host ID
# @param type [String] CPU for cpu_usage, MEM for memory_usage
# @param usage [Integer] Host usage
# @param error [Boolean] True if there has been an error, false otherwise
#
# @return [Document,Boolean] Updated hosts information and error if any
def check_usage(doc, hid, type, usage, error)
doc.root.xpath("HOST_SHARE/#{type}_USAGE").each do |e|
next unless e.text != usage.to_s
log_error("Host #{hid} #{type}_USAGE has #{e.text} \tis\t#{usage}")
e.content = usage
error = true
end
[doc, error]
end
# Fix hosts information
def fix_host
@db.transaction do
@fixes_host.each do |id, body|
@db[:host_pool].where(oid: id).update(body: body)
@db[:host_pool].where(:oid => id).update(:body => body)
end
end
end
end
end