1
0
mirror of https://github.com/dkmstr/openuds.git synced 2025-01-18 06:03:54 +03:00

Refactoring Proxmox

This commit is contained in:
Adolfo Gómez García 2024-07-03 19:15:59 +02:00
parent 65d7e81263
commit e62e9875da
No known key found for this signature in database
GPG Key ID: DD1ABF20724CDA23
13 changed files with 77 additions and 58 deletions

View File

@ -139,10 +139,10 @@ class TestProxmoxProvider(UDSTransactionTestCase):
self.assertEqual(provider.list_machines(), fixtures.VMS_INFO)
api.list_machines.assert_called_once_with(force=False)
self.assertEqual(provider.get_machine_info(1), fixtures.VMS_INFO[0])
self.assertEqual(provider.get_vm_info(1), fixtures.VMS_INFO[0])
api.get_machine_pool_info.assert_called_once_with(1, None, force=True)
self.assertEqual(provider.get_machine_configuration(1), fixtures.VMS_CONFIGURATION[0])
self.assertEqual(provider.get_vm_config(1), fixtures.VMS_CONFIGURATION[0])
api.get_machine_configuration.assert_called_once_with(1, force=True)
self.assertEqual(
@ -209,7 +209,7 @@ class TestProxmoxProvider(UDSTransactionTestCase):
api.convert_to_template.assert_called_once_with(1)
self.assertEqual(
provider.clone_machine(1, 'name', 'description', True, 'node', 'storage', 'pool', True),
provider.clone_vm(1, 'name', 'description', True, 'node', 'storage', 'pool', True),
fixtures.VM_CREATION_RESULT,
)
api.clone_machine.assert_called_once_with(
@ -300,7 +300,7 @@ class TestProxmoxProvider(UDSTransactionTestCase):
# Patch get_provider to return te ProxmoxProvider instance (provider)
with mock.patch('uds.services.Proxmox.helpers.get_provider', return_value=provider):
# Test get_storage
vm_info = provider.get_machine_info(1)
vm_info = provider.get_vm_info(1)
h_storage = get_storage({'prov_uuid': 'test', 'machine': '1'})
self.assertEqual(
list(

View File

@ -66,7 +66,7 @@ class TestProxmoxFixedService(UDSTransactionTestCase):
with fixtures.patched_provider() as provider:
service = fixtures.create_service_fixed(provider=provider)
self.assertEqual(service.get_machine_info(2).name, fixtures.VMS_INFO[1].name)
self.assertEqual(service.get_vm_info(2).name, fixtures.VMS_INFO[1].name)
# is_available is already tested, so we will skip it

View File

@ -109,7 +109,7 @@ class TestProxmovLinkedService(UDSTestCase):
)
# Get machine info
self.assertEqual(service.get_machine_info(1), fixtures.VMS_INFO[0])
self.assertEqual(service.get_vm_info(1), fixtures.VMS_INFO[0])
api.get_machine_pool_info.assert_called_with(1, service.pool.value, force=True)
# Get nic mac
@ -119,7 +119,7 @@ class TestProxmovLinkedService(UDSTestCase):
self.assertEqual(service.provider().remove_machine(1), fixtures.UPID)
# Enable HA
service.enable_machine_ha(1, True)
service.enable_vm_ha(1, True)
api.enable_machine_ha.assert_called_with(1, True, service.ha.value)
def test_service_methods_2(self) -> None:
@ -128,7 +128,7 @@ class TestProxmovLinkedService(UDSTestCase):
service = fixtures.create_service_linked(provider=provider)
# Disable HA
service.disable_machine_ha(1)
service.disable_vm_ha(1)
api.disable_machine_ha.assert_called_with(1)
# Get basename

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2023 Virtual Cable S.L.U.
# Copyright (c) 2023-2024 Virtual Cable S.L.U.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@ -102,7 +102,7 @@ class ServerManager(metaclass=singleton.Singleton):
counters[uuid] = counters.get(uuid, 0) + 1
def get_server_stats(
self, serversFltr: 'QuerySet[models.Server]'
self, severs_filter: 'QuerySet[models.Server]'
) -> list[tuple[typing.Optional['types.servers.ServerStats'], 'models.Server']]:
"""
Returns a list of stats for a list of servers
@ -120,7 +120,7 @@ class ServerManager(metaclass=singleton.Singleton):
# Retrieve, in parallel, stats for all servers (not restrained)
with ThreadPoolExecutor(max_workers=10) as executor:
for server in serversFltr.select_for_update():
for server in severs_filter.select_for_update():
if server.is_restrained():
continue # Skip restrained servers
executor.submit(_retrieve_stats, server)
@ -134,6 +134,7 @@ class ServerManager(metaclass=singleton.Singleton):
now: datetime.datetime,
min_memory_mb: int = 0,
excluded_servers_uuids: typing.Optional[typing.Set[str]] = None,
weight_threshold: int = 0, # If not 0, server with weight below and nearer to this value will be selected
) -> tuple['models.Server', 'types.servers.ServerStats']:
"""
Finds the best server for a service
@ -145,19 +146,31 @@ class ServerManager(metaclass=singleton.Singleton):
if excluded_servers_uuids:
fltrs = fltrs.exclude(uuid__in=excluded_servers_uuids)
serversStats = self.get_server_stats(fltrs)
stats_and_servers = self.get_server_stats(fltrs)
def _weight_threshold(stats: 'types.servers.ServerStats') -> float:
if weight_threshold == 0:
return stats.weight()
# Values under threshold are better, weight is in between 0 and 1, lower is better
# To values over threshold, we will add 1, so they are always worse than any value under threshold
return stats.weight() if stats.weight() < weight_threshold else 1 + stats.weight()
# Now, cachedStats has a list of tuples (stats, server), use it to find the best server
for stats, server in serversStats:
for stats, server in stats_and_servers:
if stats is None:
unmanaged_list.append(server)
continue
if min_memory_mb and stats.memused // (1024 * 1024) < min_memory_mb: # Stats has minMemory in bytes
continue
if best is None or stats.weight() < best[1].weight():
if best is None:
best = (server, stats)
if _weight_threshold(stats) < _weight_threshold(best[1]):
best = (server, stats)
# stats.weight() < best[1].weight()
# Cannot be assigned to any server!!
# If no best, select one from unmanaged
if best is None:
@ -226,7 +239,9 @@ class ServerManager(metaclass=singleton.Singleton):
# If server is forced, and server is part of the group, use it
if server:
if (
server.groups.filter(uuid=server_group.uuid).exclude(uuid__in=excluded_servers_uuids).count()
server.groups.filter(uuid=server_group.uuid)
.exclude(uuid__in=excluded_servers_uuids)
.count()
== 0
):
raise exceptions.UDSException(_('Server is not part of the group'))
@ -319,9 +334,9 @@ class ServerManager(metaclass=singleton.Singleton):
resetCounter = False
# ServerCounterType
serverCounter: typing.Optional[
types.servers.ServerCounter
] = types.servers.ServerCounter.from_iterable(props.get(prop_name))
serverCounter: typing.Optional[types.servers.ServerCounter] = (
types.servers.ServerCounter.from_iterable(props.get(prop_name))
)
# If no cached value, get server assignation
if serverCounter is None:
return types.servers.ServerCounter.null()

View File

@ -41,6 +41,7 @@ from uds.core.util import ensure, singleton
IP_SUBTYPE: typing.Final[str] = 'ip'
class ServerType(enum.IntEnum):
TUNNEL = 1
ACTOR = 2
@ -148,10 +149,12 @@ class ServerStats:
@property
def cpufree_ratio(self) -> float:
# Returns a valuen between 0 and 1, being 1 the best value (no cpu used per user) and 0 the worst
return (1 - self.cpuused) / (self.current_users + 1)
@property
def memfree_ratio(self) -> float:
# Returns a valuen between 0 and 1, being 1 the best value (no memory used per user) and 0 the worst
return (self.memtotal - self.memused) / (self.memtotal or 1) / (self.current_users + 1)
@property
@ -169,17 +172,23 @@ class ServerStats:
return self.stamp > sql_stamp() - consts.cache.DEFAULT_CACHE_TIMEOUT
def weight(self, minMemory: int = 0) -> float:
def weight(self, min_memory: int = 0) -> float:
# Weights are calculated as:
# 0.5 * cpu_usage + 0.5 * (1 - mem_free / mem_total) / (current_users + 1)
# +1 is because this weights the connection of current users + new user
# Dividing by number of users + 1 gives us a "ratio" of available resources per user when a new user is added
# Also note that +512 forces that if mem_free is less than 512 MB, this server will be put at the end of the list
if self.memtotal - self.memused < minMemory:
if self.memtotal - self.memused < min_memory:
return 1000000000 # At the end of the list
# Lower is better
return 1 / ((self.cpufree_ratio * 1.3 + self.memfree_ratio) or 1)
# value can be between:
# (1 / (1 * 1.3 + 1) - 0.434) * 1.76 ~= 0.0 (worst case, no memory, all cpu)
# and
# (1 / ((0 * 1.3 + 0) or 1) - 0.434) * 1.76 = 0.566 * 1.76 ~= 1.0 (best case, all memory, no cpu)
w = (1 / ((self.cpufree_ratio * 1.3 + self.memfree_ratio) or 1) - 0.434) * 1.76
return min(max(0.0, w), 1.0)
def adjust(self, users_increment: int) -> 'ServerStats':
"""
@ -250,6 +259,7 @@ class ServerStats:
# Human readable
return f'memory: {self.memused//(1024*1024)}/{self.memtotal//(1024*1024)} cpu: {self.cpuused*100} users: {self.current_users}, weight: {self.weight()}, valid: {self.is_valid}'
# ServerCounter must be serializable by json, so
# we keep it as a NamedTuple instead of a dataclass
class ServerCounter(typing.NamedTuple):
@ -257,7 +267,9 @@ class ServerCounter(typing.NamedTuple):
counter: int
@staticmethod
def from_iterable(data: typing.Optional[collections.abc.Iterable[typing.Any]]) -> typing.Optional['ServerCounter']:
def from_iterable(
data: typing.Optional[collections.abc.Iterable[typing.Any]],
) -> typing.Optional['ServerCounter']:
if data is None:
return None

View File

@ -77,7 +77,7 @@ class ProxmoxUserServiceFixed(FixedUserService, autoserializable.AutoSerializabl
return types.states.TaskState.FINISHED
try:
vminfo = self.service().get_machine_info(int(self._vmid))
vminfo = self.service().get_vm_info(int(self._vmid))
except prox_exceptions.ProxmoxConnectionError:
raise # If connection fails, let it fail on parent
except Exception as e:
@ -104,7 +104,7 @@ class ProxmoxUserServiceFixed(FixedUserService, autoserializable.AutoSerializabl
def op_start(self) -> None:
try:
vminfo = self.service().get_machine_info(int(self._vmid))
vminfo = self.service().get_vm_info(int(self._vmid))
except prox_exceptions.ProxmoxConnectionError:
self.retry_later()
return

View File

@ -201,7 +201,7 @@ class ProxmoxUserserviceLinked(DynamicUserService):
# Set mac
try:
# Note: service will only enable ha if it is configured to do so
self.service().enable_machine_ha(int(self._vmid), True) # Enable HA before continuing here
self.service().enable_vm_ha(int(self._vmid), True) # Enable HA before continuing here
# Set vm mac address now on first interface
self.service().provider().set_machine_mac(int(self._vmid), self.get_unique_id())

View File

@ -52,7 +52,7 @@ def get_storage(parameters: typing.Any) -> types.ui.CallbackResultType:
# Obtains datacenter from cluster
try:
vm_info = provider.get_machine_info(int(parameters['machine']))
vm_info = provider.get_vm_info(int(parameters['machine']))
except Exception:
return []

View File

@ -134,7 +134,7 @@ class ProxmoxDeferredRemoval(jobs.Job):
# The soft shutdown has already being initiated by the remove method
try:
vmInfo = instance.get_machine_info(vmid)
vmInfo = instance.get_vm_info(vmid)
logger.debug('Found %s for removal %s', vmid, data)
# If machine is powered on, tries to stop it
# tries to remove in sync mode

View File

@ -169,10 +169,10 @@ class ProxmoxProvider(services.ServiceProvider):
def list_machines(self, force: bool = False) -> list[prox_types.VMInfo]:
return self._api().list_machines(force=force)
def get_machine_info(self, vmid: int, poolid: typing.Optional[str] = None) -> prox_types.VMInfo:
def get_vm_info(self, vmid: int, poolid: typing.Optional[str] = None) -> prox_types.VMInfo:
return self._api().get_machine_pool_info(vmid, poolid, force=True)
def get_machine_configuration(self, vmid: int) -> prox_types.VMConfiguration:
def get_vm_config(self, vmid: int) -> prox_types.VMConfiguration:
return self._api().get_machine_configuration(vmid, force=True)
def get_storage_info(self, storageid: str, node: str, force: bool = False) -> prox_types.StorageInfo:
@ -194,7 +194,7 @@ class ProxmoxProvider(services.ServiceProvider):
def create_template(self, vmid: int) -> None:
self._api().convert_to_template(vmid)
def clone_machine(
def clone_vm(
self,
vmid: int,
name: str,

View File

@ -118,7 +118,7 @@ class ProxmoxPublication(DynamicPublication, autoserializable.AutoSerializable):
self.service().provider().set_protection(int(self._vmid), protection=False)
time.sleep(0.5) # Give some tome to proxmox. We have observed some concurrency issues
# And add it to HA if needed (decided by service configuration)
self.service().enable_machine_ha(int(self._vmid))
self.service().enable_vm_ha(int(self._vmid))
# Wait a bit, if too fast, proxmox fails.. (Have not tested on 8.x, but previous versions failed if too fast..)
time.sleep(0.5)
# Mark vm as template

View File

@ -113,8 +113,8 @@ class ProxmoxServiceFixed(FixedService): # pylint: disable=too-many-public-meth
def provider(self) -> 'ProxmoxProvider':
return typing.cast('ProxmoxProvider', super().provider())
def get_machine_info(self, vmId: int) -> 'prox_types.VMInfo':
return self.provider().get_machine_info(vmId, self.pool.value.strip())
def get_vm_info(self, vmId: int) -> 'prox_types.VMInfo':
return self.provider().get_vm_info(vmId, self.pool.value.strip())
def is_avaliable(self) -> bool:
return self.provider().is_available()
@ -185,7 +185,7 @@ class ProxmoxServiceFixed(FixedService): # pylint: disable=too-many-public-meth
if checking_vmid not in assigned_vms: # Not already assigned
try:
# Invoke to check it exists, do not need to store the result
self.provider().get_machine_info(int(checking_vmid), self.pool.value.strip())
self.provider().get_vm_info(int(checking_vmid), self.pool.value.strip())
found_vmid = checking_vmid
break
except Exception: # Notifies on log, but skipt it
@ -209,11 +209,11 @@ class ProxmoxServiceFixed(FixedService): # pylint: disable=too-many-public-meth
return str(found_vmid)
def get_mac(self, vmid: str) -> str:
config = self.provider().get_machine_configuration(int(vmid))
config = self.provider().get_vm_config(int(vmid))
return config.networks[0].mac.lower()
def get_ip(self, vmid: str) -> str:
return self.provider().get_guest_ip_address(int(vmid))
def get_name(self, vmid: str) -> str:
return self.provider().get_machine_info(int(vmid)).name or ''
return self.provider().get_vm_info(int(vmid)).name or ''

View File

@ -39,7 +39,7 @@ from uds.core.services.generics.dynamic.publication import DynamicPublication
from uds.core.services.generics.dynamic.service import DynamicService
from uds.core.services.generics.dynamic.userservice import DynamicUserService
from uds.core.ui import gui
from uds.core.util import validators, fields
from uds.core.util import validators
from . import helpers
from .deployment_linked import ProxmoxUserserviceLinked
@ -123,7 +123,7 @@ class ProxmoxServiceLinked(DynamicService):
readonly=True,
)
try_soft_shutdown = fields.soft_shutdown_field()
try_soft_shutdown = DynamicService.try_soft_shutdown
machine = gui.ChoiceField(
label=_("Base Machine"),
@ -211,7 +211,7 @@ class ProxmoxServiceLinked(DynamicService):
name = self.sanitized_name(name)
pool = self.pool.value or None
if vmid == -1: # vmId == -1 if cloning for template
return self.provider().clone_machine(
return self.provider().clone_vm(
self.machine.as_int(),
name,
description,
@ -220,7 +220,7 @@ class ProxmoxServiceLinked(DynamicService):
target_pool=pool,
)
return self.provider().clone_machine(
return self.provider().clone_vm(
vmid,
name,
description,
@ -230,17 +230,18 @@ class ProxmoxServiceLinked(DynamicService):
must_have_vgpus={'1': True, '2': False}.get(self.gpu.value, None),
)
def get_machine_info(self, vmid: int) -> 'prox_types.VMInfo':
return self.provider().get_machine_info(vmid, self.pool.value.strip())
def get_vm_info(self, vmid: int) -> 'prox_types.VMInfo':
return self.provider().get_vm_info(vmid, self.pool.value.strip())
def get_nic_mac(self, vmid: int) -> str:
config = self.provider().get_machine_configuration(vmid)
config = self.provider().get_vm_config(vmid)
return config.networks[0].mac.lower()
def xremove_machine(self, vmid: int) -> 'prox_types.UPID':
# TODO: Remove this method, kept for reference of old code
def _xremove_machine(self, vmid: int) -> 'prox_types.UPID':
# First, remove from HA if needed
try:
self.disable_machine_ha(vmid)
self.disable_vm_ha(vmid)
except Exception as e:
logger.warning('Exception disabling HA for vm %s: %s', vmid, e)
self.do_log(level=types.log.LogLevel.WARNING, message=f'Exception disabling HA for vm {vmid}: {e}')
@ -248,22 +249,16 @@ class ProxmoxServiceLinked(DynamicService):
# And remove it
return self.provider().remove_machine(vmid)
def enable_machine_ha(self, vmid: int, started: bool = False) -> None:
def enable_vm_ha(self, vmid: int, started: bool = False) -> None:
if self.ha.value == '__':
return
self.provider().enable_machine_ha(vmid, started, self.ha.value or None)
def disable_machine_ha(self, vmid: int) -> None:
def disable_vm_ha(self, vmid: int) -> None:
if self.ha.value == '__':
return
self.provider().disable_machine_ha(vmid)
def get_basename(self) -> str:
return self.basename.value
def get_lenname(self) -> int:
return int(self.lenname.value)
def get_macs_range(self) -> str:
"""
Returns de selected mac range
@ -273,9 +268,6 @@ class ProxmoxServiceLinked(DynamicService):
def is_ha_enabled(self) -> bool:
return self.ha.value != '__'
def should_try_soft_shutdown(self) -> bool:
return self.try_soft_shutdown.as_bool()
def get_console_connection(self, vmid: str) -> typing.Optional[types.services.ConsoleConnectionInfo]:
return self.provider().get_console_connection(vmid)
@ -320,7 +312,7 @@ class ProxmoxServiceLinked(DynamicService):
def is_running(self, caller_instance: typing.Optional['DynamicUserService | DynamicPublication'], vmid: str) -> bool:
# Raise an exception if fails to get machine info
vminfo = self.get_machine_info(int(vmid))
vminfo = self.get_vm_info(int(vmid))
return vminfo.status != 'stopped'