From 548b6e813dbbc6adaff563fc38cefb0f52beead7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20G=C3=B3mez=20Garc=C3=ADa?= Date: Tue, 6 Jul 2021 12:39:22 +0200 Subject: [PATCH] Fixed Proxmox concurrencly on vmid assignation problem --- server/src/uds/core/services/provider.py | 4 +- server/src/uds/services/Proxmox/__init__.py | 4 +- .../uds/services/Proxmox/client/__init__.py | 10 +- server/src/uds/services/Proxmox/jobs.py | 72 ++++++--- server/src/uds/services/Proxmox/provider.py | 142 +++++++++++++++--- 5 files changed, 190 insertions(+), 42 deletions(-) diff --git a/server/src/uds/core/services/provider.py b/server/src/uds/core/services/provider.py index 0d9bbb56..6d515f2d 100644 --- a/server/src/uds/core/services/provider.py +++ b/server/src/uds/core/services/provider.py @@ -180,7 +180,7 @@ class ServiceProvider(Module): val = self.maxPreparingServices = GlobalConfig.MAX_PREPARING_SERVICES.getInt(force=True) # Recover global an cache till restart if isinstance(val, gui.InputField): - retVal = val.value + retVal = int(val.value) else: retVal = val return retVal if retVal > 0 else 1 @@ -191,7 +191,7 @@ class ServiceProvider(Module): val = self.maxRemovingServices = GlobalConfig.MAX_REMOVING_SERVICES.getInt(force=True) # Recover global an cache till restart if isinstance(val, gui.InputField): - retVal = val.value + retVal = int(val.value) else: retVal = val return retVal if retVal > 0 else 1 diff --git a/server/src/uds/services/Proxmox/__init__.py b/server/src/uds/services/Proxmox/__init__.py index b8953547..c5edf7b9 100644 --- a/server/src/uds/services/Proxmox/__init__.py +++ b/server/src/uds/services/Proxmox/__init__.py @@ -28,8 +28,8 @@ from uds.core import managers from .provider import ProxmoxProvider -from .jobs import ProxmoxDeferredRemoval +from .jobs import ProxmoxDeferredRemoval, ProxmoxVmidReleaser # Scheduled task to do clean processes -for cls in (ProxmoxDeferredRemoval, ): +for cls in (ProxmoxDeferredRemoval, ProxmoxVmidReleaser): managers.taskManager().registerJob(cls) diff --git a/server/src/uds/services/Proxmox/client/__init__.py b/server/src/uds/services/Proxmox/client/__init__.py index c24948fd..74f3ba62 100644 --- a/server/src/uds/services/Proxmox/client/__init__.py +++ b/server/src/uds/services/Proxmox/client/__init__.py @@ -256,6 +256,14 @@ class ProxmoxClient: def getNextVMId(self) -> int: return int(self._get('cluster/nextid')['data']) + @ensureConected + def isVMIdAvailable(self, vmId: int) -> bool: + try: + self._get(f'cluster/nextid?vmid={vmId}') + except Exception: # Not available + return False + return True + @ensureConected @allowCache( 'nodeNets', @@ -289,6 +297,7 @@ class ProxmoxClient: def cloneVm( self, vmId: int, + newVmId: int, name: str, description: typing.Optional[str], linkedClone: bool, @@ -296,7 +305,6 @@ class ProxmoxClient: toStorage: typing.Optional[str] = None, toPool: typing.Optional[str] = None, ) -> types.VmCreationResult: - newVmId = self.getNextVMId() vmInfo = self.getVmInfo(vmId) fromNode = vmInfo.node diff --git a/server/src/uds/services/Proxmox/jobs.py b/server/src/uds/services/Proxmox/jobs.py index e8f18b37..f4e62041 100644 --- a/server/src/uds/services/Proxmox/jobs.py +++ b/server/src/uds/services/Proxmox/jobs.py @@ -33,15 +33,17 @@ import typing from uds.core import jobs -from uds.models import Provider +from uds.models import Provider, getSqlDatetimeAsUnix +from uds.core.util.unique_id_generator import UniqueIDGenerator from . import provider from . import client -# Not imported at runtime, just for type checking +MAX_VMID_LIFE_SECS = 365 * 24 * 60 * 60 * 3 # 3 years for "reseting" logger = logging.getLogger(__name__) + class ProxmoxDeferredRemoval(jobs.Job): frecuency = 60 * 5 # Once every NN minutes friendly_name = 'Proxmox removal' @@ -49,55 +51,81 @@ class ProxmoxDeferredRemoval(jobs.Job): @staticmethod def remove(providerInstance: 'provider.ProxmoxProvider', vmId: int) -> None: - logger.debug('Adding %s from %s to defeffed removal process', vmId, providerInstance) + logger.debug( + 'Adding %s from %s to defeffed removal process', vmId, providerInstance + ) ProxmoxDeferredRemoval.counter += 1 try: # First check state & stop machine if needed vmInfo = providerInstance.getMachineInfo(vmId) if vmInfo.status == 'running': - # If running vm, simply stops it and wait for next - ProxmoxDeferredRemoval.waitForTaskFinish(providerInstance, providerInstance.stopMachine(vmId)) + # If running vm, simply stops it and wait for next + ProxmoxDeferredRemoval.waitForTaskFinish( + providerInstance, providerInstance.stopMachine(vmId) + ) - ProxmoxDeferredRemoval.waitForTaskFinish(providerInstance, providerInstance.removeMachine(vmId)) + ProxmoxDeferredRemoval.waitForTaskFinish( + providerInstance, providerInstance.removeMachine(vmId) + ) except client.ProxmoxNotFound: return # Machine does not exists except Exception as e: providerInstance.storage.saveData('tr' + str(vmId), str(vmId), attr1='tRm') - logger.info('Machine %s could not be removed right now, queued for later: %s', vmId, e) + logger.info( + 'Machine %s could not be removed right now, queued for later: %s', + vmId, + e, + ) @staticmethod - def waitForTaskFinish(providerInstance: 'provider.ProxmoxProvider', upid: 'client.types.UPID', maxWait: int = 30) -> bool: + def waitForTaskFinish( + providerInstance: 'provider.ProxmoxProvider', + upid: 'client.types.UPID', + maxWait: int = 30, + ) -> bool: counter = 0 - while providerInstance.getTaskInfo(upid.node, upid.upid).isRunning() and counter < maxWait: + while ( + providerInstance.getTaskInfo(upid.node, upid.upid).isRunning() + and counter < maxWait + ): time.sleep(0.3) counter += 1 - + return counter < maxWait def run(self) -> None: dbProvider: Provider # Look for Providers of type proxmox - for dbProvider in Provider.objects.filter(maintenance_mode=False, data_type=provider.ProxmoxProvider.typeType): + for dbProvider in Provider.objects.filter( + maintenance_mode=False, data_type=provider.ProxmoxProvider.typeType + ): logger.debug('Provider %s if os type proxmox', dbProvider) storage = dbProvider.getEnvironment().storage - instance: provider.ProxmoxProvider = typing.cast(provider.ProxmoxProvider, dbProvider.getInstance()) + instance: provider.ProxmoxProvider = typing.cast( + provider.ProxmoxProvider, dbProvider.getInstance() + ) for i in storage.filter('tRm'): vmId = int(i[1].decode()) - + try: vmInfo = instance.getMachineInfo(vmId) logger.debug('Found %s for removal %s', vmId, i) # If machine is powered on, tries to stop it # tries to remove in sync mode if vmInfo.status == 'running': - ProxmoxDeferredRemoval.waitForTaskFinish(instance, instance.stopMachine(vmId)) + ProxmoxDeferredRemoval.waitForTaskFinish( + instance, instance.stopMachine(vmId) + ) return - if vmInfo.status == 'stopped': # Machine exists, try to remove it now - ProxmoxDeferredRemoval.waitForTaskFinish(instance, instance.removeMachine(vmId)) - + if ( + vmInfo.status == 'stopped' + ): # Machine exists, try to remove it now + ProxmoxDeferredRemoval.waitForTaskFinish( + instance, instance.removeMachine(vmId) + ) # It this is reached, remove check storage.remove('tr' + str(vmId)) @@ -108,3 +136,13 @@ class ProxmoxDeferredRemoval(jobs.Job): logger.error('Delayed removal of %s failed: %s', i, e) logger.debug('Deferred removal for proxmox finished') + + +class ProxmoxVmidReleaser(jobs.Job): + frecuency = 60 * 60 * 24 * 30 # Once a month + friendly_name = 'Proxmox maintenance' + + def run(self) -> None: + logger.debug('Proxmox Vmid releader running') + gen = UniqueIDGenerator('vmid', 'proxmox', 'proxmox') + gen.releaseOlderThan(getSqlDatetimeAsUnix() - MAX_VMID_LIFE_SECS) diff --git a/server/src/uds/services/Proxmox/provider.py b/server/src/uds/services/Proxmox/provider.py index 4f7b5fcf..5f28b27c 100644 --- a/server/src/uds/services/Proxmox/provider.py +++ b/server/src/uds/services/Proxmox/provider.py @@ -32,9 +32,11 @@ import typing from django.utils.translation import ugettext_noop as _ +from uds.models import getSqlDatetimeAsUnix from uds.core import services from uds.core.ui import gui from uds.core.util import validators +from uds.core.util.unique_id_generator import UniqueIDGenerator from .service import ProxmoxLinkedService @@ -48,35 +50,114 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) CACHE_TIME_FOR_SERVER = 1800 +MAX_VM_ID = 999999999 - -class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-public-methods +class ProxmoxProvider( + services.ServiceProvider +): # pylint: disable=too-many-public-methods offers = [ProxmoxLinkedService] typeName = _('Proxmox Platform Provider') typeType = 'ProxmoxPlatform' typeDescription = _('Proxmox platform service provider') iconFile = 'provider.png' - host = gui.TextField(length=64, label=_('Host'), order=1, tooltip=_('Proxmox Server IP or Hostname'), required=True) - port = gui.NumericField(lengh=5, label=_('Port'), order=2, tooltip=_('Proxmox API port (default is 8006)'), required=True, defvalue='8006') + host = gui.TextField( + length=64, + label=_('Host'), + order=1, + tooltip=_('Proxmox Server IP or Hostname'), + required=True, + ) + port = gui.NumericField( + lengh=5, + label=_('Port'), + order=2, + tooltip=_('Proxmox API port (default is 8006)'), + required=True, + defvalue='8006', + ) - username = gui.TextField(length=32, label=_('Username'), order=3, tooltip=_('User with valid privileges on Proxmox, (use "user@authenticator" form)'), required=True, defvalue='root@pam') - password = gui.PasswordField(lenth=32, label=_('Password'), order=4, tooltip=_('Password of the user of Proxmox'), required=True) + username = gui.TextField( + length=32, + label=_('Username'), + order=3, + tooltip=_( + 'User with valid privileges on Proxmox, (use "user@authenticator" form)' + ), + required=True, + defvalue='root@pam', + ) + password = gui.PasswordField( + lenth=32, + label=_('Password'), + order=4, + tooltip=_('Password of the user of Proxmox'), + required=True, + ) - maxPreparingServices = gui.NumericField(length=3, label=_('Creation concurrency'), defvalue='10', minValue=1, maxValue=65536, order=50, tooltip=_('Maximum number of concurrently creating VMs'), required=True, tab=gui.ADVANCED_TAB) - maxRemovingServices = gui.NumericField(length=3, label=_('Removal concurrency'), defvalue='5', minValue=1, maxValue=65536, order=51, tooltip=_('Maximum number of concurrently removing VMs'), required=True, tab=gui.ADVANCED_TAB) + maxPreparingServices = gui.NumericField( + length=3, + label=_('Creation concurrency'), + defvalue='10', + minValue=1, + maxValue=65536, + order=50, + tooltip=_('Maximum number of concurrently creating VMs'), + required=True, + tab=gui.ADVANCED_TAB, + ) + maxRemovingServices = gui.NumericField( + length=3, + label=_('Removal concurrency'), + defvalue='5', + minValue=1, + maxValue=65536, + order=51, + tooltip=_('Maximum number of concurrently removing VMs'), + required=True, + tab=gui.ADVANCED_TAB, + ) - timeout = gui.NumericField(length=3, label=_('Timeout'), defvalue='20', order=90, tooltip=_('Timeout in seconds of connection to Proxmox'), required=True, tab=gui.ADVANCED_TAB) + timeout = gui.NumericField( + length=3, + label=_('Timeout'), + defvalue='20', + order=90, + tooltip=_('Timeout in seconds of connection to Proxmox'), + required=True, + tab=gui.ADVANCED_TAB, + ) + + startVmId = gui.NumericField( + length=3, + label=_('Starting VmId'), + defvalue='10000', + minValue=10000, + maxValue=100000, + order=91, + tooltip=_('Starting machine id on proxmox'), + required=True, + tab=gui.ADVANCED_TAB, + ) # Own variables _api: typing.Optional[client.ProxmoxClient] = None + _vmid_generator: UniqueIDGenerator def __getApi(self) -> client.ProxmoxClient: """ Returns the connection API object """ if self._api is None: - self._api = client.ProxmoxClient(self.host.value, self.port.num(), self.username.value, self.password.value, self.timeout.num(), False, self.cache) + self._api = client.ProxmoxClient( + self.host.value, + self.port.num(), + self.username.value, + self.password.value, + self.timeout.num(), + False, + self.cache, + ) return self._api @@ -88,6 +169,8 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub # Just reset _api connection variable self._api = None + # All proxmox use same UniqueId generator + self._vmid_generator = UniqueIDGenerator('vmid', 'proxmox', 'proxmox') if values is not None: self.timeout.value = validators.validateTimeout(self.timeout.value) @@ -107,16 +190,20 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub def listMachines(self) -> typing.List[client.types.VMInfo]: return self.__getApi().listVms() - def getMachineInfo(self, vmId: int, poolId: typing.Optional[str] = None) -> client.types.VMInfo: + def getMachineInfo( + self, vmId: int, poolId: typing.Optional[str] = None + ) -> client.types.VMInfo: return self.__getApi().getVMPoolInfo(vmId, poolId, force=True) def getMachineConfiguration(self, vmId: int) -> client.types.VMConfiguration: return self.__getApi().getVmConfiguration(vmId, force=True) - + def getStorageInfo(self, storageId: str, node: str) -> client.types.StorageInfo: return self.__getApi().getStorage(storageId, node) - def listStorages(self, node: typing.Optional[str]) -> typing.List[client.types.StorageInfo]: + def listStorages( + self, node: typing.Optional[str] + ) -> typing.List[client.types.StorageInfo]: return self.__getApi().listStorages(node=node, content='images') def listPools(self) -> typing.List[client.types.PoolInfo]: @@ -133,11 +220,13 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub linkedClone: bool, toNode: typing.Optional[str] = None, toStorage: typing.Optional[str] = None, - toPool: typing.Optional[str] = None + toPool: typing.Optional[str] = None, ) -> client.types.VmCreationResult: - return self.__getApi().cloneVm(vmId, name, description, linkedClone, toNode, toStorage, toPool) + return self.__getApi().cloneVm( + vmId, self.getNewVmId(), name, description, linkedClone, toNode, toStorage, toPool + ) - def startMachine(self,vmId: int) -> client.types.UPID: + def startMachine(self, vmId: int) -> client.types.UPID: return self.__getApi().startVm(vmId) def stopMachine(self, vmId: int) -> client.types.UPID: @@ -158,22 +247,35 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub def getTaskInfo(self, node: str, upid: str) -> client.types.TaskStatus: return self.__getApi().getTask(node, upid) - def enableHA(self, vmId: int, started: bool = False, group: typing.Optional[str] = None) -> None: + def enableHA( + self, vmId: int, started: bool = False, group: typing.Optional[str] = None + ) -> None: self.__getApi().enableVmHA(vmId, started, group) def disableHA(self, vmId: int) -> None: self.__getApi().disableVmHA(vmId) - def setProtection(self, vmId: int, node: typing.Optional[str] = None, protection: bool = False) -> None: - self.__getApi().setProtection(vmId, node, protection) + def setProtection( + self, vmId: int, node: typing.Optional[str] = None, protection: bool = False + ) -> None: + self.__getApi().setProtection(vmId, node, protection) def listHaGroups(self) -> typing.List[str]: return self.__getApi().listHAGroups() - def getConsoleConnection(self, machineId: str) -> typing.Optional[typing.MutableMapping[str, typing.Any]]: + def getConsoleConnection( + self, machineId: str + ) -> typing.Optional[typing.MutableMapping[str, typing.Any]]: # TODO: maybe proxmox also supports "spice"? for future release... return None + def getNewVmId(self) -> int: + while True: # look for an unused VmId + vmId = self._vmid_generator.get(self.startVmId.num(), MAX_VM_ID) + if self.__getApi().isVMIdAvailable(vmId): + return vmId + # All assigned VMId will be left as unusable on UDS until released by time (3 months) + @staticmethod def test(env: 'Environment', data: 'Module.ValuesType') -> typing.List[typing.Any]: """