Fixed Proxmox concurrencly on vmid assignation problem

This commit is contained in:
Adolfo Gómez García 2021-07-06 12:39:22 +02:00
parent 31b513a7ef
commit 548b6e813d
5 changed files with 190 additions and 42 deletions

View File

@ -180,7 +180,7 @@ class ServiceProvider(Module):
val = self.maxPreparingServices = GlobalConfig.MAX_PREPARING_SERVICES.getInt(force=True) # Recover global an cache till restart
if isinstance(val, gui.InputField):
retVal = val.value
retVal = int(val.value)
else:
retVal = val
return retVal if retVal > 0 else 1
@ -191,7 +191,7 @@ class ServiceProvider(Module):
val = self.maxRemovingServices = GlobalConfig.MAX_REMOVING_SERVICES.getInt(force=True) # Recover global an cache till restart
if isinstance(val, gui.InputField):
retVal = val.value
retVal = int(val.value)
else:
retVal = val
return retVal if retVal > 0 else 1

View File

@ -28,8 +28,8 @@
from uds.core import managers
from .provider import ProxmoxProvider
from .jobs import ProxmoxDeferredRemoval
from .jobs import ProxmoxDeferredRemoval, ProxmoxVmidReleaser
# Scheduled task to do clean processes
for cls in (ProxmoxDeferredRemoval, ):
for cls in (ProxmoxDeferredRemoval, ProxmoxVmidReleaser):
managers.taskManager().registerJob(cls)

View File

@ -256,6 +256,14 @@ class ProxmoxClient:
def getNextVMId(self) -> int:
return int(self._get('cluster/nextid')['data'])
@ensureConected
def isVMIdAvailable(self, vmId: int) -> bool:
try:
self._get(f'cluster/nextid?vmid={vmId}')
except Exception: # Not available
return False
return True
@ensureConected
@allowCache(
'nodeNets',
@ -289,6 +297,7 @@ class ProxmoxClient:
def cloneVm(
self,
vmId: int,
newVmId: int,
name: str,
description: typing.Optional[str],
linkedClone: bool,
@ -296,7 +305,6 @@ class ProxmoxClient:
toStorage: typing.Optional[str] = None,
toPool: typing.Optional[str] = None,
) -> types.VmCreationResult:
newVmId = self.getNextVMId()
vmInfo = self.getVmInfo(vmId)
fromNode = vmInfo.node

View File

@ -33,15 +33,17 @@ import typing
from uds.core import jobs
from uds.models import Provider
from uds.models import Provider, getSqlDatetimeAsUnix
from uds.core.util.unique_id_generator import UniqueIDGenerator
from . import provider
from . import client
# Not imported at runtime, just for type checking
MAX_VMID_LIFE_SECS = 365 * 24 * 60 * 60 * 3 # 3 years for "reseting"
logger = logging.getLogger(__name__)
class ProxmoxDeferredRemoval(jobs.Job):
frecuency = 60 * 5 # Once every NN minutes
friendly_name = 'Proxmox removal'
@ -49,26 +51,43 @@ class ProxmoxDeferredRemoval(jobs.Job):
@staticmethod
def remove(providerInstance: 'provider.ProxmoxProvider', vmId: int) -> None:
logger.debug('Adding %s from %s to defeffed removal process', vmId, providerInstance)
logger.debug(
'Adding %s from %s to defeffed removal process', vmId, providerInstance
)
ProxmoxDeferredRemoval.counter += 1
try:
# First check state & stop machine if needed
vmInfo = providerInstance.getMachineInfo(vmId)
if vmInfo.status == 'running':
# If running vm, simply stops it and wait for next
ProxmoxDeferredRemoval.waitForTaskFinish(providerInstance, providerInstance.stopMachine(vmId))
ProxmoxDeferredRemoval.waitForTaskFinish(
providerInstance, providerInstance.stopMachine(vmId)
)
ProxmoxDeferredRemoval.waitForTaskFinish(providerInstance, providerInstance.removeMachine(vmId))
ProxmoxDeferredRemoval.waitForTaskFinish(
providerInstance, providerInstance.removeMachine(vmId)
)
except client.ProxmoxNotFound:
return # Machine does not exists
except Exception as e:
providerInstance.storage.saveData('tr' + str(vmId), str(vmId), attr1='tRm')
logger.info('Machine %s could not be removed right now, queued for later: %s', vmId, e)
logger.info(
'Machine %s could not be removed right now, queued for later: %s',
vmId,
e,
)
@staticmethod
def waitForTaskFinish(providerInstance: 'provider.ProxmoxProvider', upid: 'client.types.UPID', maxWait: int = 30) -> bool:
def waitForTaskFinish(
providerInstance: 'provider.ProxmoxProvider',
upid: 'client.types.UPID',
maxWait: int = 30,
) -> bool:
counter = 0
while providerInstance.getTaskInfo(upid.node, upid.upid).isRunning() and counter < maxWait:
while (
providerInstance.getTaskInfo(upid.node, upid.upid).isRunning()
and counter < maxWait
):
time.sleep(0.3)
counter += 1
@ -77,11 +96,15 @@ class ProxmoxDeferredRemoval(jobs.Job):
def run(self) -> None:
dbProvider: Provider
# Look for Providers of type proxmox
for dbProvider in Provider.objects.filter(maintenance_mode=False, data_type=provider.ProxmoxProvider.typeType):
for dbProvider in Provider.objects.filter(
maintenance_mode=False, data_type=provider.ProxmoxProvider.typeType
):
logger.debug('Provider %s if os type proxmox', dbProvider)
storage = dbProvider.getEnvironment().storage
instance: provider.ProxmoxProvider = typing.cast(provider.ProxmoxProvider, dbProvider.getInstance())
instance: provider.ProxmoxProvider = typing.cast(
provider.ProxmoxProvider, dbProvider.getInstance()
)
for i in storage.filter('tRm'):
vmId = int(i[1].decode())
@ -92,12 +115,17 @@ class ProxmoxDeferredRemoval(jobs.Job):
# If machine is powered on, tries to stop it
# tries to remove in sync mode
if vmInfo.status == 'running':
ProxmoxDeferredRemoval.waitForTaskFinish(instance, instance.stopMachine(vmId))
ProxmoxDeferredRemoval.waitForTaskFinish(
instance, instance.stopMachine(vmId)
)
return
if vmInfo.status == 'stopped': # Machine exists, try to remove it now
ProxmoxDeferredRemoval.waitForTaskFinish(instance, instance.removeMachine(vmId))
if (
vmInfo.status == 'stopped'
): # Machine exists, try to remove it now
ProxmoxDeferredRemoval.waitForTaskFinish(
instance, instance.removeMachine(vmId)
)
# It this is reached, remove check
storage.remove('tr' + str(vmId))
@ -108,3 +136,13 @@ class ProxmoxDeferredRemoval(jobs.Job):
logger.error('Delayed removal of %s failed: %s', i, e)
logger.debug('Deferred removal for proxmox finished')
class ProxmoxVmidReleaser(jobs.Job):
frecuency = 60 * 60 * 24 * 30 # Once a month
friendly_name = 'Proxmox maintenance'
def run(self) -> None:
logger.debug('Proxmox Vmid releader running')
gen = UniqueIDGenerator('vmid', 'proxmox', 'proxmox')
gen.releaseOlderThan(getSqlDatetimeAsUnix() - MAX_VMID_LIFE_SECS)

View File

@ -32,9 +32,11 @@ import typing
from django.utils.translation import ugettext_noop as _
from uds.models import getSqlDatetimeAsUnix
from uds.core import services
from uds.core.ui import gui
from uds.core.util import validators
from uds.core.util.unique_id_generator import UniqueIDGenerator
from .service import ProxmoxLinkedService
@ -48,35 +50,114 @@ if typing.TYPE_CHECKING:
logger = logging.getLogger(__name__)
CACHE_TIME_FOR_SERVER = 1800
MAX_VM_ID = 999999999
class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-public-methods
class ProxmoxProvider(
services.ServiceProvider
): # pylint: disable=too-many-public-methods
offers = [ProxmoxLinkedService]
typeName = _('Proxmox Platform Provider')
typeType = 'ProxmoxPlatform'
typeDescription = _('Proxmox platform service provider')
iconFile = 'provider.png'
host = gui.TextField(length=64, label=_('Host'), order=1, tooltip=_('Proxmox Server IP or Hostname'), required=True)
port = gui.NumericField(lengh=5, label=_('Port'), order=2, tooltip=_('Proxmox API port (default is 8006)'), required=True, defvalue='8006')
host = gui.TextField(
length=64,
label=_('Host'),
order=1,
tooltip=_('Proxmox Server IP or Hostname'),
required=True,
)
port = gui.NumericField(
lengh=5,
label=_('Port'),
order=2,
tooltip=_('Proxmox API port (default is 8006)'),
required=True,
defvalue='8006',
)
username = gui.TextField(length=32, label=_('Username'), order=3, tooltip=_('User with valid privileges on Proxmox, (use "user@authenticator" form)'), required=True, defvalue='root@pam')
password = gui.PasswordField(lenth=32, label=_('Password'), order=4, tooltip=_('Password of the user of Proxmox'), required=True)
username = gui.TextField(
length=32,
label=_('Username'),
order=3,
tooltip=_(
'User with valid privileges on Proxmox, (use "user@authenticator" form)'
),
required=True,
defvalue='root@pam',
)
password = gui.PasswordField(
lenth=32,
label=_('Password'),
order=4,
tooltip=_('Password of the user of Proxmox'),
required=True,
)
maxPreparingServices = gui.NumericField(length=3, label=_('Creation concurrency'), defvalue='10', minValue=1, maxValue=65536, order=50, tooltip=_('Maximum number of concurrently creating VMs'), required=True, tab=gui.ADVANCED_TAB)
maxRemovingServices = gui.NumericField(length=3, label=_('Removal concurrency'), defvalue='5', minValue=1, maxValue=65536, order=51, tooltip=_('Maximum number of concurrently removing VMs'), required=True, tab=gui.ADVANCED_TAB)
maxPreparingServices = gui.NumericField(
length=3,
label=_('Creation concurrency'),
defvalue='10',
minValue=1,
maxValue=65536,
order=50,
tooltip=_('Maximum number of concurrently creating VMs'),
required=True,
tab=gui.ADVANCED_TAB,
)
maxRemovingServices = gui.NumericField(
length=3,
label=_('Removal concurrency'),
defvalue='5',
minValue=1,
maxValue=65536,
order=51,
tooltip=_('Maximum number of concurrently removing VMs'),
required=True,
tab=gui.ADVANCED_TAB,
)
timeout = gui.NumericField(length=3, label=_('Timeout'), defvalue='20', order=90, tooltip=_('Timeout in seconds of connection to Proxmox'), required=True, tab=gui.ADVANCED_TAB)
timeout = gui.NumericField(
length=3,
label=_('Timeout'),
defvalue='20',
order=90,
tooltip=_('Timeout in seconds of connection to Proxmox'),
required=True,
tab=gui.ADVANCED_TAB,
)
startVmId = gui.NumericField(
length=3,
label=_('Starting VmId'),
defvalue='10000',
minValue=10000,
maxValue=100000,
order=91,
tooltip=_('Starting machine id on proxmox'),
required=True,
tab=gui.ADVANCED_TAB,
)
# Own variables
_api: typing.Optional[client.ProxmoxClient] = None
_vmid_generator: UniqueIDGenerator
def __getApi(self) -> client.ProxmoxClient:
"""
Returns the connection API object
"""
if self._api is None:
self._api = client.ProxmoxClient(self.host.value, self.port.num(), self.username.value, self.password.value, self.timeout.num(), False, self.cache)
self._api = client.ProxmoxClient(
self.host.value,
self.port.num(),
self.username.value,
self.password.value,
self.timeout.num(),
False,
self.cache,
)
return self._api
@ -88,6 +169,8 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub
# Just reset _api connection variable
self._api = None
# All proxmox use same UniqueId generator
self._vmid_generator = UniqueIDGenerator('vmid', 'proxmox', 'proxmox')
if values is not None:
self.timeout.value = validators.validateTimeout(self.timeout.value)
@ -107,7 +190,9 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub
def listMachines(self) -> typing.List[client.types.VMInfo]:
return self.__getApi().listVms()
def getMachineInfo(self, vmId: int, poolId: typing.Optional[str] = None) -> client.types.VMInfo:
def getMachineInfo(
self, vmId: int, poolId: typing.Optional[str] = None
) -> client.types.VMInfo:
return self.__getApi().getVMPoolInfo(vmId, poolId, force=True)
def getMachineConfiguration(self, vmId: int) -> client.types.VMConfiguration:
@ -116,7 +201,9 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub
def getStorageInfo(self, storageId: str, node: str) -> client.types.StorageInfo:
return self.__getApi().getStorage(storageId, node)
def listStorages(self, node: typing.Optional[str]) -> typing.List[client.types.StorageInfo]:
def listStorages(
self, node: typing.Optional[str]
) -> typing.List[client.types.StorageInfo]:
return self.__getApi().listStorages(node=node, content='images')
def listPools(self) -> typing.List[client.types.PoolInfo]:
@ -133,9 +220,11 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub
linkedClone: bool,
toNode: typing.Optional[str] = None,
toStorage: typing.Optional[str] = None,
toPool: typing.Optional[str] = None
toPool: typing.Optional[str] = None,
) -> client.types.VmCreationResult:
return self.__getApi().cloneVm(vmId, name, description, linkedClone, toNode, toStorage, toPool)
return self.__getApi().cloneVm(
vmId, self.getNewVmId(), name, description, linkedClone, toNode, toStorage, toPool
)
def startMachine(self, vmId: int) -> client.types.UPID:
return self.__getApi().startVm(vmId)
@ -158,22 +247,35 @@ class ProxmoxProvider(services.ServiceProvider): # pylint: disable=too-many-pub
def getTaskInfo(self, node: str, upid: str) -> client.types.TaskStatus:
return self.__getApi().getTask(node, upid)
def enableHA(self, vmId: int, started: bool = False, group: typing.Optional[str] = None) -> None:
def enableHA(
self, vmId: int, started: bool = False, group: typing.Optional[str] = None
) -> None:
self.__getApi().enableVmHA(vmId, started, group)
def disableHA(self, vmId: int) -> None:
self.__getApi().disableVmHA(vmId)
def setProtection(self, vmId: int, node: typing.Optional[str] = None, protection: bool = False) -> None:
def setProtection(
self, vmId: int, node: typing.Optional[str] = None, protection: bool = False
) -> None:
self.__getApi().setProtection(vmId, node, protection)
def listHaGroups(self) -> typing.List[str]:
return self.__getApi().listHAGroups()
def getConsoleConnection(self, machineId: str) -> typing.Optional[typing.MutableMapping[str, typing.Any]]:
def getConsoleConnection(
self, machineId: str
) -> typing.Optional[typing.MutableMapping[str, typing.Any]]:
# TODO: maybe proxmox also supports "spice"? for future release...
return None
def getNewVmId(self) -> int:
while True: # look for an unused VmId
vmId = self._vmid_generator.get(self.startVmId.num(), MAX_VM_ID)
if self.__getApi().isVMIdAvailable(vmId):
return vmId
# All assigned VMId will be left as unusable on UDS until released by time (3 months)
@staticmethod
def test(env: 'Environment', data: 'Module.ValuesType') -> typing.List[typing.Any]:
"""