1
0
mirror of https://github.com/dkmstr/openuds.git synced 2025-01-05 09:17:54 +03:00

Advancing a lot with ovirt. Detected something on machine creations that maybe an vdsm bug (ovirt). Going to make a bug report to see if it really is an error or is something i'm missing (seems like a bug anyway... :-) )

This commit is contained in:
Adolfo Gómez 2012-11-21 13:04:58 +00:00
parent aebacb166b
commit 400ac892f0
5 changed files with 351 additions and 117 deletions

View File

@ -37,6 +37,8 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
opCreate, opStart, opStop, opSuspend, opWait, opError, opFinish, opRetry = range(8)
class OVirtLinkedDeployment(UserDeployment): class OVirtLinkedDeployment(UserDeployment):
''' '''
This class generates the user consumable elements of the service tree. This class generates the user consumable elements of the service tree.
@ -52,6 +54,14 @@ class OVirtLinkedDeployment(UserDeployment):
#: Recheck every five seconds by default (for task methods) #: Recheck every five seconds by default (for task methods)
suggestedTime = 5 suggestedTime = 5
def initialize(self):
self._name = ''
self._ip = ''
self._vmid = ''
self._reason = ''
self._queue = []
self._destroyAfter = 'f'
# Serializable needed methods # Serializable needed methods
def marshal(self): def marshal(self):
''' '''
@ -87,14 +97,10 @@ class OVirtLinkedDeployment(UserDeployment):
a new unique name, so we keep the first generated name cached and don't a new unique name, so we keep the first generated name cached and don't
generate more names. (Generator are simple utility classes) generate more names. (Generator are simple utility classes)
''' '''
name = self.storage().readData('name') if self._name == '':
if name is None: self._name = self.nameGenerator().get( self.service().getBaseName(), self.service().getLenName() )
name = self.nameGenerator().get( self.service().getBaseName() return self._name
+ '-' + self.service().getColour(), 3 )
# Store value for persistence
self.storage().saveData('name', name)
return name
def setIp(self, ip): def setIp(self, ip):
''' '''
@ -108,7 +114,8 @@ class OVirtLinkedDeployment(UserDeployment):
:note: This IP is the IP of the "consumed service", so the transport can :note: This IP is the IP of the "consumed service", so the transport can
access it. access it.
''' '''
self.storage().saveData('ip', str(ip)) logger.debug('Setting IP to %s' % ip)
self._ip = ip
def getUniqueId(self): def getUniqueId(self):
''' '''
@ -120,11 +127,9 @@ class OVirtLinkedDeployment(UserDeployment):
The get method of a mac generator takes one param, that is the mac range The get method of a mac generator takes one param, that is the mac range
to use to get an unused mac. to use to get an unused mac.
''' '''
mac = self.storage().readData('mac') if self._mac == '':
if mac is None: self._mac = self.macGenerator().get( self.service().getMacRange() )
mac = self.macGenerator().get( '00:00:00:00:00:00-00:FF:FF:FF:FF:FF' ) return self._mac
self.storage().saveData('mac', mac)
return mac
def getIp(self): def getIp(self):
''' '''
@ -144,120 +149,189 @@ class OVirtLinkedDeployment(UserDeployment):
show the IP to the administrator, this method will get called show the IP to the administrator, this method will get called
''' '''
ip = self.storage().readData('ip') return self._ip
if ip is None:
ip = '192.168.0.34' # Sample IP for testing purposses only
return ip
def setReady(self): def setReady(self):
''' '''
This is a task method. As that, the expected return values are
State values RUNNING, FINISHED or ERROR.
The method is invoked whenever a machine is provided to an user, right The method is invoked whenever a machine is provided to an user, right
before presenting it (via transport rendering) to the user. before presenting it (via transport rendering) to the user.
This method exist for this kind of situations (i will explain it with a
sample)
Imagine a Service tree (Provider, Service, ...) for virtual machines.
This machines will get created by the UserDeployment implementation, but,
at some time, the machine can be put at in an state (suspend, shut down)
that will make the transport impossible to connect with it.
This method, in this case, will check the state of the machine, and if
it is "ready", that is, powered on and accesible, it will return
"State.FINISHED". If the machine is not accesible (has ben erased, for
example), it will return "State.ERROR" and store a reason of error so UDS
can ask for it and present this information to the Administrator.
If the machine powered off, or suspended, or any other state that is not
directly usable but can be put in an usable state, it will return
"State.RUNNING", and core will use checkState to see when the operation
has finished.
I hope this sample is enough to explain the use of this method..
''' '''
if self.cache().get('ready') == '1':
# In our case, the service is always ready
return State.FINISHED return State.FINISHED
state = self.service().getMachineState(self._vmid)
if state == 'unknown':
return self.__error('Machine is not available anymore')
if state not in ('up', 'powering_up', 'restoring_state'):
return self.__powerOn()
self.cache().put('ready', '1')
return State.FINISHED
def notifyReadyFromOsManager(self, data):
# Here we will check for suspending the VM (when full ready)
logger.debug('Checking if cache 2 for {0}'.format(self._name))
if self.__getCurrentOp() == opWait:
logger.debug('Machine is ready. Moving to level 2')
self.__popCurrentOp() # Remove current state
return self.__executeQueue()
#if self._squeue.getCurrent() == stWaitReady:
# logger.debug('Move to level 2, suspending machine')
# return self.moveToCache(self.L2_CACHE)
return State.FINISHED
def __executeQueue(self):
op = self.__getCurrentOp()
if op == opError:
return State.ERROR
if op == opFinish:
return State.FINISHED
if op == opCreate:
return self.__create()
def __initQueueForDeploy(self, forLevel2 = False):
if forLevel2 is False:
self._queue = [opCreate, opStart, opFinish]
else:
self._queue = [opCreate, opStart, opWait, opSuspend, opFinish]
def __getCurrentOp(self):
if len(self._queue) == 0:
return opFinish
return self._queue[0]
def __popCurrentOp(self):
if len(self._queue) == 0:
return opFinish
res = self._queue.pop(0)
return res
def __pushFrontOp(self, op):
self._queue.insert(0, op)
def __pushBackOp(self, op):
self._queue.append(op)
def __error(self, reason):
'''
Internal method to set object as error state
Returns:
State.ERROR, so we can do "return self.__error(reason)"
'''
self._queue = [opError]
self._reason = str(reason)
return State.ERROR
# Queue execution methods
def __retry(self):
'''
Used to retry an operation
In fact, this will not be never invoked, unless we push it twice, because
checkState method will "pop" first item when a check operation returns State.FINISHED
'''
return State.FINISHED
def __create(self):
'''
Deploys a machine from template for user/cache
'''
templateId = self.publication().getTemplateId()
name = self.service().sanitizeVmName('UDS service ' + self.getName())
comments = 'UDS Linked clone for'
try:
self._vmid = self.service().deployFromTemplate(name, comments, templateId)
if self._vmid is None:
raise Exception('Can\'t create machine')
except Exception as e:
return self.__error(e)
return State.RUNNING
def __powerOn(self):
'''
Powers on the machine
'''
state = self.service.getMachineState(self._vmid)
if state == 'down':
pass
def deployForUser(self, user): def deployForUser(self, user):
''' '''
Deploys an service instance for an user. Deploys an service instance for an user.
This is a task method. As that, the excepted return values are
State values RUNNING, FINISHED or ERROR.
The user parameter is not realy neded, but provided. It indicates the
Database User Object (see py:mod:`uds.modules`) to which this deployed
user service will be assigned to.
This method will get called whenever a new deployed service for an user
is needed. This will give this class the oportunity to create
a service that is assigned to an user.
The way of using this method is as follows:
If the service gets created in "one step", that is, before the return
of this method, the consumable service for the user gets created, it
will return "State.FINISH".
If the service needs more steps (as in this case), we will return
"State.RUNNING", and if it has an error, it wil return "State.ERROR" and
store an error string so administration interface can show it.
We do not use user for anything, as in most cases will be.
''' '''
import random self.__initQueueForDeploy(False)
return self.__executeQueue()
self.storage().saveData('count', '0') def deployForCache(self, cacheLevel):
'''
# random fail Deploys an service instance for cache
if random.randint(0, 9) == 9: '''
self.storage().saveData('error', 'Random error at deployForUser :-)') forLevel2 = cacheLevel == self.L2_CACHE
return State.ERROR self.__initQueueForDeploy(forLevel2)
return self.__executeQueue()
def __checkDeploy(self):
'''
Checks the state of a deploy for an user or cache
'''
try:
state = self.service().getMachineState(self._vmid)
if state != 'down':
return State.RUNNING return State.RUNNING
except Exception as e:
return self.__error(e)
return State.FINISHED
def checkState(self): def checkState(self):
''' '''
Our deployForUser method will initiate the consumable service deployment, Check what operation is going on, and acts acordly to it
but will not finish it.
So in our sample, we will only check if a number reaches 5, and if so
return that we have finished, else we will return that we are working
on it.
One deployForUser returns State.RUNNING, this task will get called until
checkState returns State.FINISHED.
Also, we will make the publication fail one of every 10 calls to this
method.
Note: Destroying, canceling and deploying for cache also makes use of
this method, so you must keep the info of that you are checking if you
need it.
In our case, destroy is 1-step action so this will no get called while
destroying, and cancel will simply invoke destroy
''' '''
import random op = self.__getCurrentOp()
count = int(self.storage().readData('count')) + 1 if op == opError:
# Count is always a valid value, because this method will never get
# called before deployForUser, deployForCache, destroy or cancel.
# In our sample, we only use checkState in case of deployForUser,
# so at first call count will be 0.
if count >= 5:
return State.FINISHED
# random fail
if random.randint(0, 9) == 9:
self.storage().saveData('error', 'Random error at checkState :-)')
return State.ERROR return State.ERROR
self.storage().saveData('count', str(count)) if op == opFinish:
return State.FINISHED
res = None
if op == opCreate:
res = self.__checkDeploy()
if op == opStart:
res = self.__checkPowerOn()
if op == opStop:
res = self.__checkPowerOff()
if op == opWait:
res = State.RUNNING
if op == opSuspend:
res = self.__checkSuspend()
if res is None:
return self.__error('Unexpected operation found')
if res == State.FINISHED:
self.__popCurrentOp()
return State.RUNNING return State.RUNNING
return res
def finish(self): def finish(self):
''' '''

View File

@ -184,8 +184,46 @@ class OVirtLinkedService(Service):
''' '''
return self.parent().getTemplateState(templateId) return self.parent().getTemplateState(templateId)
def deployFromTemplate(self, name, comments, templateId):
'''
Deploys a virtual machine on selected cluster from selected template
Args:
name: Name (sanitized) of the machine
comments: Comments for machine
templateId: Id of the template to deploy from
Returns:
Id of the machine being created form template
'''
return self.parent().deployFromTemplate(name, comments, templateId, self.cluster.value)
def getMachineState(self, machineId):
'''
Invokes getMachineState from parent provider
(returns if machine is "active" or "inactive"
Args:
machineId: If of the machine to get state
Returns:
'down': Machine is not running
'unknown': Machine is not known
'powering_up': Machine is powering up
'up': Machine is up and running
'saving_state': Machine is "suspending"
'suspended': Machine is suspended
'restoring_state': Machine is restoring state (unsuspending)
'powering_down': Machine is powering down
'image_locked': Machine is creating/cloning and is not usable
'''
return self.parent().getMachineState(machineId)
def removeTemplate(self, templateId): def removeTemplate(self, templateId):
''' '''
invokes removeTemplate from parent provider invokes removeTemplate from parent provider
''' '''
return self.parent().removeTemplate(templateId) return self.parent().removeTemplate(templateId)
def getMacRange(self):
return self.parent().getMacRange()

View File

@ -34,6 +34,7 @@ Created on Jun 22, 2012
''' '''
from django.utils.translation import ugettext_noop as translatable, ugettext as _ from django.utils.translation import ugettext_noop as translatable, ugettext as _
from uds.core.util.State import State
from uds.core.services import ServiceProvider from uds.core.services import ServiceProvider
from OVirtLinkedService import OVirtLinkedService from OVirtLinkedService import OVirtLinkedService
from uds.core.ui import gui from uds.core.ui import gui
@ -261,6 +262,29 @@ class Provider(ServiceProvider):
''' '''
return self.__getApi().getTemplateState(templateId) return self.__getApi().getTemplateState(templateId)
def getMachineState(self, machineId):
'''
Returns the state of the machine
This method do not uses cache at all (it always tries to get machine state from oVirt server)
Args:
machineId: Id of the machine to get state
Returns:
'down': Machine is not running
'unknown': Machine is not known
'powering_up': Machine is powering up
'up': Machine is up and running
'saving_state': Machine is "suspending"
'suspended': Machine is suspended
'restoring_state': Machine is restoring state (unsuspending)
'powering_down': Machine is powering down
'''
return self.__getApi().getMachineState(machineId)
return State.INACTIVE
def removeTemplate(self, templateId): def removeTemplate(self, templateId):
''' '''
Removes a template from ovirt server Removes a template from ovirt server
@ -269,6 +293,23 @@ class Provider(ServiceProvider):
''' '''
return self.__getApi().removeTemplate(templateId) return self.__getApi().removeTemplate(templateId)
def deployFromTemplate(self, name, comments, templateId, clusterId):
'''
Deploys a virtual machine on selected cluster from selected template
Args:
name: Name (sanitized) of the machine
comments: Comments for machine
templateId: Id of the template to deploy from
clusterId: Id of the cluster to deploy to
Returns:
Id of the machine being created form template
'''
return self.__getApi().deployFromTemplate(name, comments, templateId, clusterId)
def getMacRange(self):
return self.macsRange.value
@staticmethod @staticmethod
def test(env, data): def test(env, data):

View File

@ -166,10 +166,8 @@ class OVirtPublication(Publication):
# Methods provided below are specific for this publication # Methods provided below are specific for this publication
# and will be used by user deployments that uses this kind of publication # and will be used by user deployments that uses this kind of publication
def getBaseName(self): def getTemplateId(self):
''' '''
This sample method (just for this sample publication), provides Returns the template id associated with the publication
the name generater for this publication. This is just a sample, and
this will do the work
''' '''
return self._name return self._templateId

View File

@ -337,8 +337,8 @@ class Client(object):
api = self.__getApi() api = self.__getApi()
storage = api.storagedomains.get(id=storageId) #storage = api.storagedomains.get(id=storageId)
storage_domain = params.StorageDomain(storage) storage_domain = params.StorageDomain(id=storageId)
cluster = api.clusters.get(id=clusterId) cluster = api.clusters.get(id=clusterId)
vm = api.vms.get(id=vmId) vm = api.vms.get(id=vmId)
@ -348,9 +348,9 @@ class Client(object):
template = params.Template(name=name,storage_domain=storage_domain, vm=vm, cluster=cluster, description=comments) template = params.Template(name=name,storage_domain=storage_domain, vm=vm, cluster=cluster, description=comments)
api.templates.add(template) return api.templates.add(template).get_id()
return api.templates.get(name=name).get_id() #return api.templates.get(name=name).get_id()
finally: finally:
lock.release() lock.release()
@ -358,6 +358,7 @@ class Client(object):
def getTemplateState(self, templateId): def getTemplateState(self, templateId):
''' '''
Returns current template state. Returns current template state.
This method do not uses cache at all (it always tries to get template state from oVirt server)
Returned values could be: Returned values could be:
ok ok
@ -381,6 +382,42 @@ class Client(object):
finally: finally:
lock.release() lock.release()
def deployFromTemplate(self, name, comments, templateId, clusterId):
'''
Deploys a virtual machine on selected cluster from selected template
Args:
name: Name (sanitized) of the machine
comments: Comments for machine
templateId: Id of the template to deploy from
clusterId: Id of the cluster to deploy to
Returns:
Id of the machine being created form template
'''
try:
lock.acquire(True)
api = self.__getApi()
cluster = api.clusters.get(id=clusterId)
template = api.templates.get(id=templateId)
if cluster is None:
raise Exception('Cluster not found')
if template is None:
raise Exception('Template not found')
par = params.VM(name=name, cluster=cluster, template=template, description=comments)
params.Display()
return api.vms.add(par).get_id()
finally:
lock.release()
def removeTemplate(self, templateId): def removeTemplate(self, templateId):
''' '''
Removes a template from ovirt server Removes a template from ovirt server
@ -402,3 +439,49 @@ class Client(object):
lock.release() lock.release()
def getMachineState(self, machineId):
'''
Returns current state of a machine (running, suspended, ...).
This method do not uses cache at all (it always tries to get machine state from oVirt server)
Args:
machineId: Id of the machine to get status
Returns:
'down': Machine is not running
'unknown': Machine is not known
'powering_up': Machine is powering up
'up': Machine is up and running
'saving_state': Machine is "suspending"
'suspended': Machine is suspended
'restoring_state': Machine is restoring state (unsuspending)
'powering_down': Machine is powering down
'''
try:
lock.acquire(True)
api = self.__getApi()
vm = api.vms.get(id=machineId)
if vm is None or vm.get_status() is None:
return 'unknown'
return vm.get_status().get_state()
finally:
lock.release()
def powerOnMachine(self, machineId):
'''
Tries to power on a machine. No check is done, it is simply requested to oVirt
Args:
machineId: Id of the machine
Returns:
'''