From 19edf417bac12be10ddc7d99a9349f2dbca6914a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20G=C3=B3mez=20Garc=C3=ADa?= Date: Wed, 1 Jul 2020 16:45:03 +0200 Subject: [PATCH] *More fix & improvements on cleaner workers --- .../src/uds/core/workers/assigned_unused.py | 2 +- .../workers/hanged_userservice_cleaner.py | 29 +++++++++++++------ server/src/uds/core/workers/stuck_cleaner.py | 14 ++++----- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/server/src/uds/core/workers/assigned_unused.py b/server/src/uds/core/workers/assigned_unused.py index 97eed1caa..cc35635c8 100644 --- a/server/src/uds/core/workers/assigned_unused.py +++ b/server/src/uds/core/workers/assigned_unused.py @@ -61,7 +61,7 @@ class AssignedAndUnused(Job): userServices__cache_level=0 ) ) - ).filter(outdated__gt=0) + ).filter(outdated__gt=0, state=State.ACTIVE) for ds in outdatedServicePools: # Skips checking deployed services in maintenance mode or ignores assigned and unused if ds.isInMaintenance() is True or ds.ignores_unused: diff --git a/server/src/uds/core/workers/hanged_userservice_cleaner.py b/server/src/uds/core/workers/hanged_userservice_cleaner.py index b95d6f91b..3afa25a19 100644 --- a/server/src/uds/core/workers/hanged_userservice_cleaner.py +++ b/server/src/uds/core/workers/hanged_userservice_cleaner.py @@ -33,9 +33,9 @@ from datetime import timedelta import logging -from django.db.models import Q +from django.db.models import Q, Count from uds.core.util.config import GlobalConfig -from uds.models import ServicePool, getSqlDatetime +from uds.models import ServicePool, UserService, getSqlDatetime from uds.core.util.state import State from uds.core.jobs import Job from uds.core.util import log @@ -51,18 +51,29 @@ class HangedCleaner(Job): def run(self): since_state = getSqlDatetime() - timedelta(seconds=GlobalConfig.MAX_INITIALIZING_TIME.getInt()) # Filter for locating machine not ready - flt = Q(state_date__lt=since_state, state=State.PREPARING) | Q(state_date__lt=since_state, state=State.USABLE, os_state=State.PREPARING) | Q(state_date__lt=since_state, state=State.REMOVING) + flt = ( + Q(state_date__lt=since_state, state=State.PREPARING) | + Q(state_date__lt=since_state, state=State.USABLE, os_state=State.PREPARING) + ) + + withHangedServices = ServicePool.objects.annotate( + hanged = Count( + 'userServices', + filter=Q(userServices__state_date__lt=since_state, userServices__state=State.PREPARING) | + Q(userServices__state_date__lt=since_state, state=State.USABLE, userServices__os_state=State.PREPARING) + ) + ).exclude(hanged=0).exclude(osmanager=None).exclude(service__provider__maintenance_mode=True).filter(state=State.ACTIVE) # Type servicePool: ServicePool - for servicePool in ServicePool.objects.exclude(osmanager=None, state__in=State.VALID_STATES, service__provider__maintenance_mode=True): + for servicePool in withHangedServices: logger.debug('Searching for hanged services for %s', servicePool) + us: UserService for us in servicePool.userServices.filter(flt): + if us.getProperty('destroy_after'): # It's waiting for removal, skip this very specific case + continue logger.debug('Found hanged service %s', us) log.doLog(us, log.ERROR, 'User Service seems to be hanged. Removing it.', log.INTERNAL) - log.doLog(servicePool, log.ERROR, 'Removing user service {0} because it seems to be hanged'.format(us.friendly_name)) - if us.state in (State.REMOVING,): - us.setState(State.ERROR) - else: - us.removeOrCancel() + log.doLog(servicePool, log.ERROR, 'Removing user service {} because it seems to be hanged'.format(us.friendly_name)) + us.removeOrCancel() diff --git a/server/src/uds/core/workers/stuck_cleaner.py b/server/src/uds/core/workers/stuck_cleaner.py index 646258db8..b74e482ff 100644 --- a/server/src/uds/core/workers/stuck_cleaner.py +++ b/server/src/uds/core/workers/stuck_cleaner.py @@ -48,7 +48,7 @@ MAX_STUCK_TIME = 3600 * 24 * 2 # At most 2 days "Stuck", not configurable (ther class StuckCleaner(Job): """ - Kaputen Cleaner is very similar to Hanged Cleaner, at start, almost a copy + Kaputen Cleaner is very similar to Hanged Cleaner We keep it in a new place to "control" more specific thins """ frecuency = 3600 * 24 # Executes Once a day @@ -69,8 +69,9 @@ class StuckCleaner(Job): userServices__state__in=State.INFO_STATES + State.VALID_STATES )) ) - ).filter(service__provider__maintenance_mode=False).exclude(stuckCount=0) + ).filter(service__provider__maintenance_mode=False, state=State.ACTIVE).exclude(stuckCount=0) + # Info states are removed on UserServiceCleaner and VALID_STATES are ok, or if "hanged", checked on "HangedCleaner" def stuckUserServices(servicePool: ServicePool ) -> typing.Iterable[UserService]: q = servicePool.userServices.filter( state_date__lt=since_state @@ -81,10 +82,9 @@ class StuckCleaner(Job): yield from q.filter(state=State.PREPARING, properties__name='destroy_after') for servicePool in servicePoolswithStucks: - logger.debug('Searching for stuck states for %s', servicePool.name) - # Info states are removed on UserServiceCleaner and VALID_STATES are ok, or if "hanged", checked on "HangedCleaner" + # logger.debug('Searching for stuck states for %s', servicePool.name) for stuck in stuckUserServices(servicePool): logger.debug('Found stuck user service %s', stuck) - log.doLog(servicePool, log.ERROR, 'User service %s has been hard removed because it\'s stuck', stuck.name) - print('Found stuck ', stuck) - # stuck.delete() + log.doLog(servicePool, log.ERROR, 'User service {} has been hard removed because it\'s stuck'.format(stuck.name)) + # stuck.setState(State.ERROR) + stuck.delete()