mirror of
https://github.com/ansible/awx.git
synced 2024-11-01 16:51:11 +03:00
conform isolated system to new early node choice
* Randomly chose an instance in the controller instance group for which to control the isolated node run. Note the chosen instance via a job controller_node field
This commit is contained in:
parent
e720fe5dd0
commit
8d352a4edf
@ -702,7 +702,8 @@ class UnifiedJobSerializer(BaseSerializer):
|
|||||||
model = UnifiedJob
|
model = UnifiedJob
|
||||||
fields = ('*', 'unified_job_template', 'launch_type', 'status',
|
fields = ('*', 'unified_job_template', 'launch_type', 'status',
|
||||||
'failed', 'started', 'finished', 'elapsed', 'job_args',
|
'failed', 'started', 'finished', 'elapsed', 'job_args',
|
||||||
'job_cwd', 'job_env', 'job_explanation', 'execution_node',
|
'job_cwd', 'job_env', 'job_explanation',
|
||||||
|
'execution_node', 'controller_node',
|
||||||
'result_traceback', 'event_processing_finished')
|
'result_traceback', 'event_processing_finished')
|
||||||
extra_kwargs = {
|
extra_kwargs = {
|
||||||
'unified_job_template': {
|
'unified_job_template': {
|
||||||
@ -3434,7 +3435,7 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
|
|||||||
class Meta:
|
class Meta:
|
||||||
model = WorkflowJob
|
model = WorkflowJob
|
||||||
fields = ('*', 'workflow_job_template', 'extra_vars', 'allow_simultaneous',
|
fields = ('*', 'workflow_job_template', 'extra_vars', 'allow_simultaneous',
|
||||||
'-execution_node', '-event_processing_finished',)
|
'-execution_node', '-event_processing_finished', '-controller_node',)
|
||||||
|
|
||||||
def get_related(self, obj):
|
def get_related(self, obj):
|
||||||
res = super(WorkflowJobSerializer, self).get_related(obj)
|
res = super(WorkflowJobSerializer, self).get_related(obj)
|
||||||
@ -3463,7 +3464,7 @@ class WorkflowJobSerializer(LabelsListMixin, UnifiedJobSerializer):
|
|||||||
class WorkflowJobListSerializer(WorkflowJobSerializer, UnifiedJobListSerializer):
|
class WorkflowJobListSerializer(WorkflowJobSerializer, UnifiedJobListSerializer):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
fields = ('*', '-execution_node',)
|
fields = ('*', '-execution_node', '-controller_node',)
|
||||||
|
|
||||||
|
|
||||||
class WorkflowJobCancelSerializer(WorkflowJobSerializer):
|
class WorkflowJobCancelSerializer(WorkflowJobSerializer):
|
||||||
|
20
awx/main/migrations/0040_v330_unifiedjob_controller_node.py
Normal file
20
awx/main/migrations/0040_v330_unifiedjob_controller_node.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11.11 on 2018-05-25 18:58
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('main', '0039_v330_custom_venv_help_text'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='unifiedjob',
|
||||||
|
name='controller_node',
|
||||||
|
field=models.TextField(blank=True, default=b'', editable=False, help_text='The instance that managed the isolated execution environment.'),
|
||||||
|
),
|
||||||
|
]
|
@ -507,7 +507,8 @@ class StdoutMaxBytesExceeded(Exception):
|
|||||||
self.supported = supported
|
self.supported = supported
|
||||||
|
|
||||||
|
|
||||||
class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique, UnifiedJobTypeStringMixin, TaskManagerUnifiedJobMixin):
|
class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique,
|
||||||
|
UnifiedJobTypeStringMixin, TaskManagerUnifiedJobMixin):
|
||||||
'''
|
'''
|
||||||
Concrete base class for unified job run by the task engine.
|
Concrete base class for unified job run by the task engine.
|
||||||
'''
|
'''
|
||||||
@ -571,6 +572,12 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
|||||||
editable=False,
|
editable=False,
|
||||||
help_text=_("The node the job executed on."),
|
help_text=_("The node the job executed on."),
|
||||||
)
|
)
|
||||||
|
controller_node = models.TextField(
|
||||||
|
blank=True,
|
||||||
|
default='',
|
||||||
|
editable=False,
|
||||||
|
help_text=_("The instance that managed the isolated execution environment."),
|
||||||
|
)
|
||||||
notifications = models.ManyToManyField(
|
notifications = models.ManyToManyField(
|
||||||
'Notification',
|
'Notification',
|
||||||
editable=False,
|
editable=False,
|
||||||
@ -1228,17 +1235,8 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
|||||||
raise RuntimeError("Expected celery_task_id to be set on model.")
|
raise RuntimeError("Expected celery_task_id to be set on model.")
|
||||||
kwargs['task_id'] = self.celery_task_id
|
kwargs['task_id'] = self.celery_task_id
|
||||||
task_class = self._get_task_class()
|
task_class = self._get_task_class()
|
||||||
args = [self.pk]
|
|
||||||
from awx.main.models.ha import InstanceGroup
|
|
||||||
ig = InstanceGroup.objects.get(name=queue)
|
|
||||||
if ig.controller_id:
|
|
||||||
if self.supports_isolation(): # case of jobs and ad hoc commands
|
|
||||||
isolated_instance = ig.instances.order_by('-capacity').first()
|
|
||||||
args.append(isolated_instance.hostname)
|
|
||||||
else: # proj & inv updates, system jobs run on controller
|
|
||||||
queue = ig.controller.name
|
|
||||||
kwargs['queue'] = queue
|
kwargs['queue'] = queue
|
||||||
task_class().apply_async(args, opts, **kwargs)
|
task_class().apply_async([self.pk], opts, **kwargs)
|
||||||
|
|
||||||
def start(self, error_callback, success_callback, **kwargs):
|
def start(self, error_callback, success_callback, **kwargs):
|
||||||
'''
|
'''
|
||||||
@ -1400,3 +1398,9 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
|
|||||||
r['{}_schedule_id'.format(name)] = self.schedule.pk
|
r['{}_schedule_id'.format(name)] = self.schedule.pk
|
||||||
r['{}_schedule_name'.format(name)] = self.schedule.name
|
r['{}_schedule_name'.format(name)] = self.schedule.name
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
def get_celery_queue_name(self):
|
||||||
|
return self.controller_node or self.execution_node or settings.CELERY_DEFAULT_QUEUE
|
||||||
|
|
||||||
|
def get_isolated_execution_node_name(self):
|
||||||
|
return self.execution_node if self.controller_node else None
|
||||||
|
@ -7,6 +7,7 @@ import logging
|
|||||||
import uuid
|
import uuid
|
||||||
import json
|
import json
|
||||||
import six
|
import six
|
||||||
|
import random
|
||||||
from sets import Set
|
from sets import Set
|
||||||
|
|
||||||
# Django
|
# Django
|
||||||
@ -265,8 +266,16 @@ class TaskManager():
|
|||||||
elif not task.supports_isolation() and rampart_group.controller_id:
|
elif not task.supports_isolation() and rampart_group.controller_id:
|
||||||
# non-Ansible jobs on isolated instances run on controller
|
# non-Ansible jobs on isolated instances run on controller
|
||||||
task.instance_group = rampart_group.controller
|
task.instance_group = rampart_group.controller
|
||||||
logger.info('Submitting isolated %s to queue %s via %s.',
|
task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
|
||||||
task.log_format, task.instance_group_id, rampart_group.controller_id)
|
logger.info(six.text_type('Submitting isolated {} to queue {}.').format(
|
||||||
|
task.log_format, task.instance_group.name, task.execution_node))
|
||||||
|
elif task.supports_isolation() and rampart_group.controller_id:
|
||||||
|
# TODO: Select from only online nodes in the controller node
|
||||||
|
task.instance_group = rampart_group
|
||||||
|
task.execution_node = instance.hostname
|
||||||
|
task.controller_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
|
||||||
|
logger.info(six.text_type('Submitting isolated {} to queue {} controlled by {}.').format(
|
||||||
|
task.log_format, task.execution_node, task.controller_node))
|
||||||
else:
|
else:
|
||||||
task.instance_group = rampart_group
|
task.instance_group = rampart_group
|
||||||
if instance is not None:
|
if instance is not None:
|
||||||
@ -284,11 +293,10 @@ class TaskManager():
|
|||||||
def post_commit():
|
def post_commit():
|
||||||
task.websocket_emit_status(task.status)
|
task.websocket_emit_status(task.status)
|
||||||
if task.status != 'failed':
|
if task.status != 'failed':
|
||||||
if instance is not None:
|
task.start_celery_task(opts,
|
||||||
actual_queue=instance.hostname
|
error_callback=error_handler,
|
||||||
else:
|
success_callback=success_handler,
|
||||||
actual_queue=settings.CELERY_DEFAULT_QUEUE
|
queue=task.get_celery_queue_name())
|
||||||
task.start_celery_task(opts, error_callback=error_handler, success_callback=success_handler, queue=actual_queue)
|
|
||||||
|
|
||||||
connection.on_commit(post_commit)
|
connection.on_commit(post_commit)
|
||||||
|
|
||||||
|
@ -868,15 +868,10 @@ class BaseTask(Task):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
@with_path_cleanup
|
@with_path_cleanup
|
||||||
def run(self, pk, isolated_host=None, **kwargs):
|
def run(self, pk, **kwargs):
|
||||||
'''
|
'''
|
||||||
Run the job/task and capture its output.
|
Run the job/task and capture its output.
|
||||||
'''
|
'''
|
||||||
'''
|
|
||||||
execution_node = settings.CLUSTER_HOST_ID
|
|
||||||
if isolated_host is not None:
|
|
||||||
execution_node = isolated_host
|
|
||||||
'''
|
|
||||||
instance = self.update_model(pk, status='running',
|
instance = self.update_model(pk, status='running',
|
||||||
start_args='') # blank field to remove encrypted passwords
|
start_args='') # blank field to remove encrypted passwords
|
||||||
|
|
||||||
@ -886,6 +881,8 @@ class BaseTask(Task):
|
|||||||
extra_update_fields = {}
|
extra_update_fields = {}
|
||||||
event_ct = 0
|
event_ct = 0
|
||||||
stdout_handle = None
|
stdout_handle = None
|
||||||
|
isolated_host = instance.get_isolated_execution_node_name()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs['isolated'] = isolated_host is not None
|
kwargs['isolated'] = isolated_host is not None
|
||||||
self.pre_run_hook(instance, **kwargs)
|
self.pre_run_hook(instance, **kwargs)
|
||||||
|
Loading…
Reference in New Issue
Block a user