mirror of
https://github.com/ansible/awx.git
synced 2024-10-27 17:55:10 +03:00
generate control node receptor.conf
when a new remote execution/hop node is added regenerate the receptor.conf for all control node to peer out to the new remote execution node Signed-off-by: Hao Liu <haoli@redhat.com> Co-Authored-By: Seth Foster <fosterseth@users.noreply.github.com> Co-Authored-By: Shane McDonald <me@shanemcd.com>
This commit is contained in:
parent
4bf9925cf7
commit
9b034ad574
@ -423,6 +423,13 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs
|
||||
|
||||
@receiver(post_save, sender=Instance)
|
||||
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
|
||||
# TODO: handle update to instance
|
||||
if settings.IS_K8S and created and instance.node_type in ('execution', 'hop'):
|
||||
from awx.main.tasks.receptor import write_receptor_config # prevents circular import
|
||||
|
||||
# on commit broadcast to all control instance to update their receptor configs
|
||||
connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
|
||||
|
||||
if created or instance.has_policy_changes():
|
||||
schedule_policy_task()
|
||||
|
||||
|
@ -642,10 +642,6 @@ class TaskManager(TaskBase):
|
||||
found_acceptable_queue = True
|
||||
break
|
||||
|
||||
# TODO: remove this after we have confidence that OCP control nodes are reporting node_type=control
|
||||
if settings.IS_K8S and task.capacity_type == 'execution':
|
||||
logger.debug("Skipping group {}, task cannot run on control plane".format(instance_group.name))
|
||||
continue
|
||||
# at this point we know the instance group is NOT a container group
|
||||
# because if it was, it would have started the task and broke out of the loop.
|
||||
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
|
||||
|
@ -145,7 +145,7 @@ class BaseTask(object):
|
||||
"""
|
||||
Return params structure to be executed by the container runtime
|
||||
"""
|
||||
if settings.IS_K8S:
|
||||
if settings.IS_K8S and instance.instance_group.is_container_group:
|
||||
return {}
|
||||
|
||||
image = instance.execution_environment.image
|
||||
|
@ -27,12 +27,17 @@ from awx.main.utils.common import (
|
||||
)
|
||||
from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
|
||||
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
|
||||
from awx.main.models import Instance
|
||||
from awx.main.dispatch.publish import task
|
||||
|
||||
# Receptorctl
|
||||
from receptorctl.socket_interface import ReceptorControl
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
logger = logging.getLogger('awx.main.tasks.receptor')
|
||||
__RECEPTOR_CONF = '/etc/receptor/receptor.conf'
|
||||
__RECEPTOR_CONF_LOCKFILE = f'{__RECEPTOR_CONF}.lock'
|
||||
RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')
|
||||
|
||||
|
||||
@ -43,8 +48,10 @@ class ReceptorConnectionType(Enum):
|
||||
|
||||
|
||||
def get_receptor_sockfile():
|
||||
with open(__RECEPTOR_CONF, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||
with lock:
|
||||
with open(__RECEPTOR_CONF, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
for section in data:
|
||||
for entry_name, entry_data in section.items():
|
||||
if entry_name == 'control-service':
|
||||
@ -60,8 +67,10 @@ def get_tls_client(use_stream_tls=None):
|
||||
if not use_stream_tls:
|
||||
return None
|
||||
|
||||
with open(__RECEPTOR_CONF, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||
with lock:
|
||||
with open(__RECEPTOR_CONF, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
for section in data:
|
||||
for entry_name, entry_data in section.items():
|
||||
if entry_name == 'tls-client':
|
||||
@ -78,12 +87,25 @@ def get_receptor_ctl():
|
||||
return ReceptorControl(receptor_sockfile)
|
||||
|
||||
|
||||
def find_node_in_mesh(node_name, receptor_ctl):
|
||||
attempts = 10
|
||||
backoff = 1
|
||||
for attempt in range(attempts):
|
||||
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
|
||||
for node in all_nodes:
|
||||
if node.get('NodeID') == node_name:
|
||||
return node
|
||||
else:
|
||||
logger.warning(f"Instance {node_name} is not in the receptor mesh. {attempts-attempt} attempts left.")
|
||||
time.sleep(backoff)
|
||||
backoff += 1
|
||||
else:
|
||||
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
|
||||
|
||||
|
||||
def get_conn_type(node_name, receptor_ctl):
|
||||
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
|
||||
for node in all_nodes:
|
||||
if node.get('NodeID') == node_name:
|
||||
return ReceptorConnectionType(node.get('ConnType'))
|
||||
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
|
||||
node = find_node_in_mesh(node_name, receptor_ctl)
|
||||
return ReceptorConnectionType(node.get('ConnType'))
|
||||
|
||||
|
||||
def administrative_workunit_reaper(work_list=None):
|
||||
@ -574,3 +596,66 @@ class AWXReceptorJob:
|
||||
else:
|
||||
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
|
||||
return config
|
||||
|
||||
|
||||
RECEPTOR_CONFIG_STARTER = (
|
||||
{'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0600'}},
|
||||
{'local-only': None},
|
||||
{'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
|
||||
{
|
||||
'work-kubernetes': {
|
||||
'worktype': 'kubernetes-runtime-auth',
|
||||
'authmethod': 'runtime',
|
||||
'allowruntimeauth': True,
|
||||
'allowruntimepod': True,
|
||||
'allowruntimeparams': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'work-kubernetes': {
|
||||
'worktype': 'kubernetes-incluster-auth',
|
||||
'authmethod': 'incluster',
|
||||
'allowruntimeauth': True,
|
||||
'allowruntimepod': True,
|
||||
'allowruntimeparams': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'tls-client': {
|
||||
'name': 'tlsclient',
|
||||
'rootcas': '/etc/receptor/tls/ca/receptor-ca.crt',
|
||||
'cert': '/etc/receptor/tls/receptor.crt',
|
||||
'key': '/etc/receptor/tls/receptor.key',
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@task()
|
||||
def write_receptor_config():
|
||||
receptor_config = list(RECEPTOR_CONFIG_STARTER)
|
||||
|
||||
instances = Instance.objects.exclude(node_type='control')
|
||||
for instance in instances:
|
||||
peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
|
||||
receptor_config.append(peer)
|
||||
|
||||
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||
with lock:
|
||||
with open(__RECEPTOR_CONF, 'w') as file:
|
||||
yaml.dump(receptor_config, file, default_flow_style=False)
|
||||
|
||||
receptor_ctl = get_receptor_ctl()
|
||||
|
||||
attempts = 10
|
||||
backoff = 1
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
receptor_ctl.simple_command("reload")
|
||||
break
|
||||
except ValueError:
|
||||
logger.warning(f"Unable to reload Receptor configuration. {attempts-attempt} attempts left.")
|
||||
time.sleep(backoff)
|
||||
backoff += 1
|
||||
else:
|
||||
raise RuntimeError("Receptor reload failed")
|
||||
|
@ -61,7 +61,7 @@ from awx.main.utils.common import (
|
||||
from awx.main.utils.external_logging import reconfigure_rsyslog
|
||||
from awx.main.utils.reload import stop_local_services
|
||||
from awx.main.utils.pglock import advisory_lock
|
||||
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper
|
||||
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
from awx.main import analytics
|
||||
from awx.conf import settings_registry
|
||||
@ -81,6 +81,10 @@ Try upgrading OpenSSH or providing your private key in an different format. \
|
||||
def dispatch_startup():
|
||||
startup_logger = logging.getLogger('awx.main.tasks')
|
||||
|
||||
# TODO: Enable this on VM installs
|
||||
if settings.IS_K8S:
|
||||
write_receptor_config()
|
||||
|
||||
startup_logger.debug("Syncing Schedules")
|
||||
for sch in Schedule.objects.all():
|
||||
try:
|
||||
@ -555,7 +559,7 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
|
||||
except Exception:
|
||||
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
|
||||
try:
|
||||
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
|
||||
if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
|
||||
deprovision_hostname = other_inst.hostname
|
||||
other_inst.delete() # FIXME: what about associated inbound links?
|
||||
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
|
||||
|
24
docs/licenses/filelock.txt
Normal file
24
docs/licenses/filelock.txt
Normal file
@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org>
|
@ -25,6 +25,7 @@ django-split-settings
|
||||
django-taggit
|
||||
djangorestframework==3.13.1
|
||||
djangorestframework-yaml
|
||||
filelock
|
||||
GitPython>=3.1.1 # minimum to fix https://github.com/ansible/awx/issues/6119
|
||||
irc
|
||||
jinja2>=2.11.3 # CVE-2020-28493
|
||||
|
@ -132,6 +132,8 @@ docutils==0.16
|
||||
# via python-daemon
|
||||
ecdsa==0.18.0
|
||||
# via python-jose
|
||||
filelock==3.8.0
|
||||
# via -r /awx_devel/requirements/requirements.in
|
||||
# via
|
||||
# -r /awx_devel/requirements/requirements_git.txt
|
||||
# django-radius
|
||||
|
@ -109,6 +109,20 @@
|
||||
mode: '0600'
|
||||
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||
|
||||
- name: Create Receptor Config Lock File
|
||||
file:
|
||||
path: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf.lock"
|
||||
state: touch
|
||||
mode: '0600'
|
||||
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||
|
||||
- name: Render Receptor Config(s) for Control Plane
|
||||
template:
|
||||
src: "receptor-awx.conf.j2"
|
||||
dest: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf"
|
||||
mode: '0600'
|
||||
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||
|
||||
- name: Render Receptor Hop Config
|
||||
template:
|
||||
src: "receptor-hop.conf.j2"
|
||||
|
@ -42,6 +42,7 @@ services:
|
||||
- "../../docker-compose/_sources/local_settings.py:/etc/tower/conf.d/local_settings.py"
|
||||
- "../../docker-compose/_sources/SECRET_KEY:/etc/tower/SECRET_KEY"
|
||||
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf:/etc/receptor/receptor.conf"
|
||||
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf.lock:/etc/receptor/receptor.conf.lock"
|
||||
- "../../docker-compose/_sources/receptor/work_public_key.pem:/etc/receptor/work_public_key.pem"
|
||||
- "../../docker-compose/_sources/receptor/work_private_key.pem:/etc/receptor/work_private_key.pem"
|
||||
# - "../../docker-compose/_sources/certs:/etc/receptor/certs" # TODO: optionally generate certs
|
||||
|
Loading…
Reference in New Issue
Block a user