1
0
mirror of https://github.com/ansible/awx.git synced 2024-10-27 17:55:10 +03:00

generate control node receptor.conf

when a new remote execution/hop node is added
regenerate the receptor.conf for all control node to
peer out to the new remote execution node

Signed-off-by: Hao Liu <haoli@redhat.com>
Co-Authored-By: Seth Foster <fosterseth@users.noreply.github.com>
Co-Authored-By: Shane McDonald <me@shanemcd.com>
This commit is contained in:
Shane McDonald 2022-08-01 14:13:59 -04:00 committed by Jeff Bradberry
parent 4bf9925cf7
commit 9b034ad574
10 changed files with 150 additions and 16 deletions

View File

@ -423,6 +423,13 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs
@receiver(post_save, sender=Instance)
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
# TODO: handle update to instance
if settings.IS_K8S and created and instance.node_type in ('execution', 'hop'):
from awx.main.tasks.receptor import write_receptor_config # prevents circular import
# on commit broadcast to all control instance to update their receptor configs
connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
if created or instance.has_policy_changes():
schedule_policy_task()

View File

@ -642,10 +642,6 @@ class TaskManager(TaskBase):
found_acceptable_queue = True
break
# TODO: remove this after we have confidence that OCP control nodes are reporting node_type=control
if settings.IS_K8S and task.capacity_type == 'execution':
logger.debug("Skipping group {}, task cannot run on control plane".format(instance_group.name))
continue
# at this point we know the instance group is NOT a container group
# because if it was, it would have started the task and broke out of the loop.
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(

View File

@ -145,7 +145,7 @@ class BaseTask(object):
"""
Return params structure to be executed by the container runtime
"""
if settings.IS_K8S:
if settings.IS_K8S and instance.instance_group.is_container_group:
return {}
image = instance.execution_environment.image

View File

@ -27,12 +27,17 @@ from awx.main.utils.common import (
)
from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
from awx.main.models import Instance
from awx.main.dispatch.publish import task
# Receptorctl
from receptorctl.socket_interface import ReceptorControl
from filelock import FileLock
logger = logging.getLogger('awx.main.tasks.receptor')
__RECEPTOR_CONF = '/etc/receptor/receptor.conf'
__RECEPTOR_CONF_LOCKFILE = f'{__RECEPTOR_CONF}.lock'
RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')
@ -43,8 +48,10 @@ class ReceptorConnectionType(Enum):
def get_receptor_sockfile():
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
with lock:
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'control-service':
@ -60,8 +67,10 @@ def get_tls_client(use_stream_tls=None):
if not use_stream_tls:
return None
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
with lock:
with open(__RECEPTOR_CONF, 'r') as f:
data = yaml.safe_load(f)
for section in data:
for entry_name, entry_data in section.items():
if entry_name == 'tls-client':
@ -78,12 +87,25 @@ def get_receptor_ctl():
return ReceptorControl(receptor_sockfile)
def find_node_in_mesh(node_name, receptor_ctl):
attempts = 10
backoff = 1
for attempt in range(attempts):
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
for node in all_nodes:
if node.get('NodeID') == node_name:
return node
else:
logger.warning(f"Instance {node_name} is not in the receptor mesh. {attempts-attempt} attempts left.")
time.sleep(backoff)
backoff += 1
else:
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
def get_conn_type(node_name, receptor_ctl):
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
for node in all_nodes:
if node.get('NodeID') == node_name:
return ReceptorConnectionType(node.get('ConnType'))
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
node = find_node_in_mesh(node_name, receptor_ctl)
return ReceptorConnectionType(node.get('ConnType'))
def administrative_workunit_reaper(work_list=None):
@ -574,3 +596,66 @@ class AWXReceptorJob:
else:
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
return config
RECEPTOR_CONFIG_STARTER = (
{'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0600'}},
{'local-only': None},
{'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
{
'work-kubernetes': {
'worktype': 'kubernetes-runtime-auth',
'authmethod': 'runtime',
'allowruntimeauth': True,
'allowruntimepod': True,
'allowruntimeparams': True,
}
},
{
'work-kubernetes': {
'worktype': 'kubernetes-incluster-auth',
'authmethod': 'incluster',
'allowruntimeauth': True,
'allowruntimepod': True,
'allowruntimeparams': True,
}
},
{
'tls-client': {
'name': 'tlsclient',
'rootcas': '/etc/receptor/tls/ca/receptor-ca.crt',
'cert': '/etc/receptor/tls/receptor.crt',
'key': '/etc/receptor/tls/receptor.key',
}
},
)
@task()
def write_receptor_config():
receptor_config = list(RECEPTOR_CONFIG_STARTER)
instances = Instance.objects.exclude(node_type='control')
for instance in instances:
peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
receptor_config.append(peer)
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
with lock:
with open(__RECEPTOR_CONF, 'w') as file:
yaml.dump(receptor_config, file, default_flow_style=False)
receptor_ctl = get_receptor_ctl()
attempts = 10
backoff = 1
for attempt in range(attempts):
try:
receptor_ctl.simple_command("reload")
break
except ValueError:
logger.warning(f"Unable to reload Receptor configuration. {attempts-attempt} attempts left.")
time.sleep(backoff)
backoff += 1
else:
raise RuntimeError("Receptor reload failed")

View File

@ -61,7 +61,7 @@ from awx.main.utils.common import (
from awx.main.utils.external_logging import reconfigure_rsyslog
from awx.main.utils.reload import stop_local_services
from awx.main.utils.pglock import advisory_lock
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
from awx.main.consumers import emit_channel_notification
from awx.main import analytics
from awx.conf import settings_registry
@ -81,6 +81,10 @@ Try upgrading OpenSSH or providing your private key in an different format. \
def dispatch_startup():
startup_logger = logging.getLogger('awx.main.tasks')
# TODO: Enable this on VM installs
if settings.IS_K8S:
write_receptor_config()
startup_logger.debug("Syncing Schedules")
for sch in Schedule.objects.all():
try:
@ -555,7 +559,7 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
except Exception:
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
try:
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
deprovision_hostname = other_inst.hostname
other_inst.delete() # FIXME: what about associated inbound links?
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))

View File

@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org>

View File

@ -25,6 +25,7 @@ django-split-settings
django-taggit
djangorestframework==3.13.1
djangorestframework-yaml
filelock
GitPython>=3.1.1 # minimum to fix https://github.com/ansible/awx/issues/6119
irc
jinja2>=2.11.3 # CVE-2020-28493

View File

@ -132,6 +132,8 @@ docutils==0.16
# via python-daemon
ecdsa==0.18.0
# via python-jose
filelock==3.8.0
# via -r /awx_devel/requirements/requirements.in
# via
# -r /awx_devel/requirements/requirements_git.txt
# django-radius

View File

@ -109,6 +109,20 @@
mode: '0600'
with_sequence: start=1 end={{ control_plane_node_count }}
- name: Create Receptor Config Lock File
file:
path: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf.lock"
state: touch
mode: '0600'
with_sequence: start=1 end={{ control_plane_node_count }}
- name: Render Receptor Config(s) for Control Plane
template:
src: "receptor-awx.conf.j2"
dest: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf"
mode: '0600'
with_sequence: start=1 end={{ control_plane_node_count }}
- name: Render Receptor Hop Config
template:
src: "receptor-hop.conf.j2"

View File

@ -42,6 +42,7 @@ services:
- "../../docker-compose/_sources/local_settings.py:/etc/tower/conf.d/local_settings.py"
- "../../docker-compose/_sources/SECRET_KEY:/etc/tower/SECRET_KEY"
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf:/etc/receptor/receptor.conf"
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf.lock:/etc/receptor/receptor.conf.lock"
- "../../docker-compose/_sources/receptor/work_public_key.pem:/etc/receptor/work_public_key.pem"
- "../../docker-compose/_sources/receptor/work_private_key.pem:/etc/receptor/work_private_key.pem"
# - "../../docker-compose/_sources/certs:/etc/receptor/certs" # TODO: optionally generate certs