1
0
mirror of https://github.com/ansible/awx.git synced 2024-11-01 08:21:15 +03:00

Merge pull request #3882 from ryanpetrello/debug-log

add the ability to toggle DEBUG logging on dynamically

Reviewed-by: Shane McDonald <me@shanemcd.com>
             https://github.com/shanemcd
This commit is contained in:
softwarefactory-project-zuul[bot] 2019-05-16 15:12:01 +00:00 committed by GitHub
commit d9965cfe7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 112 additions and 130 deletions

View File

@ -24,7 +24,6 @@ from rest_framework.fields import empty, SkipField
# Tower
from awx.main.utils import encrypt_field, decrypt_field
from awx.main.utils.db import get_tower_migration_version
from awx.conf import settings_registry
from awx.conf.models import Setting
from awx.conf.migrations._reencrypt import decrypt_field as old_decrypt_field
@ -90,45 +89,42 @@ def _ctit_db_wrapper(trans_safe=False):
transaction.set_rollback(False)
yield
except DBError:
if 'migrate' in sys.argv and get_tower_migration_version() < '310':
logger.info('Using default settings until version 3.1 migration.')
else:
# We want the _full_ traceback with the context
# First we get the current call stack, which constitutes the "top",
# it has the context up to the point where the context manager is used
top_stack = StringIO()
traceback.print_stack(file=top_stack)
top_lines = top_stack.getvalue().strip('\n').split('\n')
top_stack.close()
# Get "bottom" stack from the local error that happened
# inside of the "with" block this wraps
exc_type, exc_value, exc_traceback = sys.exc_info()
bottom_stack = StringIO()
traceback.print_tb(exc_traceback, file=bottom_stack)
bottom_lines = bottom_stack.getvalue().strip('\n').split('\n')
# Glue together top and bottom where overlap is found
bottom_cutoff = 0
for i, line in enumerate(bottom_lines):
if line in top_lines:
# start of overlapping section, take overlap from bottom
top_lines = top_lines[:top_lines.index(line)]
bottom_cutoff = i
break
bottom_lines = bottom_lines[bottom_cutoff:]
tb_lines = top_lines + bottom_lines
# We want the _full_ traceback with the context
# First we get the current call stack, which constitutes the "top",
# it has the context up to the point where the context manager is used
top_stack = StringIO()
traceback.print_stack(file=top_stack)
top_lines = top_stack.getvalue().strip('\n').split('\n')
top_stack.close()
# Get "bottom" stack from the local error that happened
# inside of the "with" block this wraps
exc_type, exc_value, exc_traceback = sys.exc_info()
bottom_stack = StringIO()
traceback.print_tb(exc_traceback, file=bottom_stack)
bottom_lines = bottom_stack.getvalue().strip('\n').split('\n')
# Glue together top and bottom where overlap is found
bottom_cutoff = 0
for i, line in enumerate(bottom_lines):
if line in top_lines:
# start of overlapping section, take overlap from bottom
top_lines = top_lines[:top_lines.index(line)]
bottom_cutoff = i
break
bottom_lines = bottom_lines[bottom_cutoff:]
tb_lines = top_lines + bottom_lines
tb_string = '\n'.join(
['Traceback (most recent call last):'] +
tb_lines +
['{}: {}'.format(exc_type.__name__, str(exc_value))]
)
bottom_stack.close()
# Log the combined stack
if trans_safe:
if 'check_migrations' not in sys.argv:
logger.warning('Database settings are not available, using defaults, error:\n{}'.format(tb_string))
else:
logger.error('Error modifying something related to database settings.\n{}'.format(tb_string))
tb_string = '\n'.join(
['Traceback (most recent call last):'] +
tb_lines +
['{}: {}'.format(exc_type.__name__, str(exc_value))]
)
bottom_stack.close()
# Log the combined stack
if trans_safe:
if 'check_migrations' not in sys.argv:
logger.debug('Database settings are not available, using defaults, error:\n{}'.format(tb_string))
else:
logger.debug('Error modifying something related to database settings.\n{}'.format(tb_string))
finally:
if trans_safe and is_atomic and rollback_set:
transaction.set_rollback(rollback_set)

View File

@ -40,3 +40,14 @@ ENV_BLACKLIST = frozenset((
'JOB_CALLBACK_DEBUG', 'INVENTORY_HOSTVARS',
'AWX_HOST', 'PROJECT_REVISION'
))
# loggers that may be called in process of emitting a log
LOGGER_BLACKLIST = (
'awx.main.utils.handlers',
'awx.main.utils.formatters',
'awx.main.utils.filters',
'awx.main.utils.encryption',
'awx.main.utils.log',
# loggers that may be called getting logging settings
'awx.conf'
)

View File

@ -108,7 +108,7 @@ class TaskManager():
dag = WorkflowDAG(workflow_job)
spawn_nodes = dag.bfs_nodes_to_run()
if spawn_nodes:
logger.info('Spawning jobs for %s', workflow_job.log_format)
logger.debug('Spawning jobs for %s', workflow_job.log_format)
else:
logger.debug('No nodes to spawn for %s', workflow_job.log_format)
for spawn_node in spawn_nodes:
@ -118,7 +118,7 @@ class TaskManager():
job = spawn_node.unified_job_template.create_unified_job(**kv)
spawn_node.job = job
spawn_node.save()
logger.info('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
logger.debug('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
can_start = True
if isinstance(spawn_node.unified_job_template, WorkflowJobTemplate):
workflow_ancestors = job.get_ancestor_workflows()
@ -178,7 +178,7 @@ class TaskManager():
if not is_done:
continue
has_failed, reason = dag.has_workflow_failed()
logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
result.append(workflow_job.id)
new_status = 'failed' if has_failed else 'successful'
logger.debug("Transitioning {} to {} status.".format(workflow_job.log_format, new_status))
@ -233,26 +233,26 @@ class TaskManager():
else:
if type(task) is WorkflowJob:
task.status = 'running'
logger.info('Transitioning %s to running status.', task.log_format)
logger.debug('Transitioning %s to running status.', task.log_format)
schedule_task_manager()
elif not task.supports_isolation() and rampart_group.controller_id:
# non-Ansible jobs on isolated instances run on controller
task.instance_group = rampart_group.controller
task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
logger.info('Submitting isolated {} to queue {}.'.format(
task.log_format, task.instance_group.name, task.execution_node))
logger.debug('Submitting isolated {} to queue {}.'.format(
task.log_format, task.instance_group.name, task.execution_node))
elif controller_node:
task.instance_group = rampart_group
task.execution_node = instance.hostname
task.controller_node = controller_node
logger.info('Submitting isolated {} to queue {} controlled by {}.'.format(
task.log_format, task.execution_node, controller_node))
logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format(
task.log_format, task.execution_node, controller_node))
else:
task.instance_group = rampart_group
if instance is not None:
task.execution_node = instance.hostname
logger.info('Submitting {} to <instance group, instance> <{},{}>.'.format(
task.log_format, task.instance_group_id, task.execution_node))
logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format(
task.log_format, task.instance_group_id, task.execution_node))
with disable_activity_stream():
task.celery_task_id = str(uuid.uuid4())
task.save()
@ -295,7 +295,7 @@ class TaskManager():
project_task.created = task.created - timedelta(seconds=1)
project_task.status = 'pending'
project_task.save()
logger.info(
logger.debug(
'Spawned {} as dependency of {}'.format(
project_task.log_format, task.log_format
)
@ -309,7 +309,7 @@ class TaskManager():
inventory_task.created = task.created - timedelta(seconds=2)
inventory_task.status = 'pending'
inventory_task.save()
logger.info(
logger.debug(
'Spawned {} as dependency of {}'.format(
inventory_task.log_format, task.log_format
)

View File

@ -103,7 +103,7 @@ class InvalidVirtualenvError(Exception):
def dispatch_startup():
startup_logger = logging.getLogger('awx.main.tasks')
startup_logger.info("Syncing Schedules")
startup_logger.debug("Syncing Schedules")
for sch in Schedule.objects.all():
try:
sch.update_computed_fields()
@ -189,20 +189,20 @@ def apply_cluster_membership_policies():
# NOTE: arguable behavior: policy-list-group is not added to
# instance's group count for consideration in minimum-policy rules
if group_actual.instances:
logger.info("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))
logger.debug("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))
if ig.controller_id is None:
actual_groups.append(group_actual)
else:
# For isolated groups, _only_ apply the policy_instance_list
# do not add to in-memory list, so minimum rules not applied
logger.info('Committing instances to isolated group {}'.format(ig.name))
logger.debug('Committing instances to isolated group {}'.format(ig.name))
ig.instances.set(group_actual.instances)
# Process Instance minimum policies next, since it represents a concrete lower bound to the
# number of instances to make available to instance groups
actual_instances = [Node(obj=i, groups=[]) for i in considered_instances if i.managed_by_policy]
logger.info("Total non-isolated instances:{} available for policy: {}".format(
logger.debug("Total non-isolated instances:{} available for policy: {}".format(
total_instances, len(actual_instances)))
for g in sorted(actual_groups, key=lambda x: len(x.instances)):
policy_min_added = []
@ -217,7 +217,7 @@ def apply_cluster_membership_policies():
i.groups.append(g.obj.id)
policy_min_added.append(i.obj.id)
if policy_min_added:
logger.info("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name))
logger.debug("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name))
# Finally, process instance policy percentages
for g in sorted(actual_groups, key=lambda x: len(x.instances)):
@ -233,7 +233,7 @@ def apply_cluster_membership_policies():
i.groups.append(g.obj.id)
policy_per_added.append(i.obj.id)
if policy_per_added:
logger.info("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name))
logger.debug("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name))
# Determine if any changes need to be made
needs_change = False
@ -242,7 +242,7 @@ def apply_cluster_membership_policies():
needs_change = True
break
if not needs_change:
logger.info('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
logger.debug('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
return
# On a differential basis, apply instances to non-isolated groups
@ -251,12 +251,12 @@ def apply_cluster_membership_policies():
instances_to_add = set(g.instances) - set(g.prior_instances)
instances_to_remove = set(g.prior_instances) - set(g.instances)
if instances_to_add:
logger.info('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name))
logger.debug('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name))
g.obj.instances.add(*instances_to_add)
if instances_to_remove:
logger.info('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name))
logger.debug('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name))
g.obj.instances.remove(*instances_to_remove)
logger.info('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))
logger.debug('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))
@task(queue='tower_broadcast_all', exchange_type='fanout')
@ -277,7 +277,7 @@ def delete_project_files(project_path):
if os.path.exists(project_path):
try:
shutil.rmtree(project_path)
logger.info('Success removing project files {}'.format(project_path))
logger.debug('Success removing project files {}'.format(project_path))
except Exception:
logger.exception('Could not remove project directory {}'.format(project_path))
if os.path.exists(lock_file):
@ -372,7 +372,7 @@ def purge_old_stdout_files():
for f in os.listdir(settings.JOBOUTPUT_ROOT):
if os.path.getctime(os.path.join(settings.JOBOUTPUT_ROOT,f)) < nowtime - settings.LOCAL_STDOUT_EXPIRE_TIME:
os.unlink(os.path.join(settings.JOBOUTPUT_ROOT,f))
logger.info("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f)))
logger.debug("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f)))
@task(queue=get_local_queuename)
@ -514,7 +514,7 @@ def awx_periodic_scheduler():
try:
job_kwargs = schedule.get_job_kwargs()
new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs)
logger.info('Spawned {} from schedule {}-{}.'.format(
logger.debug('Spawned {} from schedule {}-{}.'.format(
new_unified_job.log_format, schedule.name, schedule.pk))
if invalid_license:
@ -1264,7 +1264,7 @@ class BaseTask(object):
extra_update_fields['result_traceback'] = traceback.format_exc()
logger.exception('%s Exception occurred while running task', self.instance.log_format)
finally:
logger.info('%s finished running, producing %s events.', self.instance.log_format, self.event_ct)
logger.debug('%s finished running, producing %s events.', self.instance.log_format, self.event_ct)
try:
self.post_run_hook(self.instance, status)
@ -1795,8 +1795,8 @@ class RunProjectUpdate(BaseTask):
with transaction.atomic():
if InventoryUpdate.objects.filter(inventory_source=inv_src,
status__in=ACTIVE_STATES).exists():
logger.info('Skipping SCM inventory update for `{}` because '
'another update is already active.'.format(inv_src.name))
logger.debug('Skipping SCM inventory update for `{}` because '
'another update is already active.'.format(inv_src.name))
continue
local_inv_update = inv_src.create_inventory_update(
_eager_fields=dict(
@ -1862,7 +1862,7 @@ class RunProjectUpdate(BaseTask):
try:
instance.refresh_from_db(fields=['cancel_flag'])
if instance.cancel_flag:
logger.info("ProjectUpdate({0}) was cancelled".format(instance.pk))
logger.debug("ProjectUpdate({0}) was cancelled".format(instance.pk))
return
fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
break
@ -2415,7 +2415,7 @@ def deep_copy_model_obj(
model_module, model_name, obj_pk, new_obj_pk,
user_pk, sub_obj_list, permission_check_func=None
):
logger.info('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk))
logger.debug('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk))
from awx.api.generics import CopyAPIView
from awx.main.signals import disable_activity_stream
model = getattr(importlib.import_module(model_module), model_name, None)

View File

@ -29,20 +29,6 @@ def test_base_logging_handler_skip_log(params, logger_name, expected, dummy_log_
assert filter.filter(dummy_log_record) is expected, (params, logger_name)
@pytest.mark.parametrize('level, expect', [
(30, True), # warning
(20, False) # info
])
def test_log_configurable_severity(level, expect, dummy_log_record):
dummy_log_record.levelno = level
filter = ExternalLoggerEnabled(
enabled_flag=True,
enabled_loggers=['awx', 'activity_stream', 'job_events', 'system_tracking'],
lvl='WARNING'
)
assert filter.filter(dummy_log_record) is expect
class Field(object):
def __init__(self, name, related_model=None, __prevent_search__=None):

View File

@ -1,26 +1,7 @@
# Copyright (c) 2017 Ansible by Red Hat
# All Rights Reserved.
# Django database
from django.db.migrations.loader import MigrationLoader
from django.db import connection
# Python
from itertools import chain
import re
def get_tower_migration_version():
loader = MigrationLoader(connection, ignore_no_migrations=True)
v = '000'
for app_name, migration_name in loader.applied_migrations:
if app_name == 'main':
version_captures = re.findall('^[0-9]{4}_v([0-9]{3})_', migration_name)
if len(version_captures) == 1:
migration_version = version_captures[0]
if migration_version > v:
v = migration_version
return v
def get_all_field_names(model):

View File

@ -9,12 +9,13 @@ from pyparsing import (
ParseException,
)
import logging
from logging import Filter, _nameToLevel
from logging import Filter
from django.apps import apps
from django.db import models
from django.conf import settings
from awx.main.constants import LOGGER_BLACKLIST
from awx.main.utils.common import get_search_fields
__all__ = ['SmartFilter', 'ExternalLoggerEnabled']
@ -49,18 +50,6 @@ class FieldFromSettings(object):
class ExternalLoggerEnabled(Filter):
# Prevents recursive logging loops from swamping the server
LOGGER_BLACKLIST = (
# loggers that may be called in process of emitting a log
'awx.main.utils.handlers',
'awx.main.utils.formatters',
'awx.main.utils.filters',
'awx.main.utils.encryption',
'awx.main.utils.log',
# loggers that may be called getting logging settings
'awx.conf'
)
lvl = FieldFromSettings('LOG_AGGREGATOR_LEVEL')
enabled_loggers = FieldFromSettings('LOG_AGGREGATOR_LOGGERS')
enabled_flag = FieldFromSettings('LOG_AGGREGATOR_ENABLED')
@ -84,17 +73,13 @@ class ExternalLoggerEnabled(Filter):
True - should be logged
"""
# Logger exceptions
for logger_name in self.LOGGER_BLACKLIST:
for logger_name in LOGGER_BLACKLIST:
if record.name.startswith(logger_name):
return False
# General enablement
if not self.enabled_flag:
return False
# Level enablement
if record.levelno < _nameToLevel[self.lvl]:
return False
# Logger type enablement
loggers = self.enabled_loggers
if not loggers:

View File

@ -19,11 +19,12 @@ from django.conf import settings
from requests_futures.sessions import FuturesSession
# AWX
from awx.main.constants import LOGGER_BLACKLIST
from awx.main.utils.formatters import LogstashFormatter
__all__ = ['BaseHTTPSHandler', 'TCPHandler', 'UDPHandler',
'AWXProxyHandler']
'AWXProxyHandler', 'RotatingProductionLogHandler']
logger = logging.getLogger('awx.main.utils.handlers')
@ -91,6 +92,28 @@ class SocketResult:
return self
class DynamicLevelMixin(object):
@property
def level(self):
from django.conf import settings
for logger_name in LOGGER_BLACKLIST:
if self.name.startswith(logger_name):
return 30 # WARNING
try:
return logging._nameToLevel[settings.LOG_AGGREGATOR_LEVEL]
except Exception:
return 30 # WARNING
@level.setter
def level(self, level):
pass # no-op, this value comes from the database
class RotatingProductionLogHandler(logging.handlers.RotatingFileHandler, DynamicLevelMixin):
pass
class BaseHandler(logging.Handler):
def __init__(self, host=None, port=None, indv_facts=None, **kwargs):
super(BaseHandler, self).__init__()
@ -272,7 +295,7 @@ HANDLER_MAPPING = {
}
class AWXProxyHandler(logging.Handler):
class AWXProxyHandler(logging.Handler, DynamicLevelMixin):
'''
Handler specific to the AWX external logging feature

View File

@ -1013,8 +1013,8 @@ LOGGING = {
'class': 'django.utils.log.AdminEmailHandler',
},
'tower_warnings': {
'level': 'WARNING',
'class':'logging.handlers.RotatingFileHandler',
# don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
'filters': ['require_debug_false'],
'filename': os.path.join(LOG_ROOT, 'tower.log'),
'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1022,8 +1022,8 @@ LOGGING = {
'formatter':'simple',
},
'callback_receiver': {
'level': 'WARNING',
'class':'logging.handlers.RotatingFileHandler',
# don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
'filters': ['require_debug_false'],
'filename': os.path.join(LOG_ROOT, 'callback_receiver.log'),
'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1031,8 +1031,8 @@ LOGGING = {
'formatter':'simple',
},
'dispatcher': {
'level': 'WARNING',
'class':'logging.handlers.RotatingFileHandler',
# don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
'filters': ['require_debug_false'],
'filename': os.path.join(LOG_ROOT, 'dispatcher.log'),
'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1049,8 +1049,8 @@ LOGGING = {
'formatter': 'timed_import',
},
'task_system': {
'level': 'INFO',
'class':'logging.handlers.RotatingFileHandler',
# don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
'filters': ['require_debug_false'],
'filename': os.path.join(LOG_ROOT, 'task_system.log'),
'maxBytes': 1024 * 1024 * 5, # 5 MB