Merge pull request #3882 from ryanpetrello/debug-log

add the ability to toggle DEBUG logging on dynamically Reviewed-by: Shane McDonald <me@shanemcd.com> https://github.com/shanemcd
2024-11-01 08:21:15 +03:00 · 2019-05-16 15:12:01 +00:00 · 2019-05-16 15:12:01 +00:00 · d9965cfe7e
commit d9965cfe7e
parent 8981c7d59a b1d75327e3
9 changed files with 112 additions and 130 deletions
--- a/awx/conf/settings.py
+++ b/awx/conf/settings.py
@ -24,7 +24,6 @@ from rest_framework.fields import empty, SkipField

 # Tower
 from awx.main.utils import encrypt_field, decrypt_field
-from awx.main.utils.db import get_tower_migration_version
 from awx.conf import settings_registry
 from awx.conf.models import Setting
 from awx.conf.migrations._reencrypt import decrypt_field as old_decrypt_field
@ -90,45 +89,42 @@ def _ctit_db_wrapper(trans_safe=False):
                    transaction.set_rollback(False)
        yield
    except DBError:
-        if 'migrate' in sys.argv and get_tower_migration_version() < '310':
-            logger.info('Using default settings until version 3.1 migration.')
-        else:
-            # We want the _full_ traceback with the context
-            # First we get the current call stack, which constitutes the "top",
-            # it has the context up to the point where the context manager is used
-            top_stack = StringIO()
-            traceback.print_stack(file=top_stack)
-            top_lines = top_stack.getvalue().strip('\n').split('\n')
-            top_stack.close()
-            # Get "bottom" stack from the local error that happened
-            # inside of the "with" block this wraps
-            exc_type, exc_value, exc_traceback = sys.exc_info()
-            bottom_stack = StringIO()
-            traceback.print_tb(exc_traceback, file=bottom_stack)
-            bottom_lines = bottom_stack.getvalue().strip('\n').split('\n')
-            # Glue together top and bottom where overlap is found
-            bottom_cutoff = 0
-            for i, line in enumerate(bottom_lines):
-                if line in top_lines:
-                    # start of overlapping section, take overlap from bottom
-                    top_lines = top_lines[:top_lines.index(line)]
-                    bottom_cutoff = i
-                    break
-            bottom_lines = bottom_lines[bottom_cutoff:]
-            tb_lines = top_lines + bottom_lines
+        # We want the _full_ traceback with the context
+        # First we get the current call stack, which constitutes the "top",
+        # it has the context up to the point where the context manager is used
+        top_stack = StringIO()
+        traceback.print_stack(file=top_stack)
+        top_lines = top_stack.getvalue().strip('\n').split('\n')
+        top_stack.close()
+        # Get "bottom" stack from the local error that happened
+        # inside of the "with" block this wraps
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        bottom_stack = StringIO()
+        traceback.print_tb(exc_traceback, file=bottom_stack)
+        bottom_lines = bottom_stack.getvalue().strip('\n').split('\n')
+        # Glue together top and bottom where overlap is found
+        bottom_cutoff = 0
+        for i, line in enumerate(bottom_lines):
+            if line in top_lines:
+                # start of overlapping section, take overlap from bottom
+                top_lines = top_lines[:top_lines.index(line)]
+                bottom_cutoff = i
+                break
+        bottom_lines = bottom_lines[bottom_cutoff:]
+        tb_lines = top_lines + bottom_lines

-            tb_string = '\n'.join(
-                ['Traceback (most recent call last):'] +
-                tb_lines +
-                ['{}: {}'.format(exc_type.__name__, str(exc_value))]
-            )
-            bottom_stack.close()
-            # Log the combined stack
-            if trans_safe:
-                if 'check_migrations' not in sys.argv:
-                    logger.warning('Database settings are not available, using defaults, error:\n{}'.format(tb_string))
-            else:
-                logger.error('Error modifying something related to database settings.\n{}'.format(tb_string))
+        tb_string = '\n'.join(
+            ['Traceback (most recent call last):'] +
+            tb_lines +
+            ['{}: {}'.format(exc_type.__name__, str(exc_value))]
+        )
+        bottom_stack.close()
+        # Log the combined stack
+        if trans_safe:
+            if 'check_migrations' not in sys.argv:
+                logger.debug('Database settings are not available, using defaults, error:\n{}'.format(tb_string))
+        else:
+            logger.debug('Error modifying something related to database settings.\n{}'.format(tb_string))
    finally:
        if trans_safe and is_atomic and rollback_set:
            transaction.set_rollback(rollback_set)
--- a/awx/main/constants.py
+++ b/awx/main/constants.py
@ -40,3 +40,14 @@ ENV_BLACKLIST = frozenset((
    'JOB_CALLBACK_DEBUG', 'INVENTORY_HOSTVARS',
    'AWX_HOST', 'PROJECT_REVISION'
 ))
+
+# loggers that may be called in process of emitting a log
+LOGGER_BLACKLIST = (
+    'awx.main.utils.handlers',
+    'awx.main.utils.formatters',
+    'awx.main.utils.filters',
+    'awx.main.utils.encryption',
+    'awx.main.utils.log',
+    # loggers that may be called getting logging settings
+    'awx.conf'
+)
--- a/awx/main/scheduler/task_manager.py
+++ b/awx/main/scheduler/task_manager.py
@ -108,7 +108,7 @@ class TaskManager():
            dag = WorkflowDAG(workflow_job)
            spawn_nodes = dag.bfs_nodes_to_run()
            if spawn_nodes:
-                logger.info('Spawning jobs for %s', workflow_job.log_format)
+                logger.debug('Spawning jobs for %s', workflow_job.log_format)
            else:
                logger.debug('No nodes to spawn for %s', workflow_job.log_format)
            for spawn_node in spawn_nodes:
@ -118,7 +118,7 @@ class TaskManager():
                job = spawn_node.unified_job_template.create_unified_job(**kv)
                spawn_node.job = job
                spawn_node.save()
-                logger.info('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
+                logger.debug('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk)
                can_start = True
                if isinstance(spawn_node.unified_job_template, WorkflowJobTemplate):
                    workflow_ancestors = job.get_ancestor_workflows()
@ -178,7 +178,7 @@ class TaskManager():
                if not is_done:
                    continue
                has_failed, reason = dag.has_workflow_failed()
-                logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
+                logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
                result.append(workflow_job.id)
                new_status = 'failed' if has_failed else 'successful'
                logger.debug("Transitioning {} to {} status.".format(workflow_job.log_format, new_status))
@ -233,26 +233,26 @@ class TaskManager():
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
-                logger.info('Transitioning %s to running status.', task.log_format)
+                logger.debug('Transitioning %s to running status.', task.log_format)
                schedule_task_manager()
            elif not task.supports_isolation() and rampart_group.controller_id:
                # non-Ansible jobs on isolated instances run on controller
                task.instance_group = rampart_group.controller
                task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
-                logger.info('Submitting isolated {} to queue {}.'.format(
-                            task.log_format, task.instance_group.name, task.execution_node))
+                logger.debug('Submitting isolated {} to queue {}.'.format(
+                             task.log_format, task.instance_group.name, task.execution_node))
            elif controller_node:
                task.instance_group = rampart_group
                task.execution_node = instance.hostname
                task.controller_node = controller_node
-                logger.info('Submitting isolated {} to queue {} controlled by {}.'.format(
-                            task.log_format, task.execution_node, controller_node))
+                logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format(
+                             task.log_format, task.execution_node, controller_node))
            else:
                task.instance_group = rampart_group
                if instance is not None:
                    task.execution_node = instance.hostname
-                logger.info('Submitting {} to <instance group, instance> <{},{}>.'.format(
-                            task.log_format, task.instance_group_id, task.execution_node))
+                logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format(
+                             task.log_format, task.instance_group_id, task.execution_node))
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()
@ -295,7 +295,7 @@ class TaskManager():
        project_task.created = task.created - timedelta(seconds=1)
        project_task.status = 'pending'
        project_task.save()
-        logger.info(
+        logger.debug(
            'Spawned {} as dependency of {}'.format(
                project_task.log_format, task.log_format
            )
@ -309,7 +309,7 @@ class TaskManager():
        inventory_task.created = task.created - timedelta(seconds=2)
        inventory_task.status = 'pending'
        inventory_task.save()
-        logger.info(
+        logger.debug(
            'Spawned {} as dependency of {}'.format(
                inventory_task.log_format, task.log_format
            )
--- a/awx/main/tasks.py
+++ b/awx/main/tasks.py
@ -103,7 +103,7 @@ class InvalidVirtualenvError(Exception):

 def dispatch_startup():
    startup_logger = logging.getLogger('awx.main.tasks')
-    startup_logger.info("Syncing Schedules")
+    startup_logger.debug("Syncing Schedules")
    for sch in Schedule.objects.all():
        try:
            sch.update_computed_fields()
@ -189,20 +189,20 @@ def apply_cluster_membership_policies():
                # NOTE: arguable behavior: policy-list-group is not added to
                # instance's group count for consideration in minimum-policy rules
            if group_actual.instances:
-                logger.info("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))
+                logger.debug("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))

            if ig.controller_id is None:
                actual_groups.append(group_actual)
            else:
                # For isolated groups, _only_ apply the policy_instance_list
                # do not add to in-memory list, so minimum rules not applied
-                logger.info('Committing instances to isolated group {}'.format(ig.name))
+                logger.debug('Committing instances to isolated group {}'.format(ig.name))
                ig.instances.set(group_actual.instances)

        # Process Instance minimum policies next, since it represents a concrete lower bound to the
        # number of instances to make available to instance groups
        actual_instances = [Node(obj=i, groups=[]) for i in considered_instances if i.managed_by_policy]
-        logger.info("Total non-isolated instances:{} available for policy: {}".format(
+        logger.debug("Total non-isolated instances:{} available for policy: {}".format(
            total_instances, len(actual_instances)))
        for g in sorted(actual_groups, key=lambda x: len(x.instances)):
            policy_min_added = []
@ -217,7 +217,7 @@ def apply_cluster_membership_policies():
                i.groups.append(g.obj.id)
                policy_min_added.append(i.obj.id)
            if policy_min_added:
-                logger.info("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name))
+                logger.debug("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name))

        # Finally, process instance policy percentages
        for g in sorted(actual_groups, key=lambda x: len(x.instances)):
@ -233,7 +233,7 @@ def apply_cluster_membership_policies():
                i.groups.append(g.obj.id)
                policy_per_added.append(i.obj.id)
            if policy_per_added:
-                logger.info("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name))
+                logger.debug("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name))

        # Determine if any changes need to be made
        needs_change = False
@ -242,7 +242,7 @@ def apply_cluster_membership_policies():
                needs_change = True
                break
        if not needs_change:
-            logger.info('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
+            logger.debug('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
            return

        # On a differential basis, apply instances to non-isolated groups
@ -251,12 +251,12 @@ def apply_cluster_membership_policies():
                instances_to_add = set(g.instances) - set(g.prior_instances)
                instances_to_remove = set(g.prior_instances) - set(g.instances)
                if instances_to_add:
-                    logger.info('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name))
+                    logger.debug('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name))
                    g.obj.instances.add(*instances_to_add)
                if instances_to_remove:
-                    logger.info('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name))
+                    logger.debug('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name))
                    g.obj.instances.remove(*instances_to_remove)
-        logger.info('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))
+        logger.debug('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute))


@task(queue='tower_broadcast_all', exchange_type='fanout')
@ -277,7 +277,7 @@ def delete_project_files(project_path):
    if os.path.exists(project_path):
        try:
            shutil.rmtree(project_path)
-            logger.info('Success removing project files {}'.format(project_path))
+            logger.debug('Success removing project files {}'.format(project_path))
        except Exception:
            logger.exception('Could not remove project directory {}'.format(project_path))
    if os.path.exists(lock_file):
@ -372,7 +372,7 @@ def purge_old_stdout_files():
    for f in os.listdir(settings.JOBOUTPUT_ROOT):
        if os.path.getctime(os.path.join(settings.JOBOUTPUT_ROOT,f)) < nowtime - settings.LOCAL_STDOUT_EXPIRE_TIME:
            os.unlink(os.path.join(settings.JOBOUTPUT_ROOT,f))
-            logger.info("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f)))
+            logger.debug("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f)))


@task(queue=get_local_queuename)
@ -514,7 +514,7 @@ def awx_periodic_scheduler():
            try:
                job_kwargs = schedule.get_job_kwargs()
                new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs)
-                logger.info('Spawned {} from schedule {}-{}.'.format(
+                logger.debug('Spawned {} from schedule {}-{}.'.format(
                    new_unified_job.log_format, schedule.name, schedule.pk))

                if invalid_license:
@ -1264,7 +1264,7 @@ class BaseTask(object):
            extra_update_fields['result_traceback'] = traceback.format_exc()
            logger.exception('%s Exception occurred while running task', self.instance.log_format)
        finally:
-            logger.info('%s finished running, producing %s events.', self.instance.log_format, self.event_ct)
+            logger.debug('%s finished running, producing %s events.', self.instance.log_format, self.event_ct)

        try:
            self.post_run_hook(self.instance, status)
@ -1795,8 +1795,8 @@ class RunProjectUpdate(BaseTask):
            with transaction.atomic():
                if InventoryUpdate.objects.filter(inventory_source=inv_src,
                                                  status__in=ACTIVE_STATES).exists():
-                    logger.info('Skipping SCM inventory update for `{}` because '
-                                'another update is already active.'.format(inv_src.name))
+                    logger.debug('Skipping SCM inventory update for `{}` because '
+                                 'another update is already active.'.format(inv_src.name))
                    continue
                local_inv_update = inv_src.create_inventory_update(
                    _eager_fields=dict(
@ -1862,7 +1862,7 @@ class RunProjectUpdate(BaseTask):
            try:
                instance.refresh_from_db(fields=['cancel_flag'])
                if instance.cancel_flag:
-                    logger.info("ProjectUpdate({0}) was cancelled".format(instance.pk))
+                    logger.debug("ProjectUpdate({0}) was cancelled".format(instance.pk))
                    return
                fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
                break
@ -2415,7 +2415,7 @@ def deep_copy_model_obj(
    model_module, model_name, obj_pk, new_obj_pk,
    user_pk, sub_obj_list, permission_check_func=None
 ):
-    logger.info('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk))
+    logger.debug('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk))
    from awx.api.generics import CopyAPIView
    from awx.main.signals import disable_activity_stream
    model = getattr(importlib.import_module(model_module), model_name, None)
--- a/awx/main/tests/unit/utils/test_filters.py
+++ b/awx/main/tests/unit/utils/test_filters.py
@ -29,20 +29,6 @@ def test_base_logging_handler_skip_log(params, logger_name, expected, dummy_log_
    assert filter.filter(dummy_log_record) is expected, (params, logger_name)


-@pytest.mark.parametrize('level, expect', [
-    (30, True),  # warning
-    (20, False)  # info
-])
-def test_log_configurable_severity(level, expect, dummy_log_record):
-    dummy_log_record.levelno = level
-    filter = ExternalLoggerEnabled(
-        enabled_flag=True,
-        enabled_loggers=['awx', 'activity_stream', 'job_events', 'system_tracking'],
-        lvl='WARNING'
-    )
-    assert filter.filter(dummy_log_record) is expect
-
-
 class Field(object):

    def __init__(self, name, related_model=None, __prevent_search__=None):
--- a/awx/main/utils/db.py
+++ b/awx/main/utils/db.py
@ -1,26 +1,7 @@
 # Copyright (c) 2017 Ansible by Red Hat
 # All Rights Reserved.

-# Django database
-from django.db.migrations.loader import MigrationLoader
-from django.db import connection
-
-# Python
 from itertools import chain
-import re
-
-
-def get_tower_migration_version():
-    loader = MigrationLoader(connection, ignore_no_migrations=True)
-    v = '000'
-    for app_name, migration_name in loader.applied_migrations:
-        if app_name == 'main':
-            version_captures = re.findall('^[0-9]{4}_v([0-9]{3})_', migration_name)
-            if len(version_captures) == 1:
-                migration_version = version_captures[0]
-                if migration_version > v:
-                    v = migration_version
-    return v


 def get_all_field_names(model):
--- a/awx/main/utils/filters.py
+++ b/awx/main/utils/filters.py
@ -9,12 +9,13 @@ from pyparsing import (
    ParseException,
 )
 import logging
-from logging import Filter, _nameToLevel
+from logging import Filter

 from django.apps import apps
 from django.db import models
 from django.conf import settings

+from awx.main.constants import LOGGER_BLACKLIST
 from awx.main.utils.common import get_search_fields

 __all__ = ['SmartFilter', 'ExternalLoggerEnabled']
@ -49,18 +50,6 @@ class FieldFromSettings(object):

 class ExternalLoggerEnabled(Filter):

-    # Prevents recursive logging loops from swamping the server
-    LOGGER_BLACKLIST = (
-        # loggers that may be called in process of emitting a log
-        'awx.main.utils.handlers',
-        'awx.main.utils.formatters',
-        'awx.main.utils.filters',
-        'awx.main.utils.encryption',
-        'awx.main.utils.log',
-        # loggers that may be called getting logging settings
-        'awx.conf'
-    )
-
    lvl = FieldFromSettings('LOG_AGGREGATOR_LEVEL')
    enabled_loggers = FieldFromSettings('LOG_AGGREGATOR_LOGGERS')
    enabled_flag = FieldFromSettings('LOG_AGGREGATOR_ENABLED')
@ -84,17 +73,13 @@ class ExternalLoggerEnabled(Filter):
        True - should be logged
        """
        # Logger exceptions
-        for logger_name in self.LOGGER_BLACKLIST:
+        for logger_name in LOGGER_BLACKLIST:
            if record.name.startswith(logger_name):
                return False
        # General enablement
        if not self.enabled_flag:
            return False

-        # Level enablement
-        if record.levelno < _nameToLevel[self.lvl]:
-            return False
-
        # Logger type enablement
        loggers = self.enabled_loggers
        if not loggers:
--- a/awx/main/utils/handlers.py
+++ b/awx/main/utils/handlers.py
@ -19,11 +19,12 @@ from django.conf import settings
 from requests_futures.sessions import FuturesSession

 # AWX
+from awx.main.constants import LOGGER_BLACKLIST
 from awx.main.utils.formatters import LogstashFormatter


 __all__ = ['BaseHTTPSHandler', 'TCPHandler', 'UDPHandler',
-           'AWXProxyHandler']
+           'AWXProxyHandler', 'RotatingProductionLogHandler']


 logger = logging.getLogger('awx.main.utils.handlers')
@ -91,6 +92,28 @@ class SocketResult:
        return self


+class DynamicLevelMixin(object):
+
+    @property
+    def level(self):
+        from django.conf import settings
+        for logger_name in LOGGER_BLACKLIST:
+            if self.name.startswith(logger_name):
+                return 30  # WARNING
+        try:
+            return logging._nameToLevel[settings.LOG_AGGREGATOR_LEVEL]
+        except Exception:
+            return 30  # WARNING
+
+    @level.setter
+    def level(self, level):
+        pass  # no-op, this value comes from the database
+
+
+class RotatingProductionLogHandler(logging.handlers.RotatingFileHandler, DynamicLevelMixin):
+    pass
+
+
 class BaseHandler(logging.Handler):
    def __init__(self, host=None, port=None, indv_facts=None, **kwargs):
        super(BaseHandler, self).__init__()
@ -272,7 +295,7 @@ HANDLER_MAPPING = {
 }


-class AWXProxyHandler(logging.Handler):
+class AWXProxyHandler(logging.Handler, DynamicLevelMixin):
    '''
    Handler specific to the AWX external logging feature

--- a/awx/settings/defaults.py
+++ b/awx/settings/defaults.py
@ -1013,8 +1013,8 @@ LOGGING = {
            'class': 'django.utils.log.AdminEmailHandler',
        },
        'tower_warnings': {
-            'level': 'WARNING',
-            'class':'logging.handlers.RotatingFileHandler',
+            # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
+            'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
            'filters': ['require_debug_false'],
            'filename': os.path.join(LOG_ROOT, 'tower.log'),
            'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1022,8 +1022,8 @@ LOGGING = {
            'formatter':'simple',
        },
        'callback_receiver': {
-            'level': 'WARNING',
-            'class':'logging.handlers.RotatingFileHandler',
+            # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
+            'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
            'filters': ['require_debug_false'],
            'filename': os.path.join(LOG_ROOT, 'callback_receiver.log'),
            'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1031,8 +1031,8 @@ LOGGING = {
            'formatter':'simple',
        },
        'dispatcher': {
-            'level': 'WARNING',
-            'class':'logging.handlers.RotatingFileHandler',
+            # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
+            'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
            'filters': ['require_debug_false'],
            'filename': os.path.join(LOG_ROOT, 'dispatcher.log'),
            'maxBytes': 1024 * 1024 * 5, # 5 MB
@ -1049,8 +1049,8 @@ LOGGING = {
            'formatter': 'timed_import',
        },
        'task_system': {
-            'level': 'INFO',
-            'class':'logging.handlers.RotatingFileHandler',
+            # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL
+            'class':'awx.main.utils.handlers.RotatingProductionLogHandler',
            'filters': ['require_debug_false'],
            'filename': os.path.join(LOG_ROOT, 'task_system.log'),
            'maxBytes': 1024 * 1024 * 5, # 5 MB