mirror of
https://github.com/ansible/awx.git
synced 2024-10-27 00:55:06 +03:00
Merge pull request #7431 from ryanpetrello/bottleneck
add a tool for discovering which tasks/hosts are slow for a JT Reviewed-by: https://github.com/apps/softwarefactory-project-zuul
This commit is contained in:
commit
fa53cdf329
96
awx/main/management/commands/bottleneck.py
Normal file
96
awx/main/management/commands/bottleneck.py
Normal file
@ -0,0 +1,96 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection
|
||||
|
||||
from awx.main.models import JobTemplate
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Find the slowest tasks and hosts for a Job Template's most recent runs."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--template', dest='jt', type=int,
|
||||
help='ID of the Job Template to profile')
|
||||
parser.add_argument('--threshold', dest='threshold', type=float, default=30,
|
||||
help='Only show tasks that took at least this many seconds (defaults to 30)')
|
||||
parser.add_argument('--history', dest='history', type=float, default=25,
|
||||
help='The number of historic jobs to look at')
|
||||
parser.add_argument('--ignore', action='append', help='ignore a specific action (e.g., --ignore git)')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
jt = options['jt']
|
||||
threshold = options['threshold']
|
||||
history = options['history']
|
||||
ignore = options['ignore']
|
||||
|
||||
print('## ' + JobTemplate.objects.get(pk=jt).name + f' (last {history} runs)\n')
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
f'''
|
||||
SELECT
|
||||
b.id, b.job_id, b.host_name, b.created - a.created delta,
|
||||
b.task task,
|
||||
b.event_data::json->'task_action' task_action,
|
||||
b.event_data::json->'task_path' task_path
|
||||
FROM main_jobevent a JOIN main_jobevent b
|
||||
ON b.parent_uuid = a.parent_uuid AND a.host_name = b.host_name
|
||||
WHERE
|
||||
a.event = 'runner_on_start' AND
|
||||
b.event != 'runner_on_start' AND
|
||||
b.event != 'runner_on_skipped' AND
|
||||
b.failed = false AND
|
||||
a.job_id IN (
|
||||
SELECT unifiedjob_ptr_id FROM main_job
|
||||
WHERE job_template_id={jt}
|
||||
ORDER BY unifiedjob_ptr_id DESC
|
||||
LIMIT {history}
|
||||
)
|
||||
ORDER BY delta DESC;
|
||||
'''
|
||||
)
|
||||
slowest_events = cursor.fetchall()
|
||||
|
||||
def format_td(x):
|
||||
return str(x).split('.')[0]
|
||||
|
||||
fastest = dict()
|
||||
for event in slowest_events:
|
||||
_id, job_id, host, duration, task, action, playbook = event
|
||||
playbook = playbook.rsplit('/')[-1]
|
||||
if ignore and action in ignore:
|
||||
continue
|
||||
if host:
|
||||
fastest[(action, playbook)] = (_id, host, format_td(duration))
|
||||
|
||||
host_counts = dict()
|
||||
warned = set()
|
||||
print(f'slowest tasks (--threshold={threshold})\n---')
|
||||
|
||||
for event in slowest_events:
|
||||
_id, job_id, host, duration, task, action, playbook = event
|
||||
if ignore and action in ignore:
|
||||
continue
|
||||
if duration.total_seconds() < threshold:
|
||||
break
|
||||
playbook = playbook.rsplit('/')[-1]
|
||||
human_duration = format_td(duration)
|
||||
|
||||
fastest_summary = ''
|
||||
fastest_match = fastest.get((action, playbook))
|
||||
if fastest_match[2] != human_duration and (host, action, playbook) not in warned:
|
||||
warned.add((host, action, playbook))
|
||||
fastest_summary = ' ' + self.style.WARNING(f'{fastest_match[1]} ran this in {fastest_match[2]}s at /api/v2/job_events/{fastest_match[0]}/')
|
||||
|
||||
url = f'/api/v2/jobs/{job_id}/'
|
||||
print(' -- '.join([url, host, human_duration, action, task, playbook]) + fastest_summary)
|
||||
host_counts.setdefault(host, [])
|
||||
host_counts[host].append(duration)
|
||||
|
||||
host_counts = sorted(host_counts.items(), key=lambda item: [e.total_seconds() for e in item[1]], reverse=True)
|
||||
|
||||
print('\nslowest hosts\n---')
|
||||
for h, matches in host_counts:
|
||||
total = len(matches)
|
||||
total_seconds = sum([e.total_seconds() for e in matches])
|
||||
print(f'{h} had {total} tasks that ran longer than {threshold} second(s) for a total of {total_seconds}')
|
||||
|
||||
print('')
|
Loading…
Reference in New Issue
Block a user