mirror of
https://github.com/samba-team/samba.git
synced 2025-01-21 18:04:06 +03:00
ca5cc05b22
For --merge-by-timestamp the traces do not need to contain the traceid header field. Signed-off-by: Pavel Filipenský <pfilipensky@samba.org> Reviewed-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Autobuild-User(master): Pavel Filipensky <pfilipensky@samba.org> Autobuild-Date(master): Thu Jul 20 19:14:05 UTC 2023 on atb-devel-224
383 lines
13 KiB
Python
Executable File
383 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
#######################################################################
|
|
#
|
|
# A script to parse samba (especially winbind) logfiles.
|
|
# Trace files should be in a non-syslog format (debug syslog format = no).
|
|
#
|
|
# --traceid ... Specify the traceid of the request to parse
|
|
# --pid ... Specify the pid
|
|
# --breakdown ... Break to separate files per each traceid
|
|
# --merge-by-timestamp ... Merge logs by timestamp
|
|
# --flow ... Show the request/sub-request call flow
|
|
# --flow-compact ... Show the request/sub-request call flow without dcerpc
|
|
#
|
|
#
|
|
# Copyright (c) 2023 Andreas Schneider <asn@samba.org>
|
|
# Copyright (c) 2023 Pavel Filipenský <pfilipen@redhat.com>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
#######################################################################
|
|
#
|
|
# Requires: ???
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
from argparse import ArgumentParser
|
|
from collections import defaultdict
|
|
|
|
# Trace record consists of a trace header followed by one or more text lines.
|
|
#
|
|
# This tool expects trace header format based on these smb.conf parameters:
|
|
#
|
|
# debug syslog format = no
|
|
# debug hires timestamp = yes
|
|
# winbind debug traceid = yes
|
|
#
|
|
# If 'winbind debug traceid = no' is set, then the option --merge-by-timestamp
|
|
# still can be used.
|
|
#
|
|
# Each trace header contains a traceid, which is the main identifier for this
|
|
# tool. A single traceid is either provided via command line option --traceid
|
|
# or a list of traceids is derived from the PID specified via option --pid.
|
|
# Creating and evaluating list of traceids from PID can be tricky:
|
|
# The traceid can appear in a trace record before trace record containing the
|
|
# PID is processed. So when we see a new traceid we are not sure if it belongs
|
|
# to the traced PID.
|
|
# The PID appears only in the main winbind process (log.winbind). If a
|
|
# directory with many log files should be processed, we process the files in
|
|
# random order.
|
|
# It might happen that e.g. log.wb-ADDOMAIN is processed before log.winbind so
|
|
# we do not know the list of traceids yet.
|
|
# To make all this easy we put into memory all trace records and do the final
|
|
# traceid filtering only after all files are read. This can require lot of
|
|
# memory if files are big.
|
|
|
|
|
|
def process_file_no_traceid(record_list, fname):
|
|
with open(fname, "r") as infile:
|
|
data = infile.readlines()
|
|
date = ""
|
|
record_lines = []
|
|
|
|
RE_HEADER_NO_TRACEID = re.compile(
|
|
r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*")
|
|
for line in data:
|
|
header = RE_HEADER_NO_TRACEID.search(line)
|
|
if header:
|
|
# Append all previous trace lines of a record
|
|
if record_lines:
|
|
record_list.append((date, None, record_lines, fname))
|
|
record_lines = []
|
|
# Remember the new date
|
|
date = header.group(1)
|
|
record_lines.append(line)
|
|
|
|
|
|
def process_file(record_list, traceid_set, fname, opid, otraceid):
|
|
with open(fname, "r") as infile:
|
|
data = infile.readlines()
|
|
pid = None
|
|
traceid = 0
|
|
traceid_prev = None
|
|
undecided_traceid = False
|
|
date = ""
|
|
record_lines = []
|
|
|
|
# If traceid option was provided the traceid_set will contain just it
|
|
if otraceid:
|
|
traceid_set.add(otraceid)
|
|
|
|
RE_HEADER = re.compile(
|
|
r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*?, .*, "
|
|
r"traceid=([0-9]+).*\]")
|
|
RE_INTERFACE_VERSION = re.compile(
|
|
r"^\s+winbindd_interface_version: \[\S* \((\d+)\)\]")
|
|
RE_ASYNC_REQUEST = re.compile(
|
|
r"^\s+process_request_send: "
|
|
r"\[\S* \((\d+)\)\] Handling async request:")
|
|
# Example of a header line
|
|
# [2023/05/01 07:40:45.439049, 3, pid=418844, effective(0, 0), real(0, 0), class=winbind, traceid=37] ../../source3/winbindd/winbindd_misc.c:355(winbindd_interface_version)
|
|
for line in data:
|
|
header = RE_HEADER.search(line)
|
|
if header:
|
|
# Append all previous trace lines of a record if the traceid is in
|
|
# the list.
|
|
if record_lines:
|
|
record_list.append((date, traceid, record_lines, fname))
|
|
record_lines = []
|
|
# Remember the new date and the new traceid
|
|
date = header.group(1)
|
|
traceid = header.group(2)
|
|
if traceid != traceid_prev:
|
|
traceid_prev = traceid
|
|
undecided_traceid = True
|
|
if opid:
|
|
# Search for lines that identify a new winbind client and the
|
|
# client PID
|
|
|
|
# winbindd_interface_version: [nss_winbind (500725)]: request interface version (version = 32)
|
|
# process_request_send: [nss_winbind (500725)] Handling async request: SETPWENT
|
|
interface_version = RE_INTERFACE_VERSION.search(line)
|
|
async_request = RE_ASYNC_REQUEST.search(line)
|
|
if interface_version:
|
|
pid = interface_version.group(1)
|
|
if undecided_traceid:
|
|
if pid == opid:
|
|
traceid_set.add(traceid)
|
|
undecided_traceid = False
|
|
if async_request:
|
|
pid = async_request.group(1)
|
|
if undecided_traceid:
|
|
if pid == opid:
|
|
traceid_set.add(traceid)
|
|
undecided_traceid = False
|
|
# For --breakdown add every traceid
|
|
if not opid and not otraceid:
|
|
traceid_set.add(traceid)
|
|
|
|
record_lines.append(line)
|
|
|
|
|
|
def filter_traceids(record_list, traceid_set):
|
|
llist = []
|
|
for (d, t, li, f) in record_list:
|
|
if t in traceid_set:
|
|
llist.append((d, t, li, f))
|
|
return llist
|
|
|
|
|
|
def filter_flow(record_list):
|
|
local_list = []
|
|
for (date, traceid, lines, filename) in record_list:
|
|
for line in lines:
|
|
isflow = re.search(r"^(\s+)flow: (.*)", line)
|
|
if isflow:
|
|
local_list.append(isflow.group(1) + isflow.group(2))
|
|
return local_list
|
|
|
|
|
|
def filter_flowcompact(flist):
|
|
local_list = []
|
|
end_marker = None
|
|
for fl in flist:
|
|
if not end_marker:
|
|
local_list.append(fl)
|
|
dcerpc_start = re.search(r"^(\s+)-> dcerpc_", fl)
|
|
if dcerpc_start:
|
|
end_marker = dcerpc_start.group(1)
|
|
else:
|
|
dcerpc_end = re.search(r"^" + end_marker + "<- dcerpc_", fl)
|
|
if dcerpc_end:
|
|
end_marker = None
|
|
local_list.append(fl)
|
|
return local_list
|
|
|
|
|
|
def print_record_list(record_list, file):
|
|
f_prev = None
|
|
for (date, traceid, lines, filename) in record_list:
|
|
# Inform about filename change
|
|
if filename != f_prev:
|
|
print("-" * 72, file=file)
|
|
print("FILE: ", filename, file=file)
|
|
print("-" * 72, file=file)
|
|
for line in lines:
|
|
print(line, end='', file=file)
|
|
f_prev = filename
|
|
|
|
# record_list ... list of quadruplets <date, traceid, [trace lines], filename>
|
|
# flow_list ... lines from record_list with 'flow' traces
|
|
# traceid_set ... list of traceids we want to trace
|
|
# with --traceid ... there is a single traceids
|
|
# with --pid ... there are all traceids for the PID
|
|
# with --breakdown ... there are all traceids
|
|
|
|
|
|
def setup_parser():
|
|
parser = ArgumentParser()
|
|
|
|
parser.add_argument(
|
|
"path",
|
|
type=str,
|
|
help="logfile or directory"
|
|
)
|
|
parser.add_argument(
|
|
"--traceid",
|
|
dest="traceid",
|
|
help="specify the traceid of the trace records",
|
|
metavar="ID"
|
|
)
|
|
parser.add_argument(
|
|
"--pid",
|
|
dest="pid",
|
|
help="specify the pid of winbind client",
|
|
metavar="PID"
|
|
)
|
|
parser.add_argument(
|
|
"--breakdown",
|
|
action="store_true",
|
|
dest="breakdown",
|
|
default=False,
|
|
help="breakdown the traces into per traceid files"
|
|
)
|
|
parser.add_argument(
|
|
"--merge-by-timestamp",
|
|
action="store_true",
|
|
dest="merge",
|
|
default=False,
|
|
help="merge logs by timestamp"
|
|
)
|
|
parser.add_argument(
|
|
"--flow",
|
|
action="store_true",
|
|
dest="flow",
|
|
default=False,
|
|
help="show the request/sub-request flow traces"
|
|
)
|
|
parser.add_argument(
|
|
"--flow-compact",
|
|
action="store_true",
|
|
dest="flowcompact",
|
|
default=False,
|
|
help="show the request/sub-request flow traces without dcerpc details"
|
|
)
|
|
return parser
|
|
|
|
|
|
def main(): # noqa
|
|
record_list = []
|
|
flow_list = []
|
|
traceid_set = set()
|
|
|
|
parser = setup_parser()
|
|
options = parser.parse_args()
|
|
|
|
if (not options.traceid and not options.pid and not options.breakdown
|
|
and not options.merge):
|
|
print("One of --traceid or --pid is needed"
|
|
" or --breakdown or --merge-by-timestamp.")
|
|
sys.exit(1)
|
|
elif options.traceid and options.pid:
|
|
print("Only one of --traceid or --pid is allowed.")
|
|
sys.exit(1)
|
|
elif options.breakdown and (options.traceid or options.pid):
|
|
print("--breakdown cannot be combined with --traceid and --pid.")
|
|
sys.exit(1)
|
|
|
|
if options.flow and not options.traceid:
|
|
print("Option --flow can be used only together with --traceid.")
|
|
sys.exit(1)
|
|
|
|
if options.flowcompact and not options.traceid:
|
|
print("Option --flow-compact can be used only together with "
|
|
"--traceid.")
|
|
sys.exit(1)
|
|
|
|
if options.flow and options.flowcompact:
|
|
print("Only one of --flow or --flow-compact is allowed.")
|
|
sys.exit(1)
|
|
|
|
if not options.path:
|
|
print("Path to logfile or directory with logs is needed.")
|
|
sys.exit(1)
|
|
|
|
merge_with_no_traceid = (not options.traceid and not options.pid
|
|
and not options.breakdown) and options.merge
|
|
|
|
path = options.path
|
|
if os.path.isdir(path):
|
|
for root, dirs, files in os.walk(path):
|
|
for name in files:
|
|
if merge_with_no_traceid:
|
|
process_file_no_traceid(
|
|
record_list,
|
|
os.path.join(root, name)
|
|
)
|
|
else:
|
|
process_file(
|
|
record_list,
|
|
traceid_set,
|
|
os.path.join(root, name),
|
|
options.pid,
|
|
options.traceid,
|
|
)
|
|
elif os.path.isfile(path):
|
|
if merge_with_no_traceid:
|
|
process_file_no_traceid(
|
|
record_list,
|
|
path
|
|
)
|
|
else:
|
|
process_file(
|
|
record_list,
|
|
traceid_set,
|
|
path,
|
|
options.pid,
|
|
options.traceid
|
|
)
|
|
else:
|
|
print(path, "Path is neither file or directory.")
|
|
sys.exit(1)
|
|
|
|
# Sort only using timestamps, no use of traceid
|
|
if merge_with_no_traceid:
|
|
record_list.sort()
|
|
print_record_list(record_list, sys.stdout)
|
|
sys.exit(0)
|
|
|
|
# Keep only records with matching traceids
|
|
if not options.breakdown:
|
|
record_list = filter_traceids(record_list, traceid_set)
|
|
|
|
if options.breakdown:
|
|
for traceid in traceid_set:
|
|
# Full
|
|
with open("%s.full" % traceid, "w") as full_f:
|
|
full_l = filter_traceids(record_list, {traceid})
|
|
if options.merge:
|
|
full_l.sort()
|
|
print_record_list(full_l, full_f)
|
|
# Flow
|
|
with open("%s.flow" % traceid, "w") as flow_f:
|
|
flow_l = filter_flow(full_l)
|
|
for fl in flow_l:
|
|
print(fl, file=flow_f)
|
|
# Flow compact
|
|
with open("%s.flowcompact" % traceid, "w") as flowcompact_f:
|
|
flowcompact_l = filter_flowcompact(flow_l)
|
|
for fl in flowcompact_l:
|
|
print(fl, file=flowcompact_f)
|
|
elif options.flow:
|
|
flow_list = filter_flow(record_list)
|
|
for fl in flow_list:
|
|
print(fl)
|
|
elif options.flowcompact:
|
|
flow_list = filter_flow(record_list)
|
|
flow_list = filter_flowcompact(flow_list)
|
|
for fl in flow_list:
|
|
print(fl)
|
|
else:
|
|
if options.merge:
|
|
record_list.sort()
|
|
print_record_list(record_list, sys.stdout)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|