1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00
samba-mirror/source3/script/samba-log-parser
Pavel Filipenský ca5cc05b22 s3:script: Replace --merge by --merge-by-timestamp in samba-log-parser
For --merge-by-timestamp the traces do not need to contain the traceid
header field.

Signed-off-by: Pavel Filipenský <pfilipensky@samba.org>
Reviewed-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>

Autobuild-User(master): Pavel Filipensky <pfilipensky@samba.org>
Autobuild-Date(master): Thu Jul 20 19:14:05 UTC 2023 on atb-devel-224
2023-07-20 19:14:05 +00:00

383 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
#
#######################################################################
#
# A script to parse samba (especially winbind) logfiles.
# Trace files should be in a non-syslog format (debug syslog format = no).
#
# --traceid ... Specify the traceid of the request to parse
# --pid ... Specify the pid
# --breakdown ... Break to separate files per each traceid
# --merge-by-timestamp ... Merge logs by timestamp
# --flow ... Show the request/sub-request call flow
# --flow-compact ... Show the request/sub-request call flow without dcerpc
#
#
# Copyright (c) 2023 Andreas Schneider <asn@samba.org>
# Copyright (c) 2023 Pavel Filipenský <pfilipen@redhat.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################
#
# Requires: ???
import sys
import os
import re
from argparse import ArgumentParser
from collections import defaultdict
# Trace record consists of a trace header followed by one or more text lines.
#
# This tool expects trace header format based on these smb.conf parameters:
#
# debug syslog format = no
# debug hires timestamp = yes
# winbind debug traceid = yes
#
# If 'winbind debug traceid = no' is set, then the option --merge-by-timestamp
# still can be used.
#
# Each trace header contains a traceid, which is the main identifier for this
# tool. A single traceid is either provided via command line option --traceid
# or a list of traceids is derived from the PID specified via option --pid.
# Creating and evaluating list of traceids from PID can be tricky:
# The traceid can appear in a trace record before trace record containing the
# PID is processed. So when we see a new traceid we are not sure if it belongs
# to the traced PID.
# The PID appears only in the main winbind process (log.winbind). If a
# directory with many log files should be processed, we process the files in
# random order.
# It might happen that e.g. log.wb-ADDOMAIN is processed before log.winbind so
# we do not know the list of traceids yet.
# To make all this easy we put into memory all trace records and do the final
# traceid filtering only after all files are read. This can require lot of
# memory if files are big.
def process_file_no_traceid(record_list, fname):
with open(fname, "r") as infile:
data = infile.readlines()
date = ""
record_lines = []
RE_HEADER_NO_TRACEID = re.compile(
r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*")
for line in data:
header = RE_HEADER_NO_TRACEID.search(line)
if header:
# Append all previous trace lines of a record
if record_lines:
record_list.append((date, None, record_lines, fname))
record_lines = []
# Remember the new date
date = header.group(1)
record_lines.append(line)
def process_file(record_list, traceid_set, fname, opid, otraceid):
with open(fname, "r") as infile:
data = infile.readlines()
pid = None
traceid = 0
traceid_prev = None
undecided_traceid = False
date = ""
record_lines = []
# If traceid option was provided the traceid_set will contain just it
if otraceid:
traceid_set.add(otraceid)
RE_HEADER = re.compile(
r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*?, .*, "
r"traceid=([0-9]+).*\]")
RE_INTERFACE_VERSION = re.compile(
r"^\s+winbindd_interface_version: \[\S* \((\d+)\)\]")
RE_ASYNC_REQUEST = re.compile(
r"^\s+process_request_send: "
r"\[\S* \((\d+)\)\] Handling async request:")
# Example of a header line
# [2023/05/01 07:40:45.439049, 3, pid=418844, effective(0, 0), real(0, 0), class=winbind, traceid=37] ../../source3/winbindd/winbindd_misc.c:355(winbindd_interface_version)
for line in data:
header = RE_HEADER.search(line)
if header:
# Append all previous trace lines of a record if the traceid is in
# the list.
if record_lines:
record_list.append((date, traceid, record_lines, fname))
record_lines = []
# Remember the new date and the new traceid
date = header.group(1)
traceid = header.group(2)
if traceid != traceid_prev:
traceid_prev = traceid
undecided_traceid = True
if opid:
# Search for lines that identify a new winbind client and the
# client PID
# winbindd_interface_version: [nss_winbind (500725)]: request interface version (version = 32)
# process_request_send: [nss_winbind (500725)] Handling async request: SETPWENT
interface_version = RE_INTERFACE_VERSION.search(line)
async_request = RE_ASYNC_REQUEST.search(line)
if interface_version:
pid = interface_version.group(1)
if undecided_traceid:
if pid == opid:
traceid_set.add(traceid)
undecided_traceid = False
if async_request:
pid = async_request.group(1)
if undecided_traceid:
if pid == opid:
traceid_set.add(traceid)
undecided_traceid = False
# For --breakdown add every traceid
if not opid and not otraceid:
traceid_set.add(traceid)
record_lines.append(line)
def filter_traceids(record_list, traceid_set):
llist = []
for (d, t, li, f) in record_list:
if t in traceid_set:
llist.append((d, t, li, f))
return llist
def filter_flow(record_list):
local_list = []
for (date, traceid, lines, filename) in record_list:
for line in lines:
isflow = re.search(r"^(\s+)flow: (.*)", line)
if isflow:
local_list.append(isflow.group(1) + isflow.group(2))
return local_list
def filter_flowcompact(flist):
local_list = []
end_marker = None
for fl in flist:
if not end_marker:
local_list.append(fl)
dcerpc_start = re.search(r"^(\s+)-> dcerpc_", fl)
if dcerpc_start:
end_marker = dcerpc_start.group(1)
else:
dcerpc_end = re.search(r"^" + end_marker + "<- dcerpc_", fl)
if dcerpc_end:
end_marker = None
local_list.append(fl)
return local_list
def print_record_list(record_list, file):
f_prev = None
for (date, traceid, lines, filename) in record_list:
# Inform about filename change
if filename != f_prev:
print("-" * 72, file=file)
print("FILE: ", filename, file=file)
print("-" * 72, file=file)
for line in lines:
print(line, end='', file=file)
f_prev = filename
# record_list ... list of quadruplets <date, traceid, [trace lines], filename>
# flow_list ... lines from record_list with 'flow' traces
# traceid_set ... list of traceids we want to trace
# with --traceid ... there is a single traceids
# with --pid ... there are all traceids for the PID
# with --breakdown ... there are all traceids
def setup_parser():
parser = ArgumentParser()
parser.add_argument(
"path",
type=str,
help="logfile or directory"
)
parser.add_argument(
"--traceid",
dest="traceid",
help="specify the traceid of the trace records",
metavar="ID"
)
parser.add_argument(
"--pid",
dest="pid",
help="specify the pid of winbind client",
metavar="PID"
)
parser.add_argument(
"--breakdown",
action="store_true",
dest="breakdown",
default=False,
help="breakdown the traces into per traceid files"
)
parser.add_argument(
"--merge-by-timestamp",
action="store_true",
dest="merge",
default=False,
help="merge logs by timestamp"
)
parser.add_argument(
"--flow",
action="store_true",
dest="flow",
default=False,
help="show the request/sub-request flow traces"
)
parser.add_argument(
"--flow-compact",
action="store_true",
dest="flowcompact",
default=False,
help="show the request/sub-request flow traces without dcerpc details"
)
return parser
def main(): # noqa
record_list = []
flow_list = []
traceid_set = set()
parser = setup_parser()
options = parser.parse_args()
if (not options.traceid and not options.pid and not options.breakdown
and not options.merge):
print("One of --traceid or --pid is needed"
" or --breakdown or --merge-by-timestamp.")
sys.exit(1)
elif options.traceid and options.pid:
print("Only one of --traceid or --pid is allowed.")
sys.exit(1)
elif options.breakdown and (options.traceid or options.pid):
print("--breakdown cannot be combined with --traceid and --pid.")
sys.exit(1)
if options.flow and not options.traceid:
print("Option --flow can be used only together with --traceid.")
sys.exit(1)
if options.flowcompact and not options.traceid:
print("Option --flow-compact can be used only together with "
"--traceid.")
sys.exit(1)
if options.flow and options.flowcompact:
print("Only one of --flow or --flow-compact is allowed.")
sys.exit(1)
if not options.path:
print("Path to logfile or directory with logs is needed.")
sys.exit(1)
merge_with_no_traceid = (not options.traceid and not options.pid
and not options.breakdown) and options.merge
path = options.path
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
if merge_with_no_traceid:
process_file_no_traceid(
record_list,
os.path.join(root, name)
)
else:
process_file(
record_list,
traceid_set,
os.path.join(root, name),
options.pid,
options.traceid,
)
elif os.path.isfile(path):
if merge_with_no_traceid:
process_file_no_traceid(
record_list,
path
)
else:
process_file(
record_list,
traceid_set,
path,
options.pid,
options.traceid
)
else:
print(path, "Path is neither file or directory.")
sys.exit(1)
# Sort only using timestamps, no use of traceid
if merge_with_no_traceid:
record_list.sort()
print_record_list(record_list, sys.stdout)
sys.exit(0)
# Keep only records with matching traceids
if not options.breakdown:
record_list = filter_traceids(record_list, traceid_set)
if options.breakdown:
for traceid in traceid_set:
# Full
with open("%s.full" % traceid, "w") as full_f:
full_l = filter_traceids(record_list, {traceid})
if options.merge:
full_l.sort()
print_record_list(full_l, full_f)
# Flow
with open("%s.flow" % traceid, "w") as flow_f:
flow_l = filter_flow(full_l)
for fl in flow_l:
print(fl, file=flow_f)
# Flow compact
with open("%s.flowcompact" % traceid, "w") as flowcompact_f:
flowcompact_l = filter_flowcompact(flow_l)
for fl in flowcompact_l:
print(fl, file=flowcompact_f)
elif options.flow:
flow_list = filter_flow(record_list)
for fl in flow_list:
print(fl)
elif options.flowcompact:
flow_list = filter_flow(record_list)
flow_list = filter_flowcompact(flow_list)
for fl in flow_list:
print(fl)
else:
if options.merge:
record_list.sort()
print_record_list(record_list, sys.stdout)
sys.exit(0)
if __name__ == "__main__":
main()