2010-05-09 23:13:55 +00:00
#!/bin/python
"""
2012-09-11 16:34:54 +05:30
Copyright ( c ) 2006 - 2012 Red Hat , Inc . < http : / / www . redhat . com >
2010-05-09 23:13:55 +00:00
This file is part of GlusterFS .
2012-08-27 16:48:55 +05:30
This file is licensed to you under your choice of the GNU Lesser
General Public License , version 3 or any later version ( LGPLv3 or
later ) , or the GNU General Public License , version 2 ( GPLv2 ) , in all
cases as published by the Free Software Foundation .
2012-09-11 16:34:54 +05:30
2010-05-09 23:13:55 +00:00
"""
2018-04-13 09:13:16 -04:00
from __future__ import print_function
2010-05-09 23:13:55 +00:00
import os
import string
import sys
class NFSRequest :
2011-04-18 00:58:49 +00:00
def requestIsEntryOp ( self ) :
op = self . op
if op == " CREATE " or op == " LOOKUP " or op == " REMOVE " or op == " LINK " or op == " RENAME " or op == " MKDIR " or op == " RMDIR " or op == " SYMLINK " or op == " MKNOD " :
return 1
else :
return 0
2010-05-09 23:13:55 +00:00
def __init__ ( self , logline , linecount ) :
self . calllinecount = 0
self . xid = " "
self . op = " "
self . opdata = " "
self . replydata = " "
self . replylinecount = 0
2011-04-18 00:58:49 +00:00
self . timestamp = " "
self . entryname = " "
self . gfid = " "
self . replygfid = " "
2010-05-09 23:13:55 +00:00
tokens = logline . strip ( ) . split ( " " )
2011-04-18 00:58:49 +00:00
self . timestamp = tokens [ 0 ] + " " + tokens [ 1 ]
2010-05-09 23:13:55 +00:00
if " XID: " not in tokens :
return None
if " args: " not in tokens :
return None
self . calllinecount = linecount
xididx = tokens . index ( " XID: " )
self . xid = tokens [ xididx + 1 ] . strip ( " , " )
opidx = tokens . index ( " args: " )
self . op = tokens [ opidx - 1 ] . strip ( " : " )
self . opdata = " " . join ( tokens [ opidx + 1 : ] )
2011-04-18 00:58:49 +00:00
if self . requestIsEntryOp ( ) :
nameidx = tokens . index ( " name: " )
self . entryname = tokens [ nameidx + 1 ] . strip ( " , " )
gfididx = tokens . index ( " gfid " )
self . gfid = tokens [ gfididx + 1 ] . strip ( " , " )
2010-05-09 23:13:55 +00:00
def getXID ( self ) :
return self . xid
def setReply ( self , logline , linecount ) :
tokens = logline . strip ( ) . split ( " " )
2011-04-18 00:58:49 +00:00
timestamp = tokens [ 0 ] + " " + tokens [ 1 ]
2010-05-09 23:13:55 +00:00
statidx = tokens . index ( " NFS: " )
2011-04-18 00:58:49 +00:00
self . replydata = " TimeStamp: " + timestamp + " " + " " . join ( tokens [ statidx + 1 : ] )
2010-05-09 23:13:55 +00:00
self . replylinecount = linecount
2011-04-18 00:58:49 +00:00
if " gfid " in tokens :
gfididx = tokens . index ( " gfid " )
self . replygfid = tokens [ gfididx + 1 ] . strip ( " , " )
2010-05-09 23:13:55 +00:00
def dump ( self ) :
2018-04-13 09:13:16 -04:00
print ( " ReqLine: " + str ( self . calllinecount ) + " TimeStamp: " + self . timestamp + " , XID: " + self . xid + " " + self . op + " ARGS: " + self . opdata + " RepLine: " + str ( self . replylinecount ) + " " + self . replydata )
2010-05-09 23:13:55 +00:00
class NFSLogAnalyzer :
2011-04-18 00:58:49 +00:00
def __init__ ( self , optn , trackfilename , tracknamefh , stats ) :
self . stats = stats
2010-05-09 23:13:55 +00:00
self . xid_request_map = { }
self . orphan_replies = { }
2011-04-18 00:58:49 +00:00
self . rqlist = [ ]
2010-05-09 23:13:55 +00:00
self . CALL = 1
self . REPLY = 2
2011-04-18 00:58:49 +00:00
self . optn = optn
self . trackfilename = trackfilename
self . tracknamefh = tracknamefh
self . trackedfilehandles = [ ]
2010-05-09 23:13:55 +00:00
def handle_call_line ( self , logline , linecount ) :
newreq = NFSRequest ( logline , linecount )
xid = newreq . getXID ( )
2011-04-18 00:58:49 +00:00
if ( self . optn == SYNTHESIZE ) :
self . xid_request_map [ xid ] = newreq
self . rqlist . append ( newreq )
elif self . optn == TRACKFILENAME :
if newreq . requestIsEntryOp ( ) :
if newreq . entryname == self . trackfilename :
self . xid_request_map [ xid ] = newreq
self . rqlist . append ( newreq )
else :
del newreq
elif self . tracknamefh == ENABLE_TRACKNAME_FH :
if len ( self . trackedfilehandles ) > 0 :
if newreq . gfid in self . trackedfilehandles :
self . xid_request_map [ xid ] = newreq
self . rqlist . append ( newreq )
else :
del newreq
else :
del newreq
else :
del newreq
2010-05-09 23:13:55 +00:00
def handle_reply_line ( self , logline , linecount ) :
tokens = logline . strip ( ) . split ( " " )
xididx = tokens . index ( " XID: " )
xid = tokens [ xididx + 1 ] . strip ( " , " )
if xid not in self . xid_request_map . keys ( ) :
self . orphan_replies [ xid ] = logline
else :
2011-04-18 00:58:49 +00:00
rq = self . xid_request_map [ xid ]
rq . setReply ( logline , linecount )
if rq . requestIsEntryOp ( ) and rq . entryname == self . trackfilename :
self . trackedfilehandles . append ( rq . replygfid )
2010-05-09 23:13:55 +00:00
def analyzeLine ( self , logline , linecount ) :
tokens = logline . strip ( ) . split ( " " )
msgtype = 0
if " XID: " not in tokens :
return
if " args: " in tokens :
msgtype = self . CALL
elif " NFS: " in tokens :
msgtype = self . REPLY
if msgtype == self . CALL :
self . handle_call_line ( logline , linecount )
elif msgtype == self . REPLY :
self . handle_reply_line ( logline , linecount )
def getStats ( self ) :
2011-04-18 00:58:49 +00:00
if self . stats == 0 :
return
2010-05-09 23:13:55 +00:00
rcount = len ( self . xid_request_map . keys ( ) )
orphancount = len ( self . orphan_replies . keys ( ) )
2018-04-13 09:13:16 -04:00
print ( " Requests: " + str ( rcount ) + " , Orphans: " + str ( orphancount ) )
2010-05-09 23:13:55 +00:00
def dump ( self ) :
self . getStats ( )
2011-04-18 00:58:49 +00:00
for rq in self . rqlist :
2010-05-09 23:13:55 +00:00
rq . dump ( )
2011-04-18 00:58:49 +00:00
del rq
2010-05-09 23:13:55 +00:00
2011-04-18 00:58:49 +00:00
self . rqlist = [ ]
self . orphan_replies = { }
self . xid_request_map = { }
2010-05-09 23:13:55 +00:00
linecount = 0
2011-04-18 00:58:49 +00:00
SYNTHESIZE = 1
TRACKFILENAME = 2
ENABLESTATS = 1
DISABLESTATS = 0
ENABLE_TRACKNAME_FH = 1
DISABLE_TRACKNAME_FH = 0
2010-05-09 23:13:55 +00:00
progmsgcount = 1000
2011-04-18 00:58:49 +00:00
dumpinterval = 200000
operation = SYNTHESIZE
stats = ENABLESTATS
tracknamefh = DISABLE_TRACKNAME_FH
trackfilename = " "
"""
Print the progress of the analysing operations every X number of lines read from
the logs , where X is the argument provided to this option .
2010-05-09 23:13:55 +00:00
2011-04-18 00:58:49 +00:00
Use this to print a status message every say 10000 lines processed or 100000
lines processed to know how much longer the processing will go on for .
USAGE : - - progress < NUMLINES >
"""
2010-05-09 23:13:55 +00:00
if " --progress " in sys . argv :
idx = sys . argv . index ( " --progress " )
progmsgcount = int ( sys . argv [ idx + 1 ] )
2011-04-18 00:58:49 +00:00
"""
The replies for a NFS request can be separated by hundreds and even thousands
of other NFS requests and replies . These can be spread over many hundreds and
thousands of log lines . This script maintains a memory dict to map each request
to its reply using the XID . Because this is in - core , there is a limit to the
number of entries in the dict . At regular intervals , it dumps the mapped
requests and the replies into the stdout . The requests whose replies were not
found at the point of dumping are left as orphans , i . e . without info about the
replies . Use this option to tune the number of lines to maximize the number of
requests whose replies are found while balancing the dict size with memory
on the machine . The default works fine for most cases .
USAGE : - - dump < NUMLINES >
"""
2010-05-09 23:13:55 +00:00
if " --dump " in sys . argv :
idx = sys . argv . index ( " --dump " )
dumpinterval = int ( sys . argv [ idx + 1 ] )
2011-04-18 00:58:49 +00:00
"""
The default operation of the script is to output all the requests mapped to
their replies in a single line . This operation mode can be changed by this
argument . It is used to print only those operations that were performed on the
filename given as the argument to this option . Only those entry operations are
printed which contain this filename .
USAGE : - - trackfilename < filename >
"""
if " --trackfilename " in sys . argv :
idx = sys . argv . index ( " --trackfilename " )
trackfilename = sys . argv [ idx + 1 ]
operation = TRACKFILENAME
"""
At every dump interval , some stats are printed about the dumped lines .
Use this option to disable printing that to avoid cluttering the
output .
"""
if " --nostats " in sys . argv :
stats = DISABLESTATS
"""
While tracking a file using - - trackfilename , we ' re only given those
operations which contain the filename . This excludes a large number
of operations which operate on that file using its filehandle instead of
the filename . This option enables outputting those operations also . It
tracks every single file handle that was ever seen in the log for a given
filename .
USAGE : - - trackfilename
"""
if " --tracknamefh " in sys . argv :
tracknamefh = ENABLE_TRACKNAME_FH
la = NFSLogAnalyzer ( operation , trackfilename , tracknamefh , stats )
2010-05-09 23:13:55 +00:00
for line in sys . stdin :
linecount = linecount + 1
if linecount % dumpinterval == 0 :
sys . stderr . write ( " Dumping data.. \n " )
la . dump ( )
if linecount % progmsgcount == 0 :
sys . stderr . write ( " Integrating line: " + str ( linecount ) + " \n " )
la . analyzeLine ( line , linecount )