Quota fsck script

script can be used to for 3 purposes 1) Extract marker xattr from the backend with a crawl (for analysis) 2) Report anomalies in accounting by comparing stat based accounting with marker translator's accounting 3) Fix anomalies in the FS by marking the anamolous directories dirty and having them heal in a bottom up fashion over the directory tree. Change-Id: Ib4d4a6d1886d7332c80d5a93f7afccaa0f52ab58 fixes: #390 Signed-off-by: Sanoj Unnikrishnan <sunnikri@redhat.com> Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
2018-01-11 12:24:35 +05:30 · 2018-01-11 12:24:35 +05:30 · c8a3e8ed1c
commit c8a3e8ed1c
parent 3780f6c251
1 changed files with 378 additions and 0 deletions
--- a/extras/quota/quota_fsck.py
+++ b/extras/quota/quota_fsck.py
@ -0,0 +1,378 @@
+#!/usr/bin/python
+# The following script enables, Detecting, Reporting and Fixing
+# anomalies in quota accounting. Run this script with -h option
+# for further details.
+
+'''
+   Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+'''
+import os, sys, re
+from stat import *
+import subprocess
+import argparse
+import xattr
+
+aggr_size = {}
+verbose_mode = False
+mnt_path = None
+brick_path = None
+obj_fix_count = 0
+file_count = 0
+dir_count = 0
+
+#CONSTANTS
+KB = 1024
+MB = 1048576
+GB = 1048576 * 1024
+TB = 1048576 * 1048576
+
+QUOTA_VERBOSE = 0
+QUOTA_META_ABSENT = 1
+QUOTA_SIZE_MISMATCH = 2
+
+IS_DIRTY ='0x3100'
+IS_CLEAN ='0x3000'
+
+
+epilog_msg='''
+            The script attempts to find any gluster accounting issues in the
+            filesystem at the given subtree. The script crawls the given
+            subdirectory tree doing a stat for all files and compares the
+            size reported by gluster quota with the size reported by stat
+            calls. Any mismatch is reported. In addition integrity of marker
+            xattrs are verified.
+            '''
+
+def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+    if log_type == QUOTA_VERBOSE:
+        print '%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path ,  xattr_dict, stbuf)
+    elif log_type == QUOTA_META_ABSENT:
+        print '%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path , xattr_dict)
+    elif log_type == QUOTA_SIZE_MISMATCH:
+        print "mismatch"
+        if dir_size is not None:
+            print '%24s %60s %12s %12s' % ("Size Mismatch",path , xattr_dict['contri_size'],
+                   dir_size)
+        else:
+            print '%-24s %-60s %-12i %-12i' % ("Size Mismatch",path , xattr_dict['contri_size'],
+                   stbuf.st_size)
+
+def size_differs_lot(s1, s2):
+    '''
+    There could be minor accounting differences between the stat based
+    accounting and gluster accounting. To avoid these from throwing lot
+    of false positives in our logs. using a threshold of 1M for now.
+    TODO: For a deeply nested directory, at higher levels in hierarchy
+    differences may not be significant, hence this check needs to be improved.
+    '''
+    if abs(s1-s2) > 0:
+        return True
+    else:
+        return False
+
+def fix_hardlink_accounting(curr_dict, accounted_dict, curr_size):
+    '''
+            Hard links are messy.. we have to account them for their parent
+            directory. But, stop accounting at the most common ancestor.
+            Eg:
+                say we have 3 hardlinks : /d1/d2/h1, /d1/d3/h2 and /d1/h3
+
+            suppose we encounter the hard links h1 first , then h2 and then h3.
+            while accounting for h1, we account the size until root(d2->d1->/)
+            while accounting for h2, we need to account only till d3. (as d1
+            and / are accounted for this inode).
+            while accounting for h3 we should not account at all.. as all
+            its ancestors are already accounted for same inode.
+
+            curr_dict                : dict of hardlinks that were seen and
+                                       accounted by the current iteration.
+            accounted_dict           : dict of hardlinks that has already been
+                                       accounted for.
+
+            size                     : size of the object as accounted by the
+                                       curr_iteration.
+
+            Return vale:
+            curr_size                : size reduced by hardlink sizes for those
+                                       hardlinks that has already been accounted
+                                       in current subtree.
+            Also delete the duplicate link from curr_dict.
+    '''
+
+    dual_accounted_links = set(curr_dict.keys()) & set(accounted_dict.keys())
+    for link in dual_accounted_links:
+        curr_size = curr_size - curr_dict[link]
+        del curr_dict[link]
+    return curr_size
+
+
+def fix_xattr(file_name, mark_dirty):
+    global obj_fix_count
+    global mnt_path
+
+    if mnt_path is None:
+        return
+    if mark_dirty:
+        print "MARKING DIRTY: " + file_name
+        out = subprocess.check_output (["/usr/bin/setfattr", "-n",
+                                       "trusted.glusterfs.quota.dirty",
+                                       "-v", IS_DIRTY, file_name])
+    rel_path = os.path.relpath(file_name, brick_path)
+    print "stat on "  + mnt_path + "/" + rel_path
+    stbuf = os.lstat(mnt_path + "/" + rel_path)
+
+    obj_fix_count += 1
+
+def get_quota_xattr_brick(dpath):
+    out = subprocess.check_output (["/usr/bin/getfattr", "--no-dereference",
+                                    "-d", "-m.", "-e", "hex", dpath])
+    pairs = out.splitlines()
+
+    '''
+    Sample output to be parsed:
+    [root@dhcp35-100 mnt]# getfattr -d -m. -e hex /export/b1/B0/d14/d13/
+    # file: export/b1/B0/d14/d13/
+    security.selinux=0x756e636f6e66696e65645f753a6f626a6563745f723a7573725f743a733000
+    trusted.gfid=0xbae5e0d2d05043de9fd851d91ecf63e8
+    trusted.glusterfs.dht=0x000000010000000000000000ffffffff
+    trusted.glusterfs.dht.mds=0x00000000
+    trusted.glusterfs.quota.6a7675a3-b85a-40c5-830b-de9229d702ce.contri.39=0x00000000000000000000000000000000000000000000000e
+    trusted.glusterfs.quota.dirty=0x3000
+    trusted.glusterfs.quota.size.39=0x00000000000000000000000000000000000000000000000e
+    '''
+
+    '''
+    xattr_dict dictionary holds quota related xattrs
+    eg:
+    '''
+
+    xattr_dict = {}
+    xattr_dict['parents'] = {}
+
+    for xattr in pairs:
+        xattr_key = xattr.split("=")[0]
+        if re.search("# file:",xattr_key):
+            # skip the file comment
+            continue
+        elif xattr_key is "":
+            # skip any empty lines
+            continue
+        elif not re.search("quota",xattr_key):
+            # skip all non quota xattr.
+            continue
+
+        xattr_value = xattr.split("=")[1]
+        if re.search("contri",xattr_key):
+
+            xattr_version = xattr_key.split(".")[5]
+            if 'version' not in xattr_dict:
+                xattr_dict['version'] = xattr_version
+            else:
+                if xattr_version != xattr_dict['version']:
+                   print "Multiple xattr version found"
+
+
+            cur_parent = xattr_key.split(".")[3]
+            if cur_parent not in xattr_dict['parents']:
+                xattr_dict['parents'][cur_parent] = {}
+
+            contri_dict = xattr_dict['parents'][cur_parent]
+            if len(xattr_value) == 34:
+                # 34 bytes implies file contri xattr
+                # contri format =0x< 16bytes file size><16bytes file count>
+                # size is obtained in iatt, file count = 1, dir count=0
+                contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+                contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+                contri_dict['contri_dir_count'] = 0
+            else:
+                # This is a directory contri.
+                contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+                contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+                contri_dict['contri_dir_count'] = int(xattr_value[34:], 16)
+
+        elif re.search("size",xattr_key):
+            xattr_dict['size'] = int(xattr_value[2:18], 16)
+            xattr_dict['file_count'] = int(xattr_value[18:34], 16)
+            xattr_dict['dir_count'] = int(xattr_value[34:], 16)
+        elif re.search("dirty",xattr_key):
+            if xattr_value == IS_CLEAN:
+                xattr_dict['dirty'] = False
+            elif xattr_value == IS_DIRTY:
+                xattr_dict['dirty'] = True
+        elif re.search("limit_objects",xattr_key):
+            xattr_dict['limit_objects'] = int(xattr_value[2:18], 16)
+        elif re.search("limit_set",xattr_key):
+            xattr_dict['limit_set'] = int(xattr_value[2:18], 16)
+
+    return xattr_dict
+
+def verify_file_xattr(path, stbuf = None):
+
+    global file_count
+    file_count += 1
+
+    if stbuf is None:
+        stbuf = os.lstat(path)
+
+    xattr_dict = get_quota_xattr_brick(path)
+
+    for parent in xattr_dict['parents']:
+        contri_dict = xattr_dict['parents'][parent]
+
+        if 'contri_size' not in contri_dict or \
+           'contri_file_count' not in contri_dict or \
+           'contri_dir_count' not in contri_dict:
+            print_msg(QUOTA_META_ABSENT, path, xattr_dict, stbuf)
+            fix_xattr(path, False)
+            return
+        elif size_differs_lot(contri_dict['contri_size'] , stbuf.st_size):
+            print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf)
+            fix_xattr(path, False)
+            return
+
+    if verbose_mode is True:
+        print_msg(QUOTA_VERBOSE, path, xattr_dict, stbuf)
+
+
+def verify_dir_xattr(path, dir_size):
+
+    global dir_count
+    dir_count += 1
+    xattr_dict = get_quota_xattr_brick(path)
+
+    stbuf = os.lstat(path)
+
+    for parent in xattr_dict['parents']:
+        contri_dict = xattr_dict['parents'][parent]
+
+        if 'size' not in xattr_dict or 'contri_size' not in contri_dict:
+            print_msg(QUOTA_META_ABSENT, path)
+            fix_xattr(path, True)
+            return
+        elif size_differs_lot(dir_size, xattr_dict['size']) or \
+             size_differs_lot(contri_dict['contri_size'] , xattr_dict['size']):
+            print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf, dir_size)
+            fix_xattr(path, True)
+            return
+
+    if verbose_mode is True:
+        print_msg("VERBOSE", path, xattr_dict, stbuf, dir_size)
+
+
+def walktree(t_dir, hard_link_dict):
+    '''recursively descend the directory tree rooted at dir,
+       aggregating the size
+       t_dir            : directory to walk over.
+       hard_link_dict   : dict of inodes with multiple hard_links under t_dir
+    '''
+    global aggr_size
+    aggr_size[t_dir] = 0
+
+    for entry in os.listdir(t_dir):
+        pathname = os.path.join(t_dir, entry)
+        stbuf = os.lstat(pathname)
+        if S_ISDIR(stbuf.st_mode):
+            # It's a directory, recurse into it
+            if entry == '.glusterfs':
+                print "skipping " + pathname
+                continue
+            descendent_hardlinks = {}
+            subtree_size = walktree(pathname, descendent_hardlinks)
+
+            subtree_size = fix_hardlink_accounting(descendent_hardlinks,
+                                                   hard_link_dict,
+                                                   subtree_size)
+
+            aggr_size[t_dir] = aggr_size[t_dir] + subtree_size
+
+        elif S_ISREG(stbuf.st_mode) or S_ISLNK(stbuf.st_mode):
+            # Even a symbolic link file may have multiple hardlinks.
+
+            file_size = stbuf.st_size
+            if stbuf.st_nlink > 2:
+                # send a single element dict to check if file is accounted.
+                file_size = fix_hardlink_accounting({stbuf.st_ino:stbuf.st_size},
+                                                    hard_link_dict,
+                                                    stbuf.st_size)
+
+                if file_size == 0:
+                    print_msg("HARD_LINK (skipped)", pathname, "",
+                                stbuf)
+                else:
+                    print_msg("HARD_LINK (accounted)", pathname, "",
+                               stbuf)
+                    hard_link_dict[stbuf.st_ino] = stbuf.st_size
+
+            if t_dir in aggr_size:
+                aggr_size[t_dir] = aggr_size[t_dir] + file_size
+            else:
+                aggr_size[t_dir] = file_size
+            verify_file_xattr(pathname, stbuf)
+
+        else:
+            # Unknown file type, print a message
+            print 'Skipping %s, due to file mode' % pathname
+
+    if t_dir not in aggr_size:
+        aggr_size[t_dir] = 0
+
+    verify_dir_xattr(t_dir, aggr_size[t_dir])
+    # du also accounts for t_directory sizes
+    # aggr_size[t_dir] += 4096
+
+    #cleanup
+    ret = aggr_size[t_dir]
+    del aggr_size[t_dir]
+    return ret
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Diagnose quota accounting issues.', epilog=epilog_msg)
+    parser.add_argument('brick_path', nargs=1,
+                        help='The brick path (or any descendent sub-directory of brick path)',
+                        )
+    parser.add_argument('--full-logs', dest='verbose', action='store_true',
+                   help='''
+                         log all the xattr values and stat values reported
+                         for analysis. [CAUTION: This can give lot of output
+                         depending on FS depth. So one has to make sure enough
+                         disk space exists if redirecting to file]
+                        '''
+                        )
+    parser.add_argument('--fix-issues',metavar='mount_path', dest='mnt', action='store',
+                   help='''
+                         fix accounting issues where the xattr values disagree
+                         with stat sizes reported by gluster. A mount is also
+                         required for this option to be used.
+                         [CAUTION: This will directly modify backend xattr]
+                        '''
+                        )
+    parser.add_argument('--sub-dir',metavar='sub_dir', dest='sub_dir', action='store',
+                   help='''
+                         limit the crawling and accounting verification/correction
+                         to a specific subdirectory.
+                        '''
+                        )
+
+    args = parser.parse_args()
+    verbose_mode = args.verbose
+    brick_path = args.brick_path[0]
+    sub_dir = args.sub_dir
+    mnt_path = args.mnt
+    hard_link_dict = {}
+    if sub_dir is not None:
+        walktree(os.path.join(brick_path, sub_dir), hard_link_dict)
+    else:
+        walktree(brick_path, hard_link_dict)
+
+    print "Files verified : " + str(file_count)
+    print "Directories verified : " + str(dir_count)
+    if mnt_path is not None:
+        print "Objects Fixed : " + str(obj_fix_count)