2ed5b09b3e
Syzkaller reported a UAF bug a while back: ================================================================== BUG: KASAN: use-after-free in xfs_ilock_attr_map_shared+0xe3/0xf6 fs/xfs/xfs_inode.c:127 Read of size 4 at addr ffff88802cec919c by task syz-executor262/2958 CPU: 2 PID: 2958 Comm: syz-executor262 Not tainted 5.15.0-0.30.3-20220406_1406 #3 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module+el8.3.0+7860+a7792d29 04/01/2014 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x82/0xa9 lib/dump_stack.c:106 print_address_description.constprop.9+0x21/0x2d5 mm/kasan/report.c:256 __kasan_report mm/kasan/report.c:442 [inline] kasan_report.cold.14+0x7f/0x11b mm/kasan/report.c:459 xfs_ilock_attr_map_shared+0xe3/0xf6 fs/xfs/xfs_inode.c:127 xfs_attr_get+0x378/0x4c2 fs/xfs/libxfs/xfs_attr.c:159 xfs_xattr_get+0xe3/0x150 fs/xfs/xfs_xattr.c:36 __vfs_getxattr+0xdf/0x13d fs/xattr.c:399 cap_inode_need_killpriv+0x41/0x5d security/commoncap.c:300 security_inode_need_killpriv+0x4c/0x97 security/security.c:1408 dentry_needs_remove_privs.part.28+0x21/0x63 fs/inode.c:1912 dentry_needs_remove_privs+0x80/0x9e fs/inode.c:1908 do_truncate+0xc3/0x1e0 fs/open.c:56 handle_truncate fs/namei.c:3084 [inline] do_open fs/namei.c:3432 [inline] path_openat+0x30ab/0x396d fs/namei.c:3561 do_filp_open+0x1c4/0x290 fs/namei.c:3588 do_sys_openat2+0x60d/0x98c fs/open.c:1212 do_sys_open+0xcf/0x13c fs/open.c:1228 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x7e arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0x0 RIP: 0033:0x7f7ef4bb753d Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1b 79 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007f7ef52c2ed8 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 0000000000404148 RCX: 00007f7ef4bb753d RDX: 00007f7ef4bb753d RSI: 0000000000000000 RDI: 0000000020004fc0 RBP: 0000000000404140 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0030656c69662f2e R13: 00007ffd794db37f R14: 00007ffd794db470 R15: 00007f7ef52c2fc0 </TASK> Allocated by task 2953: kasan_save_stack+0x19/0x38 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x68/0x7c mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:254 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3213 [inline] slab_alloc mm/slub.c:3221 [inline] kmem_cache_alloc+0x11b/0x3eb mm/slub.c:3226 kmem_cache_zalloc include/linux/slab.h:711 [inline] xfs_ifork_alloc+0x25/0xa2 fs/xfs/libxfs/xfs_inode_fork.c:287 xfs_bmap_add_attrfork+0x3f2/0x9b1 fs/xfs/libxfs/xfs_bmap.c:1098 xfs_attr_set+0xe38/0x12a7 fs/xfs/libxfs/xfs_attr.c:746 xfs_xattr_set+0xeb/0x1a9 fs/xfs/xfs_xattr.c:59 __vfs_setxattr+0x11b/0x177 fs/xattr.c:180 __vfs_setxattr_noperm+0x128/0x5e0 fs/xattr.c:214 __vfs_setxattr_locked+0x1d4/0x258 fs/xattr.c:275 vfs_setxattr+0x154/0x33d fs/xattr.c:301 setxattr+0x216/0x29f fs/xattr.c:575 __do_sys_fsetxattr fs/xattr.c:632 [inline] __se_sys_fsetxattr fs/xattr.c:621 [inline] __x64_sys_fsetxattr+0x243/0x2fe fs/xattr.c:621 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x7e arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0x0 Freed by task 2949: kasan_save_stack+0x19/0x38 mm/kasan/common.c:38 kasan_set_track+0x1c/0x21 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xe2/0x10e mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:230 [inline] slab_free_hook mm/slub.c:1700 [inline] slab_free_freelist_hook mm/slub.c:1726 [inline] slab_free mm/slub.c:3492 [inline] kmem_cache_free+0xdc/0x3ce mm/slub.c:3508 xfs_attr_fork_remove+0x8d/0x132 fs/xfs/libxfs/xfs_attr_leaf.c:773 xfs_attr_sf_removename+0x5dd/0x6cb fs/xfs/libxfs/xfs_attr_leaf.c:822 xfs_attr_remove_iter+0x68c/0x805 fs/xfs/libxfs/xfs_attr.c:1413 xfs_attr_remove_args+0xb1/0x10d fs/xfs/libxfs/xfs_attr.c:684 xfs_attr_set+0xf1e/0x12a7 fs/xfs/libxfs/xfs_attr.c:802 xfs_xattr_set+0xeb/0x1a9 fs/xfs/xfs_xattr.c:59 __vfs_removexattr+0x106/0x16a fs/xattr.c:468 cap_inode_killpriv+0x24/0x47 security/commoncap.c:324 security_inode_killpriv+0x54/0xa1 security/security.c:1414 setattr_prepare+0x1a6/0x897 fs/attr.c:146 xfs_vn_change_ok+0x111/0x15e fs/xfs/xfs_iops.c:682 xfs_vn_setattr_size+0x5f/0x15a fs/xfs/xfs_iops.c:1065 xfs_vn_setattr+0x125/0x2ad fs/xfs/xfs_iops.c:1093 notify_change+0xae5/0x10a1 fs/attr.c:410 do_truncate+0x134/0x1e0 fs/open.c:64 handle_truncate fs/namei.c:3084 [inline] do_open fs/namei.c:3432 [inline] path_openat+0x30ab/0x396d fs/namei.c:3561 do_filp_open+0x1c4/0x290 fs/namei.c:3588 do_sys_openat2+0x60d/0x98c fs/open.c:1212 do_sys_open+0xcf/0x13c fs/open.c:1228 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x7e arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0x0 The buggy address belongs to the object at ffff88802cec9188 which belongs to the cache xfs_ifork of size 40 The buggy address is located 20 bytes inside of 40-byte region [ffff88802cec9188, ffff88802cec91b0) The buggy address belongs to the page: page:00000000c3af36a1 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2cec9 flags: 0xfffffc0000200(slab|node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc0000200 ffffea00009d2580 0000000600000006 ffff88801a9ffc80 raw: 0000000000000000 0000000080490049 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88802cec9080: fb fb fb fc fc fa fb fb fb fb fc fc fb fb fb fb ffff88802cec9100: fb fc fc fb fb fb fb fb fc fc fb fb fb fb fb fc >ffff88802cec9180: fc fa fb fb fb fb fc fc fa fb fb fb fb fc fc fb ^ ffff88802cec9200: fb fb fb fb fc fc fb fb fb fb fb fc fc fb fb fb ffff88802cec9280: fb fb fc fc fa fb fb fb fb fc fc fa fb fb fb fb ================================================================== The root cause of this bug is the unlocked access to xfs_inode.i_afp from the getxattr code paths while trying to determine which ILOCK mode to use to stabilize the xattr data. Unfortunately, the VFS does not acquire i_rwsem when vfs_getxattr (or listxattr) call into the filesystem, which means that getxattr can race with a removexattr that's tearing down the attr fork and crash: xfs_attr_set: xfs_attr_get: xfs_attr_fork_remove: xfs_ilock_attr_map_shared: xfs_idestroy_fork(ip->i_afp); kmem_cache_free(xfs_ifork_cache, ip->i_afp); if (ip->i_afp && ip->i_afp = NULL; xfs_need_iread_extents(ip->i_afp)) <KABOOM> ip->i_forkoff = 0; Regrettably, the VFS is much more lax about i_rwsem and getxattr than is immediately obvious -- not only does it not guarantee that we hold i_rwsem, it actually doesn't guarantee that we *don't* hold it either. The getxattr system call won't acquire the lock before calling XFS, but the file capabilities code calls getxattr with and without i_rwsem held to determine if the "security.capabilities" xattr is set on the file. Fixing the VFS locking requires a treewide investigation into every code path that could touch an xattr and what i_rwsem state it expects or sets up. That could take years or even prove impossible; fortunately, we can fix this UAF problem inside XFS. An earlier version of this patch used smp_wmb in xfs_attr_fork_remove to ensure that i_forkoff is always zeroed before i_afp is set to null and changed the read paths to use smp_rmb before accessing i_forkoff and i_afp, which avoided these UAF problems. However, the patch author was too busy dealing with other problems in the meantime, and by the time he came back to this issue, the situation had changed a bit. On a modern system with selinux, each inode will always have at least one xattr for the selinux label, so it doesn't make much sense to keep incurring the extra pointer dereference. Furthermore, Allison's upcoming parent pointer patchset will also cause nearly every inode in the filesystem to have extended attributes. Therefore, make the inode attribute fork structure part of struct xfs_inode, at a cost of 40 more bytes. This patch adds a clunky if_present field where necessary to maintain the existing logic of xattr fork null pointer testing in the existing codebase. The next patch switches the logic over to XFS_IFORK_Q and it all goes away. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
622 lines
23 KiB
C
622 lines
23 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#ifndef __XFS_ATTR_H__
|
|
#define __XFS_ATTR_H__
|
|
|
|
struct xfs_inode;
|
|
struct xfs_da_args;
|
|
struct xfs_attr_list_context;
|
|
|
|
/*
|
|
* Large attribute lists are structured around Btrees where all the data
|
|
* elements are in the leaf nodes. Attribute names are hashed into an int,
|
|
* then that int is used as the index into the Btree. Since the hashval
|
|
* of an attribute name may not be unique, we may have duplicate keys.
|
|
* The internal links in the Btree are logical block offsets into the file.
|
|
*
|
|
* Small attribute lists use a different format and are packed as tightly
|
|
* as possible so as to fit into the literal area of the inode.
|
|
*/
|
|
|
|
/*
|
|
* The maximum size (into the kernel or returned from the kernel) of an
|
|
* attribute value or the buffer used for an attr_list() call. Larger
|
|
* sizes will result in an ERANGE return code.
|
|
*/
|
|
#define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */
|
|
|
|
/*
|
|
* Kernel-internal version of the attrlist cursor.
|
|
*/
|
|
struct xfs_attrlist_cursor_kern {
|
|
__u32 hashval; /* hash value of next entry to add */
|
|
__u32 blkno; /* block containing entry (suggestion) */
|
|
__u32 offset; /* offset in list of equal-hashvals */
|
|
__u16 pad1; /* padding to match user-level */
|
|
__u8 pad2; /* padding to match user-level */
|
|
__u8 initted; /* T/F: cursor has been initialized */
|
|
};
|
|
|
|
|
|
/*========================================================================
|
|
* Structure used to pass context around among the routines.
|
|
*========================================================================*/
|
|
|
|
|
|
/* void; state communicated via *context */
|
|
typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
|
|
unsigned char *, int, int);
|
|
|
|
struct xfs_attr_list_context {
|
|
struct xfs_trans *tp;
|
|
struct xfs_inode *dp; /* inode */
|
|
struct xfs_attrlist_cursor_kern cursor; /* position in list */
|
|
void *buffer; /* output buffer */
|
|
|
|
/*
|
|
* Abort attribute list iteration if non-zero. Can be used to pass
|
|
* error values to the xfs_attr_list caller.
|
|
*/
|
|
int seen_enough;
|
|
bool allow_incomplete;
|
|
|
|
ssize_t count; /* num used entries */
|
|
int dupcnt; /* count dup hashvals seen */
|
|
int bufsize; /* total buffer size */
|
|
int firstu; /* first used byte in buffer */
|
|
unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE} */
|
|
int resynch; /* T/F: resynch with cursor */
|
|
put_listent_func_t put_listent; /* list output fmt function */
|
|
int index; /* index into output buffer */
|
|
};
|
|
|
|
|
|
/*
|
|
* ========================================================================
|
|
* Structure used to pass context around among the delayed routines.
|
|
* ========================================================================
|
|
*/
|
|
|
|
/*
|
|
* Below is a state machine diagram for attr remove operations. The XFS_DAS_*
|
|
* states indicate places where the function would return -EAGAIN, and then
|
|
* immediately resume from after being called by the calling function. States
|
|
* marked as a "subroutine state" indicate that they belong to a subroutine, and
|
|
* so the calling function needs to pass them back to that subroutine to allow
|
|
* it to finish where it left off. But they otherwise do not have a role in the
|
|
* calling function other than just passing through.
|
|
*
|
|
* xfs_attr_remove_iter()
|
|
* │
|
|
* v
|
|
* have attr to remove? ──n──> done
|
|
* │
|
|
* y
|
|
* │
|
|
* v
|
|
* are we short form? ──y──> xfs_attr_shortform_remove ──> done
|
|
* │
|
|
* n
|
|
* │
|
|
* V
|
|
* are we leaf form? ──y──> xfs_attr_leaf_removename ──> done
|
|
* │
|
|
* n
|
|
* │
|
|
* V
|
|
* ┌── need to setup state?
|
|
* │ │
|
|
* n y
|
|
* │ │
|
|
* │ v
|
|
* │ find attr and get state
|
|
* │ attr has remote blks? ──n─┐
|
|
* │ │ v
|
|
* │ │ find and invalidate
|
|
* │ y the remote blocks.
|
|
* │ │ mark attr incomplete
|
|
* │ ├────────────────┘
|
|
* └──────────┤
|
|
* │
|
|
* v
|
|
* Have remote blks to remove? ───y─────┐
|
|
* │ ^ remove the blks
|
|
* │ │ │
|
|
* │ │ v
|
|
* │ XFS_DAS_RMTBLK <─n── done?
|
|
* │ re-enter with │
|
|
* │ one less blk to y
|
|
* │ remove │
|
|
* │ V
|
|
* │ refill the state
|
|
* n │
|
|
* │ v
|
|
* │ XFS_DAS_RM_NAME
|
|
* │ │
|
|
* ├─────────────────────────┘
|
|
* │
|
|
* v
|
|
* remove leaf and
|
|
* update hash with
|
|
* xfs_attr_node_remove_cleanup
|
|
* │
|
|
* v
|
|
* need to
|
|
* shrink tree? ─n─┐
|
|
* │ │
|
|
* y │
|
|
* │ │
|
|
* v │
|
|
* join leaf │
|
|
* │ │
|
|
* v │
|
|
* XFS_DAS_RM_SHRINK │
|
|
* │ │
|
|
* v │
|
|
* do the shrink │
|
|
* │ │
|
|
* v │
|
|
* free state <──┘
|
|
* │
|
|
* v
|
|
* done
|
|
*
|
|
*
|
|
* Below is a state machine diagram for attr set operations.
|
|
*
|
|
* It seems the challenge with understanding this system comes from trying to
|
|
* absorb the state machine all at once, when really one should only be looking
|
|
* at it with in the context of a single function. Once a state sensitive
|
|
* function is called, the idea is that it "takes ownership" of the
|
|
* state machine. It isn't concerned with the states that may have belonged to
|
|
* it's calling parent. Only the states relevant to itself or any other
|
|
* subroutines there in. Once a calling function hands off the state machine to
|
|
* a subroutine, it needs to respect the simple rule that it doesn't "own" the
|
|
* state machine anymore, and it's the responsibility of that calling function
|
|
* to propagate the -EAGAIN back up the call stack. Upon reentry, it is
|
|
* committed to re-calling that subroutine until it returns something other than
|
|
* -EAGAIN. Once that subroutine signals completion (by returning anything other
|
|
* than -EAGAIN), the calling function can resume using the state machine.
|
|
*
|
|
* xfs_attr_set_iter()
|
|
* │
|
|
* v
|
|
* ┌─y─ has an attr fork?
|
|
* │ |
|
|
* │ n
|
|
* │ |
|
|
* │ V
|
|
* │ add a fork
|
|
* │ │
|
|
* └──────────┤
|
|
* │
|
|
* V
|
|
* ┌─── is shortform?
|
|
* │ │
|
|
* │ y
|
|
* │ │
|
|
* │ V
|
|
* │ xfs_attr_set_fmt
|
|
* │ |
|
|
* │ V
|
|
* │ xfs_attr_try_sf_addname
|
|
* │ │
|
|
* │ V
|
|
* │ had enough ──y──> done
|
|
* │ space?
|
|
* n │
|
|
* │ n
|
|
* │ │
|
|
* │ V
|
|
* │ transform to leaf
|
|
* │ │
|
|
* │ V
|
|
* │ hold the leaf buffer
|
|
* │ │
|
|
* │ V
|
|
* │ return -EAGAIN
|
|
* │ Re-enter in
|
|
* │ leaf form
|
|
* │
|
|
* └─> release leaf buffer
|
|
* if needed
|
|
* │
|
|
* V
|
|
* ┌───n── fork has
|
|
* │ only 1 blk?
|
|
* │ │
|
|
* │ y
|
|
* │ │
|
|
* │ v
|
|
* │ xfs_attr_leaf_try_add()
|
|
* │ │
|
|
* │ v
|
|
* │ had enough ──────────────y─────────────┐
|
|
* │ space? │
|
|
* │ │ │
|
|
* │ n │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ return -EAGAIN │
|
|
* │ re-enter in │
|
|
* │ node form │
|
|
* │ │ │
|
|
* └──────────┤ │
|
|
* │ │
|
|
* V │
|
|
* xfs_attr_node_addname_find_attr │
|
|
* determines if this │
|
|
* is create or rename │
|
|
* find space to store attr │
|
|
* │ │
|
|
* v │
|
|
* xfs_attr_node_addname │
|
|
* │ │
|
|
* v │
|
|
* fits in a node leaf? ────n─────┐ │
|
|
* │ ^ v │
|
|
* │ │ single leaf node? │
|
|
* │ │ │ │ │
|
|
* y │ y n │
|
|
* │ │ │ │ │
|
|
* v │ v v │
|
|
* update │ grow the leaf split if │
|
|
* hashvals └── return -EAGAIN needed │
|
|
* │ retry leaf add │ │
|
|
* │ on reentry │ │
|
|
* ├────────────────────────────┘ │
|
|
* │ │
|
|
* v │
|
|
* need to alloc │
|
|
* ┌─y── or flip flag? │
|
|
* │ │ │
|
|
* │ n │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ done │
|
|
* │ │
|
|
* │ │
|
|
* │ XFS_DAS_FOUND_LBLK <────────────────┘
|
|
* │ │
|
|
* │ V
|
|
* │ xfs_attr_leaf_addname()
|
|
* │ │
|
|
* │ v
|
|
* │ ┌──first time through?
|
|
* │ │ │
|
|
* │ │ y
|
|
* │ │ │
|
|
* │ n v
|
|
* │ │ if we have rmt blks
|
|
* │ │ find space for them
|
|
* │ │ │
|
|
* │ └──────────┤
|
|
* │ │
|
|
* │ v
|
|
* │ still have
|
|
* │ ┌─n─ blks to alloc? <──┐
|
|
* │ │ │ │
|
|
* │ │ y │
|
|
* │ │ │ │
|
|
* │ │ v │
|
|
* │ │ alloc one blk │
|
|
* │ │ return -EAGAIN ──┘
|
|
* │ │ re-enter with one
|
|
* │ │ less blk to alloc
|
|
* │ │
|
|
* │ │
|
|
* │ └───> set the rmt
|
|
* │ value
|
|
* │ │
|
|
* │ v
|
|
* │ was this
|
|
* │ a rename? ──n─┐
|
|
* │ │ │
|
|
* │ y │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ flip incomplete │
|
|
* │ flag │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ XFS_DAS_FLIP_LFLAG │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ need to remove │
|
|
* │ old bks? ──n──┤
|
|
* │ │ │
|
|
* │ y │
|
|
* │ │ │
|
|
* │ V │
|
|
* │ remove │
|
|
* │ ┌───> old blks │
|
|
* │ │ │ │
|
|
* │ XFS_DAS_RM_LBLK │ │
|
|
* │ ^ │ │
|
|
* │ │ v │
|
|
* │ └──y── more to │
|
|
* │ remove? │
|
|
* │ │ │
|
|
* │ n │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ XFS_DAS_RD_LEAF │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ remove leaf │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ shrink to sf │
|
|
* │ if needed │
|
|
* │ │ │
|
|
* │ v │
|
|
* │ done <──────┘
|
|
* │
|
|
* └──────> XFS_DAS_FOUND_NBLK
|
|
* │
|
|
* v
|
|
* ┌─────n── need to
|
|
* │ alloc blks?
|
|
* │ │
|
|
* │ y
|
|
* │ │
|
|
* │ v
|
|
* │ find space
|
|
* │ │
|
|
* │ v
|
|
* │ ┌─>XFS_DAS_ALLOC_NODE
|
|
* │ │ │
|
|
* │ │ v
|
|
* │ │ alloc blk
|
|
* │ │ │
|
|
* │ │ v
|
|
* │ └──y── need to alloc
|
|
* │ more blocks?
|
|
* │ │
|
|
* │ n
|
|
* │ │
|
|
* │ v
|
|
* │ set the rmt value
|
|
* │ │
|
|
* │ v
|
|
* │ was this
|
|
* └────────> a rename? ──n─┐
|
|
* │ │
|
|
* y │
|
|
* │ │
|
|
* v │
|
|
* flip incomplete │
|
|
* flag │
|
|
* │ │
|
|
* v │
|
|
* XFS_DAS_FLIP_NFLAG │
|
|
* │ │
|
|
* v │
|
|
* need to │
|
|
* remove blks? ─n──┤
|
|
* │ │
|
|
* y │
|
|
* │ │
|
|
* v │
|
|
* remove │
|
|
* ┌────────> old blks │
|
|
* │ │ │
|
|
* XFS_DAS_RM_NBLK │ │
|
|
* ^ │ │
|
|
* │ v │
|
|
* └──────y── more to │
|
|
* remove │
|
|
* │ │
|
|
* n │
|
|
* │ │
|
|
* v │
|
|
* XFS_DAS_CLR_FLAG │
|
|
* │ │
|
|
* v │
|
|
* clear flags │
|
|
* │ │
|
|
* ├──────────┘
|
|
* │
|
|
* v
|
|
* done
|
|
*/
|
|
|
|
/*
|
|
* Enum values for xfs_attr_intent.xattri_da_state
|
|
*
|
|
* These values are used by delayed attribute operations to keep track of where
|
|
* they were before they returned -EAGAIN. A return code of -EAGAIN signals the
|
|
* calling function to roll the transaction, and then call the subroutine to
|
|
* finish the operation. The enum is then used by the subroutine to jump back
|
|
* to where it was and resume executing where it left off.
|
|
*/
|
|
enum xfs_delattr_state {
|
|
XFS_DAS_UNINIT = 0, /* No state has been set yet */
|
|
|
|
/*
|
|
* Initial sequence states. The replace setup code relies on the
|
|
* ADD and REMOVE states for a specific format to be sequential so
|
|
* that we can transform the initial operation to be performed
|
|
* according to the xfs_has_larp() state easily.
|
|
*/
|
|
XFS_DAS_SF_ADD, /* Initial sf add state */
|
|
XFS_DAS_SF_REMOVE, /* Initial sf replace/remove state */
|
|
|
|
XFS_DAS_LEAF_ADD, /* Initial leaf add state */
|
|
XFS_DAS_LEAF_REMOVE, /* Initial leaf replace/remove state */
|
|
|
|
XFS_DAS_NODE_ADD, /* Initial node add state */
|
|
XFS_DAS_NODE_REMOVE, /* Initial node replace/remove state */
|
|
|
|
/* Leaf state set/replace/remove sequence */
|
|
XFS_DAS_LEAF_SET_RMT, /* set a remote xattr from a leaf */
|
|
XFS_DAS_LEAF_ALLOC_RMT, /* We are allocating remote blocks */
|
|
XFS_DAS_LEAF_REPLACE, /* Perform replace ops on a leaf */
|
|
XFS_DAS_LEAF_REMOVE_OLD, /* Start removing old attr from leaf */
|
|
XFS_DAS_LEAF_REMOVE_RMT, /* A rename is removing remote blocks */
|
|
XFS_DAS_LEAF_REMOVE_ATTR, /* Remove the old attr from a leaf */
|
|
|
|
/* Node state sequence, must match leaf state above */
|
|
XFS_DAS_NODE_SET_RMT, /* set a remote xattr from a node */
|
|
XFS_DAS_NODE_ALLOC_RMT, /* We are allocating remote blocks */
|
|
XFS_DAS_NODE_REPLACE, /* Perform replace ops on a node */
|
|
XFS_DAS_NODE_REMOVE_OLD, /* Start removing old attr from node */
|
|
XFS_DAS_NODE_REMOVE_RMT, /* A rename is removing remote blocks */
|
|
XFS_DAS_NODE_REMOVE_ATTR, /* Remove the old attr from a node */
|
|
|
|
XFS_DAS_DONE, /* finished operation */
|
|
};
|
|
|
|
#define XFS_DAS_STRINGS \
|
|
{ XFS_DAS_UNINIT, "XFS_DAS_UNINIT" }, \
|
|
{ XFS_DAS_SF_ADD, "XFS_DAS_SF_ADD" }, \
|
|
{ XFS_DAS_SF_REMOVE, "XFS_DAS_SF_REMOVE" }, \
|
|
{ XFS_DAS_LEAF_ADD, "XFS_DAS_LEAF_ADD" }, \
|
|
{ XFS_DAS_LEAF_REMOVE, "XFS_DAS_LEAF_REMOVE" }, \
|
|
{ XFS_DAS_NODE_ADD, "XFS_DAS_NODE_ADD" }, \
|
|
{ XFS_DAS_NODE_REMOVE, "XFS_DAS_NODE_REMOVE" }, \
|
|
{ XFS_DAS_LEAF_SET_RMT, "XFS_DAS_LEAF_SET_RMT" }, \
|
|
{ XFS_DAS_LEAF_ALLOC_RMT, "XFS_DAS_LEAF_ALLOC_RMT" }, \
|
|
{ XFS_DAS_LEAF_REPLACE, "XFS_DAS_LEAF_REPLACE" }, \
|
|
{ XFS_DAS_LEAF_REMOVE_OLD, "XFS_DAS_LEAF_REMOVE_OLD" }, \
|
|
{ XFS_DAS_LEAF_REMOVE_RMT, "XFS_DAS_LEAF_REMOVE_RMT" }, \
|
|
{ XFS_DAS_LEAF_REMOVE_ATTR, "XFS_DAS_LEAF_REMOVE_ATTR" }, \
|
|
{ XFS_DAS_NODE_SET_RMT, "XFS_DAS_NODE_SET_RMT" }, \
|
|
{ XFS_DAS_NODE_ALLOC_RMT, "XFS_DAS_NODE_ALLOC_RMT" }, \
|
|
{ XFS_DAS_NODE_REPLACE, "XFS_DAS_NODE_REPLACE" }, \
|
|
{ XFS_DAS_NODE_REMOVE_OLD, "XFS_DAS_NODE_REMOVE_OLD" }, \
|
|
{ XFS_DAS_NODE_REMOVE_RMT, "XFS_DAS_NODE_REMOVE_RMT" }, \
|
|
{ XFS_DAS_NODE_REMOVE_ATTR, "XFS_DAS_NODE_REMOVE_ATTR" }, \
|
|
{ XFS_DAS_DONE, "XFS_DAS_DONE" }
|
|
|
|
struct xfs_attri_log_nameval;
|
|
|
|
/*
|
|
* Context used for keeping track of delayed attribute operations
|
|
*/
|
|
struct xfs_attr_intent {
|
|
/*
|
|
* used to log this item to an intent containing a list of attrs to
|
|
* commit later
|
|
*/
|
|
struct list_head xattri_list;
|
|
|
|
/* Used in xfs_attr_node_removename to roll through removing blocks */
|
|
struct xfs_da_state *xattri_da_state;
|
|
|
|
struct xfs_da_args *xattri_da_args;
|
|
|
|
/*
|
|
* Shared buffer containing the attr name and value so that the logging
|
|
* code can share large memory buffers between log items.
|
|
*/
|
|
struct xfs_attri_log_nameval *xattri_nameval;
|
|
|
|
/* Used to keep track of current state of delayed operation */
|
|
enum xfs_delattr_state xattri_dela_state;
|
|
|
|
/*
|
|
* Attr operation being performed - XFS_ATTRI_OP_FLAGS_*
|
|
*/
|
|
unsigned int xattri_op_flags;
|
|
|
|
/* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
|
|
xfs_dablk_t xattri_lblkno;
|
|
int xattri_blkcnt;
|
|
struct xfs_bmbt_irec xattri_map;
|
|
};
|
|
|
|
|
|
/*========================================================================
|
|
* Function prototypes for the kernel.
|
|
*========================================================================*/
|
|
|
|
/*
|
|
* Overall external interface routines.
|
|
*/
|
|
int xfs_attr_inactive(struct xfs_inode *dp);
|
|
int xfs_attr_list_ilocked(struct xfs_attr_list_context *);
|
|
int xfs_attr_list(struct xfs_attr_list_context *);
|
|
int xfs_inode_hasattr(struct xfs_inode *ip);
|
|
bool xfs_attr_is_leaf(struct xfs_inode *ip);
|
|
int xfs_attr_get_ilocked(struct xfs_da_args *args);
|
|
int xfs_attr_get(struct xfs_da_args *args);
|
|
int xfs_attr_set(struct xfs_da_args *args);
|
|
int xfs_attr_set_iter(struct xfs_attr_intent *attr);
|
|
int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
|
|
bool xfs_attr_namecheck(const void *name, size_t length);
|
|
int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
|
|
void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
|
|
unsigned int *total);
|
|
|
|
/*
|
|
* Check to see if the attr should be upgraded from non-existent or shortform to
|
|
* single-leaf-block attribute list.
|
|
*/
|
|
static inline bool
|
|
xfs_attr_is_shortform(
|
|
struct xfs_inode *ip)
|
|
{
|
|
return ip->i_af.if_format == XFS_DINODE_FMT_LOCAL ||
|
|
(ip->i_af.if_format == XFS_DINODE_FMT_EXTENTS &&
|
|
ip->i_af.if_nextents == 0);
|
|
}
|
|
|
|
static inline enum xfs_delattr_state
|
|
xfs_attr_init_add_state(struct xfs_da_args *args)
|
|
{
|
|
/*
|
|
* When called from the completion of a attr remove to determine the
|
|
* next state, the attribute fork may be null. This can occur only occur
|
|
* on a pure remove, but we grab the next state before we check if a
|
|
* replace operation is being performed. If we are called from any other
|
|
* context, i_af is guaranteed to exist. Hence if the attr fork is
|
|
* null, we were called from a pure remove operation and so we are done.
|
|
*/
|
|
if (!args->dp->i_af.if_present)
|
|
return XFS_DAS_DONE;
|
|
|
|
args->op_flags |= XFS_DA_OP_ADDNAME;
|
|
if (xfs_attr_is_shortform(args->dp))
|
|
return XFS_DAS_SF_ADD;
|
|
if (xfs_attr_is_leaf(args->dp))
|
|
return XFS_DAS_LEAF_ADD;
|
|
return XFS_DAS_NODE_ADD;
|
|
}
|
|
|
|
static inline enum xfs_delattr_state
|
|
xfs_attr_init_remove_state(struct xfs_da_args *args)
|
|
{
|
|
args->op_flags |= XFS_DA_OP_REMOVE;
|
|
if (xfs_attr_is_shortform(args->dp))
|
|
return XFS_DAS_SF_REMOVE;
|
|
if (xfs_attr_is_leaf(args->dp))
|
|
return XFS_DAS_LEAF_REMOVE;
|
|
return XFS_DAS_NODE_REMOVE;
|
|
}
|
|
|
|
/*
|
|
* If we are logging the attributes, then we have to start with removal of the
|
|
* old attribute so that there is always consistent state that we can recover
|
|
* from if the system goes down part way through. We always log the new attr
|
|
* value, so even when we remove the attr first we still have the information in
|
|
* the log to finish the replace operation atomically.
|
|
*/
|
|
static inline enum xfs_delattr_state
|
|
xfs_attr_init_replace_state(struct xfs_da_args *args)
|
|
{
|
|
args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE;
|
|
if (args->op_flags & XFS_DA_OP_LOGGED)
|
|
return xfs_attr_init_remove_state(args);
|
|
return xfs_attr_init_add_state(args);
|
|
}
|
|
|
|
extern struct kmem_cache *xfs_attr_intent_cache;
|
|
int __init xfs_attr_intent_init_cache(void);
|
|
void xfs_attr_intent_destroy_cache(void);
|
|
|
|
#endif /* __XFS_ATTR_H__ */
|