027690c75e
I made every global per-network-namespace instead. But perhaps doing that to this slab was a step too far. The kmem_cache_create call in our net init method also seems to be responsible for this lockdep warning: [ 45.163710] Unable to find swap-space signature [ 45.375718] trinity-c1 (855): attempted to duplicate a private mapping with mremap. This is not supported. [ 46.055744] futex_wake_op: trinity-c1 tries to shift op by -209; fix this program [ 51.011723] [ 51.013378] ====================================================== [ 51.013875] WARNING: possible circular locking dependency detected [ 51.014378] 5.2.0-rc2 #1 Not tainted [ 51.014672] ------------------------------------------------------ [ 51.015182] trinity-c2/886 is trying to acquire lock: [ 51.015593] 000000005405f099 (slab_mutex){+.+.}, at: slab_attr_store+0xa2/0x130 [ 51.016190] [ 51.016190] but task is already holding lock: [ 51.016652] 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500 [ 51.017266] [ 51.017266] which lock already depends on the new lock. [ 51.017266] [ 51.017909] [ 51.017909] the existing dependency chain (in reverse order) is: [ 51.018497] [ 51.018497] -> #1 (kn->count#43){++++}: [ 51.018956] __lock_acquire+0x7cf/0x1a20 [ 51.019317] lock_acquire+0x17d/0x390 [ 51.019658] __kernfs_remove+0x892/0xae0 [ 51.020020] kernfs_remove_by_name_ns+0x78/0x110 [ 51.020435] sysfs_remove_link+0x55/0xb0 [ 51.020832] sysfs_slab_add+0xc1/0x3e0 [ 51.021332] __kmem_cache_create+0x155/0x200 [ 51.021720] create_cache+0xf5/0x320 [ 51.022054] kmem_cache_create_usercopy+0x179/0x320 [ 51.022486] kmem_cache_create+0x1a/0x30 [ 51.022867] nfsd_reply_cache_init+0x278/0x560 [ 51.023266] nfsd_init_net+0x20f/0x5e0 [ 51.023623] ops_init+0xcb/0x4b0 [ 51.023928] setup_net+0x2fe/0x670 [ 51.024315] copy_net_ns+0x30a/0x3f0 [ 51.024653] create_new_namespaces+0x3c5/0x820 [ 51.025257] unshare_nsproxy_namespaces+0xd1/0x240 [ 51.025881] ksys_unshare+0x506/0x9c0 [ 51.026381] __x64_sys_unshare+0x3a/0x50 [ 51.026937] do_syscall_64+0x110/0x10b0 [ 51.027509] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 51.028175] [ 51.028175] -> #0 (slab_mutex){+.+.}: [ 51.028817] validate_chain+0x1c51/0x2cc0 [ 51.029422] __lock_acquire+0x7cf/0x1a20 [ 51.029947] lock_acquire+0x17d/0x390 [ 51.030438] __mutex_lock+0x100/0xfa0 [ 51.030995] mutex_lock_nested+0x27/0x30 [ 51.031516] slab_attr_store+0xa2/0x130 [ 51.032020] sysfs_kf_write+0x11d/0x180 [ 51.032529] kernfs_fop_write+0x32a/0x500 [ 51.033056] do_loop_readv_writev+0x21d/0x310 [ 51.033627] do_iter_write+0x2e5/0x380 [ 51.034148] vfs_writev+0x170/0x310 [ 51.034616] do_pwritev+0x13e/0x160 [ 51.035100] __x64_sys_pwritev+0xa3/0x110 [ 51.035633] do_syscall_64+0x110/0x10b0 [ 51.036200] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 51.036924] [ 51.036924] other info that might help us debug this: [ 51.036924] [ 51.037876] Possible unsafe locking scenario: [ 51.037876] [ 51.038556] CPU0 CPU1 [ 51.039130] ---- ---- [ 51.039676] lock(kn->count#43); [ 51.040084] lock(slab_mutex); [ 51.040597] lock(kn->count#43); [ 51.041062] lock(slab_mutex); [ 51.041320] [ 51.041320] *** DEADLOCK *** [ 51.041320] [ 51.041793] 3 locks held by trinity-c2/886: [ 51.042128] #0: 000000001f55e152 (sb_writers#5){.+.+}, at: vfs_writev+0x2b9/0x310 [ 51.042739] #1: 00000000c7d6c034 (&of->mutex){+.+.}, at: kernfs_fop_write+0x25b/0x500 [ 51.043400] #2: 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500 Reported-by: kernel test robot <lkp@intel.com> Fixes: 3ba75830ce17 "drc containerization" Signed-off-by: J. Bruce Fields <bfields@redhat.com>
90 lines
2.0 KiB
C
90 lines
2.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Request reply cache. This was heavily inspired by the
|
|
* implementation in 4.3BSD/4.4BSD.
|
|
*
|
|
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
|
|
*/
|
|
|
|
#ifndef NFSCACHE_H
|
|
#define NFSCACHE_H
|
|
|
|
#include <linux/sunrpc/svc.h>
|
|
#include "netns.h"
|
|
|
|
/*
|
|
* Representation of a reply cache entry.
|
|
*
|
|
* Note that we use a sockaddr_in6 to hold the address instead of the more
|
|
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
|
|
* is much larger than a sockaddr_in6.
|
|
*/
|
|
struct svc_cacherep {
|
|
struct {
|
|
/* Keep often-read xid, csum in the same cache line: */
|
|
__be32 k_xid;
|
|
__wsum k_csum;
|
|
u32 k_proc;
|
|
u32 k_prot;
|
|
u32 k_vers;
|
|
unsigned int k_len;
|
|
struct sockaddr_in6 k_addr;
|
|
} c_key;
|
|
|
|
struct rb_node c_node;
|
|
struct list_head c_lru;
|
|
unsigned char c_state, /* unused, inprog, done */
|
|
c_type, /* status, buffer */
|
|
c_secure : 1; /* req came from port < 1024 */
|
|
unsigned long c_timestamp;
|
|
union {
|
|
struct kvec u_vec;
|
|
__be32 u_status;
|
|
} c_u;
|
|
};
|
|
|
|
#define c_replvec c_u.u_vec
|
|
#define c_replstat c_u.u_status
|
|
|
|
/* cache entry states */
|
|
enum {
|
|
RC_UNUSED,
|
|
RC_INPROG,
|
|
RC_DONE
|
|
};
|
|
|
|
/* return values */
|
|
enum {
|
|
RC_DROPIT,
|
|
RC_REPLY,
|
|
RC_DOIT
|
|
};
|
|
|
|
/*
|
|
* Cache types.
|
|
* We may want to add more types one day, e.g. for diropres and
|
|
* attrstat replies. Using cache entries with fixed length instead
|
|
* of buffer pointers may be more efficient.
|
|
*/
|
|
enum {
|
|
RC_NOCACHE,
|
|
RC_REPLSTAT,
|
|
RC_REPLBUFF,
|
|
};
|
|
|
|
/* Cache entries expire after this time period */
|
|
#define RC_EXPIRE (120 * HZ)
|
|
|
|
/* Checksum this amount of the request */
|
|
#define RC_CSUMLEN (256U)
|
|
|
|
int nfsd_drc_slab_create(void);
|
|
void nfsd_drc_slab_free(void);
|
|
int nfsd_reply_cache_init(struct nfsd_net *);
|
|
void nfsd_reply_cache_shutdown(struct nfsd_net *);
|
|
int nfsd_cache_lookup(struct svc_rqst *);
|
|
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
|
|
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
|
|
|
|
#endif /* NFSCACHE_H */
|