027690c75e
I made every global per-network-namespace instead. But perhaps doing that to this slab was a step too far. The kmem_cache_create call in our net init method also seems to be responsible for this lockdep warning: [ 45.163710] Unable to find swap-space signature [ 45.375718] trinity-c1 (855): attempted to duplicate a private mapping with mremap. This is not supported. [ 46.055744] futex_wake_op: trinity-c1 tries to shift op by -209; fix this program [ 51.011723] [ 51.013378] ====================================================== [ 51.013875] WARNING: possible circular locking dependency detected [ 51.014378] 5.2.0-rc2 #1 Not tainted [ 51.014672] ------------------------------------------------------ [ 51.015182] trinity-c2/886 is trying to acquire lock: [ 51.015593] 000000005405f099 (slab_mutex){+.+.}, at: slab_attr_store+0xa2/0x130 [ 51.016190] [ 51.016190] but task is already holding lock: [ 51.016652] 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500 [ 51.017266] [ 51.017266] which lock already depends on the new lock. [ 51.017266] [ 51.017909] [ 51.017909] the existing dependency chain (in reverse order) is: [ 51.018497] [ 51.018497] -> #1 (kn->count#43){++++}: [ 51.018956] __lock_acquire+0x7cf/0x1a20 [ 51.019317] lock_acquire+0x17d/0x390 [ 51.019658] __kernfs_remove+0x892/0xae0 [ 51.020020] kernfs_remove_by_name_ns+0x78/0x110 [ 51.020435] sysfs_remove_link+0x55/0xb0 [ 51.020832] sysfs_slab_add+0xc1/0x3e0 [ 51.021332] __kmem_cache_create+0x155/0x200 [ 51.021720] create_cache+0xf5/0x320 [ 51.022054] kmem_cache_create_usercopy+0x179/0x320 [ 51.022486] kmem_cache_create+0x1a/0x30 [ 51.022867] nfsd_reply_cache_init+0x278/0x560 [ 51.023266] nfsd_init_net+0x20f/0x5e0 [ 51.023623] ops_init+0xcb/0x4b0 [ 51.023928] setup_net+0x2fe/0x670 [ 51.024315] copy_net_ns+0x30a/0x3f0 [ 51.024653] create_new_namespaces+0x3c5/0x820 [ 51.025257] unshare_nsproxy_namespaces+0xd1/0x240 [ 51.025881] ksys_unshare+0x506/0x9c0 [ 51.026381] __x64_sys_unshare+0x3a/0x50 [ 51.026937] do_syscall_64+0x110/0x10b0 [ 51.027509] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 51.028175] [ 51.028175] -> #0 (slab_mutex){+.+.}: [ 51.028817] validate_chain+0x1c51/0x2cc0 [ 51.029422] __lock_acquire+0x7cf/0x1a20 [ 51.029947] lock_acquire+0x17d/0x390 [ 51.030438] __mutex_lock+0x100/0xfa0 [ 51.030995] mutex_lock_nested+0x27/0x30 [ 51.031516] slab_attr_store+0xa2/0x130 [ 51.032020] sysfs_kf_write+0x11d/0x180 [ 51.032529] kernfs_fop_write+0x32a/0x500 [ 51.033056] do_loop_readv_writev+0x21d/0x310 [ 51.033627] do_iter_write+0x2e5/0x380 [ 51.034148] vfs_writev+0x170/0x310 [ 51.034616] do_pwritev+0x13e/0x160 [ 51.035100] __x64_sys_pwritev+0xa3/0x110 [ 51.035633] do_syscall_64+0x110/0x10b0 [ 51.036200] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 51.036924] [ 51.036924] other info that might help us debug this: [ 51.036924] [ 51.037876] Possible unsafe locking scenario: [ 51.037876] [ 51.038556] CPU0 CPU1 [ 51.039130] ---- ---- [ 51.039676] lock(kn->count#43); [ 51.040084] lock(slab_mutex); [ 51.040597] lock(kn->count#43); [ 51.041062] lock(slab_mutex); [ 51.041320] [ 51.041320] *** DEADLOCK *** [ 51.041320] [ 51.041793] 3 locks held by trinity-c2/886: [ 51.042128] #0: 000000001f55e152 (sb_writers#5){.+.+}, at: vfs_writev+0x2b9/0x310 [ 51.042739] #1: 00000000c7d6c034 (&of->mutex){+.+.}, at: kernfs_fop_write+0x25b/0x500 [ 51.043400] #2: 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500 Reported-by: kernel test robot <lkp@intel.com> Fixes: 3ba75830ce17 "drc containerization" Signed-off-by: J. Bruce Fields <bfields@redhat.com>
188 lines
4.9 KiB
C
188 lines
4.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* per net namespace data structures for nfsd
|
|
*
|
|
* Copyright (C) 2012, Jeff Layton <jlayton@redhat.com>
|
|
*/
|
|
|
|
#ifndef __NFSD_NETNS_H__
|
|
#define __NFSD_NETNS_H__
|
|
|
|
#include <net/net_namespace.h>
|
|
#include <net/netns/generic.h>
|
|
|
|
/* Hash tables for nfs4_clientid state */
|
|
#define CLIENT_HASH_BITS 4
|
|
#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
|
|
#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
|
|
|
|
#define SESSION_HASH_SIZE 512
|
|
|
|
struct cld_net;
|
|
struct nfsd4_client_tracking_ops;
|
|
|
|
/*
|
|
* Represents a nfsd "container". With respect to nfsv4 state tracking, the
|
|
* fields of interest are the *_id_hashtbls and the *_name_tree. These track
|
|
* the nfs4_client objects by either short or long form clientid.
|
|
*
|
|
* Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean
|
|
* up expired clients and delegations within the container.
|
|
*/
|
|
struct nfsd_net {
|
|
struct cld_net *cld_net;
|
|
|
|
struct cache_detail *svc_expkey_cache;
|
|
struct cache_detail *svc_export_cache;
|
|
|
|
struct cache_detail *idtoname_cache;
|
|
struct cache_detail *nametoid_cache;
|
|
|
|
struct lock_manager nfsd4_manager;
|
|
bool grace_ended;
|
|
time64_t boot_time;
|
|
|
|
/* internal mount of the "nfsd" pseudofilesystem: */
|
|
struct vfsmount *nfsd_mnt;
|
|
|
|
struct dentry *nfsd_client_dir;
|
|
|
|
/*
|
|
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
|
|
* used in reboot/reset lease grace period processing
|
|
*
|
|
* conf_id_hashtbl[], and conf_name_tree hold confirmed
|
|
* setclientid_confirmed info.
|
|
*
|
|
* unconf_str_hastbl[] and unconf_name_tree hold unconfirmed
|
|
* setclientid info.
|
|
*/
|
|
struct list_head *reclaim_str_hashtbl;
|
|
int reclaim_str_hashtbl_size;
|
|
struct list_head *conf_id_hashtbl;
|
|
struct rb_root conf_name_tree;
|
|
struct list_head *unconf_id_hashtbl;
|
|
struct rb_root unconf_name_tree;
|
|
struct list_head *sessionid_hashtbl;
|
|
/*
|
|
* client_lru holds client queue ordered by nfs4_client.cl_time
|
|
* for lease renewal.
|
|
*
|
|
* close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
|
|
* for last close replay.
|
|
*
|
|
* All of the above fields are protected by the client_mutex.
|
|
*/
|
|
struct list_head client_lru;
|
|
struct list_head close_lru;
|
|
struct list_head del_recall_lru;
|
|
|
|
/* protected by blocked_locks_lock */
|
|
struct list_head blocked_locks_lru;
|
|
|
|
struct delayed_work laundromat_work;
|
|
|
|
/* client_lock protects the client lru list and session hash table */
|
|
spinlock_t client_lock;
|
|
|
|
/* protects blocked_locks_lru */
|
|
spinlock_t blocked_locks_lock;
|
|
|
|
struct file *rec_file;
|
|
bool in_grace;
|
|
const struct nfsd4_client_tracking_ops *client_tracking_ops;
|
|
|
|
time64_t nfsd4_lease;
|
|
time64_t nfsd4_grace;
|
|
bool somebody_reclaimed;
|
|
|
|
bool track_reclaim_completes;
|
|
atomic_t nr_reclaim_complete;
|
|
|
|
bool nfsd_net_up;
|
|
bool lockd_up;
|
|
|
|
/* Time of server startup */
|
|
struct timespec64 nfssvc_boot;
|
|
seqlock_t boot_lock;
|
|
|
|
/*
|
|
* Max number of connections this nfsd container will allow. Defaults
|
|
* to '0' which is means that it bases this on the number of threads.
|
|
*/
|
|
unsigned int max_connections;
|
|
|
|
u32 clientid_base;
|
|
u32 clientid_counter;
|
|
u32 clverifier_counter;
|
|
|
|
struct svc_serv *nfsd_serv;
|
|
|
|
wait_queue_head_t ntf_wq;
|
|
atomic_t ntf_refcnt;
|
|
|
|
/*
|
|
* clientid and stateid data for construction of net unique COPY
|
|
* stateids.
|
|
*/
|
|
u32 s2s_cp_cl_id;
|
|
struct idr s2s_cp_stateids;
|
|
spinlock_t s2s_cp_lock;
|
|
|
|
/*
|
|
* Version information
|
|
*/
|
|
bool *nfsd_versions;
|
|
bool *nfsd4_minorversions;
|
|
|
|
/*
|
|
* Duplicate reply cache
|
|
*/
|
|
struct nfsd_drc_bucket *drc_hashtbl;
|
|
|
|
/* max number of entries allowed in the cache */
|
|
unsigned int max_drc_entries;
|
|
|
|
/* number of significant bits in the hash value */
|
|
unsigned int maskbits;
|
|
unsigned int drc_hashsize;
|
|
|
|
/*
|
|
* Stats and other tracking of on the duplicate reply cache.
|
|
* These fields and the "rc" fields in nfsdstats are modified
|
|
* with only the per-bucket cache lock, which isn't really safe
|
|
* and should be fixed if we want the statistics to be
|
|
* completely accurate.
|
|
*/
|
|
|
|
/* total number of entries */
|
|
atomic_t num_drc_entries;
|
|
|
|
/* cache misses due only to checksum comparison failures */
|
|
unsigned int payload_misses;
|
|
|
|
/* amount of memory (in bytes) currently consumed by the DRC */
|
|
unsigned int drc_mem_usage;
|
|
|
|
/* longest hash chain seen */
|
|
unsigned int longest_chain;
|
|
|
|
/* size of cache when we saw the longest hash chain */
|
|
unsigned int longest_chain_cachesize;
|
|
|
|
struct shrinker nfsd_reply_cache_shrinker;
|
|
/* utsname taken from the the process that starts the server */
|
|
char nfsd_name[UNX_MAXNODENAME+1];
|
|
};
|
|
|
|
/* Simple check to find out if a given net was properly initialized */
|
|
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
|
|
|
|
extern void nfsd_netns_free_versions(struct nfsd_net *nn);
|
|
|
|
extern unsigned int nfsd_net_id;
|
|
|
|
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
|
|
void nfsd_reset_boot_verifier(struct nfsd_net *nn);
|
|
#endif /* __NFSD_NETNS_H__ */
|