nvme-tcp: lockdep: annotate in-kernel sockets
[ Upstream commit 841aee4d75f18fdfb53935080b03de0c65e9b92c ] Put NVMe/TCP sockets in their own class to avoid some lockdep warnings. Sockets created by nvme-tcp are not exposed to user-space, and will not trigger certain code paths that the general socket API exposes. Lockdep complains about a circular dependency between the socket and filesystem locks, because setsockopt can trigger a page fault with a socket lock held, but nvme-tcp sends requests on the socket while file system locks are held. ====================================================== WARNING: possible circular locking dependency detected 5.15.0-rc3 #1 Not tainted ------------------------------------------------------ fio/1496 is trying to acquire lock: (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendpage+0x23/0x80 but task is already holding lock: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs] which lock already depends on the new lock. other info that might help us debug this: chain exists of: sk_lock-AF_INET --> sb_internal --> &xfs_dir_ilock_class/5 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&xfs_dir_ilock_class/5); lock(sb_internal); lock(&xfs_dir_ilock_class/5); lock(sk_lock-AF_INET); *** DEADLOCK *** 6 locks held by fio/1496: #0: (sb_writers#13){.+.+}-{0:0}, at: path_openat+0x9fc/0xa20 #1: (&inode->i_sb->s_type->i_mutex_dir_key){++++}-{3:3}, at: path_openat+0x296/0xa20 #2: (sb_internal){.+.+}-{0:0}, at: xfs_trans_alloc_icreate+0x41/0xd0 [xfs] #3: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs] #4: (hctx->srcu){....}-{0:0}, at: hctx_lock+0x51/0xd0 #5: (&queue->send_mutex){+.+.}-{3:3}, at: nvme_tcp_queue_rq+0x33e/0x380 [nvme_tcp] This annotation lets lockdep analyze nvme-tcp controlled sockets independently of what the user-space sockets API does. Link: https://lore.kernel.org/linux-nvme/CAHj4cs9MDYLJ+q+2_GXUK9HxFizv2pxUryUR0toX974M040z7g@mail.gmail.com/ Signed-off-by: Chris Leech <cleech@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
7e4967e913
commit
1c200c8bce
@ -30,6 +30,44 @@ static int so_priority;
|
||||
module_param(so_priority, int, 0644);
|
||||
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
/* lockdep can detect a circular dependency of the form
|
||||
* sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
|
||||
* because dependencies are tracked for both nvme-tcp and user contexts. Using
|
||||
* a separate class prevents lockdep from conflating nvme-tcp socket use with
|
||||
* user-space socket API use.
|
||||
*/
|
||||
static struct lock_class_key nvme_tcp_sk_key[2];
|
||||
static struct lock_class_key nvme_tcp_slock_key[2];
|
||||
|
||||
static void nvme_tcp_reclassify_socket(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
|
||||
return;
|
||||
|
||||
switch (sk->sk_family) {
|
||||
case AF_INET:
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
|
||||
&nvme_tcp_slock_key[0],
|
||||
"sk_lock-AF_INET-NVME",
|
||||
&nvme_tcp_sk_key[0]);
|
||||
break;
|
||||
case AF_INET6:
|
||||
sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
|
||||
&nvme_tcp_slock_key[1],
|
||||
"sk_lock-AF_INET6-NVME",
|
||||
&nvme_tcp_sk_key[1]);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void nvme_tcp_reclassify_socket(struct socket *sock) { }
|
||||
#endif
|
||||
|
||||
enum nvme_tcp_send_state {
|
||||
NVME_TCP_SEND_CMD_PDU = 0,
|
||||
NVME_TCP_SEND_H2C_PDU,
|
||||
@ -1422,6 +1460,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
||||
goto err_destroy_mutex;
|
||||
}
|
||||
|
||||
nvme_tcp_reclassify_socket(queue->sock);
|
||||
|
||||
/* Single syn retry */
|
||||
tcp_sock_set_syncnt(queue->sock->sk, 1);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user