5481fc6eb8
The fileserver probe timer, net->fs_probe_timer, isn't cancelled when the kafs module is being removed and so the count it holds on net->servers_outstanding doesn't get dropped.. This causes rmmod to wait forever. The hung process shows a stack like: afs_purge_servers+0x1b5/0x23c [kafs] afs_net_exit+0x44/0x6e [kafs] ops_exit_list+0x72/0x93 unregister_pernet_operations+0x14c/0x1ba unregister_pernet_subsys+0x1d/0x2a afs_exit+0x29/0x6f [kafs] __do_sys_delete_module.isra.0+0x1a2/0x24b do_syscall_64+0x51/0x95 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by: (1) Attempting to cancel the probe timer and, if successful, drop the count that the timer was holding. (2) Make the timer function just drop the count and not schedule the prober if the afs portion of net namespace is being destroyed. Also, whilst we're at it, make the following changes: (3) Initialise net->servers_outstanding to 1 and decrement it before waiting on it so that it doesn't generate wake up events by being decremented to 0 until we're cleaning up. (4) Switch the atomic_dec() on ->servers_outstanding for ->fs_timer in afs_purge_servers() to use the helper function for that. Fixes: f6cbb368bcb0 ("afs: Actively poll fileservers to maintain NAT or firewall openings") Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
470 lines
12 KiB
C
470 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* AFS fileserver probing
|
|
*
|
|
* Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/slab.h>
|
|
#include "afs_fs.h"
|
|
#include "internal.h"
|
|
#include "protocol_yfs.h"
|
|
|
|
static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
|
|
static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
|
|
|
|
/*
|
|
* Start the probe polling timer. We have to supply it with an inc on the
|
|
* outstanding server count.
|
|
*/
|
|
static void afs_schedule_fs_probe(struct afs_net *net,
|
|
struct afs_server *server, bool fast)
|
|
{
|
|
unsigned long atj;
|
|
|
|
if (!net->live)
|
|
return;
|
|
|
|
atj = server->probed_at;
|
|
atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
|
|
|
|
afs_inc_servers_outstanding(net);
|
|
if (timer_reduce(&net->fs_probe_timer, atj))
|
|
afs_dec_servers_outstanding(net);
|
|
}
|
|
|
|
/*
|
|
* Handle the completion of a set of probes.
|
|
*/
|
|
static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
|
|
{
|
|
bool responded = server->probe.responded;
|
|
|
|
write_seqlock(&net->fs_lock);
|
|
if (responded) {
|
|
list_add_tail(&server->probe_link, &net->fs_probe_slow);
|
|
} else {
|
|
server->rtt = UINT_MAX;
|
|
clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
|
|
list_add_tail(&server->probe_link, &net->fs_probe_fast);
|
|
}
|
|
write_sequnlock(&net->fs_lock);
|
|
|
|
afs_schedule_fs_probe(net, server, !responded);
|
|
}
|
|
|
|
/*
|
|
* Handle the completion of a probe.
|
|
*/
|
|
static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
|
|
{
|
|
_enter("");
|
|
|
|
if (atomic_dec_and_test(&server->probe_outstanding))
|
|
afs_finished_fs_probe(net, server);
|
|
|
|
wake_up_all(&server->probe_wq);
|
|
}
|
|
|
|
/*
|
|
* Handle inability to send a probe due to ENOMEM when trying to allocate a
|
|
* call struct.
|
|
*/
|
|
static void afs_fs_probe_not_done(struct afs_net *net,
|
|
struct afs_server *server,
|
|
struct afs_addr_cursor *ac)
|
|
{
|
|
struct afs_addr_list *alist = ac->alist;
|
|
unsigned int index = ac->index;
|
|
|
|
_enter("");
|
|
|
|
trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
|
|
spin_lock(&server->probe_lock);
|
|
|
|
server->probe.local_failure = true;
|
|
if (server->probe.error == 0)
|
|
server->probe.error = -ENOMEM;
|
|
|
|
set_bit(index, &alist->failed);
|
|
|
|
spin_unlock(&server->probe_lock);
|
|
return afs_done_one_fs_probe(net, server);
|
|
}
|
|
|
|
/*
|
|
* Process the result of probing a fileserver. This is called after successful
|
|
* or failed delivery of an FS.GetCapabilities operation.
|
|
*/
|
|
void afs_fileserver_probe_result(struct afs_call *call)
|
|
{
|
|
struct afs_addr_list *alist = call->alist;
|
|
struct afs_server *server = call->server;
|
|
unsigned int index = call->addr_ix;
|
|
unsigned int rtt_us = 0;
|
|
int ret = call->error;
|
|
|
|
_enter("%pU,%u", &server->uuid, index);
|
|
|
|
spin_lock(&server->probe_lock);
|
|
|
|
switch (ret) {
|
|
case 0:
|
|
server->probe.error = 0;
|
|
goto responded;
|
|
case -ECONNABORTED:
|
|
if (!server->probe.responded) {
|
|
server->probe.abort_code = call->abort_code;
|
|
server->probe.error = ret;
|
|
}
|
|
goto responded;
|
|
case -ENOMEM:
|
|
case -ENONET:
|
|
clear_bit(index, &alist->responded);
|
|
server->probe.local_failure = true;
|
|
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
|
|
goto out;
|
|
case -ECONNRESET: /* Responded, but call expired. */
|
|
case -ERFKILL:
|
|
case -EADDRNOTAVAIL:
|
|
case -ENETUNREACH:
|
|
case -EHOSTUNREACH:
|
|
case -EHOSTDOWN:
|
|
case -ECONNREFUSED:
|
|
case -ETIMEDOUT:
|
|
case -ETIME:
|
|
default:
|
|
clear_bit(index, &alist->responded);
|
|
set_bit(index, &alist->failed);
|
|
if (!server->probe.responded &&
|
|
(server->probe.error == 0 ||
|
|
server->probe.error == -ETIMEDOUT ||
|
|
server->probe.error == -ETIME))
|
|
server->probe.error = ret;
|
|
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
|
|
goto out;
|
|
}
|
|
|
|
responded:
|
|
clear_bit(index, &alist->failed);
|
|
|
|
if (call->service_id == YFS_FS_SERVICE) {
|
|
server->probe.is_yfs = true;
|
|
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
|
alist->addrs[index].srx_service = call->service_id;
|
|
} else {
|
|
server->probe.not_yfs = true;
|
|
if (!server->probe.is_yfs) {
|
|
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
|
alist->addrs[index].srx_service = call->service_id;
|
|
}
|
|
}
|
|
|
|
rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
|
|
if (rtt_us < server->probe.rtt) {
|
|
server->probe.rtt = rtt_us;
|
|
server->rtt = rtt_us;
|
|
alist->preferred = index;
|
|
}
|
|
|
|
smp_wmb(); /* Set rtt before responded. */
|
|
server->probe.responded = true;
|
|
set_bit(index, &alist->responded);
|
|
set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
|
|
out:
|
|
spin_unlock(&server->probe_lock);
|
|
|
|
_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
|
|
&server->uuid, index, &alist->addrs[index].transport,
|
|
rtt_us, ret);
|
|
|
|
return afs_done_one_fs_probe(call->net, server);
|
|
}
|
|
|
|
/*
|
|
* Probe one or all of a fileserver's addresses to find out the best route and
|
|
* to query its capabilities.
|
|
*/
|
|
void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
|
|
struct key *key, bool all)
|
|
{
|
|
struct afs_addr_cursor ac = {
|
|
.index = 0,
|
|
};
|
|
|
|
_enter("%pU", &server->uuid);
|
|
|
|
read_lock(&server->fs_lock);
|
|
ac.alist = rcu_dereference_protected(server->addresses,
|
|
lockdep_is_held(&server->fs_lock));
|
|
afs_get_addrlist(ac.alist);
|
|
read_unlock(&server->fs_lock);
|
|
|
|
server->probed_at = jiffies;
|
|
atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
|
|
memset(&server->probe, 0, sizeof(server->probe));
|
|
server->probe.rtt = UINT_MAX;
|
|
|
|
ac.index = ac.alist->preferred;
|
|
if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
|
|
all = true;
|
|
|
|
if (all) {
|
|
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
|
|
if (!afs_fs_get_capabilities(net, server, &ac, key))
|
|
afs_fs_probe_not_done(net, server, &ac);
|
|
} else {
|
|
if (!afs_fs_get_capabilities(net, server, &ac, key))
|
|
afs_fs_probe_not_done(net, server, &ac);
|
|
}
|
|
|
|
afs_put_addrlist(ac.alist);
|
|
}
|
|
|
|
/*
|
|
* Wait for the first as-yet untried fileserver to respond.
|
|
*/
|
|
int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
|
|
{
|
|
struct wait_queue_entry *waits;
|
|
struct afs_server *server;
|
|
unsigned int rtt = UINT_MAX, rtt_s;
|
|
bool have_responders = false;
|
|
int pref = -1, i;
|
|
|
|
_enter("%u,%lx", slist->nr_servers, untried);
|
|
|
|
/* Only wait for servers that have a probe outstanding. */
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
if (test_bit(i, &untried)) {
|
|
server = slist->servers[i].server;
|
|
if (!atomic_read(&server->probe_outstanding))
|
|
__clear_bit(i, &untried);
|
|
if (server->probe.responded)
|
|
have_responders = true;
|
|
}
|
|
}
|
|
if (have_responders || !untried)
|
|
return 0;
|
|
|
|
waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
|
|
if (!waits)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
if (test_bit(i, &untried)) {
|
|
server = slist->servers[i].server;
|
|
init_waitqueue_entry(&waits[i], current);
|
|
add_wait_queue(&server->probe_wq, &waits[i]);
|
|
}
|
|
}
|
|
|
|
for (;;) {
|
|
bool still_probing = false;
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
if (test_bit(i, &untried)) {
|
|
server = slist->servers[i].server;
|
|
if (server->probe.responded)
|
|
goto stop;
|
|
if (atomic_read(&server->probe_outstanding))
|
|
still_probing = true;
|
|
}
|
|
}
|
|
|
|
if (!still_probing || signal_pending(current))
|
|
goto stop;
|
|
schedule();
|
|
}
|
|
|
|
stop:
|
|
set_current_state(TASK_RUNNING);
|
|
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
if (test_bit(i, &untried)) {
|
|
server = slist->servers[i].server;
|
|
rtt_s = READ_ONCE(server->rtt);
|
|
if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
|
|
rtt_s < rtt) {
|
|
pref = i;
|
|
rtt = rtt_s;
|
|
}
|
|
|
|
remove_wait_queue(&server->probe_wq, &waits[i]);
|
|
}
|
|
}
|
|
|
|
kfree(waits);
|
|
|
|
if (pref == -1 && signal_pending(current))
|
|
return -ERESTARTSYS;
|
|
|
|
if (pref >= 0)
|
|
slist->preferred = pref;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Probe timer. We have an increment on fs_outstanding that we need to pass
|
|
* along to the work item.
|
|
*/
|
|
void afs_fs_probe_timer(struct timer_list *timer)
|
|
{
|
|
struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
|
|
|
|
if (!net->live || !queue_work(afs_wq, &net->fs_prober))
|
|
afs_dec_servers_outstanding(net);
|
|
}
|
|
|
|
/*
|
|
* Dispatch a probe to a server.
|
|
*/
|
|
static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
|
|
__releases(&net->fs_lock)
|
|
{
|
|
struct key *key = NULL;
|
|
|
|
/* We remove it from the queues here - it will be added back to
|
|
* one of the queues on the completion of the probe.
|
|
*/
|
|
list_del_init(&server->probe_link);
|
|
|
|
afs_get_server(server, afs_server_trace_get_probe);
|
|
write_sequnlock(&net->fs_lock);
|
|
|
|
afs_fs_probe_fileserver(net, server, key, all);
|
|
afs_put_server(net, server, afs_server_trace_put_probe);
|
|
}
|
|
|
|
/*
|
|
* Probe a server immediately without waiting for its due time to come
|
|
* round. This is used when all of the addresses have been tried.
|
|
*/
|
|
void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
|
|
{
|
|
write_seqlock(&net->fs_lock);
|
|
if (!list_empty(&server->probe_link))
|
|
return afs_dispatch_fs_probe(net, server, true);
|
|
write_sequnlock(&net->fs_lock);
|
|
}
|
|
|
|
/*
|
|
* Probe dispatcher to regularly dispatch probes to keep NAT alive.
|
|
*/
|
|
void afs_fs_probe_dispatcher(struct work_struct *work)
|
|
{
|
|
struct afs_net *net = container_of(work, struct afs_net, fs_prober);
|
|
struct afs_server *fast, *slow, *server;
|
|
unsigned long nowj, timer_at, poll_at;
|
|
bool first_pass = true, set_timer = false;
|
|
|
|
if (!net->live)
|
|
return;
|
|
|
|
_enter("");
|
|
|
|
if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
|
|
_leave(" [none]");
|
|
return;
|
|
}
|
|
|
|
again:
|
|
write_seqlock(&net->fs_lock);
|
|
|
|
fast = slow = server = NULL;
|
|
nowj = jiffies;
|
|
timer_at = nowj + MAX_JIFFY_OFFSET;
|
|
|
|
if (!list_empty(&net->fs_probe_fast)) {
|
|
fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
|
|
poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
|
|
if (time_before(nowj, poll_at)) {
|
|
timer_at = poll_at;
|
|
set_timer = true;
|
|
fast = NULL;
|
|
}
|
|
}
|
|
|
|
if (!list_empty(&net->fs_probe_slow)) {
|
|
slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
|
|
poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
|
|
if (time_before(nowj, poll_at)) {
|
|
if (time_before(poll_at, timer_at))
|
|
timer_at = poll_at;
|
|
set_timer = true;
|
|
slow = NULL;
|
|
}
|
|
}
|
|
|
|
server = fast ?: slow;
|
|
if (server)
|
|
_debug("probe %pU", &server->uuid);
|
|
|
|
if (server && (first_pass || !need_resched())) {
|
|
afs_dispatch_fs_probe(net, server, server == fast);
|
|
first_pass = false;
|
|
goto again;
|
|
}
|
|
|
|
write_sequnlock(&net->fs_lock);
|
|
|
|
if (server) {
|
|
if (!queue_work(afs_wq, &net->fs_prober))
|
|
afs_dec_servers_outstanding(net);
|
|
_leave(" [requeue]");
|
|
} else if (set_timer) {
|
|
if (timer_reduce(&net->fs_probe_timer, timer_at))
|
|
afs_dec_servers_outstanding(net);
|
|
_leave(" [timer]");
|
|
} else {
|
|
afs_dec_servers_outstanding(net);
|
|
_leave(" [quiesce]");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Wait for a probe on a particular fileserver to complete for 2s.
|
|
*/
|
|
int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
|
|
{
|
|
struct wait_queue_entry wait;
|
|
unsigned long timo = 2 * HZ;
|
|
|
|
if (atomic_read(&server->probe_outstanding) == 0)
|
|
goto dont_wait;
|
|
|
|
init_wait_entry(&wait, 0);
|
|
for (;;) {
|
|
prepare_to_wait_event(&server->probe_wq, &wait,
|
|
is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
|
|
if (timo == 0 ||
|
|
server->probe.responded ||
|
|
atomic_read(&server->probe_outstanding) == 0 ||
|
|
(is_intr && signal_pending(current)))
|
|
break;
|
|
timo = schedule_timeout(timo);
|
|
}
|
|
|
|
finish_wait(&server->probe_wq, &wait);
|
|
|
|
dont_wait:
|
|
if (server->probe.responded)
|
|
return 0;
|
|
if (is_intr && signal_pending(current))
|
|
return -ERESTARTSYS;
|
|
if (timo == 0)
|
|
return -ETIME;
|
|
return -EDESTADDRREQ;
|
|
}
|
|
|
|
/*
|
|
* Clean up the probing when the namespace is killed off.
|
|
*/
|
|
void afs_fs_probe_cleanup(struct afs_net *net)
|
|
{
|
|
if (del_timer_sync(&net->fs_probe_timer))
|
|
afs_dec_servers_outstanding(net);
|
|
}
|