linux/net/rxrpc/call_object.c

// SPDX-License-Identifier: GPL-2.0-or-later
/* RxRPC individual remote procedure call handling
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"

const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
	[RXRPC_CALL_UNINITIALISED]		= "Uninit  ",
	[RXRPC_CALL_CLIENT_AWAIT_CONN]		= "ClWtConn",
	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
	[RXRPC_CALL_SERVER_PREALLOC]		= "SvPrealc",
	[RXRPC_CALL_SERVER_SECURING]		= "SvSecure",
	[RXRPC_CALL_SERVER_RECV_REQUEST]	= "SvRcvReq",
	[RXRPC_CALL_SERVER_ACK_REQUEST]		= "SvAckReq",
	[RXRPC_CALL_SERVER_SEND_REPLY]		= "SvSndRpl",
	[RXRPC_CALL_SERVER_AWAIT_ACK]		= "SvAwtACK",
	[RXRPC_CALL_COMPLETE]			= "Complete",
};

const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
	[RXRPC_CALL_SUCCEEDED]			= "Complete",
	[RXRPC_CALL_REMOTELY_ABORTED]		= "RmtAbort",
	[RXRPC_CALL_LOCALLY_ABORTED]		= "LocAbort",
	[RXRPC_CALL_LOCAL_ERROR]		= "LocError",
	[RXRPC_CALL_NETWORK_ERROR]		= "NetError",
};

struct kmem_cache *rxrpc_call_jar;

static struct semaphore rxrpc_call_limiter =
	__SEMAPHORE_INITIALIZER(rxrpc_call_limiter, 1000);
static struct semaphore rxrpc_kernel_call_limiter =
	__SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000);

static void rxrpc_call_timer_expired(struct timer_list *t)
{
	struct rxrpc_call *call = from_timer(call, t, timer);

	_enter("%d", call->debug_id);

	if (call->state < RXRPC_CALL_COMPLETE) {
		trace_rxrpc_timer_expired(call, jiffies);
		rxrpc_queue_call(call, rxrpc_call_queue_timer);
	}
}

void rxrpc_reduce_call_timer(struct rxrpc_call *call,
			     unsigned long expire_at,
			     unsigned long now,
			     enum rxrpc_timer_trace why)
{
	trace_rxrpc_timer(call, why, now);
	timer_reduce(&call->timer, expire_at);
}

static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;

static void rxrpc_destroy_call(struct work_struct *);

/*
 * find an extant server call
 * - called in process context with IRQs enabled
 */
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
					      unsigned long user_call_ID)
{
	struct rxrpc_call *call;
	struct rb_node *p;

	_enter("%p,%lx", rx, user_call_ID);

	read_lock(&rx->call_lock);

	p = rx->calls.rb_node;
	while (p) {
		call = rb_entry(p, struct rxrpc_call, sock_node);

		if (user_call_ID < call->user_call_ID)
			p = p->rb_left;
		else if (user_call_ID > call->user_call_ID)
			p = p->rb_right;
		else
			goto found_extant_call;
	}

	read_unlock(&rx->call_lock);
	_leave(" = NULL");
	return NULL;

found_extant_call:
	rxrpc_get_call(call, rxrpc_call_get_sendmsg);
	read_unlock(&rx->call_lock);
	_leave(" = %p [%d]", call, refcount_read(&call->ref));
	return call;
}

/*
 * allocate a new call
 */
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
				    unsigned int debug_id)
{
	struct rxrpc_call *call;
	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));

	call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
	if (!call)
		return NULL;

	mutex_init(&call->user_mutex);

	/* Prevent lockdep reporting a deadlock false positive between the afs
	 * filesystem and sys_sendmsg() via the mmap sem.
	 */
	if (rx->sk.sk_kern_sock)
		lockdep_set_class(&call->user_mutex,
				  &rxrpc_call_user_mutex_lock_class_key);

	timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
	INIT_WORK(&call->processor, rxrpc_process_call);
	INIT_WORK(&call->destroyer, rxrpc_destroy_call);
	INIT_LIST_HEAD(&call->link);
	INIT_LIST_HEAD(&call->chan_wait_link);
	INIT_LIST_HEAD(&call->accept_link);
	INIT_LIST_HEAD(&call->recvmsg_link);
	INIT_LIST_HEAD(&call->sock_link);
	INIT_LIST_HEAD(&call->tx_buffer);
	skb_queue_head_init(&call->recvmsg_queue);
	skb_queue_head_init(&call->rx_oos_queue);
	init_waitqueue_head(&call->waitq);
	spin_lock_init(&call->notify_lock);
	spin_lock_init(&call->tx_lock);
	spin_lock_init(&call->input_lock);
	spin_lock_init(&call->acks_ack_lock);
	rwlock_init(&call->state_lock);
	refcount_set(&call->ref, 1);
	call->debug_id = debug_id;
	call->tx_total_len = -1;
	call->next_rx_timo = 20 * HZ;
	call->next_req_timo = 1 * HZ;
	atomic64_set(&call->ackr_window, 0x100000001ULL);

	memset(&call->sock_node, 0xed, sizeof(call->sock_node));

	call->rx_winsize = rxrpc_rx_window_size;
	call->tx_winsize = 16;

	if (RXRPC_TX_SMSS > 2190)
		call->cong_cwnd = 2;
	else if (RXRPC_TX_SMSS > 1095)
		call->cong_cwnd = 3;
	else
		call->cong_cwnd = 4;
	call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;

	call->rxnet = rxnet;
	call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK;
	atomic_inc(&rxnet->nr_calls);
	return call;
}

/*
 * Allocate a new client call.
 */
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
						  struct sockaddr_rxrpc *srx,
						  gfp_t gfp,
						  unsigned int debug_id)
{
	struct rxrpc_call *call;
	ktime_t now;

	_enter("");

	call = rxrpc_alloc_call(rx, gfp, debug_id);
	if (!call)
		return ERR_PTR(-ENOMEM);
	call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
	call->service_id = srx->srx_service;
	now = ktime_get_real();
	call->acks_latest_ts = now;
	call->cong_tstamp = now;

	_leave(" = %p", call);
	return call;
}

/*
 * Initiate the call ack/resend/expiry timer.
 */
static void rxrpc_start_call_timer(struct rxrpc_call *call)
{
	unsigned long now = jiffies;
	unsigned long j = now + MAX_JIFFY_OFFSET;

	call->delay_ack_at = j;
	call->ack_lost_at = j;
	call->resend_at = j;
	call->ping_at = j;
	call->expect_rx_by = j;
	call->expect_req_by = j;
	call->expect_term_by = j;
	call->timer.expires = now;
}

/*
 * Wait for a call slot to become available.
 */
static struct semaphore *rxrpc_get_call_slot(struct rxrpc_call_params *p, gfp_t gfp)
{
	struct semaphore *limiter = &rxrpc_call_limiter;

	if (p->kernel)
		limiter = &rxrpc_kernel_call_limiter;
	if (p->interruptibility == RXRPC_UNINTERRUPTIBLE) {
		down(limiter);
		return limiter;
	}
	return down_interruptible(limiter) < 0 ? NULL : limiter;
}

/*
 * Release a call slot.
 */
static void rxrpc_put_call_slot(struct rxrpc_call *call)
{
	struct semaphore *limiter = &rxrpc_call_limiter;

	if (test_bit(RXRPC_CALL_KERNEL, &call->flags))
		limiter = &rxrpc_kernel_call_limiter;
	up(limiter);
}

/*
 * Set up a call for the given parameters.
 * - Called with the socket lock held, which it must release.
 * - If it returns a call, the call's lock will need releasing by the caller.
 */
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
					 struct rxrpc_conn_parameters *cp,
					 struct sockaddr_rxrpc *srx,
					 struct rxrpc_call_params *p,
					 gfp_t gfp,
					 unsigned int debug_id)
	__releases(&rx->sk.sk_lock.slock)
	__acquires(&call->user_mutex)
{
	struct rxrpc_call *call, *xcall;
	struct rxrpc_net *rxnet;
	struct semaphore *limiter;
	struct rb_node *parent, **pp;
	int ret;

	_enter("%p,%lx", rx, p->user_call_ID);

	limiter = rxrpc_get_call_slot(p, gfp);
	if (!limiter) {
		release_sock(&rx->sk);
		return ERR_PTR(-ERESTARTSYS);
	}

	call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
	if (IS_ERR(call)) {
		release_sock(&rx->sk);
		up(limiter);
		_leave(" = %ld", PTR_ERR(call));
		return call;
	}

	call->interruptibility = p->interruptibility;
	call->tx_total_len = p->tx_total_len;
	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
			 p->user_call_ID, rxrpc_call_new_client);
	if (p->kernel)
		__set_bit(RXRPC_CALL_KERNEL, &call->flags);

	/* We need to protect a partially set up call against the user as we
	 * will be acting outside the socket lock.
	 */
	mutex_lock(&call->user_mutex);

	/* Publish the call, even though it is incompletely set up as yet */
	write_lock(&rx->call_lock);

	pp = &rx->calls.rb_node;
	parent = NULL;
	while (*pp) {
		parent = *pp;
		xcall = rb_entry(parent, struct rxrpc_call, sock_node);

		if (p->user_call_ID < xcall->user_call_ID)
			pp = &(*pp)->rb_left;
		else if (p->user_call_ID > xcall->user_call_ID)
			pp = &(*pp)->rb_right;
		else
			goto error_dup_user_ID;
	}

	rcu_assign_pointer(call->socket, rx);
	call->user_call_ID = p->user_call_ID;
	__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
	rxrpc_get_call(call, rxrpc_call_get_userid);
	rb_link_node(&call->sock_node, parent, pp);
	rb_insert_color(&call->sock_node, &rx->calls);
	list_add(&call->sock_link, &rx->sock_calls);

	write_unlock(&rx->call_lock);

	rxnet = call->rxnet;
	spin_lock_bh(&rxnet->call_lock);
	list_add_tail_rcu(&call->link, &rxnet->calls);
	spin_unlock_bh(&rxnet->call_lock);

	/* From this point on, the call is protected by its own lock. */
	release_sock(&rx->sk);

	/* Set up or get a connection record and set the protocol parameters,
	 * including channel number and call ID.
	 */
	ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
	if (ret < 0)
		goto error_attached_to_socket;

	rxrpc_see_call(call, rxrpc_call_see_connected);

	rxrpc_start_call_timer(call);

	_leave(" = %p [new]", call);
	return call;

	/* We unexpectedly found the user ID in the list after taking
	 * the call_lock.  This shouldn't happen unless the user races
	 * with itself and tries to add the same user ID twice at the
	 * same time in different threads.
	 */
error_dup_user_ID:
	write_unlock(&rx->call_lock);
	release_sock(&rx->sk);
	__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
				    RX_CALL_DEAD, -EEXIST);
	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
			 rxrpc_call_see_userid_exists);
	rxrpc_release_call(rx, call);
	mutex_unlock(&call->user_mutex);
	rxrpc_put_call(call, rxrpc_call_put_userid_exists);
	_leave(" = -EEXIST");
	return ERR_PTR(-EEXIST);

	/* We got an error, but the call is attached to the socket and is in
	 * need of release.  However, we might now race with recvmsg() when
	 * completing the call queues it.  Return 0 from sys_sendmsg() and
	 * leave the error to recvmsg() to deal with.
	 */
error_attached_to_socket:
	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
			 rxrpc_call_see_connect_failed);
	set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
	__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
				    RX_CALL_DEAD, ret);
	_leave(" = c=%08x [err]", call->debug_id);
	return call;
}

/*
 * Set up an incoming call.  call->conn points to the connection.
 * This is called in BH context and isn't allowed to fail.
 */
void rxrpc_incoming_call(struct rxrpc_sock *rx,
			 struct rxrpc_call *call,
			 struct sk_buff *skb)
{
	struct rxrpc_connection *conn = call->conn;
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
	u32 chan;

	_enter(",%d", call->conn->debug_id);

	rcu_assign_pointer(call->socket, rx);
	call->call_id		= sp->hdr.callNumber;
	call->service_id	= sp->hdr.serviceId;
	call->cid		= sp->hdr.cid;
	call->state		= RXRPC_CALL_SERVER_SECURING;
	call->cong_tstamp	= skb->tstamp;

	/* Set the channel for this call.  We don't get channel_lock as we're
	 * only defending against the data_ready handler (which we're called
	 * from) and the RESPONSE packet parser (which is only really
	 * interested in call_counter and can cope with a disagreement with the
	 * call pointer).
	 */
	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
	conn->channels[chan].call_counter = call->call_id;
	conn->channels[chan].call_id = call->call_id;
	rcu_assign_pointer(conn->channels[chan].call, call);

	spin_lock(&conn->peer->lock);
	hlist_add_head_rcu(&call->error_link, &conn->peer->error_targets);
	spin_unlock(&conn->peer->lock);

	rxrpc_start_call_timer(call);
	_leave("");
}

/*
 * Queue a call's work processor.
 */
void rxrpc_queue_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
	if (rxrpc_queue_work(&call->processor))
		trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, why);
}

/*
 * Note the re-emergence of a call.
 */
void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
	if (call) {
		int r = refcount_read(&call->ref);

		trace_rxrpc_call(call->debug_id, r, 0, why);
	}
}

bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
	int r;

	if (!__refcount_inc_not_zero(&call->ref, &r))
		return false;
	trace_rxrpc_call(call->debug_id, r + 1, 0, why);
	return true;
}

/*
 * Note the addition of a ref on a call.
 */
void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
	int r;

	__refcount_inc(&call->ref, &r);
	trace_rxrpc_call(call->debug_id, r + 1, 0, why);
}

/*
 * Clean up the Rx skb ring.
 */
static void rxrpc_cleanup_ring(struct rxrpc_call *call)
{
	skb_queue_purge(&call->recvmsg_queue);
	skb_queue_purge(&call->rx_oos_queue);
}

/*
 * Detach a call from its owning socket.
 */
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
	struct rxrpc_connection *conn = call->conn;
	bool put = false;

	_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));

	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
			 call->flags, rxrpc_call_see_release);

	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);

	if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
		BUG();

	rxrpc_put_call_slot(call);
	del_timer_sync(&call->timer);

	/* Make sure we don't get any more notifications */
	write_lock_bh(&rx->recvmsg_lock);

	if (!list_empty(&call->recvmsg_link)) {
		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
		       call, call->events, call->flags);
		list_del(&call->recvmsg_link);
		put = true;
	}

	/* list_empty() must return false in rxrpc_notify_socket() */
	call->recvmsg_link.next = NULL;
	call->recvmsg_link.prev = NULL;

	write_unlock_bh(&rx->recvmsg_lock);
	if (put)
		rxrpc_put_call(call, rxrpc_call_put_unnotify);

	write_lock(&rx->call_lock);

	if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
		rb_erase(&call->sock_node, &rx->calls);
		memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
		rxrpc_put_call(call, rxrpc_call_put_userid_exists);
	}

	list_del(&call->sock_link);
	write_unlock(&rx->call_lock);

	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);

	if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
		rxrpc_disconnect_call(call);
	if (call->security)
		call->security->free_call_crypto(call);
	_leave("");
}

/*
 * release all the calls associated with a socket
 */
void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
{
	struct rxrpc_call *call;

	_enter("%p", rx);

	while (!list_empty(&rx->to_be_accepted)) {
		call = list_entry(rx->to_be_accepted.next,
				  struct rxrpc_call, accept_link);
		list_del(&call->accept_link);
		rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
		rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
	}

	while (!list_empty(&rx->sock_calls)) {
		call = list_entry(rx->sock_calls.next,
				  struct rxrpc_call, sock_link);
		rxrpc_get_call(call, rxrpc_call_get_release_sock);
		rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
		rxrpc_send_abort_packet(call);
		rxrpc_release_call(rx, call);
		rxrpc_put_call(call, rxrpc_call_put_release_sock);
	}

	_leave("");
}

/*
 * release a call
 */
void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
	struct rxrpc_net *rxnet = call->rxnet;
	unsigned int debug_id = call->debug_id;
	bool dead;
	int r;

	ASSERT(call != NULL);

	dead = __refcount_dec_and_test(&call->ref, &r);
	trace_rxrpc_call(debug_id, r - 1, 0, why);
	if (dead) {
		ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);

		if (!list_empty(&call->link)) {
			spin_lock_bh(&rxnet->call_lock);
			list_del_init(&call->link);
			spin_unlock_bh(&rxnet->call_lock);
		}

		rxrpc_cleanup_call(call);
	}
}

/*
 * Free up the call under RCU.
 */
static void rxrpc_rcu_free_call(struct rcu_head *rcu)
{
	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
	struct rxrpc_net *rxnet = READ_ONCE(call->rxnet);

	kmem_cache_free(rxrpc_call_jar, call);
	if (atomic_dec_and_test(&rxnet->nr_calls))
		wake_up_var(&rxnet->nr_calls);
}

/*
 * Final call destruction - but must be done in process context.
 */
static void rxrpc_destroy_call(struct work_struct *work)
{
	struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer);
	struct rxrpc_txbuf *txb;

	del_timer_sync(&call->timer);
	cancel_work_sync(&call->processor); /* The processor may restart the timer */
	del_timer_sync(&call->timer);

	rxrpc_cleanup_ring(call);
	while ((txb = list_first_entry_or_null(&call->tx_buffer,
					       struct rxrpc_txbuf, call_link))) {
		list_del(&txb->call_link);
		rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
	}
	rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
	rxrpc_free_skb(call->acks_soft_tbl, rxrpc_skb_put_ack);
	rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
	rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
	call_rcu(&call->rcu, rxrpc_rcu_free_call);
}

/*
 * clean up a call
 */
void rxrpc_cleanup_call(struct rxrpc_call *call)
{
	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));

	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));

	del_timer_sync(&call->timer);
	cancel_work(&call->processor);

	if (in_softirq() || work_busy(&call->processor))
		/* Can't use the rxrpc workqueue as we need to cancel/flush
		 * something that may be running/waiting there.
		 */
		schedule_work(&call->destroyer);
	else
		rxrpc_destroy_call(&call->destroyer);
}

/*
 * Make sure that all calls are gone from a network namespace.  To reach this
 * point, any open UDP sockets in that namespace must have been closed, so any
 * outstanding calls cannot be doing I/O.
 */
void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
{
	struct rxrpc_call *call;

	_enter("");

	if (!list_empty(&rxnet->calls)) {
		spin_lock_bh(&rxnet->call_lock);

		while (!list_empty(&rxnet->calls)) {
			call = list_entry(rxnet->calls.next,
					  struct rxrpc_call, link);
			_debug("Zapping call %p", call);

			rxrpc_see_call(call, rxrpc_call_see_zap);
			list_del_init(&call->link);

			pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
			       call, refcount_read(&call->ref),
			       rxrpc_call_states[call->state],
			       call->flags, call->events);

			spin_unlock_bh(&rxnet->call_lock);
			cond_resched();
			spin_lock_bh(&rxnet->call_lock);
		}

		spin_unlock_bh(&rxnet->call_lock);
	}

	atomic_dec(&rxnet->nr_calls);
	wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}