a4e9f8e328
With deployments having upwards of 600k peers now, this somewhat heavy structure could benefit from more fine-grained allocations. Specifically, instead of using a 2048-byte slab for a 1544-byte object, we can now use 1544-byte objects directly, thus saving almost 25% per-peer, or with 600k peers, that's a savings of 303 MiB. This also makes wireguard's memory usage more transparent in tools like slabtop and /proc/slabinfo. Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring buffers") Suggested-by: Arnd Bergmann <arnd@arndb.de> Suggested-by: Matthew Wilcox <willy@infradead.org> Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: David S. Miller <davem@davemloft.net>
241 lines
7.5 KiB
C
241 lines
7.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
|
*/
|
|
|
|
#include "peer.h"
|
|
#include "device.h"
|
|
#include "queueing.h"
|
|
#include "timers.h"
|
|
#include "peerlookup.h"
|
|
#include "noise.h"
|
|
|
|
#include <linux/kref.h>
|
|
#include <linux/lockdep.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/list.h>
|
|
|
|
static struct kmem_cache *peer_cache;
|
|
static atomic64_t peer_counter = ATOMIC64_INIT(0);
|
|
|
|
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
|
const u8 public_key[NOISE_PUBLIC_KEY_LEN],
|
|
const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
|
|
{
|
|
struct wg_peer *peer;
|
|
int ret = -ENOMEM;
|
|
|
|
lockdep_assert_held(&wg->device_update_lock);
|
|
|
|
if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
|
|
return ERR_PTR(ret);
|
|
|
|
peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL);
|
|
if (unlikely(!peer))
|
|
return ERR_PTR(ret);
|
|
if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
|
|
goto err;
|
|
|
|
peer->device = wg;
|
|
wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
|
|
public_key, preshared_key, peer);
|
|
peer->internal_id = atomic64_inc_return(&peer_counter);
|
|
peer->serial_work_cpu = nr_cpumask_bits;
|
|
wg_cookie_init(&peer->latest_cookie);
|
|
wg_timers_init(peer);
|
|
wg_cookie_checker_precompute_peer_keys(peer);
|
|
spin_lock_init(&peer->keypairs.keypair_update_lock);
|
|
INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker);
|
|
INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker);
|
|
wg_prev_queue_init(&peer->tx_queue);
|
|
wg_prev_queue_init(&peer->rx_queue);
|
|
rwlock_init(&peer->endpoint_lock);
|
|
kref_init(&peer->refcount);
|
|
skb_queue_head_init(&peer->staged_packet_queue);
|
|
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
|
|
set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
|
|
netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
|
|
NAPI_POLL_WEIGHT);
|
|
napi_enable(&peer->napi);
|
|
list_add_tail(&peer->peer_list, &wg->peer_list);
|
|
INIT_LIST_HEAD(&peer->allowedips_list);
|
|
wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
|
|
++wg->num_peers;
|
|
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
|
|
return peer;
|
|
|
|
err:
|
|
kmem_cache_free(peer_cache, peer);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
|
|
{
|
|
RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
|
|
"Taking peer reference without holding the RCU read lock");
|
|
if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
|
|
return NULL;
|
|
return peer;
|
|
}
|
|
|
|
static void peer_make_dead(struct wg_peer *peer)
|
|
{
|
|
/* Remove from configuration-time lookup structures. */
|
|
list_del_init(&peer->peer_list);
|
|
wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
|
|
&peer->device->device_update_lock);
|
|
wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
|
|
|
|
/* Mark as dead, so that we don't allow jumping contexts after. */
|
|
WRITE_ONCE(peer->is_dead, true);
|
|
|
|
/* The caller must now synchronize_net() for this to take effect. */
|
|
}
|
|
|
|
static void peer_remove_after_dead(struct wg_peer *peer)
|
|
{
|
|
WARN_ON(!peer->is_dead);
|
|
|
|
/* No more keypairs can be created for this peer, since is_dead protects
|
|
* add_new_keypair, so we can now destroy existing ones.
|
|
*/
|
|
wg_noise_keypairs_clear(&peer->keypairs);
|
|
|
|
/* Destroy all ongoing timers that were in-flight at the beginning of
|
|
* this function.
|
|
*/
|
|
wg_timers_stop(peer);
|
|
|
|
/* The transition between packet encryption/decryption queues isn't
|
|
* guarded by is_dead, but each reference's life is strictly bounded by
|
|
* two generations: once for parallel crypto and once for serial
|
|
* ingestion, so we can simply flush twice, and be sure that we no
|
|
* longer have references inside these queues.
|
|
*/
|
|
|
|
/* a) For encrypt/decrypt. */
|
|
flush_workqueue(peer->device->packet_crypt_wq);
|
|
/* b.1) For send (but not receive, since that's napi). */
|
|
flush_workqueue(peer->device->packet_crypt_wq);
|
|
/* b.2.1) For receive (but not send, since that's wq). */
|
|
napi_disable(&peer->napi);
|
|
/* b.2.1) It's now safe to remove the napi struct, which must be done
|
|
* here from process context.
|
|
*/
|
|
netif_napi_del(&peer->napi);
|
|
|
|
/* Ensure any workstructs we own (like transmit_handshake_work or
|
|
* clear_peer_work) no longer are in use.
|
|
*/
|
|
flush_workqueue(peer->device->handshake_send_wq);
|
|
|
|
/* After the above flushes, a peer might still be active in a few
|
|
* different contexts: 1) from xmit(), before hitting is_dead and
|
|
* returning, 2) from wg_packet_consume_data(), before hitting is_dead
|
|
* and returning, 3) from wg_receive_handshake_packet() after a point
|
|
* where it has processed an incoming handshake packet, but where
|
|
* all calls to pass it off to timers fails because of is_dead. We won't
|
|
* have new references in (1) eventually, because we're removed from
|
|
* allowedips; we won't have new references in (2) eventually, because
|
|
* wg_index_hashtable_lookup will always return NULL, since we removed
|
|
* all existing keypairs and no more can be created; we won't have new
|
|
* references in (3) eventually, because we're removed from the pubkey
|
|
* hash table, which allows for a maximum of one handshake response,
|
|
* via the still-uncleared index hashtable entry, but not more than one,
|
|
* and in wg_cookie_message_consume, the lookup eventually gets a peer
|
|
* with a refcount of zero, so no new reference is taken.
|
|
*/
|
|
|
|
--peer->device->num_peers;
|
|
wg_peer_put(peer);
|
|
}
|
|
|
|
/* We have a separate "remove" function make sure that all active places where
|
|
* a peer is currently operating will eventually come to an end and not pass
|
|
* their reference onto another context.
|
|
*/
|
|
void wg_peer_remove(struct wg_peer *peer)
|
|
{
|
|
if (unlikely(!peer))
|
|
return;
|
|
lockdep_assert_held(&peer->device->device_update_lock);
|
|
|
|
peer_make_dead(peer);
|
|
synchronize_net();
|
|
peer_remove_after_dead(peer);
|
|
}
|
|
|
|
void wg_peer_remove_all(struct wg_device *wg)
|
|
{
|
|
struct wg_peer *peer, *temp;
|
|
LIST_HEAD(dead_peers);
|
|
|
|
lockdep_assert_held(&wg->device_update_lock);
|
|
|
|
/* Avoid having to traverse individually for each one. */
|
|
wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
|
|
|
|
list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
|
|
peer_make_dead(peer);
|
|
list_add_tail(&peer->peer_list, &dead_peers);
|
|
}
|
|
synchronize_net();
|
|
list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
|
|
peer_remove_after_dead(peer);
|
|
}
|
|
|
|
static void rcu_release(struct rcu_head *rcu)
|
|
{
|
|
struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
|
|
|
|
dst_cache_destroy(&peer->endpoint_cache);
|
|
WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue));
|
|
|
|
/* The final zeroing takes care of clearing any remaining handshake key
|
|
* material and other potentially sensitive information.
|
|
*/
|
|
memzero_explicit(peer, sizeof(*peer));
|
|
kmem_cache_free(peer_cache, peer);
|
|
}
|
|
|
|
static void kref_release(struct kref *refcount)
|
|
{
|
|
struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
|
|
|
|
pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
|
|
peer->device->dev->name, peer->internal_id,
|
|
&peer->endpoint.addr);
|
|
|
|
/* Remove ourself from dynamic runtime lookup structures, now that the
|
|
* last reference is gone.
|
|
*/
|
|
wg_index_hashtable_remove(peer->device->index_hashtable,
|
|
&peer->handshake.entry);
|
|
|
|
/* Remove any lingering packets that didn't have a chance to be
|
|
* transmitted.
|
|
*/
|
|
wg_packet_purge_staged_packets(peer);
|
|
|
|
/* Free the memory used. */
|
|
call_rcu(&peer->rcu, rcu_release);
|
|
}
|
|
|
|
void wg_peer_put(struct wg_peer *peer)
|
|
{
|
|
if (unlikely(!peer))
|
|
return;
|
|
kref_put(&peer->refcount, kref_release);
|
|
}
|
|
|
|
int __init wg_peer_init(void)
|
|
{
|
|
peer_cache = KMEM_CACHE(wg_peer, 0);
|
|
return peer_cache ? 0 : -ENOMEM;
|
|
}
|
|
|
|
void wg_peer_uninit(void)
|
|
{
|
|
kmem_cache_destroy(peer_cache);
|
|
}
|