linux/drivers/net/wireguard/queueing.h
Linus Torvalds 8ca09d5fa3 cpumask: fix incorrect cpumask scanning result checks
It turns out that commit 596ff4a09b89 ("cpumask: re-introduce
constant-sized cpumask optimizations") exposed a number of cases of
drivers not checking the result of "cpumask_next()" and friends
correctly.

The documented correct check for "no more cpus in the cpumask" is to
check for the result being equal or larger than the number of possible
CPU ids, exactly _because_ we've always done those constant-sized
cpumask scans using a widened type before.  So the return value of a
cpumask scan should be checked with

	if (cpu >= nr_cpu_ids)
		...

because the cpumask scan did not necessarily stop exactly *at* that
maximum CPU id.

But a few cases ended up instead using checks like

	if (cpu == nr_cpumask_bits)
		...

which used that internal "widened" number of bits.  And that used to
work pretty much by accident (ok, in this case "by accident" is simply
because it matched the historical internal implementation of the cpumask
scanning, so it was more of a "intentionally using implementation
details rather than an accident").

But the extended constant-sized optimizations then did that internal
implementation differently, and now that code that did things wrong but
matched the old implementation no longer worked at all.

Which then causes subsequent odd problems due to using what ends up
being an invalid CPU ID.

Most of these cases require either unusual hardware or special uses to
hit, but the random.c one triggers quite easily.

All you really need is to have a sufficiently small CONFIG_NR_CPUS value
for the bit scanning optimization to be triggered, but not enough CPUs
to then actually fill that widened cpumask.  At that point, the cpumask
scanning will return the NR_CPUS constant, which is _not_ the same as
nr_cpumask_bits.

This just does the mindless fix with

   sed -i 's/== nr_cpumask_bits/>= nr_cpu_ids/'

to fix the incorrect uses.

The ones in the SCSI lpfc driver in particular could probably be fixed
more cleanly by just removing that repeated pattern entirely, but I am
not emptionally invested enough in that driver to care.

Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/lkml/481b19b5-83a0-4793-b4fd-194ad7b978c3@roeck-us.net/
Reported-and-tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/lkml/CAMuHMdUKo_Sf7TjKzcNDa8Ve+6QrK+P8nSQrSQ=6LTRmcBKNww@mail.gmail.com/
Reported-by: Vernon Yang <vernon2gm@gmail.com>
Link: https://lore.kernel.org/lkml/20230306160651.2016767-1-vernon2gm@gmail.com/
Cc: Yury Norov <yury.norov@gmail.com>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-03-06 12:15:13 -08:00

213 lines
6.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_QUEUEING_H
#define _WG_QUEUEING_H
#include "peer.h"
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/ip_tunnels.h>
struct wg_device;
struct wg_peer;
struct multicore_worker;
struct crypt_queue;
struct prev_queue;
struct sk_buff;
/* queueing.c APIs: */
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
unsigned int len);
void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
/* receive.c APIs: */
void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
void wg_packet_handshake_receive_worker(struct work_struct *work);
/* NAPI poll function: */
int wg_packet_rx_poll(struct napi_struct *napi, int budget);
/* Workqueue worker: */
void wg_packet_decrypt_worker(struct work_struct *work);
/* send.c APIs: */
void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
bool is_retry);
void wg_packet_send_handshake_response(struct wg_peer *peer);
void wg_packet_send_handshake_cookie(struct wg_device *wg,
struct sk_buff *initiating_skb,
__le32 sender_index);
void wg_packet_send_keepalive(struct wg_peer *peer);
void wg_packet_purge_staged_packets(struct wg_peer *peer);
void wg_packet_send_staged_packets(struct wg_peer *peer);
/* Workqueue workers: */
void wg_packet_handshake_send_worker(struct work_struct *work);
void wg_packet_tx_worker(struct work_struct *work);
void wg_packet_encrypt_worker(struct work_struct *work);
enum packet_state {
PACKET_STATE_UNCRYPTED,
PACKET_STATE_CRYPTED,
PACKET_STATE_DEAD
};
struct packet_cb {
u64 nonce;
struct noise_keypair *keypair;
atomic_t state;
u32 mtu;
u8 ds;
};
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
static inline bool wg_check_packet_protocol(struct sk_buff *skb)
{
__be16 real_protocol = ip_tunnel_parse_protocol(skb);
return real_protocol && skb->protocol == real_protocol;
}
static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
{
u8 l4_hash = skb->l4_hash;
u8 sw_hash = skb->sw_hash;
u32 hash = skb->hash;
skb_scrub_packet(skb, true);
memset(&skb->headers, 0, sizeof(skb->headers));
if (encapsulating) {
skb->l4_hash = l4_hash;
skb->sw_hash = sw_hash;
skb->hash = hash;
}
skb->queue_mapping = 0;
skb->nohdr = 0;
skb->peeked = 0;
skb->mac_len = 0;
skb->dev = NULL;
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#endif
skb_reset_redirect(skb);
skb->hdr_len = skb_headroom(skb);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb_probe_transport_header(skb);
skb_reset_inner_headers(skb);
}
static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
{
unsigned int cpu = *stored_cpu, cpu_index, i;
if (unlikely(cpu >= nr_cpu_ids ||
!cpumask_test_cpu(cpu, cpu_online_mask))) {
cpu_index = id % cpumask_weight(cpu_online_mask);
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < cpu_index; ++i)
cpu = cpumask_next(cpu, cpu_online_mask);
*stored_cpu = cpu;
}
return cpu;
}
/* This function is racy, in the sense that next is unlocked, so it could return
* the same CPU twice. A race-free version of this would be to instead store an
* atomic sequence number, do an increment-and-return, and then iterate through
* every possible CPU until we get to that index -- choose_cpu. However that's
* a bit slower, and it doesn't seem like this potential race actually
* introduces any performance loss, so we live with it.
*/
static inline int wg_cpumask_next_online(int *next)
{
int cpu = *next;
while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
return cpu;
}
void wg_prev_queue_init(struct prev_queue *queue);
/* Multi producer */
bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb);
/* Single consumer */
struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue);
/* Single consumer */
static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue)
{
if (queue->peeked)
return queue->peeked;
queue->peeked = wg_prev_queue_dequeue(queue);
return queue->peeked;
}
/* Single consumer */
static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
{
queue->peeked = NULL;
}
static inline int wg_queue_enqueue_per_device_and_peer(
struct crypt_queue *device_queue, struct prev_queue *peer_queue,
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
{
int cpu;
atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
/* We first queue this up for the peer ingestion, but the consumer
* will wait for the state to change to CRYPTED or DEAD before.
*/
if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb)))
return -ENOSPC;
/* Then we queue it up in the device queue, which consumes the
* packet as soon as it can.
*/
cpu = wg_cpumask_next_online(next_cpu);
if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
return -EPIPE;
queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
return 0;
}
static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
*/
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
atomic_set_release(&PACKET_CB(skb)->state, state);
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
peer->device->packet_crypt_wq, &peer->transmit_packet_work);
wg_peer_put(peer);
}
static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
*/
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
atomic_set_release(&PACKET_CB(skb)->state, state);
napi_schedule(&peer->napi);
wg_peer_put(peer);
}
#ifdef DEBUG
bool wg_packet_counter_selftest(void);
#endif
#endif /* _WG_QUEUEING_H */