linux/net/xdp/xsk_queue.c

// SPDX-License-Identifier: GPL-2.0
/* XDP user-space ring structure
 * Copyright(c) 2018 Intel Corporation.
 */

#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/overflow.h>

#include "xsk_queue.h"

void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
{
	if (!q)
		return;

	q->size = size;
	q->chunk_mask = chunk_mask;
}

static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
{
	return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);
}

static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
{
	return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
}

struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
{
	struct xsk_queue *q;
	gfp_t gfp_flags;
	size_t size;

	q = kzalloc(sizeof(*q), GFP_KERNEL);
	if (!q)
		return NULL;

	q->nentries = nentries;
	q->ring_mask = nentries - 1;

	gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
		    __GFP_COMP  | __GFP_NORETRY;
	size = umem_queue ? xskq_umem_get_ring_size(q) :
	       xskq_rxtx_get_ring_size(q);

	q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
						      get_order(size));
	if (!q->ring) {
		kfree(q);
		return NULL;
	}

	return q;
}

void xskq_destroy(struct xsk_queue *q)
{
	if (!q)
		return;

	page_frag_free(q->ring);
	kfree(q);
}

struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
{
	struct xdp_umem_fq_reuse *newq;

	/* Check for overflow */
	if (nentries > (u32)roundup_pow_of_two(nentries))
		return NULL;
	nentries = roundup_pow_of_two(nentries);

	newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
	if (!newq)
		return NULL;
	memset(newq, 0, offsetof(typeof(*newq), handles));

	newq->nentries = nentries;
	return newq;
}
EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);

struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
					  struct xdp_umem_fq_reuse *newq)
{
	struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;

	if (!oldq) {
		umem->fq_reuse = newq;
		return NULL;
	}

	if (newq->nentries < oldq->length)
		return newq;

	memcpy(newq->handles, oldq->handles,
	       array_size(oldq->length, sizeof(u64)));
	newq->length = oldq->length;

	umem->fq_reuse = newq;
	return oldq;
}
EXPORT_SYMBOL_GPL(xsk_reuseq_swap);

void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
{
	kvfree(rq);
}
EXPORT_SYMBOL_GPL(xsk_reuseq_free);

void xsk_reuseq_destroy(struct xdp_umem *umem)
{
	xsk_reuseq_free(umem->fq_reuse);
	umem->fq_reuse = NULL;
}
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00			`// SPDX-License-Identifier: GPL-2.0`
			`/* XDP user-space ring structure`
			`* Copyright(c) 2018 Intel Corporation.`
			`*/`

net: xsk: add a simple buffer reuse queue XSK UMEM is strongly single producer single consumer so reuse of frames is challenging. Add a simple "stash" of FILL packets to reuse for drivers to optionally make use of. This is useful when driver has to free (ndo_stop) or resize a ring with an active AF_XDP ZC socket. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> 2018-09-07 11:18:46 +03:00			`#include <linux/log2.h>`
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00			`#include <linux/slab.h>`
net: xsk: add a simple buffer reuse queue XSK UMEM is strongly single producer single consumer so reuse of frames is challenging. Add a simple "stash" of FILL packets to reuse for drivers to optionally make use of. This is useful when driver has to free (ndo_stop) or resize a ring with an active AF_XDP ZC socket. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> 2018-09-07 11:18:46 +03:00			`#include <linux/overflow.h>`
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00
			`#include "xsk_queue.h"`

xsk: i40e: get rid of useless struct xdp_umem_props This commit gets rid of the structure xdp_umem_props. It was there to be able to break a dependency at one point, but this is no longer needed. The values in the struct are instead stored directly in the xdp_umem structure. This simplifies the xsk code as well as af_xdp zero-copy drivers and as a bonus gets rid of one internal header file. The i40e driver is also adapted to the new interface in this commit. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> 2018-08-31 14:40:02 +03:00			`void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)`
xsk: add support for bind for Rx Here, the bind syscall is added. Binding an AF_XDP socket, means associating the socket to an umem, a netdev and a queue index. This can be done in two ways. The first way, creating a "socket from scratch". Create the umem using the XDP_UMEM_REG setsockopt and an associated fill queue with XDP_UMEM_FILL_QUEUE. Create the Rx queue using the XDP_RX_QUEUE setsockopt. Call bind passing ifindex and queue index ("channel" in ethtool speak). The second way to bind a socket, is simply skipping the umem/netdev/queue index, and passing another already setup AF_XDP socket. The new socket will then have the same umem/netdev/queue index as the parent so it will share the same umem. You must also set the flags field in the socket address to XDP_SHARED_UMEM. v2: Use PTR_ERR instead of passing error variable explicitly. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:26 +03:00			`{`
			`if (!q)`
			`return;`

xsk: i40e: get rid of useless struct xdp_umem_props This commit gets rid of the structure xdp_umem_props. It was there to be able to break a dependency at one point, but this is no longer needed. The values in the struct are instead stored directly in the xdp_umem structure. This simplifies the xsk code as well as af_xdp zero-copy drivers and as a bonus gets rid of one internal header file. The i40e driver is also adapted to the new interface in this commit. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> 2018-08-31 14:40:02 +03:00			`q->size = size;`
			`q->chunk_mask = chunk_mask;`
xsk: add support for bind for Rx Here, the bind syscall is added. Binding an AF_XDP socket, means associating the socket to an umem, a netdev and a queue index. This can be done in two ways. The first way, creating a "socket from scratch". Create the umem using the XDP_UMEM_REG setsockopt and an associated fill queue with XDP_UMEM_FILL_QUEUE. Create the Rx queue using the XDP_RX_QUEUE setsockopt. Call bind passing ifindex and queue index ("channel" in ethtool speak). The second way to bind a socket, is simply skipping the umem/netdev/queue index, and passing another already setup AF_XDP socket. The new socket will then have the same umem/netdev/queue index as the parent so it will share the same umem. You must also set the flags field in the socket address to XDP_SHARED_UMEM. v2: Use PTR_ERR instead of passing error variable explicitly. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:26 +03:00			`}`

xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00			`static u32 xskq_umem_get_ring_size(struct xsk_queue *q)`
			`{`
xsk: new descriptor addressing scheme Currently, AF_XDP only supports a fixed frame-size memory scheme where each frame is referenced via an index (idx). A user passes the frame index to the kernel, and the kernel acts upon the data. Some NICs, however, do not have a fixed frame-size model, instead they have a model where a memory window is passed to the hardware and multiple frames are filled into that window (referred to as the "type-writer" model). By changing the descriptor format from the current frame index addressing scheme, AF_XDP can in the future be extended to support these kinds of NICs. In the index-based model, an idx refers to a frame of size frame_size. Addressing a frame in the UMEM is done by offseting the UMEM starting address by a global offset, idx * frame_size + offset. Communicating via the fill- and completion-rings are done by means of idx. In this commit, the idx is removed in favor of an address (addr), which is a relative address ranging over the UMEM. To convert an idx-based address to the new addr is simply: addr = idx * frame_size + offset. We also stop referring to the UMEM "frame" as a frame. Instead it is simply called a chunk. To transfer ownership of a chunk to the kernel, the addr of the chunk is passed in the fill-ring. Note, that the kernel will mask addr to make it chunk aligned, so there is no need for userspace to do that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or 3000 to the fill-ring will refer to the same chunk. On the completion-ring, the addr will match that of the Tx descriptor, passed to the kernel. Changing the descriptor format to use chunks/addr will allow for future changes to move to a type-writer based model, where multiple frames can reside in one chunk. In this model passing one single chunk into the fill-ring, would potentially result in multiple Rx descriptors. This commit changes the uapi of AF_XDP sockets, and updates the documentation. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> 2018-06-04 14:57:13 +03:00			`return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);`
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00			`}`

xsk: add Rx queue setup and mmap support Another setsockopt (XDP_RX_QUEUE) is added to let the process allocate a queue, where the kernel can pass completed Rx frames from the kernel to user process. The mmapping of the queue is done using the XDP_PGOFF_RX_QUEUE offset. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:25 +03:00			`static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)`
			`{`
xsk: fixed some cases of unnecessary parentheses Removed some cases of unnecessary parentheses. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> 2018-05-18 15:00:23 +03:00			`return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);`
xsk: add Rx queue setup and mmap support Another setsockopt (XDP_RX_QUEUE) is added to let the process allocate a queue, where the kernel can pass completed Rx frames from the kernel to user process. The mmapping of the queue is done using the XDP_PGOFF_RX_QUEUE offset. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:25 +03:00			`}`

			`struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)`
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00			`{`
			`struct xsk_queue *q;`
			`gfp_t gfp_flags;`
			`size_t size;`

			`q = kzalloc(sizeof(*q), GFP_KERNEL);`
			`if (!q)`
			`return NULL;`

			`q->nentries = nentries;`
			`q->ring_mask = nentries - 1;`

			`gfp_flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN \|`
			`__GFP_COMP \| __GFP_NORETRY;`
xsk: add Rx queue setup and mmap support Another setsockopt (XDP_RX_QUEUE) is added to let the process allocate a queue, where the kernel can pass completed Rx frames from the kernel to user process. The mmapping of the queue is done using the XDP_PGOFF_RX_QUEUE offset. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:25 +03:00			`size = umem_queue ? xskq_umem_get_ring_size(q) :`
			`xskq_rxtx_get_ring_size(q);`
xsk: add umem fill queue support and mmap Here, we add another setsockopt for registered user memory (umem) called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can ask the kernel to allocate a queue (ring buffer) and also mmap it (XDP_UMEM_PGOFF_FILL_QUEUE) into the process. The queue is used to explicitly pass ownership of umem frames from the user process to the kernel. These frames will in a later patch be filled in with Rx packet data by the kernel. v2: Fixed potential crash in xsk_mmap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> 2018-05-02 14:01:24 +03:00
			`q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,`
			`get_order(size));`
			`if (!q->ring) {`
			`kfree(q);`
			`return NULL;`
			`}`

			`return q;`
			`}`

			`void xskq_destroy(struct xsk_queue *q)`
			`{`
			`if (!q)`
			`return;`

			`page_frag_free(q->ring);`
			`kfree(q);`
			`}`
net: xsk: add a simple buffer reuse queue XSK UMEM is strongly single producer single consumer so reuse of frames is challenging. Add a simple "stash" of FILL packets to reuse for drivers to optionally make use of. This is useful when driver has to free (ndo_stop) or resize a ring with an active AF_XDP ZC socket. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> 2018-09-07 11:18:46 +03:00
			`struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)`
			`{`
			`struct xdp_umem_fq_reuse *newq;`

			`/* Check for overflow */`
			`if (nentries > (u32)roundup_pow_of_two(nentries))`
			`return NULL;`
			`nentries = roundup_pow_of_two(nentries);`

			`newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);`
			`if (!newq)`
			`return NULL;`
			`memset(newq, 0, offsetof(typeof(*newq), handles));`

			`newq->nentries = nentries;`
			`return newq;`
			`}`
			`EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);`

			`struct xdp_umem_fq_reuse xsk_reuseq_swap(struct xdp_umem umem,`
			`struct xdp_umem_fq_reuse *newq)`
			`{`
			`struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;`

			`if (!oldq) {`
			`umem->fq_reuse = newq;`
			`return NULL;`
			`}`

			`if (newq->nentries < oldq->length)`
			`return newq;`

			`memcpy(newq->handles, oldq->handles,`
			`array_size(oldq->length, sizeof(u64)));`
			`newq->length = oldq->length;`

			`umem->fq_reuse = newq;`
			`return oldq;`
			`}`
			`EXPORT_SYMBOL_GPL(xsk_reuseq_swap);`

			`void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)`
			`{`
			`kvfree(rq);`
			`}`
			`EXPORT_SYMBOL_GPL(xsk_reuseq_free);`

			`void xsk_reuseq_destroy(struct xdp_umem *umem)`
			`{`
			`xsk_reuseq_free(umem->fq_reuse);`
			`umem->fq_reuse = NULL;`
			`}`