Merge branch 'abstract-page-from-net-stack'
Mina Almasry says: ==================== Abstract page from net stack This series is a prerequisite to the devmem TCP series. For a full snapshot of the code which includes these changes, feel free to check: https://github.com/mina/linux/commits/tcpdevmem-rfcv5/ Currently these components in the net stack use the struct page directly: 1. Drivers. 2. Page pool. 3. skb_frag_t. To add support for new (non struct page) memory types to the net stack, we must first abstract the current memory type. Originally the plan was to reuse struct page* for the new memory types, and to set the LSB on the page* to indicate it's not really a page. However, for safe compiler type checking we need to introduce a new type. struct netmem is introduced to abstract the underlying memory type. Currently it's a no-op abstraction that is always a struct page underneath. In parallel there is an undergoing effort to add support for devmem to the net stack: https://lore.kernel.org/netdev/20231208005250.2910004-1-almasrymina@google.com/ Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Christian König <christian.koenig@amd.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Yunsheng Lin <linyunsheng@huawei.com> Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com> ==================== Link: https://lore.kernel.org/r/20240214223405.1972973-1-almasrymina@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
commit
bb18fc7a52
@ -37,6 +37,7 @@
|
||||
#endif
|
||||
#include <net/net_debug.h>
|
||||
#include <net/dropreason-core.h>
|
||||
#include <net/netmem.h>
|
||||
|
||||
/**
|
||||
* DOC: skb checksums
|
||||
@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags;
|
||||
*/
|
||||
#define GSO_BY_FRAGS 0xFFFF
|
||||
|
||||
typedef struct bio_vec skb_frag_t;
|
||||
typedef struct skb_frag {
|
||||
netmem_ref netmem;
|
||||
unsigned int len;
|
||||
unsigned int offset;
|
||||
} skb_frag_t;
|
||||
|
||||
/**
|
||||
* skb_frag_size() - Returns the size of a skb fragment
|
||||
@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
|
||||
*/
|
||||
static inline unsigned int skb_frag_size(const skb_frag_t *frag)
|
||||
{
|
||||
return frag->bv_len;
|
||||
return frag->len;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
|
||||
*/
|
||||
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
|
||||
{
|
||||
frag->bv_len = size;
|
||||
frag->len = size;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
|
||||
*/
|
||||
static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
|
||||
{
|
||||
frag->bv_len += delta;
|
||||
frag->len += delta;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
|
||||
*/
|
||||
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
|
||||
{
|
||||
frag->bv_len -= delta;
|
||||
frag->len -= delta;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
|
||||
* skb_frag_foreach_page - loop over pages in a fragment
|
||||
*
|
||||
* @f: skb frag to operate on
|
||||
* @f_off: offset from start of f->bv_page
|
||||
* @f_off: offset from start of f->netmem
|
||||
* @f_len: length from f_off to loop over
|
||||
* @p: (temp var) current page
|
||||
* @p_off: (temp var) offset from start of current page,
|
||||
@ -2429,22 +2434,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
|
||||
return skb_headlen(skb) + __skb_pagelen(skb);
|
||||
}
|
||||
|
||||
static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
|
||||
netmem_ref netmem, int off,
|
||||
int size)
|
||||
{
|
||||
frag->netmem = netmem;
|
||||
frag->offset = off;
|
||||
skb_frag_size_set(frag, size);
|
||||
}
|
||||
|
||||
static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
|
||||
struct page *page,
|
||||
int off, int size)
|
||||
{
|
||||
frag->bv_page = page;
|
||||
frag->bv_offset = off;
|
||||
skb_frag_size_set(frag, size);
|
||||
skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
|
||||
}
|
||||
|
||||
static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
|
||||
int i, netmem_ref netmem,
|
||||
int off, int size)
|
||||
{
|
||||
skb_frag_t *frag = &shinfo->frags[i];
|
||||
|
||||
skb_frag_fill_netmem_desc(frag, netmem, off, size);
|
||||
}
|
||||
|
||||
static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
|
||||
int i, struct page *page,
|
||||
int off, int size)
|
||||
{
|
||||
skb_frag_t *frag = &shinfo->frags[i];
|
||||
|
||||
skb_frag_fill_page_desc(frag, page, off, size);
|
||||
__skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
|
||||
size);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2460,10 +2480,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
|
||||
}
|
||||
|
||||
/**
|
||||
* __skb_fill_page_desc - initialise a paged fragment in an skb
|
||||
* __skb_fill_netmem_desc - initialise a fragment in an skb
|
||||
* @skb: buffer containing fragment to be initialised
|
||||
* @i: paged fragment index to initialise
|
||||
* @page: the page to use for this fragment
|
||||
* @i: fragment index to initialise
|
||||
* @netmem: the netmem to use for this fragment
|
||||
* @off: the offset to the data with @page
|
||||
* @size: the length of the data
|
||||
*
|
||||
@ -2472,10 +2492,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
|
||||
*
|
||||
* Does not take any additional reference on the fragment.
|
||||
*/
|
||||
static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
|
||||
struct page *page, int off, int size)
|
||||
static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
|
||||
netmem_ref netmem, int off, int size)
|
||||
{
|
||||
__skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
|
||||
struct page *page = netmem_to_page(netmem);
|
||||
|
||||
__skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
|
||||
|
||||
/* Propagate page pfmemalloc to the skb if we can. The problem is
|
||||
* that not all callers have unique ownership of the page but rely
|
||||
@ -2483,7 +2505,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
|
||||
*/
|
||||
page = compound_head(page);
|
||||
if (page_is_pfmemalloc(page))
|
||||
skb->pfmemalloc = true;
|
||||
skb->pfmemalloc = true;
|
||||
}
|
||||
|
||||
static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
|
||||
struct page *page, int off, int size)
|
||||
{
|
||||
__skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
|
||||
}
|
||||
|
||||
static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
|
||||
netmem_ref netmem, int off, int size)
|
||||
{
|
||||
__skb_fill_netmem_desc(skb, i, netmem, off, size);
|
||||
skb_shinfo(skb)->nr_frags = i + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2503,8 +2538,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
|
||||
static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
|
||||
struct page *page, int off, int size)
|
||||
{
|
||||
__skb_fill_page_desc(skb, i, page, off, size);
|
||||
skb_shinfo(skb)->nr_frags = i + 1;
|
||||
skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2528,8 +2562,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
|
||||
shinfo->nr_frags = i + 1;
|
||||
}
|
||||
|
||||
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
|
||||
int size, unsigned int truesize);
|
||||
void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
|
||||
int off, int size, unsigned int truesize);
|
||||
|
||||
static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
|
||||
struct page *page, int off, int size,
|
||||
unsigned int truesize)
|
||||
{
|
||||
skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
|
||||
truesize);
|
||||
}
|
||||
|
||||
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
|
||||
unsigned int truesize);
|
||||
@ -3378,7 +3420,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
|
||||
*/
|
||||
static inline unsigned int skb_frag_off(const skb_frag_t *frag)
|
||||
{
|
||||
return frag->bv_offset;
|
||||
return frag->offset;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3388,7 +3430,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
|
||||
*/
|
||||
static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
|
||||
{
|
||||
frag->bv_offset += delta;
|
||||
frag->offset += delta;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3398,7 +3440,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
|
||||
*/
|
||||
static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
|
||||
{
|
||||
frag->bv_offset = offset;
|
||||
frag->offset = offset;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3409,7 +3451,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
|
||||
static inline void skb_frag_off_copy(skb_frag_t *fragto,
|
||||
const skb_frag_t *fragfrom)
|
||||
{
|
||||
fragto->bv_offset = fragfrom->bv_offset;
|
||||
fragto->offset = fragfrom->offset;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3420,7 +3462,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
|
||||
*/
|
||||
static inline struct page *skb_frag_page(const skb_frag_t *frag)
|
||||
{
|
||||
return frag->bv_page;
|
||||
return netmem_to_page(frag->netmem);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3528,7 +3570,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
|
||||
static inline void skb_frag_page_copy(skb_frag_t *fragto,
|
||||
const skb_frag_t *fragfrom)
|
||||
{
|
||||
fragto->bv_page = fragfrom->bv_page;
|
||||
fragto->netmem = fragfrom->netmem;
|
||||
}
|
||||
|
||||
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
|
||||
|
41
include/net/netmem.h
Normal file
41
include/net/netmem.h
Normal file
@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Network memory
|
||||
*
|
||||
* Author: Mina Almasry <almasrymina@google.com>
|
||||
*/
|
||||
|
||||
#ifndef _NET_NETMEM_H
|
||||
#define _NET_NETMEM_H
|
||||
|
||||
/**
|
||||
* typedef netmem_ref - a nonexistent type marking a reference to generic
|
||||
* network memory.
|
||||
*
|
||||
* A netmem_ref currently is always a reference to a struct page. This
|
||||
* abstraction is introduced so support for new memory types can be added.
|
||||
*
|
||||
* Use the supplied helpers to obtain the underlying memory pointer and fields.
|
||||
*/
|
||||
typedef unsigned long __bitwise netmem_ref;
|
||||
|
||||
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
|
||||
* backed.
|
||||
*
|
||||
* Currently struct page is the only possible netmem, and this helper never
|
||||
* fails.
|
||||
*/
|
||||
static inline struct page *netmem_to_page(netmem_ref netmem)
|
||||
{
|
||||
return (__force struct page *)netmem;
|
||||
}
|
||||
|
||||
/* Converting from page to netmem is always safe, because a page can always be
|
||||
* a netmem.
|
||||
*/
|
||||
static inline netmem_ref page_to_netmem(struct page *page)
|
||||
{
|
||||
return (__force netmem_ref)page;
|
||||
}
|
||||
|
||||
#endif /* _NET_NETMEM_H */
|
@ -115,6 +115,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
|
||||
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
|
||||
EXPORT_SYMBOL(sysctl_max_skb_frags);
|
||||
|
||||
/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
|
||||
* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
|
||||
* netmem is a page.
|
||||
*/
|
||||
static_assert(offsetof(struct bio_vec, bv_page) ==
|
||||
offsetof(skb_frag_t, netmem));
|
||||
static_assert(sizeof_field(struct bio_vec, bv_page) ==
|
||||
sizeof_field(skb_frag_t, netmem));
|
||||
|
||||
static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
|
||||
static_assert(sizeof_field(struct bio_vec, bv_len) ==
|
||||
sizeof_field(skb_frag_t, len));
|
||||
|
||||
static_assert(offsetof(struct bio_vec, bv_offset) ==
|
||||
offsetof(skb_frag_t, offset));
|
||||
static_assert(sizeof_field(struct bio_vec, bv_offset) ==
|
||||
sizeof_field(skb_frag_t, offset));
|
||||
|
||||
#undef FN
|
||||
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
|
||||
static const char * const drop_reasons[] = {
|
||||
@ -845,17 +863,17 @@ skb_fail:
|
||||
}
|
||||
EXPORT_SYMBOL(__napi_alloc_skb);
|
||||
|
||||
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
|
||||
int size, unsigned int truesize)
|
||||
void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
|
||||
int off, int size, unsigned int truesize)
|
||||
{
|
||||
DEBUG_NET_WARN_ON_ONCE(size > truesize);
|
||||
|
||||
skb_fill_page_desc(skb, i, page, off, size);
|
||||
skb_fill_netmem_desc(skb, i, netmem, off, size);
|
||||
skb->len += size;
|
||||
skb->data_len += size;
|
||||
skb->truesize += truesize;
|
||||
}
|
||||
EXPORT_SYMBOL(skb_add_rx_frag);
|
||||
EXPORT_SYMBOL(skb_add_rx_frag_netmem);
|
||||
|
||||
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
|
||||
unsigned int truesize)
|
||||
@ -1999,10 +2017,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
|
||||
|
||||
/* skb frags point to kernel buffers */
|
||||
for (i = 0; i < new_frags - 1; i++) {
|
||||
__skb_fill_page_desc(skb, i, head, 0, psize);
|
||||
__skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
|
||||
head = (struct page *)page_private(head);
|
||||
}
|
||||
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
|
||||
__skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
|
||||
d_off);
|
||||
skb_shinfo(skb)->nr_frags = new_frags;
|
||||
|
||||
release:
|
||||
@ -3740,7 +3759,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
|
||||
if (plen) {
|
||||
page = virt_to_head_page(from->head);
|
||||
offset = from->data - (unsigned char *)page_address(page);
|
||||
__skb_fill_page_desc(to, 0, page, offset, plen);
|
||||
__skb_fill_netmem_desc(to, 0, page_to_netmem(page),
|
||||
offset, plen);
|
||||
get_page(page);
|
||||
j = 1;
|
||||
len -= plen;
|
||||
|
@ -627,7 +627,8 @@ retry:
|
||||
skb = txm->frag_skb;
|
||||
}
|
||||
|
||||
if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
|
||||
if (WARN_ON(!skb_shinfo(skb)->nr_frags) ||
|
||||
WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@ -637,8 +638,8 @@ retry:
|
||||
msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
|
||||
|
||||
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
|
||||
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
|
||||
msize);
|
||||
(const struct bio_vec *)skb_shinfo(skb)->frags,
|
||||
skb_shinfo(skb)->nr_frags, msize);
|
||||
iov_iter_advance(&msg.msg_iter, txm->frag_offset);
|
||||
|
||||
do {
|
||||
|
Loading…
Reference in New Issue
Block a user