2018-05-02 13:01:23 +02:00
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space packet buffer
* Copyright ( c ) 2018 Intel Corporation .
*/
# include <linux/init.h>
# include <linux/sched/mm.h>
# include <linux/sched/signal.h>
# include <linux/sched/task.h>
# include <linux/uaccess.h>
# include <linux/slab.h>
# include <linux/bpf.h>
# include <linux/mm.h>
# include "xdp_umem.h"
2018-06-04 14:05:51 +02:00
# include "xsk_queue.h"
2018-05-02 13:01:23 +02:00
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
# define XDP_UMEM_MIN_CHUNK_SIZE 2048
2018-05-02 13:01:23 +02:00
2018-06-04 14:05:57 +02:00
void xdp_add_sk_umem ( struct xdp_umem * umem , struct xdp_sock * xs )
{
unsigned long flags ;
spin_lock_irqsave ( & umem - > xsk_list_lock , flags ) ;
list_add_rcu ( & xs - > list , & umem - > xsk_list ) ;
spin_unlock_irqrestore ( & umem - > xsk_list_lock , flags ) ;
}
void xdp_del_sk_umem ( struct xdp_umem * umem , struct xdp_sock * xs )
{
unsigned long flags ;
if ( xs - > dev ) {
spin_lock_irqsave ( & umem - > xsk_list_lock , flags ) ;
list_del_rcu ( & xs - > list ) ;
spin_unlock_irqrestore ( & umem - > xsk_list_lock , flags ) ;
if ( umem - > zc )
synchronize_net ( ) ;
}
}
2018-06-04 14:05:55 +02:00
int xdp_umem_assign_dev ( struct xdp_umem * umem , struct net_device * dev ,
u32 queue_id , u16 flags )
{
bool force_zc , force_copy ;
struct netdev_bpf bpf ;
int err ;
force_zc = flags & XDP_ZEROCOPY ;
force_copy = flags & XDP_COPY ;
if ( force_zc & & force_copy )
return - EINVAL ;
if ( force_copy )
return 0 ;
dev_hold ( dev ) ;
2018-06-04 14:05:57 +02:00
if ( dev - > netdev_ops - > ndo_bpf & & dev - > netdev_ops - > ndo_xsk_async_xmit ) {
2018-06-04 14:05:55 +02:00
bpf . command = XDP_QUERY_XSK_UMEM ;
rtnl_lock ( ) ;
err = dev - > netdev_ops - > ndo_bpf ( dev , & bpf ) ;
rtnl_unlock ( ) ;
if ( err ) {
dev_put ( dev ) ;
return force_zc ? - ENOTSUPP : 0 ;
}
bpf . command = XDP_SETUP_XSK_UMEM ;
bpf . xsk . umem = umem ;
bpf . xsk . queue_id = queue_id ;
rtnl_lock ( ) ;
err = dev - > netdev_ops - > ndo_bpf ( dev , & bpf ) ;
rtnl_unlock ( ) ;
if ( err ) {
dev_put ( dev ) ;
return force_zc ? err : 0 ; /* fail or fallback */
}
umem - > dev = dev ;
umem - > queue_id = queue_id ;
umem - > zc = true ;
return 0 ;
}
dev_put ( dev ) ;
return force_zc ? - ENOTSUPP : 0 ; /* fail or fallback */
}
2018-06-04 14:05:57 +02:00
static void xdp_umem_clear_dev ( struct xdp_umem * umem )
2018-06-04 14:05:55 +02:00
{
struct netdev_bpf bpf ;
int err ;
if ( umem - > dev ) {
bpf . command = XDP_SETUP_XSK_UMEM ;
bpf . xsk . umem = NULL ;
bpf . xsk . queue_id = umem - > queue_id ;
rtnl_lock ( ) ;
err = umem - > dev - > netdev_ops - > ndo_bpf ( umem - > dev , & bpf ) ;
rtnl_unlock ( ) ;
if ( err )
WARN ( 1 , " failed to disable umem! \n " ) ;
dev_put ( umem - > dev ) ;
umem - > dev = NULL ;
}
}
2018-05-02 13:01:23 +02:00
static void xdp_umem_unpin_pages ( struct xdp_umem * umem )
{
unsigned int i ;
2018-05-22 09:35:02 +02:00
for ( i = 0 ; i < umem - > npgs ; i + + ) {
struct page * page = umem - > pgs [ i ] ;
2018-05-02 13:01:23 +02:00
2018-05-22 09:35:02 +02:00
set_page_dirty_lock ( page ) ;
put_page ( page ) ;
2018-05-02 13:01:23 +02:00
}
2018-05-22 09:35:02 +02:00
kfree ( umem - > pgs ) ;
umem - > pgs = NULL ;
2018-05-02 13:01:23 +02:00
}
static void xdp_umem_unaccount_pages ( struct xdp_umem * umem )
{
2018-06-08 00:06:01 +02:00
if ( umem - > user ) {
atomic_long_sub ( umem - > npgs , & umem - > user - > locked_vm ) ;
free_uid ( umem - > user ) ;
}
2018-05-02 13:01:23 +02:00
}
static void xdp_umem_release ( struct xdp_umem * umem )
{
struct task_struct * task ;
struct mm_struct * mm ;
2018-06-04 14:05:55 +02:00
xdp_umem_clear_dev ( umem ) ;
2018-05-02 13:01:24 +02:00
if ( umem - > fq ) {
xskq_destroy ( umem - > fq ) ;
umem - > fq = NULL ;
}
2018-05-02 13:01:31 +02:00
if ( umem - > cq ) {
xskq_destroy ( umem - > cq ) ;
umem - > cq = NULL ;
}
2018-05-22 09:35:02 +02:00
xdp_umem_unpin_pages ( umem ) ;
2018-05-02 13:01:23 +02:00
2018-05-22 09:35:02 +02:00
task = get_pid_task ( umem - > pid , PIDTYPE_PID ) ;
put_pid ( umem - > pid ) ;
if ( ! task )
goto out ;
mm = get_task_mm ( task ) ;
put_task_struct ( task ) ;
if ( ! mm )
goto out ;
2018-05-02 13:01:23 +02:00
2018-05-22 09:35:02 +02:00
mmput ( mm ) ;
2018-06-04 14:05:52 +02:00
kfree ( umem - > pages ) ;
umem - > pages = NULL ;
2018-05-02 13:01:23 +02:00
xdp_umem_unaccount_pages ( umem ) ;
out :
kfree ( umem ) ;
}
static void xdp_umem_release_deferred ( struct work_struct * work )
{
struct xdp_umem * umem = container_of ( work , struct xdp_umem , work ) ;
xdp_umem_release ( umem ) ;
}
void xdp_get_umem ( struct xdp_umem * umem )
{
2018-05-22 09:35:03 +02:00
refcount_inc ( & umem - > users ) ;
2018-05-02 13:01:23 +02:00
}
void xdp_put_umem ( struct xdp_umem * umem )
{
if ( ! umem )
return ;
2018-05-22 09:35:03 +02:00
if ( refcount_dec_and_test ( & umem - > users ) ) {
2018-05-02 13:01:23 +02:00
INIT_WORK ( & umem - > work , xdp_umem_release_deferred ) ;
schedule_work ( & umem - > work ) ;
}
}
static int xdp_umem_pin_pages ( struct xdp_umem * umem )
{
unsigned int gup_flags = FOLL_WRITE ;
long npgs ;
int err ;
2018-06-11 13:57:12 +02:00
umem - > pgs = kcalloc ( umem - > npgs , sizeof ( * umem - > pgs ) ,
GFP_KERNEL | __GFP_NOWARN ) ;
2018-05-02 13:01:23 +02:00
if ( ! umem - > pgs )
return - ENOMEM ;
down_write ( & current - > mm - > mmap_sem ) ;
npgs = get_user_pages ( umem - > address , umem - > npgs ,
gup_flags , & umem - > pgs [ 0 ] , NULL ) ;
up_write ( & current - > mm - > mmap_sem ) ;
if ( npgs ! = umem - > npgs ) {
if ( npgs > = 0 ) {
umem - > npgs = npgs ;
err = - ENOMEM ;
goto out_pin ;
}
err = npgs ;
goto out_pgs ;
}
return 0 ;
out_pin :
xdp_umem_unpin_pages ( umem ) ;
out_pgs :
kfree ( umem - > pgs ) ;
umem - > pgs = NULL ;
return err ;
}
static int xdp_umem_account_pages ( struct xdp_umem * umem )
{
unsigned long lock_limit , new_npgs , old_npgs ;
if ( capable ( CAP_IPC_LOCK ) )
return 0 ;
lock_limit = rlimit ( RLIMIT_MEMLOCK ) > > PAGE_SHIFT ;
umem - > user = get_uid ( current_user ( ) ) ;
do {
old_npgs = atomic_long_read ( & umem - > user - > locked_vm ) ;
new_npgs = old_npgs + umem - > npgs ;
if ( new_npgs > lock_limit ) {
free_uid ( umem - > user ) ;
umem - > user = NULL ;
return - ENOBUFS ;
}
} while ( atomic_long_cmpxchg ( & umem - > user - > locked_vm , old_npgs ,
new_npgs ) ! = old_npgs ) ;
return 0 ;
}
2018-05-22 09:35:02 +02:00
static int xdp_umem_reg ( struct xdp_umem * umem , struct xdp_umem_reg * mr )
2018-05-02 13:01:23 +02:00
{
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
u32 chunk_size = mr - > chunk_size , headroom = mr - > headroom ;
unsigned int chunks , chunks_per_page ;
2018-05-02 13:01:23 +02:00
u64 addr = mr - > addr , size = mr - > len ;
2018-06-04 14:05:52 +02:00
int size_chk , err , i ;
2018-05-02 13:01:23 +02:00
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
if ( chunk_size < XDP_UMEM_MIN_CHUNK_SIZE | | chunk_size > PAGE_SIZE ) {
2018-05-02 13:01:23 +02:00
/* Strictly speaking we could support this, if:
* - huge pages , or *
* - using an IOMMU , or
* - making sure the memory area is consecutive
* but for now , we simply say " computer says no " .
*/
return - EINVAL ;
}
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
if ( ! is_power_of_2 ( chunk_size ) )
2018-05-02 13:01:23 +02:00
return - EINVAL ;
if ( ! PAGE_ALIGNED ( addr ) ) {
/* Memory area has to be page size aligned. For
* simplicity , this might change .
*/
return - EINVAL ;
}
if ( ( addr + size ) < addr )
return - EINVAL ;
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
chunks = ( unsigned int ) div_u64 ( size , chunk_size ) ;
if ( chunks = = 0 )
2018-05-02 13:01:23 +02:00
return - EINVAL ;
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
chunks_per_page = PAGE_SIZE / chunk_size ;
if ( chunks < chunks_per_page | | chunks % chunks_per_page )
2018-05-02 13:01:23 +02:00
return - EINVAL ;
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
headroom = ALIGN ( headroom , 64 ) ;
2018-05-02 13:01:23 +02:00
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM ;
2018-05-02 13:01:23 +02:00
if ( size_chk < 0 )
return - EINVAL ;
umem - > pid = get_task_pid ( current , PIDTYPE_PID ) ;
umem - > address = ( unsigned long ) addr ;
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 13:57:13 +02:00
umem - > props . chunk_mask = ~ ( ( u64 ) chunk_size - 1 ) ;
umem - > props . size = size ;
umem - > headroom = headroom ;
umem - > chunk_size_nohr = chunk_size - headroom ;
2018-05-02 13:01:23 +02:00
umem - > npgs = size / PAGE_SIZE ;
umem - > pgs = NULL ;
umem - > user = NULL ;
2018-06-04 14:05:57 +02:00
INIT_LIST_HEAD ( & umem - > xsk_list ) ;
spin_lock_init ( & umem - > xsk_list_lock ) ;
2018-05-02 13:01:23 +02:00
2018-05-22 09:35:03 +02:00
refcount_set ( & umem - > users , 1 ) ;
2018-05-02 13:01:23 +02:00
err = xdp_umem_account_pages ( umem ) ;
if ( err )
goto out ;
err = xdp_umem_pin_pages ( umem ) ;
if ( err )
goto out_account ;
2018-06-04 14:05:52 +02:00
umem - > pages = kcalloc ( umem - > npgs , sizeof ( * umem - > pages ) , GFP_KERNEL ) ;
if ( ! umem - > pages ) {
err = - ENOMEM ;
goto out_account ;
}
for ( i = 0 ; i < umem - > npgs ; i + + )
umem - > pages [ i ] . addr = page_address ( umem - > pgs [ i ] ) ;
2018-05-02 13:01:23 +02:00
return 0 ;
out_account :
xdp_umem_unaccount_pages ( umem ) ;
out :
put_pid ( umem - > pid ) ;
return err ;
}
2018-05-02 13:01:26 +02:00
2018-05-22 09:35:02 +02:00
struct xdp_umem * xdp_umem_create ( struct xdp_umem_reg * mr )
{
struct xdp_umem * umem ;
int err ;
umem = kzalloc ( sizeof ( * umem ) , GFP_KERNEL ) ;
if ( ! umem )
return ERR_PTR ( - ENOMEM ) ;
err = xdp_umem_reg ( umem , mr ) ;
if ( err ) {
kfree ( umem ) ;
return ERR_PTR ( err ) ;
}
return umem ;
}
2018-05-02 13:01:26 +02:00
bool xdp_umem_validate_queues ( struct xdp_umem * umem )
{
2018-05-18 14:00:23 +02:00
return umem - > fq & & umem - > cq ;
2018-05-02 13:01:26 +02:00
}