2019-05-30 02:57:49 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2013-06-10 12:39:50 +04:00
/*
2013-07-10 18:13:26 +04:00
* net busy poll support
2013-06-10 12:39:50 +04:00
* Copyright ( c ) 2013 Intel Corporation .
*
* Author : Eliezer Tamir
*
* Contact Information :
* e1000 - devel Mailing List < e1000 - devel @ lists . sourceforge . net >
*/
2013-07-10 18:13:26 +04:00
# ifndef _LINUX_NET_BUSY_POLL_H
# define _LINUX_NET_BUSY_POLL_H
2013-06-10 12:39:50 +04:00
# include <linux/netdevice.h>
2017-02-01 18:36:40 +03:00
# include <linux/sched/clock.h>
2017-02-02 21:15:33 +03:00
# include <linux/sched/signal.h>
2013-06-10 12:39:50 +04:00
# include <net/ip.h>
2017-03-24 20:07:53 +03:00
/* 0 - Reserved to indicate value not set
* 1. . NR_CPUS - Reserved for sender_cpu
* NR_CPUS + 1. . ~ 0 - Region available for NAPI IDs
*/
# define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
2020-11-30 21:51:57 +03:00
# define BUSY_POLL_BUDGET 8
2017-08-11 19:31:24 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
struct napi_struct ;
extern unsigned int sysctl_net_busy_read __read_mostly ;
extern unsigned int sysctl_net_busy_poll __read_mostly ;
2013-07-08 17:20:34 +04:00
static inline bool net_busy_loop_on ( void )
2013-06-28 16:59:35 +04:00
{
2013-07-10 18:13:36 +04:00
return sysctl_net_busy_poll ;
2013-06-28 16:59:35 +04:00
}
2017-03-24 20:08:18 +03:00
static inline bool sk_can_busy_loop ( const struct sock * sk )
2013-06-28 16:59:26 +04:00
{
2021-06-29 17:12:45 +03:00
return READ_ONCE ( sk - > sk_ll_usec ) & & ! signal_pending ( current ) ;
2013-06-28 16:59:26 +04:00
}
2017-03-24 20:08:24 +03:00
bool sk_busy_loop_end ( void * p , unsigned long start_time ) ;
void napi_busy_loop ( unsigned int napi_id ,
bool ( * loop_end ) ( void * , unsigned long ) ,
2020-11-30 21:51:57 +03:00
void * loop_end_arg , bool prefer_busy_poll , u16 budget ) ;
2013-06-14 17:33:35 +04:00
2017-03-24 20:08:18 +03:00
# else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on ( void )
2013-07-03 00:22:47 +04:00
{
2017-03-24 20:08:18 +03:00
return 0 ;
2013-06-10 12:39:50 +04:00
}
2017-03-24 20:08:18 +03:00
static inline bool sk_can_busy_loop ( struct sock * sk )
2013-06-10 12:39:50 +04:00
{
2017-03-24 20:08:18 +03:00
return false ;
2013-06-10 12:39:50 +04:00
}
2017-03-24 20:08:18 +03:00
# endif /* CONFIG_NET_RX_BUSY_POLL */
2013-06-10 12:39:50 +04:00
2017-03-24 20:08:18 +03:00
static inline unsigned long busy_loop_current_time ( void )
2013-06-28 16:59:35 +04:00
{
2017-03-24 20:08:18 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
return ( unsigned long ) ( local_clock ( ) > > 10 ) ;
# else
2013-06-28 16:59:35 +04:00
return 0 ;
2017-03-24 20:08:18 +03:00
# endif
2013-06-28 16:59:35 +04:00
}
2013-06-10 12:39:50 +04:00
2017-03-24 20:08:18 +03:00
/* in poll/select we use the global sysctl_net_ll_poll value */
static inline bool busy_loop_timeout ( unsigned long start_time )
2013-06-10 12:39:50 +04:00
{
2017-03-24 20:08:18 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long bp_usec = READ_ONCE ( sysctl_net_busy_poll ) ;
2013-06-10 12:39:50 +04:00
2017-03-24 20:08:18 +03:00
if ( bp_usec ) {
unsigned long end_time = start_time + bp_usec ;
unsigned long now = busy_loop_current_time ( ) ;
2013-06-10 12:39:50 +04:00
2017-03-24 20:08:18 +03:00
return time_after ( now , end_time ) ;
}
# endif
2013-07-09 14:09:21 +04:00
return true ;
2013-06-10 12:39:50 +04:00
}
2017-03-24 20:08:18 +03:00
static inline bool sk_busy_loop_timeout ( struct sock * sk ,
unsigned long start_time )
2013-08-01 07:10:24 +04:00
{
2017-03-24 20:08:18 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long bp_usec = READ_ONCE ( sk - > sk_ll_usec ) ;
2013-08-01 07:10:24 +04:00
2017-03-24 20:08:18 +03:00
if ( bp_usec ) {
unsigned long end_time = start_time + bp_usec ;
unsigned long now = busy_loop_current_time ( ) ;
return time_after ( now , end_time ) ;
}
# endif
return true ;
}
2016-11-16 20:10:42 +03:00
2017-03-24 20:08:24 +03:00
static inline void sk_busy_loop ( struct sock * sk , int nonblock )
{
# ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE ( sk - > sk_napi_id ) ;
if ( napi_id > = MIN_NAPI_ID )
net: Introduce preferred busy-polling
The existing busy-polling mode, enabled by the SO_BUSY_POLL socket
option or system-wide using the /proc/sys/net/core/busy_read knob, is
an opportunistic. That means that if the NAPI context is not
scheduled, it will poll it. If, after busy-polling, the budget is
exceeded the busy-polling logic will schedule the NAPI onto the
regular softirq handling.
One implication of the behavior above is that a busy/heavy loaded NAPI
context will never enter/allow for busy-polling. Some applications
prefer that most NAPI processing would be done by busy-polling.
This series adds a new socket option, SO_PREFER_BUSY_POLL, that works
in concert with the napi_defer_hard_irqs and gro_flush_timeout
knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were
introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral
feature"), and allows for a user to defer interrupts to be enabled and
instead schedule the NAPI context from a watchdog timer. When a user
enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled,
and the NAPI context is being processed by a softirq, the softirq NAPI
processing will exit early to allow the busy-polling to be performed.
If the application stops performing busy-polling via a system call,
the watchdog timer defined by gro_flush_timeout will timeout, and
regular softirq handling will resume.
In summary; Heavy traffic applications that prefer busy-polling over
softirq processing should use this option.
Example usage:
$ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs
$ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout
Note that the timeout should be larger than the userspace processing
window, otherwise the watchdog will timeout and fall back to regular
softirq processing.
Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com
2020-11-30 21:51:56 +03:00
napi_busy_loop ( napi_id , nonblock ? NULL : sk_busy_loop_end , sk ,
2020-11-30 21:51:57 +03:00
READ_ONCE ( sk - > sk_prefer_busy_poll ) ,
READ_ONCE ( sk - > sk_busy_poll_budget ) ? : BUSY_POLL_BUDGET ) ;
2017-03-24 20:08:24 +03:00
# endif
}
2017-03-24 20:08:06 +03:00
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id ( struct sk_buff * skb ,
struct napi_struct * napi )
{
# ifdef CONFIG_NET_RX_BUSY_POLL
2020-06-19 00:22:15 +03:00
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
* it .
*/
if ( skb - > napi_id < MIN_NAPI_ID )
skb - > napi_id = napi - > napi_id ;
2017-03-24 20:08:06 +03:00
# endif
}
2016-11-16 20:10:42 +03:00
/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_napi_id ( struct sock * sk , const struct sk_buff * skb )
{
# ifdef CONFIG_NET_RX_BUSY_POLL
2021-10-25 19:48:18 +03:00
if ( unlikely ( READ_ONCE ( sk - > sk_napi_id ) ! = skb - > napi_id ) )
WRITE_ONCE ( sk - > sk_napi_id , skb - > napi_id ) ;
2016-11-16 20:10:42 +03:00
# endif
2018-06-30 07:26:57 +03:00
sk_rx_queue_set ( sk , skb ) ;
2016-11-16 20:10:42 +03:00
}
2020-12-01 17:22:59 +03:00
static inline void __sk_mark_napi_id_once ( struct sock * sk , unsigned int napi_id )
2016-11-16 20:10:42 +03:00
{
# ifdef CONFIG_NET_RX_BUSY_POLL
2019-10-29 20:54:44 +03:00
if ( ! READ_ONCE ( sk - > sk_napi_id ) )
2020-11-30 21:52:01 +03:00
WRITE_ONCE ( sk - > sk_napi_id , napi_id ) ;
2016-11-16 20:10:42 +03:00
# endif
}
2020-11-30 21:52:01 +03:00
/* variant used for unconnected sockets */
static inline void sk_mark_napi_id_once ( struct sock * sk ,
const struct sk_buff * skb )
{
2020-12-01 17:22:59 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
__sk_mark_napi_id_once ( sk , skb - > napi_id ) ;
# endif
2020-11-30 21:52:01 +03:00
}
static inline void sk_mark_napi_id_once_xdp ( struct sock * sk ,
const struct xdp_buff * xdp )
{
2020-12-01 17:22:59 +03:00
# ifdef CONFIG_NET_RX_BUSY_POLL
__sk_mark_napi_id_once ( sk , xdp - > rxq - > napi_id ) ;
# endif
2020-11-30 21:52:01 +03:00
}
2013-07-10 18:13:26 +04:00
# endif /* _LINUX_NET_BUSY_POLL_H */