2006-09-13 21:24:59 +04:00
/* drivers/net/ifb.c:
2006-01-09 09:34:25 +03:00
The purpose of this driver is to provide a device that allows
for sharing of resources :
1 ) qdiscs / policies that are per device as opposed to system wide .
ifb allows for a device which can be redirected to thus providing
an impression of sharing .
2 ) Allows for queueing incoming traffic for shaping instead of
2006-09-13 21:24:59 +04:00
dropping .
2006-01-09 09:34:25 +03:00
The original concept is based on what is known as the IMQ
driver initially written by Martin Devera , later rewritten
by Patrick McHardy and then maintained by Andre Correa .
You need the tc action mirror or redirect to feed this device
packets .
This program is free software ; you can redistribute it and / or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation ; either version
2 of the License , or ( at your option ) any later version .
2006-09-13 21:24:59 +04:00
2006-01-09 09:34:25 +03:00
Authors : Jamal Hadi Salim ( 2005 )
2006-09-13 21:24:59 +04:00
2006-01-09 09:34:25 +03:00
*/
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/netdevice.h>
# include <linux/etherdevice.h>
# include <linux/init.h>
# include <linux/moduleparam.h>
2006-09-13 21:24:59 +04:00
# include <net/pkt_sched.h>
2007-09-17 22:56:21 +04:00
# include <net/net_namespace.h>
2006-01-09 09:34:25 +03:00
# define TX_Q_LIMIT 32
struct ifb_private {
struct tasklet_struct ifb_tasklet ;
int tasklet_pending ;
struct sk_buff_head rq ;
struct sk_buff_head tq ;
} ;
2006-02-24 03:23:51 +03:00
static int numifbs = 2 ;
2006-01-09 09:34:25 +03:00
static void ri_tasklet ( unsigned long dev ) ;
2009-08-31 23:50:51 +04:00
static netdev_tx_t ifb_xmit ( struct sk_buff * skb , struct net_device * dev ) ;
2006-01-09 09:34:25 +03:00
static int ifb_open ( struct net_device * dev ) ;
static int ifb_close ( struct net_device * dev ) ;
2006-09-13 21:24:59 +04:00
static void ri_tasklet ( unsigned long dev )
2006-01-09 09:34:25 +03:00
{
struct net_device * _dev = ( struct net_device * ) dev ;
struct ifb_private * dp = netdev_priv ( _dev ) ;
2007-10-04 04:41:50 +04:00
struct net_device_stats * stats = & _dev - > stats ;
2008-08-01 03:58:50 +04:00
struct netdev_queue * txq ;
2006-01-09 09:34:25 +03:00
struct sk_buff * skb ;
2008-08-01 03:58:50 +04:00
txq = netdev_get_tx_queue ( _dev , 0 ) ;
2006-01-09 09:34:25 +03:00
if ( ( skb = skb_peek ( & dp - > tq ) ) = = NULL ) {
2008-08-01 03:58:50 +04:00
if ( __netif_tx_trylock ( txq ) ) {
2010-12-04 18:01:52 +03:00
skb_queue_splice_tail_init ( & dp - > rq , & dp - > tq ) ;
2008-08-01 03:58:50 +04:00
__netif_tx_unlock ( txq ) ;
2006-01-09 09:34:25 +03:00
} else {
/* reschedule */
goto resched ;
}
}
2010-12-16 02:52:55 +03:00
while ( ( skb = __skb_dequeue ( & dp - > tq ) ) ! = NULL ) {
2006-01-09 09:34:25 +03:00
u32 from = G_TC_FROM ( skb - > tc_verd ) ;
skb - > tc_verd = 0 ;
skb - > tc_verd = SET_TC_NCLS ( skb - > tc_verd ) ;
stats - > tx_packets + + ;
stats - > tx_bytes + = skb - > len ;
2007-03-29 22:46:52 +04:00
2009-11-01 22:45:16 +03:00
rcu_read_lock ( ) ;
2009-11-21 02:35:04 +03:00
skb - > dev = dev_get_by_index_rcu ( & init_net , skb - > skb_iif ) ;
2007-03-29 22:46:52 +04:00
if ( ! skb - > dev ) {
2009-11-01 22:45:16 +03:00
rcu_read_unlock ( ) ;
2007-03-29 22:46:52 +04:00
dev_kfree_skb ( skb ) ;
stats - > tx_dropped + + ;
2010-12-04 17:09:08 +03:00
if ( skb_queue_len ( & dp - > tq ) ! = 0 )
goto resched ;
2007-03-29 22:46:52 +04:00
break ;
}
2009-11-01 22:45:16 +03:00
rcu_read_unlock ( ) ;
2009-11-21 02:35:04 +03:00
skb - > skb_iif = _dev - > ifindex ;
2007-03-29 22:46:52 +04:00
2006-01-09 09:34:25 +03:00
if ( from & AT_EGRESS ) {
dev_queue_xmit ( skb ) ;
} else if ( from & AT_INGRESS ) {
2007-03-29 22:46:52 +04:00
skb_pull ( skb , skb - > dev - > hard_header_len ) ;
2010-12-15 01:39:58 +03:00
netif_receive_skb ( skb ) ;
2007-03-29 22:46:52 +04:00
} else
BUG ( ) ;
2006-01-09 09:34:25 +03:00
}
2008-08-01 03:58:50 +04:00
if ( __netif_tx_trylock ( txq ) ) {
2006-01-09 09:34:25 +03:00
if ( ( skb = skb_peek ( & dp - > rq ) ) = = NULL ) {
dp - > tasklet_pending = 0 ;
if ( netif_queue_stopped ( _dev ) )
netif_wake_queue ( _dev ) ;
} else {
2008-08-01 03:58:50 +04:00
__netif_tx_unlock ( txq ) ;
2006-01-09 09:34:25 +03:00
goto resched ;
}
2008-08-01 03:58:50 +04:00
__netif_tx_unlock ( txq ) ;
2006-01-09 09:34:25 +03:00
} else {
resched :
dp - > tasklet_pending = 1 ;
tasklet_schedule ( & dp - > ifb_tasklet ) ;
}
}
2008-11-20 08:47:07 +03:00
static const struct net_device_ops ifb_netdev_ops = {
. ndo_open = ifb_open ,
. ndo_stop = ifb_close ,
2008-11-21 07:14:53 +03:00
. ndo_start_xmit = ifb_xmit ,
. ndo_validate_addr = eth_validate_addr ,
2008-11-20 08:47:07 +03:00
} ;
ifb: add performance flags
Le lundi 03 janvier 2011 à 11:40 -0800, David Miller a écrit :
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Mon, 3 Jan 2011 20:37:03 +0100
>
> > On Sun, Jan 02, 2011 at 09:24:36PM +0100, Eric Dumazet wrote:
> >> Le mercredi 29 décembre 2010 ?? 00:07 +0100, Jarek Poplawski a écrit :
> >>
> >> > Ingress is before vlans handler so these features and the
> >> > NETIF_F_HW_VLAN_TX flag seem useful for ifb considering
> >> > dev_hard_start_xmit() checks.
> >>
> >> OK, here is v2 of the patch then, thanks everybody.
> >>
> >>
> >> [PATCH v2 net-next-2.6] ifb: add performance flags
> >>
> >> IFB can use the full set of features flags (NETIF_F_SG |
> >> NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA) to
> >> avoid unnecessary split of some packets (GRO for example)
> >>
> >> Changli suggested to also set vlan_features,
> >
> > He also suggested more GSO flags of which especially NETIF_F_TSO6
> > seems interesting (wrt GRO)?
>
> I think at least TSO6 would very much be appropriate here.
Yes, why not, I am only wondering why loopback / dummy (and others ?)
only set NETIF_F_TSO :)
Since I want to play with ECN, I might also add NETIF_F_TSO_ECN ;)
For other flags, I really doubt it can matter on ifb ?
[PATCH v3 net-next-2.6] ifb: add performance flags
IFB can use the full set of features flags (NETIF_F_SG |
NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA) to
avoid unnecessary split of some packets (GRO for example)
Changli suggested to also set vlan_features, NETIF_F_TSO6,
NETIF_F_TSO_ECN.
Jarek suggested to add NETIF_F_HW_VLAN_TX as well.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-03 13:35:22 +03:00
# define IFB_FEATURES (NETIF_F_NO_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6 | \
NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX )
2007-06-13 23:05:06 +04:00
static void ifb_setup ( struct net_device * dev )
2006-01-09 09:34:25 +03:00
{
/* Initialize the device structure. */
2007-06-13 23:05:06 +04:00
dev - > destructor = free_netdev ;
2008-11-20 08:47:07 +03:00
dev - > netdev_ops = & ifb_netdev_ops ;
2006-01-09 09:34:25 +03:00
/* Fill in device structure with ethernet-generic values. */
ether_setup ( dev ) ;
dev - > tx_queue_len = TX_Q_LIMIT ;
2008-11-20 08:47:07 +03:00
ifb: add performance flags
Le lundi 03 janvier 2011 à 11:40 -0800, David Miller a écrit :
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Mon, 3 Jan 2011 20:37:03 +0100
>
> > On Sun, Jan 02, 2011 at 09:24:36PM +0100, Eric Dumazet wrote:
> >> Le mercredi 29 décembre 2010 ?? 00:07 +0100, Jarek Poplawski a écrit :
> >>
> >> > Ingress is before vlans handler so these features and the
> >> > NETIF_F_HW_VLAN_TX flag seem useful for ifb considering
> >> > dev_hard_start_xmit() checks.
> >>
> >> OK, here is v2 of the patch then, thanks everybody.
> >>
> >>
> >> [PATCH v2 net-next-2.6] ifb: add performance flags
> >>
> >> IFB can use the full set of features flags (NETIF_F_SG |
> >> NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA) to
> >> avoid unnecessary split of some packets (GRO for example)
> >>
> >> Changli suggested to also set vlan_features,
> >
> > He also suggested more GSO flags of which especially NETIF_F_TSO6
> > seems interesting (wrt GRO)?
>
> I think at least TSO6 would very much be appropriate here.
Yes, why not, I am only wondering why loopback / dummy (and others ?)
only set NETIF_F_TSO :)
Since I want to play with ECN, I might also add NETIF_F_TSO_ECN ;)
For other flags, I really doubt it can matter on ifb ?
[PATCH v3 net-next-2.6] ifb: add performance flags
IFB can use the full set of features flags (NETIF_F_SG |
NETIF_F_FRAGLIST | NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA) to
avoid unnecessary split of some packets (GRO for example)
Changli suggested to also set vlan_features, NETIF_F_TSO6,
NETIF_F_TSO_ECN.
Jarek suggested to add NETIF_F_HW_VLAN_TX as well.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-03 13:35:22 +03:00
dev - > features | = IFB_FEATURES ;
dev - > vlan_features | = IFB_FEATURES ;
2006-01-09 09:34:25 +03:00
dev - > flags | = IFF_NOARP ;
dev - > flags & = ~ IFF_MULTICAST ;
net: release dst entry in dev_hard_start_xmit()
One point of contention in high network loads is the dst_release() performed
when a transmited skb is freed. This is because NIC tx completion calls
dev_kree_skb() long after original call to dev_queue_xmit(skb).
CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is
quite visible if one CPU is 100% handling softirqs for a network device,
since dst_clone() is done by other cpus, involving cache line ping pongs.
It seems right place to release dst is in dev_hard_start_xmit(), for most
devices but ones that are virtual, and some exceptions.
David Miller suggested to define a new device flag, set in alloc_netdev_mq()
(so that most devices set it at init time), and carefuly unset in devices
which dont want a NULL skb->dst in their ndo_start_xmit().
List of devices that must clear this flag is :
- loopback device, because it calls netif_rx() and quoting Patrick :
"ip_route_input() doesn't accept loopback addresses, so loopback packets
already need to have a dst_entry attached."
- appletalk/ipddp.c : needs skb->dst in its xmit function
- And all devices that call again dev_queue_xmit() from their xmit function
(as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-05-19 09:19:19 +04:00
dev - > priv_flags & = ~ IFF_XMIT_DST_RELEASE ;
2006-01-09 09:34:25 +03:00
random_ether_addr ( dev - > dev_addr ) ;
}
2009-08-31 23:50:51 +04:00
static netdev_tx_t ifb_xmit ( struct sk_buff * skb , struct net_device * dev )
2006-01-09 09:34:25 +03:00
{
struct ifb_private * dp = netdev_priv ( dev ) ;
2007-10-04 04:41:50 +04:00
struct net_device_stats * stats = & dev - > stats ;
2006-01-09 09:34:25 +03:00
u32 from = G_TC_FROM ( skb - > tc_verd ) ;
2007-01-02 06:39:09 +03:00
stats - > rx_packets + + ;
stats - > rx_bytes + = skb - > len ;
2006-01-09 09:34:25 +03:00
2009-11-21 02:35:04 +03:00
if ( ! ( from & ( AT_INGRESS | AT_EGRESS ) ) | | ! skb - > skb_iif ) {
2006-01-09 09:34:25 +03:00
dev_kfree_skb ( skb ) ;
stats - > rx_dropped + + ;
2009-08-31 23:50:51 +04:00
return NETDEV_TX_OK ;
2006-01-09 09:34:25 +03:00
}
if ( skb_queue_len ( & dp - > rq ) > = dev - > tx_queue_len ) {
netif_stop_queue ( dev ) ;
}
2010-12-04 18:01:52 +03:00
__skb_queue_tail ( & dp - > rq , skb ) ;
2006-01-09 09:34:25 +03:00
if ( ! dp - > tasklet_pending ) {
dp - > tasklet_pending = 1 ;
tasklet_schedule ( & dp - > ifb_tasklet ) ;
}
2009-08-31 23:50:51 +04:00
return NETDEV_TX_OK ;
2006-01-09 09:34:25 +03:00
}
static int ifb_close ( struct net_device * dev )
{
struct ifb_private * dp = netdev_priv ( dev ) ;
tasklet_kill ( & dp - > ifb_tasklet ) ;
netif_stop_queue ( dev ) ;
2010-12-04 18:01:52 +03:00
__skb_queue_purge ( & dp - > rq ) ;
__skb_queue_purge ( & dp - > tq ) ;
2006-01-09 09:34:25 +03:00
return 0 ;
}
static int ifb_open ( struct net_device * dev )
{
struct ifb_private * dp = netdev_priv ( dev ) ;
tasklet_init ( & dp - > ifb_tasklet , ri_tasklet , ( unsigned long ) dev ) ;
2010-12-04 18:01:52 +03:00
__skb_queue_head_init ( & dp - > rq ) ;
__skb_queue_head_init ( & dp - > tq ) ;
2006-01-09 09:34:25 +03:00
netif_start_queue ( dev ) ;
return 0 ;
}
2007-07-12 06:42:31 +04:00
static int ifb_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] )
{
if ( tb [ IFLA_ADDRESS ] ) {
if ( nla_len ( tb [ IFLA_ADDRESS ] ) ! = ETH_ALEN )
return - EINVAL ;
if ( ! is_valid_ether_addr ( nla_data ( tb [ IFLA_ADDRESS ] ) ) )
return - EADDRNOTAVAIL ;
}
return 0 ;
}
2007-06-13 23:05:06 +04:00
static struct rtnl_link_ops ifb_link_ops __read_mostly = {
. kind = " ifb " ,
. priv_size = sizeof ( struct ifb_private ) ,
. setup = ifb_setup ,
2007-07-12 06:42:31 +04:00
. validate = ifb_validate ,
2007-06-13 23:05:06 +04:00
} ;
2007-07-12 06:42:13 +04:00
/* Number of ifb devices to be set up by this module. */
module_param ( numifbs , int , 0 ) ;
MODULE_PARM_DESC ( numifbs , " Number of ifb devices " ) ;
2006-01-09 09:34:25 +03:00
static int __init ifb_init_one ( int index )
{
struct net_device * dev_ifb ;
int err ;
dev_ifb = alloc_netdev ( sizeof ( struct ifb_private ) ,
" ifb%d " , ifb_setup ) ;
if ( ! dev_ifb )
return - ENOMEM ;
2007-06-13 23:05:06 +04:00
dev_ifb - > rtnl_link_ops = & ifb_link_ops ;
err = register_netdevice ( dev_ifb ) ;
if ( err < 0 )
goto err ;
2008-03-21 03:05:13 +03:00
2007-06-13 23:05:06 +04:00
return 0 ;
2007-06-13 23:04:51 +04:00
2007-06-13 23:05:06 +04:00
err :
free_netdev ( dev_ifb ) ;
return err ;
2006-09-13 21:24:59 +04:00
}
2006-01-09 09:34:25 +03:00
static int __init ifb_init_module ( void )
2006-09-13 21:24:59 +04:00
{
2007-06-13 23:05:06 +04:00
int i , err ;
rtnl_lock ( ) ;
err = __rtnl_link_register ( & ifb_link_ops ) ;
2007-06-13 23:04:51 +04:00
2006-01-09 09:34:25 +03:00
for ( i = 0 ; i < numifbs & & ! err ; i + + )
2006-09-13 21:24:59 +04:00
err = ifb_init_one ( i ) ;
2007-07-12 06:42:13 +04:00
if ( err )
2007-06-13 23:05:06 +04:00
__rtnl_link_unregister ( & ifb_link_ops ) ;
rtnl_unlock ( ) ;
2006-01-09 09:34:25 +03:00
return err ;
2006-09-13 21:24:59 +04:00
}
2006-01-09 09:34:25 +03:00
static void __exit ifb_cleanup_module ( void )
{
2007-07-12 06:42:13 +04:00
rtnl_link_unregister ( & ifb_link_ops ) ;
2006-01-09 09:34:25 +03:00
}
module_init ( ifb_init_module ) ;
module_exit ( ifb_cleanup_module ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Jamal Hadi Salim " ) ;
2007-06-13 23:05:06 +04:00
MODULE_ALIAS_RTNL_LINK ( " ifb " ) ;