2006-09-13 13:24:59 -04:00
/* drivers/net/ifb.c:
2006-01-08 22:34:25 -08:00
The purpose of this driver is to provide a device that allows
for sharing of resources :
1 ) qdiscs / policies that are per device as opposed to system wide .
ifb allows for a device which can be redirected to thus providing
an impression of sharing .
2 ) Allows for queueing incoming traffic for shaping instead of
2006-09-13 13:24:59 -04:00
dropping .
2006-01-08 22:34:25 -08:00
The original concept is based on what is known as the IMQ
driver initially written by Martin Devera , later rewritten
by Patrick McHardy and then maintained by Andre Correa .
You need the tc action mirror or redirect to feed this device
packets .
This program is free software ; you can redistribute it and / or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation ; either version
2 of the License , or ( at your option ) any later version .
2006-09-13 13:24:59 -04:00
2006-01-08 22:34:25 -08:00
Authors : Jamal Hadi Salim ( 2005 )
2006-09-13 13:24:59 -04:00
2006-01-08 22:34:25 -08:00
*/
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/netdevice.h>
# include <linux/etherdevice.h>
# include <linux/init.h>
# include <linux/moduleparam.h>
2006-09-13 13:24:59 -04:00
# include <net/pkt_sched.h>
2007-09-17 11:56:21 -07:00
# include <net/net_namespace.h>
2006-01-08 22:34:25 -08:00
# define TX_TIMEOUT (2*HZ)
2006-09-13 13:24:59 -04:00
2006-01-08 22:34:25 -08:00
# define TX_Q_LIMIT 32
struct ifb_private {
struct tasklet_struct ifb_tasklet ;
int tasklet_pending ;
/* mostly debug stats leave in for now */
unsigned long st_task_enter ; /* tasklet entered */
unsigned long st_txq_refl_try ; /* transmit queue refill attempt */
unsigned long st_rxq_enter ; /* receive queue entered */
unsigned long st_rx2tx_tran ; /* receive to trasmit transfers */
unsigned long st_rxq_notenter ; /*receiveQ not entered, resched */
unsigned long st_rx_frm_egr ; /* received from egress path */
unsigned long st_rx_frm_ing ; /* received from ingress path */
unsigned long st_rxq_check ;
unsigned long st_rxq_rsch ;
struct sk_buff_head rq ;
struct sk_buff_head tq ;
} ;
2006-02-23 16:23:51 -08:00
static int numifbs = 2 ;
2006-01-08 22:34:25 -08:00
static void ri_tasklet ( unsigned long dev ) ;
2009-08-31 19:50:51 +00:00
static netdev_tx_t ifb_xmit ( struct sk_buff * skb , struct net_device * dev ) ;
2006-01-08 22:34:25 -08:00
static int ifb_open ( struct net_device * dev ) ;
static int ifb_close ( struct net_device * dev ) ;
2006-09-13 13:24:59 -04:00
static void ri_tasklet ( unsigned long dev )
2006-01-08 22:34:25 -08:00
{
struct net_device * _dev = ( struct net_device * ) dev ;
struct ifb_private * dp = netdev_priv ( _dev ) ;
2007-10-03 17:41:50 -07:00
struct net_device_stats * stats = & _dev - > stats ;
2008-07-31 16:58:50 -07:00
struct netdev_queue * txq ;
2006-01-08 22:34:25 -08:00
struct sk_buff * skb ;
2008-07-31 16:58:50 -07:00
txq = netdev_get_tx_queue ( _dev , 0 ) ;
2006-01-08 22:34:25 -08:00
dp - > st_task_enter + + ;
if ( ( skb = skb_peek ( & dp - > tq ) ) = = NULL ) {
dp - > st_txq_refl_try + + ;
2008-07-31 16:58:50 -07:00
if ( __netif_tx_trylock ( txq ) ) {
2006-01-08 22:34:25 -08:00
dp - > st_rxq_enter + + ;
while ( ( skb = skb_dequeue ( & dp - > rq ) ) ! = NULL ) {
skb_queue_tail ( & dp - > tq , skb ) ;
dp - > st_rx2tx_tran + + ;
}
2008-07-31 16:58:50 -07:00
__netif_tx_unlock ( txq ) ;
2006-01-08 22:34:25 -08:00
} else {
/* reschedule */
dp - > st_rxq_notenter + + ;
goto resched ;
}
}
while ( ( skb = skb_dequeue ( & dp - > tq ) ) ! = NULL ) {
u32 from = G_TC_FROM ( skb - > tc_verd ) ;
skb - > tc_verd = 0 ;
skb - > tc_verd = SET_TC_NCLS ( skb - > tc_verd ) ;
stats - > tx_packets + + ;
stats - > tx_bytes + = skb - > len ;
2007-03-29 11:46:52 -07:00
2009-11-01 19:45:16 +00:00
rcu_read_lock ( ) ;
2009-11-20 15:35:04 -08:00
skb - > dev = dev_get_by_index_rcu ( & init_net , skb - > skb_iif ) ;
2007-03-29 11:46:52 -07:00
if ( ! skb - > dev ) {
2009-11-01 19:45:16 +00:00
rcu_read_unlock ( ) ;
2007-03-29 11:46:52 -07:00
dev_kfree_skb ( skb ) ;
stats - > tx_dropped + + ;
break ;
}
2009-11-01 19:45:16 +00:00
rcu_read_unlock ( ) ;
2009-11-20 15:35:04 -08:00
skb - > skb_iif = _dev - > ifindex ;
2007-03-29 11:46:52 -07:00
2006-01-08 22:34:25 -08:00
if ( from & AT_EGRESS ) {
dp - > st_rx_frm_egr + + ;
dev_queue_xmit ( skb ) ;
} else if ( from & AT_INGRESS ) {
dp - > st_rx_frm_ing + + ;
2007-03-29 11:46:52 -07:00
skb_pull ( skb , skb - > dev - > hard_header_len ) ;
2006-01-08 22:34:25 -08:00
netif_rx ( skb ) ;
2007-03-29 11:46:52 -07:00
} else
BUG ( ) ;
2006-01-08 22:34:25 -08:00
}
2008-07-31 16:58:50 -07:00
if ( __netif_tx_trylock ( txq ) ) {
2006-01-08 22:34:25 -08:00
dp - > st_rxq_check + + ;
if ( ( skb = skb_peek ( & dp - > rq ) ) = = NULL ) {
dp - > tasklet_pending = 0 ;
if ( netif_queue_stopped ( _dev ) )
netif_wake_queue ( _dev ) ;
} else {
dp - > st_rxq_rsch + + ;
2008-07-31 16:58:50 -07:00
__netif_tx_unlock ( txq ) ;
2006-01-08 22:34:25 -08:00
goto resched ;
}
2008-07-31 16:58:50 -07:00
__netif_tx_unlock ( txq ) ;
2006-01-08 22:34:25 -08:00
} else {
resched :
dp - > tasklet_pending = 1 ;
tasklet_schedule ( & dp - > ifb_tasklet ) ;
}
}
2008-11-19 21:47:07 -08:00
static const struct net_device_ops ifb_netdev_ops = {
. ndo_open = ifb_open ,
. ndo_stop = ifb_close ,
2008-11-20 20:14:53 -08:00
. ndo_start_xmit = ifb_xmit ,
. ndo_validate_addr = eth_validate_addr ,
2008-11-19 21:47:07 -08:00
} ;
2007-06-13 12:05:06 -07:00
static void ifb_setup ( struct net_device * dev )
2006-01-08 22:34:25 -08:00
{
/* Initialize the device structure. */
2007-06-13 12:05:06 -07:00
dev - > destructor = free_netdev ;
2008-11-19 21:47:07 -08:00
dev - > netdev_ops = & ifb_netdev_ops ;
2006-01-08 22:34:25 -08:00
/* Fill in device structure with ethernet-generic values. */
ether_setup ( dev ) ;
dev - > tx_queue_len = TX_Q_LIMIT ;
2008-11-19 21:47:07 -08:00
2006-01-08 22:34:25 -08:00
dev - > flags | = IFF_NOARP ;
dev - > flags & = ~ IFF_MULTICAST ;
net: release dst entry in dev_hard_start_xmit()
One point of contention in high network loads is the dst_release() performed
when a transmited skb is freed. This is because NIC tx completion calls
dev_kree_skb() long after original call to dev_queue_xmit(skb).
CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is
quite visible if one CPU is 100% handling softirqs for a network device,
since dst_clone() is done by other cpus, involving cache line ping pongs.
It seems right place to release dst is in dev_hard_start_xmit(), for most
devices but ones that are virtual, and some exceptions.
David Miller suggested to define a new device flag, set in alloc_netdev_mq()
(so that most devices set it at init time), and carefuly unset in devices
which dont want a NULL skb->dst in their ndo_start_xmit().
List of devices that must clear this flag is :
- loopback device, because it calls netif_rx() and quoting Patrick :
"ip_route_input() doesn't accept loopback addresses, so loopback packets
already need to have a dst_entry attached."
- appletalk/ipddp.c : needs skb->dst in its xmit function
- And all devices that call again dev_queue_xmit() from their xmit function
(as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-05-18 22:19:19 -07:00
dev - > priv_flags & = ~ IFF_XMIT_DST_RELEASE ;
2006-01-08 22:34:25 -08:00
random_ether_addr ( dev - > dev_addr ) ;
}
2009-08-31 19:50:51 +00:00
static netdev_tx_t ifb_xmit ( struct sk_buff * skb , struct net_device * dev )
2006-01-08 22:34:25 -08:00
{
struct ifb_private * dp = netdev_priv ( dev ) ;
2007-10-03 17:41:50 -07:00
struct net_device_stats * stats = & dev - > stats ;
2006-01-08 22:34:25 -08:00
u32 from = G_TC_FROM ( skb - > tc_verd ) ;
2007-01-01 19:39:09 -08:00
stats - > rx_packets + + ;
stats - > rx_bytes + = skb - > len ;
2006-01-08 22:34:25 -08:00
2009-11-20 15:35:04 -08:00
if ( ! ( from & ( AT_INGRESS | AT_EGRESS ) ) | | ! skb - > skb_iif ) {
2006-01-08 22:34:25 -08:00
dev_kfree_skb ( skb ) ;
stats - > rx_dropped + + ;
2009-08-31 19:50:51 +00:00
return NETDEV_TX_OK ;
2006-01-08 22:34:25 -08:00
}
if ( skb_queue_len ( & dp - > rq ) > = dev - > tx_queue_len ) {
netif_stop_queue ( dev ) ;
}
skb_queue_tail ( & dp - > rq , skb ) ;
if ( ! dp - > tasklet_pending ) {
dp - > tasklet_pending = 1 ;
tasklet_schedule ( & dp - > ifb_tasklet ) ;
}
2009-08-31 19:50:51 +00:00
return NETDEV_TX_OK ;
2006-01-08 22:34:25 -08:00
}
static int ifb_close ( struct net_device * dev )
{
struct ifb_private * dp = netdev_priv ( dev ) ;
tasklet_kill ( & dp - > ifb_tasklet ) ;
netif_stop_queue ( dev ) ;
skb_queue_purge ( & dp - > rq ) ;
skb_queue_purge ( & dp - > tq ) ;
return 0 ;
}
static int ifb_open ( struct net_device * dev )
{
struct ifb_private * dp = netdev_priv ( dev ) ;
tasklet_init ( & dp - > ifb_tasklet , ri_tasklet , ( unsigned long ) dev ) ;
skb_queue_head_init ( & dp - > rq ) ;
skb_queue_head_init ( & dp - > tq ) ;
netif_start_queue ( dev ) ;
return 0 ;
}
2007-07-11 19:42:31 -07:00
static int ifb_validate ( struct nlattr * tb [ ] , struct nlattr * data [ ] )
{
if ( tb [ IFLA_ADDRESS ] ) {
if ( nla_len ( tb [ IFLA_ADDRESS ] ) ! = ETH_ALEN )
return - EINVAL ;
if ( ! is_valid_ether_addr ( nla_data ( tb [ IFLA_ADDRESS ] ) ) )
return - EADDRNOTAVAIL ;
}
return 0 ;
}
2007-06-13 12:05:06 -07:00
static struct rtnl_link_ops ifb_link_ops __read_mostly = {
. kind = " ifb " ,
. priv_size = sizeof ( struct ifb_private ) ,
. setup = ifb_setup ,
2007-07-11 19:42:31 -07:00
. validate = ifb_validate ,
2007-06-13 12:05:06 -07:00
} ;
2007-07-11 19:42:13 -07:00
/* Number of ifb devices to be set up by this module. */
module_param ( numifbs , int , 0 ) ;
MODULE_PARM_DESC ( numifbs , " Number of ifb devices " ) ;
2006-01-08 22:34:25 -08:00
static int __init ifb_init_one ( int index )
{
struct net_device * dev_ifb ;
int err ;
dev_ifb = alloc_netdev ( sizeof ( struct ifb_private ) ,
" ifb%d " , ifb_setup ) ;
if ( ! dev_ifb )
return - ENOMEM ;
2007-06-13 12:05:06 -07:00
err = dev_alloc_name ( dev_ifb , dev_ifb - > name ) ;
if ( err < 0 )
goto err ;
2006-01-08 22:34:25 -08:00
2007-06-13 12:05:06 -07:00
dev_ifb - > rtnl_link_ops = & ifb_link_ops ;
err = register_netdevice ( dev_ifb ) ;
if ( err < 0 )
goto err ;
2008-03-20 17:05:13 -07:00
2007-06-13 12:05:06 -07:00
return 0 ;
2007-06-13 12:04:51 -07:00
2007-06-13 12:05:06 -07:00
err :
free_netdev ( dev_ifb ) ;
return err ;
2006-09-13 13:24:59 -04:00
}
2006-01-08 22:34:25 -08:00
static int __init ifb_init_module ( void )
2006-09-13 13:24:59 -04:00
{
2007-06-13 12:05:06 -07:00
int i , err ;
rtnl_lock ( ) ;
err = __rtnl_link_register ( & ifb_link_ops ) ;
2007-06-13 12:04:51 -07:00
2006-01-08 22:34:25 -08:00
for ( i = 0 ; i < numifbs & & ! err ; i + + )
2006-09-13 13:24:59 -04:00
err = ifb_init_one ( i ) ;
2007-07-11 19:42:13 -07:00
if ( err )
2007-06-13 12:05:06 -07:00
__rtnl_link_unregister ( & ifb_link_ops ) ;
rtnl_unlock ( ) ;
2006-01-08 22:34:25 -08:00
return err ;
2006-09-13 13:24:59 -04:00
}
2006-01-08 22:34:25 -08:00
static void __exit ifb_cleanup_module ( void )
{
2007-07-11 19:42:13 -07:00
rtnl_link_unregister ( & ifb_link_ops ) ;
2006-01-08 22:34:25 -08:00
}
module_init ( ifb_init_module ) ;
module_exit ( ifb_cleanup_module ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Jamal Hadi Salim " ) ;
2007-06-13 12:05:06 -07:00
MODULE_ALIAS_RTNL_LINK ( " ifb " ) ;