2009-03-11 12:51:26 +03:00
/*
* Monitoring code for network dropped packet alerts
*
* Copyright ( C ) 2009 Neil Horman < nhorman @ tuxdriver . com >
*/
# include <linux/netdevice.h>
# include <linux/etherdevice.h>
# include <linux/string.h>
# include <linux/if_arp.h>
# include <linux/inetdevice.h>
# include <linux/inet.h>
# include <linux/interrupt.h>
# include <linux/netpoll.h>
# include <linux/sched.h>
# include <linux/delay.h>
# include <linux/types.h>
# include <linux/workqueue.h>
# include <linux/netlink.h>
# include <linux/net_dropmon.h>
# include <linux/percpu.h>
# include <linux/timer.h>
# include <linux/bitops.h>
# include <net/genetlink.h>
2009-05-21 11:36:08 +04:00
# include <net/netevent.h>
2009-03-11 12:51:26 +03:00
2009-04-15 03:39:12 +04:00
# include <trace/events/skb.h>
2009-06-15 14:02:23 +04:00
# include <trace/events/napi.h>
2009-03-11 12:51:26 +03:00
# include <asm/unaligned.h>
# define TRACE_ON 1
# define TRACE_OFF 0
static void send_dm_alert ( struct work_struct * unused ) ;
/*
* Globals , our netlink socket pointer
* and the work handle that will send up
* netlink alerts
*/
2009-05-21 11:36:08 +04:00
static int trace_state = TRACE_OFF ;
static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED ;
2009-03-11 12:51:26 +03:00
struct per_cpu_dm_data {
struct work_struct dm_alert_work ;
struct sk_buff * skb ;
atomic_t dm_hit_count ;
struct timer_list send_timer ;
} ;
2009-05-21 11:36:08 +04:00
struct dm_hw_stat_delta {
struct net_device * dev ;
2009-09-03 01:37:45 +04:00
unsigned long last_rx ;
2009-05-21 11:36:08 +04:00
struct list_head list ;
struct rcu_head rcu ;
unsigned long last_drop_val ;
} ;
2009-03-11 12:51:26 +03:00
static struct genl_family net_drop_monitor_family = {
. id = GENL_ID_GENERATE ,
. hdrsize = 0 ,
. name = " NET_DM " ,
2009-04-27 14:17:31 +04:00
. version = 2 ,
2009-03-11 12:51:26 +03:00
. maxattr = NET_DM_CMD_MAX ,
} ;
static DEFINE_PER_CPU ( struct per_cpu_dm_data , dm_cpu_data ) ;
static int dm_hit_limit = 64 ;
static int dm_delay = 1 ;
2009-05-21 11:36:08 +04:00
static unsigned long dm_hw_check_delta = 2 * HZ ;
static LIST_HEAD ( hw_stats_list ) ;
2009-03-11 12:51:26 +03:00
static void reset_per_cpu_data ( struct per_cpu_dm_data * data )
{
size_t al ;
struct net_dm_alert_msg * msg ;
2009-04-27 14:17:31 +04:00
struct nlattr * nla ;
2009-03-11 12:51:26 +03:00
al = sizeof ( struct net_dm_alert_msg ) ;
al + = dm_hit_limit * sizeof ( struct net_dm_drop_point ) ;
2009-04-27 14:17:31 +04:00
al + = sizeof ( struct nlattr ) ;
2009-03-11 12:51:26 +03:00
data - > skb = genlmsg_new ( al , GFP_KERNEL ) ;
genlmsg_put ( data - > skb , 0 , 0 , & net_drop_monitor_family ,
0 , NET_DM_CMD_ALERT ) ;
2009-04-27 14:17:31 +04:00
nla = nla_reserve ( data - > skb , NLA_UNSPEC , sizeof ( struct net_dm_alert_msg ) ) ;
msg = nla_data ( nla ) ;
2009-03-11 12:51:26 +03:00
memset ( msg , 0 , al ) ;
atomic_set ( & data - > dm_hit_count , dm_hit_limit ) ;
}
static void send_dm_alert ( struct work_struct * unused )
{
struct sk_buff * skb ;
struct per_cpu_dm_data * data = & __get_cpu_var ( dm_cpu_data ) ;
/*
* Grab the skb we ' re about to send
*/
skb = data - > skb ;
/*
* Replace it with a new one
*/
reset_per_cpu_data ( data ) ;
/*
* Ship it !
*/
genlmsg_multicast ( skb , 0 , NET_DM_GRP_ALERT , GFP_KERNEL ) ;
}
/*
* This is the timer function to delay the sending of an alert
* in the event that more drops will arrive during the
* hysteresis period . Note that it operates under the timer interrupt
* so we don ' t need to disable preemption here
*/
static void sched_send_work ( unsigned long unused )
{
struct per_cpu_dm_data * data = & __get_cpu_var ( dm_cpu_data ) ;
schedule_work ( & data - > dm_alert_work ) ;
}
2009-05-21 11:36:08 +04:00
static void trace_drop_common ( struct sk_buff * skb , void * location )
2009-03-11 12:51:26 +03:00
{
struct net_dm_alert_msg * msg ;
struct nlmsghdr * nlh ;
2009-04-27 14:17:31 +04:00
struct nlattr * nla ;
2009-03-11 12:51:26 +03:00
int i ;
struct per_cpu_dm_data * data = & __get_cpu_var ( dm_cpu_data ) ;
if ( ! atomic_add_unless ( & data - > dm_hit_count , - 1 , 0 ) ) {
/*
* we ' re already at zero , discard this hit
*/
goto out ;
}
nlh = ( struct nlmsghdr * ) data - > skb - > data ;
2009-04-27 14:17:31 +04:00
nla = genlmsg_data ( nlmsg_data ( nlh ) ) ;
msg = nla_data ( nla ) ;
2009-03-11 12:51:26 +03:00
for ( i = 0 ; i < msg - > entries ; i + + ) {
if ( ! memcmp ( & location , msg - > points [ i ] . pc , sizeof ( void * ) ) ) {
msg - > points [ i ] . count + + ;
goto out ;
}
}
/*
* We need to create a new entry
*/
__nla_reserve_nohdr ( data - > skb , sizeof ( struct net_dm_drop_point ) ) ;
2009-04-27 14:17:31 +04:00
nla - > nla_len + = NLA_ALIGN ( sizeof ( struct net_dm_drop_point ) ) ;
2009-03-11 12:51:26 +03:00
memcpy ( msg - > points [ msg - > entries ] . pc , & location , sizeof ( void * ) ) ;
msg - > points [ msg - > entries ] . count = 1 ;
msg - > entries + + ;
if ( ! timer_pending ( & data - > send_timer ) ) {
data - > send_timer . expires = jiffies + dm_delay * HZ ;
add_timer_on ( & data - > send_timer , smp_processor_id ( ) ) ;
}
out :
return ;
}
2009-05-21 11:36:08 +04:00
static void trace_kfree_skb_hit ( struct sk_buff * skb , void * location )
{
trace_drop_common ( skb , location ) ;
}
static void trace_napi_poll_hit ( struct napi_struct * napi )
{
struct dm_hw_stat_delta * new_stat ;
/*
2009-09-03 01:37:45 +04:00
* Don ' t check napi structures with no associated device
2009-05-21 11:36:08 +04:00
*/
2009-09-03 01:37:45 +04:00
if ( ! napi - > dev )
2009-05-21 11:36:08 +04:00
return ;
rcu_read_lock ( ) ;
list_for_each_entry_rcu ( new_stat , & hw_stats_list , list ) {
2009-09-03 01:37:45 +04:00
/*
* only add a note to our monitor buffer if :
* 1 ) this is the dev we received on
* 2 ) its after the last_rx delta
* 3 ) our rx_dropped count has gone up
*/
2009-05-21 11:36:08 +04:00
if ( ( new_stat - > dev = = napi - > dev ) & &
2009-09-03 01:37:45 +04:00
( time_after ( jiffies , new_stat - > last_rx + dm_hw_check_delta ) ) & &
2009-05-21 11:36:08 +04:00
( napi - > dev - > stats . rx_dropped ! = new_stat - > last_drop_val ) ) {
trace_drop_common ( NULL , NULL ) ;
new_stat - > last_drop_val = napi - > dev - > stats . rx_dropped ;
2009-09-03 01:37:45 +04:00
new_stat - > last_rx = jiffies ;
2009-05-21 11:36:08 +04:00
break ;
}
}
rcu_read_unlock ( ) ;
}
static void free_dm_hw_stat ( struct rcu_head * head )
{
struct dm_hw_stat_delta * n ;
n = container_of ( head , struct dm_hw_stat_delta , rcu ) ;
kfree ( n ) ;
}
2009-03-11 12:51:26 +03:00
static int set_all_monitor_traces ( int state )
{
int rc = 0 ;
2009-05-21 11:36:08 +04:00
struct dm_hw_stat_delta * new_stat = NULL ;
struct dm_hw_stat_delta * temp ;
spin_lock ( & trace_state_lock ) ;
2009-03-11 12:51:26 +03:00
switch ( state ) {
case TRACE_ON :
rc | = register_trace_kfree_skb ( trace_kfree_skb_hit ) ;
2009-05-21 11:36:08 +04:00
rc | = register_trace_napi_poll ( trace_napi_poll_hit ) ;
2009-03-11 12:51:26 +03:00
break ;
case TRACE_OFF :
rc | = unregister_trace_kfree_skb ( trace_kfree_skb_hit ) ;
2009-05-21 11:36:08 +04:00
rc | = unregister_trace_napi_poll ( trace_napi_poll_hit ) ;
2009-03-11 12:51:26 +03:00
tracepoint_synchronize_unregister ( ) ;
2009-05-21 11:36:08 +04:00
/*
* Clean the device list
*/
list_for_each_entry_safe ( new_stat , temp , & hw_stats_list , list ) {
if ( new_stat - > dev = = NULL ) {
list_del_rcu ( & new_stat - > list ) ;
call_rcu ( & new_stat - > rcu , free_dm_hw_stat ) ;
}
}
2009-03-11 12:51:26 +03:00
break ;
default :
rc = 1 ;
break ;
}
2009-05-21 11:36:08 +04:00
if ( ! rc )
trace_state = state ;
spin_unlock ( & trace_state_lock ) ;
2009-03-11 12:51:26 +03:00
if ( rc )
return - EINPROGRESS ;
return rc ;
}
static int net_dm_cmd_config ( struct sk_buff * skb ,
struct genl_info * info )
{
return - ENOTSUPP ;
}
static int net_dm_cmd_trace ( struct sk_buff * skb ,
struct genl_info * info )
{
switch ( info - > genlhdr - > cmd ) {
case NET_DM_CMD_START :
return set_all_monitor_traces ( TRACE_ON ) ;
break ;
case NET_DM_CMD_STOP :
return set_all_monitor_traces ( TRACE_OFF ) ;
break ;
}
return - ENOTSUPP ;
}
2009-05-21 11:36:08 +04:00
static int dropmon_net_event ( struct notifier_block * ev_block ,
unsigned long event , void * ptr )
{
struct net_device * dev = ptr ;
struct dm_hw_stat_delta * new_stat = NULL ;
struct dm_hw_stat_delta * tmp ;
switch ( event ) {
case NETDEV_REGISTER :
new_stat = kzalloc ( sizeof ( struct dm_hw_stat_delta ) , GFP_KERNEL ) ;
if ( ! new_stat )
goto out ;
new_stat - > dev = dev ;
2009-09-03 01:37:45 +04:00
new_stat - > last_rx = jiffies ;
2009-05-21 11:36:08 +04:00
INIT_RCU_HEAD ( & new_stat - > rcu ) ;
spin_lock ( & trace_state_lock ) ;
list_add_rcu ( & new_stat - > list , & hw_stats_list ) ;
spin_unlock ( & trace_state_lock ) ;
break ;
case NETDEV_UNREGISTER :
spin_lock ( & trace_state_lock ) ;
list_for_each_entry_safe ( new_stat , tmp , & hw_stats_list , list ) {
if ( new_stat - > dev = = dev ) {
new_stat - > dev = NULL ;
if ( trace_state = = TRACE_OFF ) {
list_del_rcu ( & new_stat - > list ) ;
call_rcu ( & new_stat - > rcu , free_dm_hw_stat ) ;
break ;
}
}
}
spin_unlock ( & trace_state_lock ) ;
break ;
}
out :
return NOTIFY_DONE ;
}
2009-03-11 12:51:26 +03:00
static struct genl_ops dropmon_ops [ ] = {
{
. cmd = NET_DM_CMD_CONFIG ,
. doit = net_dm_cmd_config ,
} ,
{
. cmd = NET_DM_CMD_START ,
. doit = net_dm_cmd_trace ,
} ,
{
. cmd = NET_DM_CMD_STOP ,
. doit = net_dm_cmd_trace ,
} ,
} ;
2009-05-21 11:36:08 +04:00
static struct notifier_block dropmon_net_notifier = {
. notifier_call = dropmon_net_event
} ;
2009-03-11 12:51:26 +03:00
static int __init init_net_drop_monitor ( void )
{
int cpu ;
int rc , i , ret ;
struct per_cpu_dm_data * data ;
printk ( KERN_INFO " Initalizing network drop monitor service \n " ) ;
if ( sizeof ( void * ) > 8 ) {
printk ( KERN_ERR " Unable to store program counters on this arch, Drop monitor failed \n " ) ;
return - ENOSPC ;
}
if ( genl_register_family ( & net_drop_monitor_family ) < 0 ) {
printk ( KERN_ERR " Could not create drop monitor netlink family \n " ) ;
return - EFAULT ;
}
rc = - EFAULT ;
for ( i = 0 ; i < ARRAY_SIZE ( dropmon_ops ) ; i + + ) {
ret = genl_register_ops ( & net_drop_monitor_family ,
& dropmon_ops [ i ] ) ;
if ( ret ) {
2009-05-21 11:36:08 +04:00
printk ( KERN_CRIT " Failed to register operation %d \n " ,
2009-03-11 12:51:26 +03:00
dropmon_ops [ i ] . cmd ) ;
goto out_unreg ;
}
}
2009-05-21 11:36:08 +04:00
rc = register_netdevice_notifier ( & dropmon_net_notifier ) ;
if ( rc < 0 ) {
printk ( KERN_CRIT " Failed to register netdevice notifier \n " ) ;
goto out_unreg ;
}
2009-03-11 12:51:26 +03:00
rc = 0 ;
for_each_present_cpu ( cpu ) {
data = & per_cpu ( dm_cpu_data , cpu ) ;
reset_per_cpu_data ( data ) ;
INIT_WORK ( & data - > dm_alert_work , send_dm_alert ) ;
init_timer ( & data - > send_timer ) ;
data - > send_timer . data = cpu ;
data - > send_timer . function = sched_send_work ;
}
2009-05-21 11:36:08 +04:00
2009-03-11 12:51:26 +03:00
goto out ;
out_unreg :
genl_unregister_family ( & net_drop_monitor_family ) ;
out :
return rc ;
}
late_initcall ( init_net_drop_monitor ) ;