2011-11-28 09:22:18 +04:00
# include <linux/skbuff.h>
2012-01-25 01:03:33 +04:00
# include <linux/export.h>
2011-11-28 09:22:18 +04:00
# include <linux/ip.h>
# include <linux/ipv6.h>
# include <linux/if_vlan.h>
# include <net/ip.h>
2012-07-18 12:11:12 +04:00
# include <net/ipv6.h>
2013-03-19 10:39:30 +04:00
# include <linux/igmp.h>
# include <linux/icmp.h>
# include <linux/sctp.h>
# include <linux/dccp.h>
2011-11-28 09:22:18 +04:00
# include <linux/if_tunnel.h>
# include <linux/if_pppox.h>
# include <linux/ppp_defs.h>
# include <net/flow_keys.h>
2014-09-06 03:20:26 +04:00
# include <scsi/fc/fc_fcoe.h>
2011-11-28 09:22:18 +04:00
flow_dissector: use a 64bit load/store
Le lundi 28 novembre 2011 à 19:06 -0500, David Miller a écrit :
> From: Dimitris Michailidis <dm@chelsio.com>
> Date: Mon, 28 Nov 2011 08:25:39 -0800
>
> >> +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
> >> *flow)
> >> +{
> >> + int poff, nhoff = skb_network_offset(skb);
> >> + u8 ip_proto;
> >> + u16 proto = skb->protocol;
> >
> > __be16 instead of u16 for proto?
>
> I'll take care of this when I apply these patches.
( CC trimmed )
Thanks David !
Here is a small patch to use one 64bit load/store on x86_64 instead of
two 32bit load/stores.
[PATCH net-next] flow_dissector: use a 64bit load/store
gcc compiler is smart enough to use a single load/store if we
memcpy(dptr, sptr, 8) on x86_64, regardless of
CONFIG_CC_OPTIMIZE_FOR_SIZE
In IP header, daddr immediately follows saddr, this wont change in the
future. We only need to make sure our flow_keys (src,dst) fields wont
break the rule.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-29 00:30:35 +04:00
/* copy saddr & daddr, possibly using 64bit load/store
* Equivalent to : flow - > src = iph - > saddr ;
* flow - > dst = iph - > daddr ;
*/
static void iph_to_flow_copy_addrs ( struct flow_keys * flow , const struct iphdr * iph )
{
BUILD_BUG_ON ( offsetof ( typeof ( * flow ) , dst ) ! =
offsetof ( typeof ( * flow ) , src ) + sizeof ( flow - > src ) ) ;
memcpy ( & flow - > src , & iph - > saddr , sizeof ( flow - > src ) + sizeof ( flow - > dst ) ) ;
}
2011-11-28 09:22:18 +04:00
2013-10-02 15:39:24 +04:00
/**
2014-08-26 04:03:46 +04:00
* __skb_flow_get_ports - extract the upper layer ports and return them
* @ skb : sk_buff to extract the ports from
2013-10-02 15:39:24 +04:00
* @ thoff : transport header offset
* @ ip_proto : protocol for which to get port offset
2014-08-26 04:03:46 +04:00
* @ data : raw buffer pointer to the packet , if NULL use skb - > data
* @ hlen : packet header length , if @ data is NULL use skb_headlen ( skb )
2013-10-02 15:39:24 +04:00
*
* The function will try to retrieve the ports at offset thoff + poff where poff
* is the protocol port offset returned from proto_ports_offset
*/
2014-08-23 23:13:41 +04:00
__be32 __skb_flow_get_ports ( const struct sk_buff * skb , int thoff , u8 ip_proto ,
void * data , int hlen )
2013-10-02 15:39:24 +04:00
{
int poff = proto_ports_offset ( ip_proto ) ;
2014-08-23 23:13:41 +04:00
if ( ! data ) {
data = skb - > data ;
hlen = skb_headlen ( skb ) ;
}
2013-10-02 15:39:24 +04:00
if ( poff > = 0 ) {
__be32 * ports , _ports ;
2014-08-23 23:13:41 +04:00
ports = __skb_header_pointer ( skb , thoff + poff ,
sizeof ( _ports ) , data , hlen , & _ports ) ;
2013-10-02 15:39:24 +04:00
if ( ports )
return * ports ;
}
return 0 ;
}
2014-08-23 23:13:41 +04:00
EXPORT_SYMBOL ( __skb_flow_get_ports ) ;
2013-10-02 15:39:24 +04:00
2014-08-26 04:03:47 +04:00
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @ skb : sk_buff to extract the flow from , can be NULL if the rest are specified
* @ data : raw buffer pointer to the packet , if NULL use skb - > data
* @ proto : protocol for which to get the flow , if @ data is NULL use skb - > protocol
* @ nhoff : network header offset , if @ data is NULL use skb_network_offset ( skb )
* @ hlen : packet header length , if @ data is NULL use skb_headlen ( skb )
*
* The function will try to retrieve the struct flow_keys from either the skbuff
* or a raw buffer specified by the rest parameters
*/
bool __skb_flow_dissect ( const struct sk_buff * skb , struct flow_keys * flow ,
void * data , __be16 proto , int nhoff , int hlen )
2011-11-28 09:22:18 +04:00
{
u8 ip_proto ;
2014-08-23 23:13:41 +04:00
if ( ! data ) {
data = skb - > data ;
2014-08-26 04:03:47 +04:00
proto = skb - > protocol ;
nhoff = skb_network_offset ( skb ) ;
2014-08-23 23:13:41 +04:00
hlen = skb_headlen ( skb ) ;
}
2011-11-28 09:22:18 +04:00
memset ( flow , 0 , sizeof ( * flow ) ) ;
again :
switch ( proto ) {
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_IP ) : {
2011-11-28 09:22:18 +04:00
const struct iphdr * iph ;
struct iphdr _iph ;
ip :
2014-08-23 23:13:41 +04:00
iph = __skb_header_pointer ( skb , nhoff , sizeof ( _iph ) , data , hlen , & _iph ) ;
2013-11-01 11:01:10 +04:00
if ( ! iph | | iph - > ihl < 5 )
2011-11-28 09:22:18 +04:00
return false ;
2013-11-07 20:37:28 +04:00
nhoff + = iph - > ihl * 4 ;
2011-11-28 09:22:18 +04:00
2013-11-07 20:37:28 +04:00
ip_proto = iph - > protocol ;
2011-11-28 09:22:18 +04:00
if ( ip_is_fragment ( iph ) )
ip_proto = 0 ;
2013-11-07 20:37:28 +04:00
2014-10-10 23:09:12 +04:00
/* skip the address processing if skb is NULL. The assumption
* here is that if there is no skb we are not looking for flow
* info but lengths and protocols .
*/
if ( ! skb )
break ;
flow_dissector: use a 64bit load/store
Le lundi 28 novembre 2011 à 19:06 -0500, David Miller a écrit :
> From: Dimitris Michailidis <dm@chelsio.com>
> Date: Mon, 28 Nov 2011 08:25:39 -0800
>
> >> +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
> >> *flow)
> >> +{
> >> + int poff, nhoff = skb_network_offset(skb);
> >> + u8 ip_proto;
> >> + u16 proto = skb->protocol;
> >
> > __be16 instead of u16 for proto?
>
> I'll take care of this when I apply these patches.
( CC trimmed )
Thanks David !
Here is a small patch to use one 64bit load/store on x86_64 instead of
two 32bit load/stores.
[PATCH net-next] flow_dissector: use a 64bit load/store
gcc compiler is smart enough to use a single load/store if we
memcpy(dptr, sptr, 8) on x86_64, regardless of
CONFIG_CC_OPTIMIZE_FOR_SIZE
In IP header, daddr immediately follows saddr, this wont change in the
future. We only need to make sure our flow_keys (src,dst) fields wont
break the rule.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-29 00:30:35 +04:00
iph_to_flow_copy_addrs ( flow , iph ) ;
2011-11-28 09:22:18 +04:00
break ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_IPV6 ) : {
2011-11-28 09:22:18 +04:00
const struct ipv6hdr * iph ;
struct ipv6hdr _iph ;
2014-07-02 08:33:01 +04:00
__be32 flow_label ;
2011-11-28 09:22:18 +04:00
ipv6 :
2014-08-23 23:13:41 +04:00
iph = __skb_header_pointer ( skb , nhoff , sizeof ( _iph ) , data , hlen , & _iph ) ;
2011-11-28 09:22:18 +04:00
if ( ! iph )
return false ;
ip_proto = iph - > nexthdr ;
nhoff + = sizeof ( struct ipv6hdr ) ;
2014-07-02 08:33:01 +04:00
2014-10-10 23:09:12 +04:00
/* see comment above in IPv4 section */
2014-09-06 03:20:26 +04:00
if ( ! skb )
break ;
2014-10-10 23:09:12 +04:00
flow - > src = ( __force __be32 ) ipv6_addr_hash ( & iph - > saddr ) ;
flow - > dst = ( __force __be32 ) ipv6_addr_hash ( & iph - > daddr ) ;
2014-07-02 08:33:01 +04:00
flow_label = ip6_flowlabel ( iph ) ;
if ( flow_label ) {
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection .
*/
flow - > n_proto = proto ;
flow - > ip_proto = ip_proto ;
flow - > ports = flow_label ;
flow - > thoff = ( u16 ) nhoff ;
return true ;
}
2011-11-28 09:22:18 +04:00
break ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_8021AD ) :
case htons ( ETH_P_8021Q ) : {
2011-11-28 09:22:18 +04:00
const struct vlan_hdr * vlan ;
struct vlan_hdr _vlan ;
2014-08-23 23:13:41 +04:00
vlan = __skb_header_pointer ( skb , nhoff , sizeof ( _vlan ) , data , hlen , & _vlan ) ;
2011-11-28 09:22:18 +04:00
if ( ! vlan )
return false ;
proto = vlan - > h_vlan_encapsulated_proto ;
nhoff + = sizeof ( * vlan ) ;
goto again ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_PPP_SES ) : {
2011-11-28 09:22:18 +04:00
struct {
struct pppoe_hdr hdr ;
__be16 proto ;
} * hdr , _hdr ;
2014-08-23 23:13:41 +04:00
hdr = __skb_header_pointer ( skb , nhoff , sizeof ( _hdr ) , data , hlen , & _hdr ) ;
2011-11-28 09:22:18 +04:00
if ( ! hdr )
return false ;
proto = hdr - > proto ;
nhoff + = PPPOE_SES_HLEN ;
switch ( proto ) {
2014-03-12 21:04:17 +04:00
case htons ( PPP_IP ) :
2011-11-28 09:22:18 +04:00
goto ip ;
2014-03-12 21:04:17 +04:00
case htons ( PPP_IPV6 ) :
2011-11-28 09:22:18 +04:00
goto ipv6 ;
default :
return false ;
}
}
2014-09-06 03:20:26 +04:00
case htons ( ETH_P_FCOE ) :
flow - > thoff = ( u16 ) ( nhoff + FCOE_HEADER_LEN ) ;
/* fall through */
2011-11-28 09:22:18 +04:00
default :
return false ;
}
switch ( ip_proto ) {
case IPPROTO_GRE : {
struct gre_hdr {
__be16 flags ;
__be16 proto ;
} * hdr , _hdr ;
2014-08-23 23:13:41 +04:00
hdr = __skb_header_pointer ( skb , nhoff , sizeof ( _hdr ) , data , hlen , & _hdr ) ;
2011-11-28 09:22:18 +04:00
if ( ! hdr )
return false ;
/*
* Only look inside GRE if version zero and no
* routing
*/
if ( ! ( hdr - > flags & ( GRE_VERSION | GRE_ROUTING ) ) ) {
proto = hdr - > proto ;
nhoff + = 4 ;
if ( hdr - > flags & GRE_CSUM )
nhoff + = 4 ;
if ( hdr - > flags & GRE_KEY )
nhoff + = 4 ;
if ( hdr - > flags & GRE_SEQ )
nhoff + = 4 ;
2013-03-11 10:52:28 +04:00
if ( proto = = htons ( ETH_P_TEB ) ) {
const struct ethhdr * eth ;
struct ethhdr _eth ;
2014-08-23 23:13:41 +04:00
eth = __skb_header_pointer ( skb , nhoff ,
sizeof ( _eth ) ,
data , hlen , & _eth ) ;
2013-03-11 10:52:28 +04:00
if ( ! eth )
return false ;
proto = eth - > h_proto ;
nhoff + = sizeof ( * eth ) ;
}
2011-11-28 09:22:18 +04:00
goto again ;
}
break ;
}
case IPPROTO_IPIP :
2013-07-29 22:07:36 +04:00
proto = htons ( ETH_P_IP ) ;
goto ip ;
2013-07-29 22:07:42 +04:00
case IPPROTO_IPV6 :
proto = htons ( ETH_P_IPV6 ) ;
goto ipv6 ;
2011-11-28 09:22:18 +04:00
default :
break ;
}
2014-06-23 14:37:58 +04:00
flow - > n_proto = proto ;
2011-11-28 09:22:18 +04:00
flow - > ip_proto = ip_proto ;
2013-03-19 10:39:29 +04:00
flow - > thoff = ( u16 ) nhoff ;
2014-10-10 23:09:12 +04:00
/* unless skb is set we don't need to record port info */
if ( skb )
flow - > ports = __skb_flow_get_ports ( skb , nhoff , ip_proto ,
data , hlen ) ;
2011-11-28 09:22:18 +04:00
return true ;
}
2014-08-23 23:13:41 +04:00
EXPORT_SYMBOL ( __skb_flow_dissect ) ;
2013-01-21 04:39:24 +04:00
static u32 hashrnd __read_mostly ;
2013-10-23 22:06:00 +04:00
static __always_inline void __flow_hash_secret_init ( void )
{
net_get_random_once ( & hashrnd , sizeof ( hashrnd ) ) ;
}
static __always_inline u32 __flow_hash_3words ( u32 a , u32 b , u32 c )
{
__flow_hash_secret_init ( ) ;
return jhash_3words ( a , b , c , hashrnd ) ;
}
2014-07-02 08:32:05 +04:00
static inline u32 __flow_hash_from_keys ( struct flow_keys * keys )
{
u32 hash ;
/* get a consistent hash (same value on both flow directions) */
if ( ( ( __force u32 ) keys - > dst < ( __force u32 ) keys - > src ) | |
( ( ( __force u32 ) keys - > dst = = ( __force u32 ) keys - > src ) & &
( ( __force u16 ) keys - > port16 [ 1 ] < ( __force u16 ) keys - > port16 [ 0 ] ) ) ) {
swap ( keys - > dst , keys - > src ) ;
swap ( keys - > port16 [ 0 ] , keys - > port16 [ 1 ] ) ;
}
hash = __flow_hash_3words ( ( __force u32 ) keys - > dst ,
( __force u32 ) keys - > src ,
( __force u32 ) keys - > ports ) ;
if ( ! hash )
hash = 1 ;
return hash ;
}
u32 flow_hash_from_keys ( struct flow_keys * keys )
{
return __flow_hash_from_keys ( keys ) ;
}
EXPORT_SYMBOL ( flow_hash_from_keys ) ;
2013-01-21 04:39:24 +04:00
/*
2013-12-16 10:12:06 +04:00
* __skb_get_hash : calculate a flow hash based on src / dst addresses
2014-03-25 02:34:47 +04:00
* and src / dst port numbers . Sets hash in skb to non - zero hash value
* on success , zero indicates no valid hash . Also , sets l4_hash in skb
2013-01-21 04:39:24 +04:00
* if hash is a canonical 4 - tuple hash over transport ports .
*/
2013-12-16 10:12:06 +04:00
void __skb_get_hash ( struct sk_buff * skb )
2013-01-21 04:39:24 +04:00
{
struct flow_keys keys ;
if ( ! skb_flow_dissect ( skb , & keys ) )
return ;
if ( keys . ports )
2014-03-25 02:34:47 +04:00
skb - > l4_hash = 1 ;
2013-01-21 04:39:24 +04:00
2014-07-02 08:33:17 +04:00
skb - > sw_hash = 1 ;
2014-07-02 08:32:05 +04:00
skb - > hash = __flow_hash_from_keys ( & keys ) ;
2013-01-21 04:39:24 +04:00
}
2013-12-16 10:12:06 +04:00
EXPORT_SYMBOL ( __skb_get_hash ) ;
2013-01-21 04:39:24 +04:00
/*
* Returns a Tx hash based on the given packet descriptor a Tx queues ' number
* to be used as a distribution range .
*/
2014-07-02 08:32:27 +04:00
u16 __skb_tx_hash ( const struct net_device * dev , struct sk_buff * skb ,
2013-01-21 04:39:24 +04:00
unsigned int num_tx_queues )
{
u32 hash ;
u16 qoffset = 0 ;
u16 qcount = num_tx_queues ;
if ( skb_rx_queue_recorded ( skb ) ) {
hash = skb_get_rx_queue ( skb ) ;
while ( unlikely ( hash > = num_tx_queues ) )
hash - = num_tx_queues ;
return hash ;
}
if ( dev - > num_tc ) {
u8 tc = netdev_get_prio_tc_map ( dev , skb - > priority ) ;
qoffset = dev - > tc_to_txq [ tc ] . offset ;
qcount = dev - > tc_to_txq [ tc ] . count ;
}
2014-08-23 22:58:54 +04:00
return ( u16 ) reciprocal_scale ( skb_get_hash ( skb ) , qcount ) + qoffset ;
2013-01-21 04:39:24 +04:00
}
EXPORT_SYMBOL ( __skb_tx_hash ) ;
2014-09-06 03:20:26 +04:00
u32 __skb_get_poff ( const struct sk_buff * skb , void * data ,
const struct flow_keys * keys , int hlen )
2013-03-19 10:39:30 +04:00
{
2014-09-06 03:20:26 +04:00
u32 poff = keys - > thoff ;
2013-03-19 10:39:30 +04:00
2014-09-06 03:20:26 +04:00
switch ( keys - > ip_proto ) {
2013-03-19 10:39:30 +04:00
case IPPROTO_TCP : {
2014-10-10 23:09:12 +04:00
/* access doff as u8 to avoid unaligned access */
const u8 * doff ;
u8 _doff ;
2013-03-19 10:39:30 +04:00
2014-10-10 23:09:12 +04:00
doff = __skb_header_pointer ( skb , poff + 12 , sizeof ( _doff ) ,
data , hlen , & _doff ) ;
if ( ! doff )
2013-03-19 10:39:30 +04:00
return poff ;
2014-10-10 23:09:12 +04:00
poff + = max_t ( u32 , sizeof ( struct tcphdr ) , ( * doff & 0xF0 ) > > 2 ) ;
2013-03-19 10:39:30 +04:00
break ;
}
case IPPROTO_UDP :
case IPPROTO_UDPLITE :
poff + = sizeof ( struct udphdr ) ;
break ;
/* For the rest, we do not really care about header
* extensions at this point for now .
*/
case IPPROTO_ICMP :
poff + = sizeof ( struct icmphdr ) ;
break ;
case IPPROTO_ICMPV6 :
poff + = sizeof ( struct icmp6hdr ) ;
break ;
case IPPROTO_IGMP :
poff + = sizeof ( struct igmphdr ) ;
break ;
case IPPROTO_DCCP :
poff + = sizeof ( struct dccp_hdr ) ;
break ;
case IPPROTO_SCTP :
poff + = sizeof ( struct sctphdr ) ;
break ;
}
return poff ;
}
2014-09-06 03:20:26 +04:00
/* skb_get_poff() returns the offset to the payload as far as it could
* be dissected . The main user is currently BPF , so that we can dynamically
* truncate packets without needing to push actual payload to the user
* space and can analyze headers only , instead .
*/
u32 skb_get_poff ( const struct sk_buff * skb )
{
struct flow_keys keys ;
if ( ! skb_flow_dissect ( skb , & keys ) )
return 0 ;
return __skb_get_poff ( skb , skb - > data , & keys , skb_headlen ( skb ) ) ;
}
2013-01-21 04:39:24 +04:00
static inline int get_xps_queue ( struct net_device * dev , struct sk_buff * skb )
{
# ifdef CONFIG_XPS
struct xps_dev_maps * dev_maps ;
struct xps_map * map ;
int queue_index = - 1 ;
rcu_read_lock ( ) ;
dev_maps = rcu_dereference ( dev - > xps_maps ) ;
if ( dev_maps ) {
map = rcu_dereference (
dev_maps - > cpu_map [ raw_smp_processor_id ( ) ] ) ;
if ( map ) {
if ( map - > len = = 1 )
queue_index = map - > queues [ 0 ] ;
2014-07-02 08:32:27 +04:00
else
2014-08-23 22:58:54 +04:00
queue_index = map - > queues [ reciprocal_scale ( skb_get_hash ( skb ) ,
map - > len ) ] ;
2013-01-21 04:39:24 +04:00
if ( unlikely ( queue_index > = dev - > real_num_tx_queues ) )
queue_index = - 1 ;
}
}
rcu_read_unlock ( ) ;
return queue_index ;
# else
return - 1 ;
# endif
}
2014-02-16 18:55:20 +04:00
static u16 __netdev_pick_tx ( struct net_device * dev , struct sk_buff * skb )
2013-01-21 04:39:24 +04:00
{
struct sock * sk = skb - > sk ;
int queue_index = sk_tx_queue_get ( sk ) ;
if ( queue_index < 0 | | skb - > ooo_okay | |
queue_index > = dev - > real_num_tx_queues ) {
int new_index = get_xps_queue ( dev , skb ) ;
if ( new_index < 0 )
new_index = skb_tx_hash ( dev , skb ) ;
2013-08-29 05:10:43 +04:00
if ( queue_index ! = new_index & & sk & &
rcu_access_pointer ( sk - > sk_dst_cache ) )
2013-09-07 23:02:57 +04:00
sk_tx_queue_set ( sk , new_index ) ;
2013-01-21 04:39:24 +04:00
queue_index = new_index ;
}
return queue_index ;
}
struct netdev_queue * netdev_pick_tx ( struct net_device * dev ,
2014-01-10 12:18:26 +04:00
struct sk_buff * skb ,
void * accel_priv )
2013-01-21 04:39:24 +04:00
{
int queue_index = 0 ;
if ( dev - > real_num_tx_queues ! = 1 ) {
const struct net_device_ops * ops = dev - > netdev_ops ;
if ( ops - > ndo_select_queue )
2014-02-16 18:55:20 +04:00
queue_index = ops - > ndo_select_queue ( dev , skb , accel_priv ,
__netdev_pick_tx ) ;
2013-01-21 04:39:24 +04:00
else
queue_index = __netdev_pick_tx ( dev , skb ) ;
2014-01-10 12:18:26 +04:00
if ( ! accel_priv )
2014-02-16 18:55:21 +04:00
queue_index = netdev_cap_txqueue ( dev , queue_index ) ;
2013-01-21 04:39:24 +04:00
}
skb_set_queue_mapping ( skb , queue_index ) ;
return netdev_get_tx_queue ( dev , queue_index ) ;
}