2017-04-13 06:29:22 +03:00
/*
* Copyright ( c ) 2017 Intel Corporation .
*
* This file is provided under a dual BSD / GPLv2 license . When using or
* redistributing this file , you may do so under either license .
*
* GPL LICENSE SUMMARY
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful , but
* WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions
* are met :
*
* - Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* - Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in
* the documentation and / or other materials provided with the
* distribution .
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* " AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
* LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
* SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
* LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
* DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
* ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*
*/
/*
* This file contains OPA VNIC encapsulation / decapsulation function .
*/
# include <linux/if_ether.h>
# include <linux/if_vlan.h>
# include "opa_vnic_internal.h"
/* OPA 16B Header fields */
# define OPA_16B_LID_MASK 0xFFFFFull
# define OPA_16B_SLID_HIGH_SHFT 8
# define OPA_16B_SLID_MASK 0xF00ull
# define OPA_16B_DLID_MASK 0xF000ull
# define OPA_16B_DLID_HIGH_SHFT 12
# define OPA_16B_LEN_SHFT 20
# define OPA_16B_SC_SHFT 20
# define OPA_16B_RC_SHFT 25
# define OPA_16B_PKEY_SHFT 16
# define OPA_VNIC_L4_HDR_SHFT 16
/* L2+L4 hdr len is 20 bytes (5 quad words) */
# define OPA_VNIC_HDR_QW_LEN 5
static inline void opa_vnic_make_header ( u8 * hdr , u32 slid , u32 dlid , u16 len ,
u16 pkey , u16 entropy , u8 sc , u8 rc ,
u8 l4_type , u16 l4_hdr )
{
/* h[1]: LT=1, 16B L2=10 */
u32 h [ OPA_VNIC_HDR_QW_LEN ] = { 0 , 0xc0000000 , 0 , 0 , 0 } ;
h [ 2 ] = l4_type ;
h [ 3 ] = entropy ;
h [ 4 ] = l4_hdr < < OPA_VNIC_L4_HDR_SHFT ;
/* Extract and set 4 upper bits and 20 lower bits of the lids */
h [ 0 ] | = ( slid & OPA_16B_LID_MASK ) ;
h [ 2 ] | = ( ( slid > > ( 20 - OPA_16B_SLID_HIGH_SHFT ) ) & OPA_16B_SLID_MASK ) ;
h [ 1 ] | = ( dlid & OPA_16B_LID_MASK ) ;
h [ 2 ] | = ( ( dlid > > ( 20 - OPA_16B_DLID_HIGH_SHFT ) ) & OPA_16B_DLID_MASK ) ;
h [ 0 ] | = ( len < < OPA_16B_LEN_SHFT ) ;
h [ 1 ] | = ( rc < < OPA_16B_RC_SHFT ) ;
h [ 1 ] | = ( sc < < OPA_16B_SC_SHFT ) ;
h [ 2 ] | = ( ( u32 ) pkey < < OPA_16B_PKEY_SHFT ) ;
memcpy ( hdr , h , OPA_VNIC_HDR_LEN ) ;
}
2017-04-13 06:29:25 +03:00
/*
* Using a simple hash table for mac table implementation with the last octet
* of mac address as a key .
*/
static void opa_vnic_free_mac_tbl ( struct hlist_head * mactbl )
{
struct opa_vnic_mac_tbl_node * node ;
struct hlist_node * tmp ;
int bkt ;
if ( ! mactbl )
return ;
vnic_hash_for_each_safe ( mactbl , bkt , tmp , node , hlist ) {
hash_del ( & node - > hlist ) ;
kfree ( node ) ;
}
kfree ( mactbl ) ;
}
static struct hlist_head * opa_vnic_alloc_mac_tbl ( void )
{
u32 size = sizeof ( struct hlist_head ) * OPA_VNIC_MAC_TBL_SIZE ;
struct hlist_head * mactbl ;
mactbl = kzalloc ( size , GFP_KERNEL ) ;
if ( ! mactbl )
return ERR_PTR ( - ENOMEM ) ;
vnic_hash_init ( mactbl ) ;
return mactbl ;
}
/* opa_vnic_release_mac_tbl - empty and free the mac table */
void opa_vnic_release_mac_tbl ( struct opa_vnic_adapter * adapter )
{
struct hlist_head * mactbl ;
mutex_lock ( & adapter - > mactbl_lock ) ;
mactbl = rcu_access_pointer ( adapter - > mactbl ) ;
rcu_assign_pointer ( adapter - > mactbl , NULL ) ;
synchronize_rcu ( ) ;
opa_vnic_free_mac_tbl ( mactbl ) ;
2017-09-26 16:44:13 +03:00
adapter - > info . vport . mac_tbl_digest = 0 ;
2017-04-13 06:29:25 +03:00
mutex_unlock ( & adapter - > mactbl_lock ) ;
}
/*
* opa_vnic_query_mac_tbl - query the mac table for a section
*
* This function implements query of specific function of the mac table .
* The function also expects the requested range to be valid .
*/
void opa_vnic_query_mac_tbl ( struct opa_vnic_adapter * adapter ,
struct opa_veswport_mactable * tbl )
{
struct opa_vnic_mac_tbl_node * node ;
struct hlist_head * mactbl ;
int bkt ;
u16 loffset , lnum_entries ;
rcu_read_lock ( ) ;
mactbl = rcu_dereference ( adapter - > mactbl ) ;
if ( ! mactbl )
goto get_mac_done ;
loffset = be16_to_cpu ( tbl - > offset ) ;
lnum_entries = be16_to_cpu ( tbl - > num_entries ) ;
vnic_hash_for_each ( mactbl , bkt , node , hlist ) {
struct __opa_vnic_mactable_entry * nentry = & node - > entry ;
struct opa_veswport_mactable_entry * entry ;
if ( ( node - > index < loffset ) | |
( node - > index > = ( loffset + lnum_entries ) ) )
continue ;
/* populate entry in the tbl corresponding to the index */
entry = & tbl - > tbl_entries [ node - > index - loffset ] ;
memcpy ( entry - > mac_addr , nentry - > mac_addr ,
ARRAY_SIZE ( entry - > mac_addr ) ) ;
memcpy ( entry - > mac_addr_mask , nentry - > mac_addr_mask ,
ARRAY_SIZE ( entry - > mac_addr_mask ) ) ;
entry - > dlid_sd = cpu_to_be32 ( nentry - > dlid_sd ) ;
}
tbl - > mac_tbl_digest = cpu_to_be32 ( adapter - > info . vport . mac_tbl_digest ) ;
get_mac_done :
rcu_read_unlock ( ) ;
}
/*
* opa_vnic_update_mac_tbl - update mac table section
*
* This function updates the specified section of the mac table .
* The procedure includes following steps .
* - Allocate a new mac ( hash ) table .
* - Add the specified entries to the new table .
* ( except the ones that are requested to be deleted ) .
* - Add all the other entries from the old mac table .
* - If there is a failure , free the new table and return .
* - Switch to the new table .
* - Free the old table and return .
*
* The function also expects the requested range to be valid .
*/
int opa_vnic_update_mac_tbl ( struct opa_vnic_adapter * adapter ,
struct opa_veswport_mactable * tbl )
{
struct opa_vnic_mac_tbl_node * node , * new_node ;
struct hlist_head * new_mactbl , * old_mactbl ;
int i , bkt , rc = 0 ;
u8 key ;
u16 loffset , lnum_entries ;
mutex_lock ( & adapter - > mactbl_lock ) ;
/* allocate new mac table */
new_mactbl = opa_vnic_alloc_mac_tbl ( ) ;
if ( IS_ERR ( new_mactbl ) ) {
mutex_unlock ( & adapter - > mactbl_lock ) ;
return PTR_ERR ( new_mactbl ) ;
}
loffset = be16_to_cpu ( tbl - > offset ) ;
lnum_entries = be16_to_cpu ( tbl - > num_entries ) ;
/* add updated entries to the new mac table */
for ( i = 0 ; i < lnum_entries ; i + + ) {
struct __opa_vnic_mactable_entry * nentry ;
struct opa_veswport_mactable_entry * entry =
& tbl - > tbl_entries [ i ] ;
u8 * mac_addr = entry - > mac_addr ;
u8 empty_mac [ ETH_ALEN ] = { 0 } ;
v_dbg ( " new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x \n " ,
loffset + i , mac_addr [ 0 ] , mac_addr [ 1 ] , mac_addr [ 2 ] ,
mac_addr [ 3 ] , mac_addr [ 4 ] , mac_addr [ 5 ] ,
entry - > dlid_sd ) ;
/* if the entry is being removed, do not add it */
if ( ! memcmp ( mac_addr , empty_mac , ARRAY_SIZE ( empty_mac ) ) )
continue ;
node = kzalloc ( sizeof ( * node ) , GFP_KERNEL ) ;
if ( ! node ) {
rc = - ENOMEM ;
goto updt_done ;
}
node - > index = loffset + i ;
nentry = & node - > entry ;
memcpy ( nentry - > mac_addr , entry - > mac_addr ,
ARRAY_SIZE ( nentry - > mac_addr ) ) ;
memcpy ( nentry - > mac_addr_mask , entry - > mac_addr_mask ,
ARRAY_SIZE ( nentry - > mac_addr_mask ) ) ;
nentry - > dlid_sd = be32_to_cpu ( entry - > dlid_sd ) ;
key = node - > entry . mac_addr [ OPA_VNIC_MAC_HASH_IDX ] ;
vnic_hash_add ( new_mactbl , & node - > hlist , key ) ;
}
/* add other entries from current mac table to new mac table */
old_mactbl = rcu_access_pointer ( adapter - > mactbl ) ;
if ( ! old_mactbl )
goto switch_tbl ;
vnic_hash_for_each ( old_mactbl , bkt , node , hlist ) {
if ( ( node - > index > = loffset ) & &
( node - > index < ( loffset + lnum_entries ) ) )
continue ;
new_node = kzalloc ( sizeof ( * new_node ) , GFP_KERNEL ) ;
if ( ! new_node ) {
rc = - ENOMEM ;
goto updt_done ;
}
new_node - > index = node - > index ;
memcpy ( & new_node - > entry , & node - > entry , sizeof ( node - > entry ) ) ;
key = new_node - > entry . mac_addr [ OPA_VNIC_MAC_HASH_IDX ] ;
vnic_hash_add ( new_mactbl , & new_node - > hlist , key ) ;
}
switch_tbl :
/* switch to new table */
rcu_assign_pointer ( adapter - > mactbl , new_mactbl ) ;
synchronize_rcu ( ) ;
adapter - > info . vport . mac_tbl_digest = be32_to_cpu ( tbl - > mac_tbl_digest ) ;
updt_done :
/* upon failure, free the new table; otherwise, free the old table */
if ( rc )
opa_vnic_free_mac_tbl ( new_mactbl ) ;
else
opa_vnic_free_mac_tbl ( old_mactbl ) ;
mutex_unlock ( & adapter - > mactbl_lock ) ;
return rc ;
}
/* opa_vnic_chk_mac_tbl - check mac table for dlid */
static uint32_t opa_vnic_chk_mac_tbl ( struct opa_vnic_adapter * adapter ,
struct ethhdr * mac_hdr )
{
struct opa_vnic_mac_tbl_node * node ;
struct hlist_head * mactbl ;
u32 dlid = 0 ;
u8 key ;
rcu_read_lock ( ) ;
mactbl = rcu_dereference ( adapter - > mactbl ) ;
if ( unlikely ( ! mactbl ) )
goto chk_done ;
key = mac_hdr - > h_dest [ OPA_VNIC_MAC_HASH_IDX ] ;
vnic_hash_for_each_possible ( mactbl , node , hlist , key ) {
struct __opa_vnic_mactable_entry * entry = & node - > entry ;
/* if related to source mac, skip */
if ( unlikely ( OPA_VNIC_DLID_SD_IS_SRC_MAC ( entry - > dlid_sd ) ) )
continue ;
if ( ! memcmp ( node - > entry . mac_addr , mac_hdr - > h_dest ,
ARRAY_SIZE ( node - > entry . mac_addr ) ) ) {
/* mac address found */
dlid = OPA_VNIC_DLID_SD_GET_DLID ( node - > entry . dlid_sd ) ;
break ;
}
}
chk_done :
rcu_read_unlock ( ) ;
return dlid ;
}
2017-04-13 06:29:22 +03:00
/* opa_vnic_get_dlid - find and return the DLID */
static uint32_t opa_vnic_get_dlid ( struct opa_vnic_adapter * adapter ,
struct sk_buff * skb , u8 def_port )
{
struct __opa_veswport_info * info = & adapter - > info ;
struct ethhdr * mac_hdr = ( struct ethhdr * ) skb_mac_header ( skb ) ;
u32 dlid ;
2017-04-13 06:29:25 +03:00
dlid = opa_vnic_chk_mac_tbl ( adapter , mac_hdr ) ;
if ( dlid )
return dlid ;
2017-04-13 06:29:22 +03:00
if ( is_multicast_ether_addr ( mac_hdr - > h_dest ) ) {
dlid = info - > vesw . u_mcast_dlid ;
} else {
if ( is_local_ether_addr ( mac_hdr - > h_dest ) ) {
dlid = ( ( uint32_t ) mac_hdr - > h_dest [ 5 ] < < 16 ) |
( ( uint32_t ) mac_hdr - > h_dest [ 4 ] < < 8 ) |
mac_hdr - > h_dest [ 3 ] ;
if ( unlikely ( ! dlid ) )
v_warn ( " Null dlid in MAC address \n " ) ;
} else if ( def_port ! = OPA_VNIC_INVALID_PORT ) {
2018-09-26 20:55:53 +03:00
if ( def_port < OPA_VESW_MAX_NUM_DEF_PORT )
dlid = info - > vesw . u_ucast_dlid [ def_port ] ;
2017-04-13 06:29:22 +03:00
}
}
return dlid ;
}
/* opa_vnic_get_sc - return the service class */
static u8 opa_vnic_get_sc ( struct __opa_veswport_info * info ,
struct sk_buff * skb )
{
struct ethhdr * mac_hdr = ( struct ethhdr * ) skb_mac_header ( skb ) ;
u16 vlan_tci ;
u8 sc ;
if ( ! __vlan_get_tag ( skb , & vlan_tci ) ) {
u8 pcp = OPA_VNIC_VLAN_PCP ( vlan_tci ) ;
if ( is_multicast_ether_addr ( mac_hdr - > h_dest ) )
sc = info - > vport . pcp_to_sc_mc [ pcp ] ;
else
sc = info - > vport . pcp_to_sc_uc [ pcp ] ;
} else {
if ( is_multicast_ether_addr ( mac_hdr - > h_dest ) )
sc = info - > vport . non_vlan_sc_mc ;
else
sc = info - > vport . non_vlan_sc_uc ;
}
return sc ;
}
u8 opa_vnic_get_vl ( struct opa_vnic_adapter * adapter , struct sk_buff * skb )
{
struct ethhdr * mac_hdr = ( struct ethhdr * ) skb_mac_header ( skb ) ;
struct __opa_veswport_info * info = & adapter - > info ;
u8 vl ;
if ( skb_vlan_tag_present ( skb ) ) {
u8 pcp = skb_vlan_tag_get ( skb ) > > VLAN_PRIO_SHIFT ;
if ( is_multicast_ether_addr ( mac_hdr - > h_dest ) )
vl = info - > vport . pcp_to_vl_mc [ pcp ] ;
else
vl = info - > vport . pcp_to_vl_uc [ pcp ] ;
} else {
if ( is_multicast_ether_addr ( mac_hdr - > h_dest ) )
vl = info - > vport . non_vlan_vl_mc ;
else
vl = info - > vport . non_vlan_vl_uc ;
}
return vl ;
}
2017-09-26 16:44:26 +03:00
/* opa_vnic_get_rc - return the routing control */
static u8 opa_vnic_get_rc ( struct __opa_veswport_info * info ,
struct sk_buff * skb )
{
u8 proto , rout_ctrl ;
switch ( vlan_get_protocol ( skb ) ) {
case htons ( ETH_P_IPV6 ) :
proto = ipv6_hdr ( skb ) - > nexthdr ;
if ( proto = = IPPROTO_TCP )
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc ,
IPV6_TCP ) ;
else if ( proto = = IPPROTO_UDP )
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc ,
IPV6_UDP ) ;
else
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc , IPV6 ) ;
break ;
case htons ( ETH_P_IP ) :
proto = ip_hdr ( skb ) - > protocol ;
if ( proto = = IPPROTO_TCP )
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc ,
IPV4_TCP ) ;
else if ( proto = = IPPROTO_UDP )
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc ,
IPV4_UDP ) ;
else
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc , IPV4 ) ;
break ;
default :
rout_ctrl = OPA_VNIC_ENCAP_RC_EXT ( info - > vesw . rc , DEFAULT ) ;
}
return rout_ctrl ;
}
2017-04-13 06:29:22 +03:00
/* opa_vnic_calc_entropy - calculate the packet entropy */
2018-04-27 21:06:35 +03:00
u8 opa_vnic_calc_entropy ( struct sk_buff * skb )
2017-04-13 06:29:22 +03:00
{
2018-04-27 21:06:35 +03:00
u32 hash = skb_get_hash ( skb ) ;
/* store XOR of all bytes in lower 8 bits */
hash ^ = hash > > 8 ;
hash ^ = hash > > 16 ;
/* return lower 8 bits as entropy */
return ( u8 ) ( hash & 0xFF ) ;
2017-04-13 06:29:22 +03:00
}
/* opa_vnic_get_def_port - get default port based on entropy */
static inline u8 opa_vnic_get_def_port ( struct opa_vnic_adapter * adapter ,
u8 entropy )
{
u8 flow_id ;
/* Add the upper and lower 4-bits of entropy to get the flow id */
flow_id = ( ( entropy & 0xf ) + ( entropy > > 4 ) ) ;
return adapter - > flow_tbl [ flow_id & ( OPA_VNIC_FLOW_TBL_SIZE - 1 ) ] ;
}
/* Calculate packet length including OPA header, crc and padding */
static inline int opa_vnic_wire_length ( struct sk_buff * skb )
{
u32 pad_len ;
/* padding for 8 bytes size alignment */
pad_len = - ( skb - > len + OPA_VNIC_ICRC_TAIL_LEN ) & 0x7 ;
pad_len + = OPA_VNIC_ICRC_TAIL_LEN ;
return ( skb - > len + pad_len ) > > 3 ;
}
/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
void opa_vnic_encap_skb ( struct opa_vnic_adapter * adapter , struct sk_buff * skb )
{
struct __opa_veswport_info * info = & adapter - > info ;
struct opa_vnic_skb_mdata * mdata ;
2017-09-26 16:44:26 +03:00
u8 def_port , sc , rc , entropy , * hdr ;
2017-04-13 06:29:22 +03:00
u16 len , l4_hdr ;
u32 dlid ;
hdr = skb_push ( skb , OPA_VNIC_HDR_LEN ) ;
2018-04-27 21:06:35 +03:00
entropy = opa_vnic_calc_entropy ( skb ) ;
2017-04-13 06:29:22 +03:00
def_port = opa_vnic_get_def_port ( adapter , entropy ) ;
len = opa_vnic_wire_length ( skb ) ;
dlid = opa_vnic_get_dlid ( adapter , skb , def_port ) ;
sc = opa_vnic_get_sc ( info , skb ) ;
2017-09-26 16:44:26 +03:00
rc = opa_vnic_get_rc ( info , skb ) ;
2017-04-13 06:29:22 +03:00
l4_hdr = info - > vesw . vesw_id ;
networking: make skb_push & __skb_push return void pointers
It seems like a historic accident that these return unsigned char *,
and in many places that means casts are required, more often than not.
Make these functions return void * and remove all the casts across
the tree, adding a (u8 *) cast only where the unsigned char pointer
was used directly, all done with the following spatch:
@@
expression SKB, LEN;
typedef u8;
identifier fn = { skb_push, __skb_push, skb_push_rcsum };
@@
- *(fn(SKB, LEN))
+ *(u8 *)fn(SKB, LEN)
@@
expression E, SKB, LEN;
identifier fn = { skb_push, __skb_push, skb_push_rcsum };
type T;
@@
- E = ((T *)(fn(SKB, LEN)))
+ E = fn(SKB, LEN)
@@
expression SKB, LEN;
identifier fn = { skb_push, __skb_push, skb_push_rcsum };
@@
- fn(SKB, LEN)[0]
+ *(u8 *)fn(SKB, LEN)
Note that the last part there converts from push(...)[0] to the
more idiomatic *(u8 *)push(...).
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-16 15:29:23 +03:00
mdata = skb_push ( skb , sizeof ( * mdata ) ) ;
2017-04-13 06:29:22 +03:00
mdata - > vl = opa_vnic_get_vl ( adapter , skb ) ;
mdata - > entropy = entropy ;
mdata - > flags = 0 ;
if ( unlikely ( ! dlid ) ) {
mdata - > flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR ;
return ;
}
opa_vnic_make_header ( hdr , info - > vport . encap_slid , dlid , len ,
2017-09-26 16:44:26 +03:00
info - > vesw . pkey , entropy , sc , rc ,
2017-04-13 06:29:22 +03:00
OPA_VNIC_L4_ETHR , l4_hdr ) ;
}