2009-02-24 18:30:18 +03:00
/*
2019-09-11 12:58:05 +03:00
* Copyright ( c ) 2006 , 2019 Oracle and / or its affiliates . All rights reserved .
2009-02-24 18:30:18 +03:00
*
* This software is available to you under a choice of one of two
* licenses . You may choose to be licensed under the terms of the GNU
* General Public License ( GPL ) Version 2 , available from the file
* COPYING in the main directory of this source tree , or the
* OpenIB . org BSD license below :
*
* Redistribution and use in source and binary forms , with or
* without modification , are permitted provided that the following
* conditions are met :
*
* - Redistributions of source code must retain the above
* copyright notice , this list of conditions and the following
* disclaimer .
*
* - Redistributions in binary form must reproduce the above
* copyright notice , this list of conditions and the following
* disclaimer in the documentation and / or other materials
* provided with the distribution .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
* EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT . IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY , WHETHER IN AN
* ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE .
*
*/
# include <linux/kernel.h>
# include <net/sock.h>
# include <linux/in.h>
2018-07-24 06:51:21 +03:00
# include <linux/ipv6.h>
2009-02-24 18:30:18 +03:00
# include <linux/if_arp.h>
2010-05-12 02:09:45 +04:00
# include <linux/jhash.h>
2011-06-16 06:09:57 +04:00
# include <linux/ratelimit.h>
2009-02-24 18:30:18 +03:00
# include "rds.h"
2015-10-30 18:49:10 +03:00
static struct rhashtable bind_hash_table ;
2017-08-25 17:21:45 +03:00
static const struct rhashtable_params ht_parms = {
2015-10-30 18:49:10 +03:00
. nelem_hint = 768 ,
2018-07-24 06:51:21 +03:00
. key_len = RDS_BOUND_KEY_LEN ,
2015-10-30 18:49:10 +03:00
. key_offset = offsetof ( struct rds_sock , rs_bound_key ) ,
. head_offset = offsetof ( struct rds_sock , rs_bound_node ) ,
. max_size = 16384 ,
. min_size = 1024 ,
2014-02-12 07:34:25 +04:00
} ;
2018-07-24 06:51:21 +03:00
/* Create a key for the bind hash table manipulation. Port is in network byte
* order .
*/
static inline void __rds_create_bind_key ( u8 * key , const struct in6_addr * addr ,
__be16 port , __u32 scope_id )
{
memcpy ( key , addr , sizeof ( * addr ) ) ;
key + = sizeof ( * addr ) ;
memcpy ( key , & port , sizeof ( port ) ) ;
key + = sizeof ( port ) ;
memcpy ( key , & scope_id , sizeof ( scope_id ) ) ;
}
2009-02-24 18:30:18 +03:00
/*
* Return the rds_sock bound at the given local address .
*
* The rx path can race with rds_release . We notice if rds_release ( ) has
* marked this socket and don ' t return a rs ref to the rx path .
*/
2018-07-24 06:51:21 +03:00
struct rds_sock * rds_find_bound ( const struct in6_addr * addr , __be16 port ,
__u32 scope_id )
2009-02-24 18:30:18 +03:00
{
2018-07-24 06:51:21 +03:00
u8 key [ RDS_BOUND_KEY_LEN ] ;
2009-02-24 18:30:18 +03:00
struct rds_sock * rs ;
2010-05-12 02:09:45 +04:00
2018-07-24 06:51:21 +03:00
__rds_create_bind_key ( key , addr , port , scope_id ) ;
2018-09-11 04:27:26 +03:00
rcu_read_lock ( ) ;
rs = rhashtable_lookup ( & bind_hash_table , key , ht_parms ) ;
2019-01-31 19:47:10 +03:00
if ( rs & & ( sock_flag ( rds_rs_to_sk ( rs ) , SOCK_DEAD ) | |
! refcount_inc_not_zero ( & rds_rs_to_sk ( rs ) - > sk_refcnt ) ) )
2009-02-24 18:30:18 +03:00
rs = NULL ;
2019-01-31 19:47:10 +03:00
2018-09-11 04:27:26 +03:00
rcu_read_unlock ( ) ;
2009-02-24 18:30:18 +03:00
2018-07-24 06:51:21 +03:00
rdsdebug ( " returning rs %p for %pI6c:%u \n " , rs , addr ,
ntohs ( port ) ) ;
2012-02-03 20:09:23 +04:00
2009-02-24 18:30:18 +03:00
return rs ;
}
/* returns -ve errno or +ve port */
2018-07-24 06:51:21 +03:00
static int rds_add_bound ( struct rds_sock * rs , const struct in6_addr * addr ,
__be16 * port , __u32 scope_id )
2009-02-24 18:30:18 +03:00
{
int ret = - EADDRINUSE ;
u16 rover , last ;
2018-07-24 06:51:21 +03:00
u8 key [ RDS_BOUND_KEY_LEN ] ;
2009-02-24 18:30:18 +03:00
if ( * port ! = 0 ) {
rover = be16_to_cpu ( * port ) ;
2016-07-14 13:51:03 +03:00
if ( rover = = RDS_FLAG_PROBE_PORT )
return - EINVAL ;
2009-02-24 18:30:18 +03:00
last = rover ;
} else {
2014-01-11 16:15:59 +04:00
rover = max_t ( u16 , prandom_u32 ( ) , 2 ) ;
2009-02-24 18:30:18 +03:00
last = rover - 1 ;
}
do {
if ( rover = = 0 )
rover + + ;
2014-02-12 07:34:25 +04:00
2016-07-14 13:51:03 +03:00
if ( rover = = RDS_FLAG_PROBE_PORT )
continue ;
2018-07-24 06:51:21 +03:00
__rds_create_bind_key ( key , addr , cpu_to_be16 ( rover ) ,
scope_id ) ;
if ( rhashtable_lookup_fast ( & bind_hash_table , key , ht_parms ) )
2015-10-30 18:49:10 +03:00
continue ;
2018-07-24 06:51:21 +03:00
memcpy ( rs - > rs_bound_key , key , sizeof ( rs - > rs_bound_key ) ) ;
rs - > rs_bound_addr = * addr ;
2016-07-14 13:51:03 +03:00
net_get_random_once ( & rs - > rs_hash_initval ,
sizeof ( rs - > rs_hash_initval ) ) ;
2015-10-30 18:49:10 +03:00
rs - > rs_bound_port = cpu_to_be16 ( rover ) ;
rs - > rs_bound_node . next = NULL ;
rds_sock_addref ( rs ) ;
if ( ! rhashtable_insert_fast ( & bind_hash_table ,
& rs - > rs_bound_node , ht_parms ) ) {
2010-05-12 02:09:45 +04:00
* port = rs - > rs_bound_port ;
2018-07-24 06:51:22 +03:00
rs - > rs_bound_scope_id = scope_id ;
2009-02-24 18:30:18 +03:00
ret = 0 ;
2018-07-24 06:51:22 +03:00
rdsdebug ( " rs %p binding to %pI6c:%d \n " ,
rs , addr , ( int ) ntohs ( * port ) ) ;
2009-02-24 18:30:18 +03:00
break ;
2012-02-03 20:08:50 +04:00
} else {
2018-07-24 06:51:21 +03:00
rs - > rs_bound_addr = in6addr_any ;
2015-10-30 18:49:10 +03:00
rds_sock_put ( rs ) ;
ret = - ENOMEM ;
break ;
2009-02-24 18:30:18 +03:00
}
} while ( rover + + ! = last ) ;
return ret ;
}
void rds_remove_bound ( struct rds_sock * rs )
{
2018-07-24 06:51:21 +03:00
if ( ipv6_addr_any ( & rs - > rs_bound_addr ) )
2015-10-30 18:49:10 +03:00
return ;
2009-02-24 18:30:18 +03:00
2018-07-24 06:51:21 +03:00
rdsdebug ( " rs %p unbinding from %pI6c:%d \n " ,
2015-10-30 18:49:10 +03:00
rs , & rs - > rs_bound_addr ,
ntohs ( rs - > rs_bound_port ) ) ;
2009-02-24 18:30:18 +03:00
2015-10-30 18:49:10 +03:00
rhashtable_remove_fast ( & bind_hash_table , & rs - > rs_bound_node , ht_parms ) ;
rds_sock_put ( rs ) ;
2018-07-24 06:51:21 +03:00
rs - > rs_bound_addr = in6addr_any ;
2009-02-24 18:30:18 +03:00
}
int rds_bind ( struct socket * sock , struct sockaddr * uaddr , int addr_len )
{
struct sock * sk = sock - > sk ;
struct rds_sock * rs = rds_sk_to_rs ( sk ) ;
2018-07-24 06:51:21 +03:00
struct in6_addr v6addr , * binding_addr ;
2009-02-24 18:30:18 +03:00
struct rds_transport * trans ;
2018-07-24 06:51:21 +03:00
__u32 scope_id = 0 ;
2009-02-24 18:30:18 +03:00
int ret = 0 ;
2018-07-24 06:51:21 +03:00
__be16 port ;
2009-02-24 18:30:18 +03:00
2018-07-24 06:51:22 +03:00
/* We allow an RDS socket to be bound to either IPv4 or IPv6
* address .
2018-07-24 06:51:21 +03:00
*/
2019-04-12 13:51:52 +03:00
if ( addr_len < offsetofend ( struct sockaddr , sa_family ) )
return - EINVAL ;
2018-07-24 06:51:22 +03:00
if ( uaddr - > sa_family = = AF_INET ) {
2018-07-24 06:51:21 +03:00
struct sockaddr_in * sin = ( struct sockaddr_in * ) uaddr ;
2018-07-24 06:51:22 +03:00
if ( addr_len < sizeof ( struct sockaddr_in ) | |
sin - > sin_addr . s_addr = = htonl ( INADDR_ANY ) | |
sin - > sin_addr . s_addr = = htonl ( INADDR_BROADCAST ) | |
2019-09-03 02:29:36 +03:00
ipv4_is_multicast ( sin - > sin_addr . s_addr ) )
2018-07-24 06:51:21 +03:00
return - EINVAL ;
ipv6_addr_set_v4mapped ( sin - > sin_addr . s_addr , & v6addr ) ;
binding_addr = & v6addr ;
port = sin - > sin_port ;
2018-07-31 08:48:42 +03:00
# if IS_ENABLED(CONFIG_IPV6)
2018-07-24 06:51:22 +03:00
} else if ( uaddr - > sa_family = = AF_INET6 ) {
struct sockaddr_in6 * sin6 = ( struct sockaddr_in6 * ) uaddr ;
2018-07-31 08:48:42 +03:00
int addr_type ;
2018-07-24 06:51:22 +03:00
if ( addr_len < sizeof ( struct sockaddr_in6 ) )
return - EINVAL ;
addr_type = ipv6_addr_type ( & sin6 - > sin6_addr ) ;
if ( ! ( addr_type & IPV6_ADDR_UNICAST ) ) {
__be32 addr4 ;
if ( ! ( addr_type & IPV6_ADDR_MAPPED ) )
return - EINVAL ;
/* It is a mapped address. Need to do some sanity
* checks .
*/
addr4 = sin6 - > sin6_addr . s6_addr32 [ 3 ] ;
if ( addr4 = = htonl ( INADDR_ANY ) | |
addr4 = = htonl ( INADDR_BROADCAST ) | |
2019-09-03 02:29:36 +03:00
ipv4_is_multicast ( addr4 ) )
2018-07-24 06:51:22 +03:00
return - EINVAL ;
}
/* The scope ID must be specified for link local address. */
if ( addr_type & IPV6_ADDR_LINKLOCAL ) {
if ( sin6 - > sin6_scope_id = = 0 )
return - EINVAL ;
scope_id = sin6 - > sin6_scope_id ;
}
binding_addr = & sin6 - > sin6_addr ;
port = sin6 - > sin6_port ;
2018-07-31 08:48:42 +03:00
# endif
2018-07-24 06:51:21 +03:00
} else {
return - EINVAL ;
}
2009-02-24 18:30:18 +03:00
lock_sock ( sk ) ;
2018-07-24 06:51:21 +03:00
/* RDS socket does not allow re-binding. */
if ( ! ipv6_addr_any ( & rs - > rs_bound_addr ) ) {
2009-02-24 18:30:18 +03:00
ret = - EINVAL ;
goto out ;
}
2018-07-24 06:51:22 +03:00
/* Socket is connected. The binding address should have the same
* scope ID as the connected address , except the case when one is
* non - link local address ( scope_id is 0 ) .
*/
if ( ! ipv6_addr_any ( & rs - > rs_conn_addr ) & & scope_id & &
rs - > rs_bound_scope_id & &
scope_id ! = rs - > rs_bound_scope_id ) {
ret = - EINVAL ;
goto out ;
}
2009-02-24 18:30:18 +03:00
2019-09-11 12:58:05 +03:00
/* The transport can be set using SO_RDS_TRANSPORT option before the
* socket is bound .
*/
if ( rs - > rs_transport ) {
2015-10-11 23:46:03 +03:00
trans = rs - > rs_transport ;
2019-09-24 18:51:16 +03:00
if ( ! trans - > laddr_check | |
trans - > laddr_check ( sock_net ( sock - > sk ) ,
2018-07-24 06:51:21 +03:00
binding_addr , scope_id ) ! = 0 ) {
2015-10-11 23:46:03 +03:00
ret = - ENOPROTOOPT ;
2019-09-11 12:58:05 +03:00
goto out ;
2015-10-11 23:46:03 +03:00
}
2019-09-11 12:58:05 +03:00
} else {
trans = rds_trans_get_preferred ( sock_net ( sock - > sk ) ,
binding_addr , scope_id ) ;
if ( ! trans ) {
ret = - EADDRNOTAVAIL ;
pr_info_ratelimited ( " RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma? \n " ,
__func__ , binding_addr ) ;
goto out ;
}
rs - > rs_transport = trans ;
2009-02-24 18:30:18 +03:00
}
2019-09-11 12:58:05 +03:00
sock_set_flag ( sk , SOCK_RCU_FREE ) ;
ret = rds_add_bound ( rs , binding_addr , & port , scope_id ) ;
2019-09-24 18:51:16 +03:00
if ( ret )
rs - > rs_transport = NULL ;
2009-02-24 18:30:18 +03:00
out :
release_sock ( sk ) ;
return ret ;
}
2014-02-12 07:34:25 +04:00
2015-10-30 18:49:10 +03:00
void rds_bind_lock_destroy ( void )
2014-02-12 07:34:25 +04:00
{
2015-10-30 18:49:10 +03:00
rhashtable_destroy ( & bind_hash_table ) ;
}
2014-02-12 07:34:25 +04:00
2015-10-30 18:49:10 +03:00
int rds_bind_lock_init ( void )
{
return rhashtable_init ( & bind_hash_table , & ht_parms ) ;
2014-02-12 07:34:25 +04:00
}