2005-04-16 15:20:36 -07:00
/*
* linux / net / sunrpc / svcsock . c
*
* These are the RPC server socket internals .
*
* The server scheduling algorithm does not always distribute the load
* evenly when servicing a single client . May need to modify the
* svc_sock_enqueue procedure . . .
*
* TCP support is largely untested and may be a little slow . The problem
* is that we currently do two separate recvfrom ' s , one for the 4 - byte
* record length , and the second for the actual record . This could possibly
* be improved by always reading a minimum size of around 100 bytes and
* tucking any superfluous bytes away in a temporary store . Still , that
* leaves write requests out in the rain . An alternative may be to peek at
* the first skb in the queue , and if it matches the next TCP sequence
* number , to extract the record marker . Yuck .
*
* Copyright ( C ) 1995 , 1996 Olaf Kirch < okir @ monad . swb . de >
*/
2007-08-28 15:50:33 -07:00
# include <linux/kernel.h>
2005-04-16 15:20:36 -07:00
# include <linux/sched.h>
# include <linux/errno.h>
# include <linux/fcntl.h>
# include <linux/net.h>
# include <linux/in.h>
# include <linux/inet.h>
# include <linux/udp.h>
2005-08-09 20:20:07 -07:00
# include <linux/tcp.h>
2005-04-16 15:20:36 -07:00
# include <linux/unistd.h>
# include <linux/slab.h>
# include <linux/netdevice.h>
# include <linux/skbuff.h>
2006-10-02 02:17:48 -07:00
# include <linux/file.h>
2006-12-06 20:34:23 -08:00
# include <linux/freezer.h>
2005-04-16 15:20:36 -07:00
# include <net/sock.h>
# include <net/checksum.h>
# include <net/ip.h>
2007-02-12 00:53:36 -08:00
# include <net/ipv6.h>
2005-08-09 20:08:28 -07:00
# include <net/tcp_states.h>
2005-04-16 15:20:36 -07:00
# include <asm/uaccess.h>
# include <asm/ioctls.h>
# include <linux/sunrpc/types.h>
2007-02-12 00:53:32 -08:00
# include <linux/sunrpc/clnt.h>
2005-04-16 15:20:36 -07:00
# include <linux/sunrpc/xdr.h>
# include <linux/sunrpc/svcsock.h>
# include <linux/sunrpc/stats.h>
/* SMP locking strategy:
*
2006-10-02 02:17:58 -07:00
* svc_pool - > sp_lock protects most of the fields of that pool .
* svc_serv - > sv_lock protects sv_tempsocks , sv_permsocks , sv_tmpcnt .
* when both need to be taken ( rare ) , svc_serv - > sv_lock is first .
* BKL protects svc_serv - > sv_nrthread .
2007-05-09 02:34:48 -07:00
* svc_sock - > sk_lock protects the svc_sock - > sk_deferred list
* and the - > sk_info_authunix cache .
2007-12-30 21:07:48 -06:00
* svc_sock - > sk_xprt . xpt_flags . XPT_BUSY prevents a svc_sock being
* enqueued multiply .
2005-04-16 15:20:36 -07:00
*
* Some flags can be set to certain values at any time
* providing that certain rules are followed :
*
2007-12-30 21:07:48 -06:00
* XPT_CONN , XPT_DATA , can be set or cleared at any time .
2007-02-09 15:38:13 -08:00
* after a set , svc_sock_enqueue must be called .
2005-04-16 15:20:36 -07:00
* after a clear , the socket must be read / accepted
* if this succeeds , it must be set again .
2007-12-30 21:07:48 -06:00
* XPT_CLOSE can set at any time . It is never cleared .
* xpt_ref contains a bias of ' 1 ' until XPT_DEAD is set .
2007-12-30 21:07:46 -06:00
* so when xprt_ref hits zero , we know the transport is dead
2007-02-08 14:20:30 -08:00
* and no - one is using it .
2007-12-30 21:07:48 -06:00
* XPT_DEAD can only be set while XPT_BUSY is held which ensures
2007-02-08 14:20:30 -08:00
* no other thread will be using the socket or will try to
2007-12-30 21:07:48 -06:00
* set XPT_DEAD .
2005-04-16 15:20:36 -07:00
*
*/
2007-12-30 21:07:17 -06:00
# define RPCDBG_FACILITY RPCDBG_SVCXPRT
2005-04-16 15:20:36 -07:00
static struct svc_sock * svc_setup_socket ( struct svc_serv * , struct socket * ,
2007-02-12 00:53:28 -08:00
int * errp , int flags ) ;
2007-02-08 14:20:30 -08:00
static void svc_delete_socket ( struct svc_sock * svsk ) ;
2005-04-16 15:20:36 -07:00
static void svc_udp_data_ready ( struct sock * , int ) ;
static int svc_udp_recvfrom ( struct svc_rqst * ) ;
static int svc_udp_sendto ( struct svc_rqst * ) ;
2007-03-06 01:42:22 -08:00
static void svc_close_socket ( struct svc_sock * svsk ) ;
2007-12-30 21:07:27 -06:00
static void svc_sock_detach ( struct svc_xprt * ) ;
static void svc_sock_free ( struct svc_xprt * ) ;
2005-04-16 15:20:36 -07:00
static struct svc_deferred_req * svc_deferred_dequeue ( struct svc_sock * svsk ) ;
static int svc_deferred_recv ( struct svc_rqst * rqstp ) ;
static struct cache_deferred_req * svc_defer ( struct cache_req * req ) ;
2007-12-30 21:07:42 -06:00
static struct svc_xprt * svc_create_socket ( struct svc_serv * , int ,
struct sockaddr * , int , int ) ;
2005-04-16 15:20:36 -07:00
2006-10-02 02:17:54 -07:00
/* apparently the "standard" is that clients close
* idle connections after 5 minutes , servers after
* 6 minutes
* http : //www.connectathon.org/talks96/nfstcp.pdf
*/
static int svc_conn_age_period = 6 * 60 ;
2006-12-06 20:35:24 -08:00
# ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key [ 2 ] ;
static struct lock_class_key svc_slock_key [ 2 ] ;
static inline void svc_reclassify_socket ( struct socket * sock )
{
struct sock * sk = sock - > sk ;
2007-09-12 10:42:12 +02:00
BUG_ON ( sock_owned_by_user ( sk ) ) ;
2006-12-06 20:35:24 -08:00
switch ( sk - > sk_family ) {
case AF_INET :
sock_lock_init_class_and_name ( sk , " slock-AF_INET-NFSD " ,
& svc_slock_key [ 0 ] , " sk_lock-AF_INET-NFSD " , & svc_key [ 0 ] ) ;
break ;
case AF_INET6 :
sock_lock_init_class_and_name ( sk , " slock-AF_INET6-NFSD " ,
& svc_slock_key [ 1 ] , " sk_lock-AF_INET6-NFSD " , & svc_key [ 1 ] ) ;
break ;
default :
BUG ( ) ;
}
}
# else
static inline void svc_reclassify_socket ( struct socket * sock )
{
}
# endif
2007-02-12 00:53:32 -08:00
static char * __svc_print_addr ( struct sockaddr * addr , char * buf , size_t len )
{
switch ( addr - > sa_family ) {
case AF_INET :
snprintf ( buf , len , " %u.%u.%u.%u, port=%u " ,
NIPQUAD ( ( ( struct sockaddr_in * ) addr ) - > sin_addr ) ,
2007-07-26 17:33:39 +01:00
ntohs ( ( ( struct sockaddr_in * ) addr ) - > sin_port ) ) ;
2007-02-12 00:53:32 -08:00
break ;
2007-03-06 01:42:22 -08:00
2007-02-12 00:53:32 -08:00
case AF_INET6 :
snprintf ( buf , len , " %x:%x:%x:%x:%x:%x:%x:%x, port=%u " ,
NIP6 ( ( ( struct sockaddr_in6 * ) addr ) - > sin6_addr ) ,
2007-07-26 17:33:39 +01:00
ntohs ( ( ( struct sockaddr_in6 * ) addr ) - > sin6_port ) ) ;
2007-02-12 00:53:32 -08:00
break ;
2007-03-06 01:42:22 -08:00
2007-02-12 00:53:32 -08:00
default :
snprintf ( buf , len , " unknown address type: %d " , addr - > sa_family ) ;
break ;
}
return buf ;
}
/**
* svc_print_addr - Format rq_addr field for printing
* @ rqstp : svc_rqst struct containing address to print
* @ buf : target buffer for formatted address
* @ len : length of target buffer
*
*/
char * svc_print_addr ( struct svc_rqst * rqstp , char * buf , size_t len )
{
2007-02-12 00:53:34 -08:00
return __svc_print_addr ( svc_addr ( rqstp ) , buf , len ) ;
2007-02-12 00:53:32 -08:00
}
EXPORT_SYMBOL_GPL ( svc_print_addr ) ;
2005-04-16 15:20:36 -07:00
/*
2006-10-02 02:17:58 -07:00
* Queue up an idle server thread . Must have pool - > sp_lock held .
2005-04-16 15:20:36 -07:00
* Note : this is really a stack rather than a queue , so that we only
2006-10-02 02:17:58 -07:00
* use as many different threads as we need , and the rest don ' t pollute
2005-04-16 15:20:36 -07:00
* the cache .
*/
static inline void
2006-10-02 02:17:58 -07:00
svc_thread_enqueue ( struct svc_pool * pool , struct svc_rqst * rqstp )
2005-04-16 15:20:36 -07:00
{
2006-10-02 02:17:58 -07:00
list_add ( & rqstp - > rq_list , & pool - > sp_threads ) ;
2005-04-16 15:20:36 -07:00
}
/*
2006-10-02 02:17:58 -07:00
* Dequeue an nfsd thread . Must have pool - > sp_lock held .
2005-04-16 15:20:36 -07:00
*/
static inline void
2006-10-02 02:17:58 -07:00
svc_thread_dequeue ( struct svc_pool * pool , struct svc_rqst * rqstp )
2005-04-16 15:20:36 -07:00
{
list_del ( & rqstp - > rq_list ) ;
}
/*
* Release an skbuff after use
*/
2007-12-30 21:07:25 -06:00
static void svc_release_skb ( struct svc_rqst * rqstp )
2005-04-16 15:20:36 -07:00
{
2007-12-30 21:07:25 -06:00
struct sk_buff * skb = rqstp - > rq_xprt_ctxt ;
2005-04-16 15:20:36 -07:00
struct svc_deferred_req * dr = rqstp - > rq_deferred ;
if ( skb ) {
2007-12-30 21:07:25 -06:00
rqstp - > rq_xprt_ctxt = NULL ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: service %p, releasing skb %p \n " , rqstp , skb ) ;
skb_free_datagram ( rqstp - > rq_sock - > sk_sk , skb ) ;
}
if ( dr ) {
rqstp - > rq_deferred = NULL ;
kfree ( dr ) ;
}
}
/*
* Queue up a socket with data pending . If there are idle nfsd
* processes , wake ' em up .
*
*/
static void
svc_sock_enqueue ( struct svc_sock * svsk )
{
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = svsk - > sk_xprt . xpt_server ;
2006-10-02 02:18:01 -07:00
struct svc_pool * pool ;
2005-04-16 15:20:36 -07:00
struct svc_rqst * rqstp ;
2006-10-02 02:18:01 -07:00
int cpu ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
if ( ! ( svsk - > sk_xprt . xpt_flags &
( ( 1 < < XPT_CONN ) | ( 1 < < XPT_DATA ) | ( 1 < < XPT_CLOSE ) | ( 1 < < XPT_DEFERRED ) ) ) )
2005-04-16 15:20:36 -07:00
return ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_DEAD , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
return ;
2006-10-02 02:18:01 -07:00
cpu = get_cpu ( ) ;
2007-12-30 21:07:50 -06:00
pool = svc_pool_for_cpu ( svsk - > sk_xprt . xpt_server , cpu ) ;
2006-10-02 02:18:01 -07:00
put_cpu ( ) ;
2006-10-02 02:17:58 -07:00
spin_lock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
2006-10-02 02:17:58 -07:00
if ( ! list_empty ( & pool - > sp_threads ) & &
! list_empty ( & pool - > sp_sockets ) )
2005-04-16 15:20:36 -07:00
printk ( KERN_ERR
" svc_sock_enqueue: threads and sockets both waiting?? \n " ) ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_DEAD , & svsk - > sk_xprt . xpt_flags ) ) {
2005-04-16 15:20:36 -07:00
/* Don't enqueue dead sockets */
dprintk ( " svc: socket %p is dead, not enqueued \n " , svsk - > sk_sk ) ;
goto out_unlock ;
}
2006-10-02 02:17:57 -07:00
/* Mark socket as busy. It will remain in this state until the
* server has processed all pending data and put the socket back
2007-12-30 21:07:48 -06:00
* on the idle list . We update XPT_BUSY atomically because
2006-10-02 02:17:57 -07:00
* it also guards against trying to enqueue the svc_sock twice .
*/
2007-12-30 21:07:48 -06:00
if ( test_and_set_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ) {
2006-10-02 02:17:57 -07:00
/* Don't enqueue socket while already enqueued */
2005-04-16 15:20:36 -07:00
dprintk ( " svc: socket %p busy, not enqueued \n " , svsk - > sk_sk ) ;
goto out_unlock ;
}
2007-12-30 21:07:50 -06:00
BUG_ON ( svsk - > sk_xprt . xpt_pool ! = NULL ) ;
svsk - > sk_xprt . xpt_pool = pool ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:31 -06:00
/* Handle pending connection */
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) )
2007-12-30 21:07:31 -06:00
goto process ;
/* Handle close in-progress */
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) )
2007-12-30 21:07:31 -06:00
goto process ;
/* Check if we have space to reply to a request */
if ( ! svsk - > sk_xprt . xpt_ops - > xpo_has_wspace ( & svsk - > sk_xprt ) ) {
2005-04-16 15:20:36 -07:00
/* Don't enqueue while not enough space for reply */
2007-12-30 21:07:31 -06:00
dprintk ( " svc: no write space, socket %p not enqueued \n " , svsk ) ;
2007-12-30 21:07:50 -06:00
svsk - > sk_xprt . xpt_pool = NULL ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
goto out_unlock ;
}
2007-12-30 21:07:31 -06:00
process :
2006-10-02 02:17:58 -07:00
if ( ! list_empty ( & pool - > sp_threads ) ) {
rqstp = list_entry ( pool - > sp_threads . next ,
2005-04-16 15:20:36 -07:00
struct svc_rqst ,
rq_list ) ;
dprintk ( " svc: socket %p served by daemon %p \n " ,
svsk - > sk_sk , rqstp ) ;
2006-10-02 02:17:58 -07:00
svc_thread_dequeue ( pool , rqstp ) ;
2005-04-16 15:20:36 -07:00
if ( rqstp - > rq_sock )
2007-02-09 15:38:13 -08:00
printk ( KERN_ERR
2005-04-16 15:20:36 -07:00
" svc_sock_enqueue: server %p, rq_sock=%p! \n " ,
rqstp , rqstp - > rq_sock ) ;
rqstp - > rq_sock = svsk ;
2007-12-30 21:07:46 -06:00
svc_xprt_get ( & svsk - > sk_xprt ) ;
2006-10-06 00:44:05 -07:00
rqstp - > rq_reserved = serv - > sv_max_mesg ;
2006-10-02 02:17:56 -07:00
atomic_add ( rqstp - > rq_reserved , & svsk - > sk_reserved ) ;
2007-12-30 21:07:50 -06:00
BUG_ON ( svsk - > sk_xprt . xpt_pool ! = pool ) ;
2005-04-16 15:20:36 -07:00
wake_up ( & rqstp - > rq_wait ) ;
} else {
dprintk ( " svc: socket %p put into queue \n " , svsk - > sk_sk ) ;
2006-10-02 02:17:58 -07:00
list_add_tail ( & svsk - > sk_ready , & pool - > sp_sockets ) ;
2007-12-30 21:07:50 -06:00
BUG_ON ( svsk - > sk_xprt . xpt_pool ! = pool ) ;
2005-04-16 15:20:36 -07:00
}
out_unlock :
2006-10-02 02:17:58 -07:00
spin_unlock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
}
/*
2006-10-02 02:17:58 -07:00
* Dequeue the first socket . Must be called with the pool - > sp_lock held .
2005-04-16 15:20:36 -07:00
*/
static inline struct svc_sock *
2006-10-02 02:17:58 -07:00
svc_sock_dequeue ( struct svc_pool * pool )
2005-04-16 15:20:36 -07:00
{
struct svc_sock * svsk ;
2006-10-02 02:17:58 -07:00
if ( list_empty ( & pool - > sp_sockets ) )
2005-04-16 15:20:36 -07:00
return NULL ;
2006-10-02 02:17:58 -07:00
svsk = list_entry ( pool - > sp_sockets . next ,
2005-04-16 15:20:36 -07:00
struct svc_sock , sk_ready ) ;
list_del_init ( & svsk - > sk_ready ) ;
dprintk ( " svc: socket %p dequeued, inuse=%d \n " ,
2007-12-30 21:07:46 -06:00
svsk - > sk_sk , atomic_read ( & svsk - > sk_xprt . xpt_ref . refcount ) ) ;
2005-04-16 15:20:36 -07:00
return svsk ;
}
/*
* Having read something from a socket , check whether it
* needs to be re - enqueued .
2007-12-30 21:07:48 -06:00
* Note : XPT_DATA only gets cleared when a read - attempt finds
2005-04-16 15:20:36 -07:00
* no ( or insufficient ) data .
*/
static inline void
svc_sock_received ( struct svc_sock * svsk )
{
2007-12-30 21:07:50 -06:00
svsk - > sk_xprt . xpt_pool = NULL ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
svc_sock_enqueue ( svsk ) ;
}
/**
* svc_reserve - change the space reserved for the reply to a request .
* @ rqstp : The request in question
* @ space : new max space to reserve
*
* Each request reserves some space on the output queue of the socket
* to make sure the reply fits . This function reduces that reserved
* space to be the amount of space used already , plus @ space .
*
*/
void svc_reserve ( struct svc_rqst * rqstp , int space )
{
space + = rqstp - > rq_res . head [ 0 ] . iov_len ;
if ( space < rqstp - > rq_reserved ) {
struct svc_sock * svsk = rqstp - > rq_sock ;
2006-10-02 02:17:56 -07:00
atomic_sub ( ( rqstp - > rq_reserved - space ) , & svsk - > sk_reserved ) ;
2005-04-16 15:20:36 -07:00
rqstp - > rq_reserved = space ;
svc_sock_enqueue ( svsk ) ;
}
}
static void
svc_sock_release ( struct svc_rqst * rqstp )
{
struct svc_sock * svsk = rqstp - > rq_sock ;
2007-12-30 21:07:25 -06:00
rqstp - > rq_xprt - > xpt_ops - > xpo_release_rqst ( rqstp ) ;
2005-04-16 15:20:36 -07:00
2006-10-04 02:15:46 -07:00
svc_free_res_pages ( rqstp ) ;
2005-04-16 15:20:36 -07:00
rqstp - > rq_res . page_len = 0 ;
rqstp - > rq_res . page_base = 0 ;
/* Reset response buffer and release
* the reservation .
* But first , check that enough space was reserved
* for the reply , otherwise we have a bug !
*/
if ( ( rqstp - > rq_res . len ) > rqstp - > rq_reserved )
printk ( KERN_ERR " RPC request reserved %d but used %d \n " ,
rqstp - > rq_reserved ,
rqstp - > rq_res . len ) ;
rqstp - > rq_res . head [ 0 ] . iov_len = 0 ;
svc_reserve ( rqstp , 0 ) ;
rqstp - > rq_sock = NULL ;
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & svsk - > sk_xprt ) ;
2005-04-16 15:20:36 -07:00
}
/*
* External function to wake up a server waiting for data
2006-10-02 02:17:58 -07:00
* This really only makes sense for services like lockd
* which have exactly one thread anyway .
2005-04-16 15:20:36 -07:00
*/
void
svc_wake_up ( struct svc_serv * serv )
{
struct svc_rqst * rqstp ;
2006-10-02 02:17:58 -07:00
unsigned int i ;
struct svc_pool * pool ;
for ( i = 0 ; i < serv - > sv_nrpools ; i + + ) {
pool = & serv - > sv_pools [ i ] ;
spin_lock_bh ( & pool - > sp_lock ) ;
if ( ! list_empty ( & pool - > sp_threads ) ) {
rqstp = list_entry ( pool - > sp_threads . next ,
struct svc_rqst ,
rq_list ) ;
dprintk ( " svc: daemon %p woken up. \n " , rqstp ) ;
/*
svc_thread_dequeue ( pool , rqstp ) ;
rqstp - > rq_sock = NULL ;
*/
wake_up ( & rqstp - > rq_wait ) ;
}
spin_unlock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
}
}
2007-02-12 00:53:36 -08:00
union svc_pktinfo_u {
struct in_pktinfo pkti ;
struct in6_pktinfo pkti6 ;
} ;
2007-04-12 13:35:59 -07:00
# define SVC_PKTINFO_SPACE \
CMSG_SPACE ( sizeof ( union svc_pktinfo_u ) )
2007-02-12 00:53:36 -08:00
static void svc_set_cmsg_data ( struct svc_rqst * rqstp , struct cmsghdr * cmh )
{
switch ( rqstp - > rq_sock - > sk_sk - > sk_family ) {
case AF_INET : {
struct in_pktinfo * pki = CMSG_DATA ( cmh ) ;
cmh - > cmsg_level = SOL_IP ;
cmh - > cmsg_type = IP_PKTINFO ;
pki - > ipi_ifindex = 0 ;
pki - > ipi_spec_dst . s_addr = rqstp - > rq_daddr . addr . s_addr ;
cmh - > cmsg_len = CMSG_LEN ( sizeof ( * pki ) ) ;
}
break ;
2007-03-06 01:42:22 -08:00
2007-02-12 00:53:36 -08:00
case AF_INET6 : {
struct in6_pktinfo * pki = CMSG_DATA ( cmh ) ;
cmh - > cmsg_level = SOL_IPV6 ;
cmh - > cmsg_type = IPV6_PKTINFO ;
pki - > ipi6_ifindex = 0 ;
ipv6_addr_copy ( & pki - > ipi6_addr ,
& rqstp - > rq_daddr . addr6 ) ;
cmh - > cmsg_len = CMSG_LEN ( sizeof ( * pki ) ) ;
}
break ;
}
return ;
}
2005-04-16 15:20:36 -07:00
/*
* Generic sendto routine
*/
static int
svc_sendto ( struct svc_rqst * rqstp , struct xdr_buf * xdr )
{
struct svc_sock * svsk = rqstp - > rq_sock ;
struct socket * sock = svsk - > sk_sock ;
int slen ;
2007-04-12 13:35:59 -07:00
union {
struct cmsghdr hdr ;
long all [ SVC_PKTINFO_SPACE / sizeof ( long ) ] ;
} buffer ;
struct cmsghdr * cmh = & buffer . hdr ;
2005-04-16 15:20:36 -07:00
int len = 0 ;
int result ;
int size ;
struct page * * ppage = xdr - > pages ;
size_t base = xdr - > page_base ;
unsigned int pglen = xdr - > page_len ;
unsigned int flags = MSG_MORE ;
2007-02-12 00:53:32 -08:00
char buf [ RPC_MAX_ADDRBUFLEN ] ;
2005-04-16 15:20:36 -07:00
slen = xdr - > len ;
if ( rqstp - > rq_prot = = IPPROTO_UDP ) {
2007-02-12 00:53:36 -08:00
struct msghdr msg = {
. msg_name = & rqstp - > rq_addr ,
. msg_namelen = rqstp - > rq_addrlen ,
. msg_control = cmh ,
. msg_controllen = sizeof ( buffer ) ,
. msg_flags = MSG_MORE ,
} ;
svc_set_cmsg_data ( rqstp , cmh ) ;
2005-04-16 15:20:36 -07:00
if ( sock_sendmsg ( sock , & msg , 0 ) < 0 )
goto out ;
}
/* send head */
if ( slen = = xdr - > head [ 0 ] . iov_len )
flags = 0 ;
2006-10-04 02:15:46 -07:00
len = kernel_sendpage ( sock , rqstp - > rq_respages [ 0 ] , 0 ,
xdr - > head [ 0 ] . iov_len , flags ) ;
2005-04-16 15:20:36 -07:00
if ( len ! = xdr - > head [ 0 ] . iov_len )
goto out ;
slen - = xdr - > head [ 0 ] . iov_len ;
if ( slen = = 0 )
goto out ;
/* send page data */
size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen ;
while ( pglen > 0 ) {
if ( slen = = size )
flags = 0 ;
2006-08-07 20:58:01 -07:00
result = kernel_sendpage ( sock , * ppage , base , size , flags ) ;
2005-04-16 15:20:36 -07:00
if ( result > 0 )
len + = result ;
if ( result ! = size )
goto out ;
slen - = size ;
pglen - = size ;
size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen ;
base = 0 ;
ppage + + ;
}
/* send tail */
if ( xdr - > tail [ 0 ] . iov_len ) {
2006-10-04 02:15:46 -07:00
result = kernel_sendpage ( sock , rqstp - > rq_respages [ 0 ] ,
( ( unsigned long ) xdr - > tail [ 0 ] . iov_base )
2007-02-09 15:38:13 -08:00
& ( PAGE_SIZE - 1 ) ,
2005-04-16 15:20:36 -07:00
xdr - > tail [ 0 ] . iov_len , 0 ) ;
if ( result > 0 )
len + = result ;
}
out :
2007-02-12 00:53:32 -08:00
dprintk ( " svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s) \n " ,
rqstp - > rq_sock , xdr - > head [ 0 ] . iov_base , xdr - > head [ 0 ] . iov_len ,
xdr - > len , len , svc_print_addr ( rqstp , buf , sizeof ( buf ) ) ) ;
2005-04-16 15:20:36 -07:00
return len ;
}
2006-10-02 02:17:47 -07:00
/*
* Report socket names for nfsdfs
*/
static int one_sock_name ( char * buf , struct svc_sock * svsk )
{
int len ;
switch ( svsk - > sk_sk - > sk_family ) {
case AF_INET :
len = sprintf ( buf , " ipv4 %s %u.%u.%u.%u %d \n " ,
svsk - > sk_sk - > sk_protocol = = IPPROTO_UDP ?
" udp " : " tcp " ,
NIPQUAD ( inet_sk ( svsk - > sk_sk ) - > rcv_saddr ) ,
inet_sk ( svsk - > sk_sk ) - > num ) ;
break ;
default :
len = sprintf ( buf , " *unknown-%d* \n " ,
svsk - > sk_sk - > sk_family ) ;
}
return len ;
}
int
2006-10-02 02:17:48 -07:00
svc_sock_names ( char * buf , struct svc_serv * serv , char * toclose )
2006-10-02 02:17:47 -07:00
{
2006-10-02 02:17:48 -07:00
struct svc_sock * svsk , * closesk = NULL ;
2006-10-02 02:17:47 -07:00
int len = 0 ;
if ( ! serv )
return 0 ;
2007-02-08 14:20:30 -08:00
spin_lock_bh ( & serv - > sv_lock ) ;
2006-10-02 02:17:47 -07:00
list_for_each_entry ( svsk , & serv - > sv_permsocks , sk_list ) {
int onelen = one_sock_name ( buf + len , svsk ) ;
2006-10-02 02:17:48 -07:00
if ( toclose & & strcmp ( toclose , buf + len ) = = 0 )
closesk = svsk ;
else
len + = onelen ;
2006-10-02 02:17:47 -07:00
}
2007-02-08 14:20:30 -08:00
spin_unlock_bh ( & serv - > sv_lock ) ;
2006-10-02 02:17:48 -07:00
if ( closesk )
2006-10-04 02:15:45 -07:00
/* Should unregister with portmap, but you cannot
* unregister just one protocol . . .
*/
2007-02-08 14:20:30 -08:00
svc_close_socket ( closesk ) ;
2006-10-04 02:15:44 -07:00
else if ( toclose )
return - ENOENT ;
2006-10-02 02:17:47 -07:00
return len ;
}
EXPORT_SYMBOL ( svc_sock_names ) ;
2005-04-16 15:20:36 -07:00
/*
* Check input queue length
*/
static int
svc_recv_available ( struct svc_sock * svsk )
{
struct socket * sock = svsk - > sk_sock ;
int avail , err ;
2006-08-07 20:58:01 -07:00
err = kernel_sock_ioctl ( sock , TIOCINQ , ( unsigned long ) & avail ) ;
2005-04-16 15:20:36 -07:00
return ( err > = 0 ) ? avail : err ;
}
/*
* Generic recvfrom routine .
*/
static int
svc_recvfrom ( struct svc_rqst * rqstp , struct kvec * iov , int nr , int buflen )
{
2007-02-12 00:53:30 -08:00
struct svc_sock * svsk = rqstp - > rq_sock ;
2007-02-12 00:53:31 -08:00
struct msghdr msg = {
. msg_flags = MSG_DONTWAIT ,
} ;
2007-07-09 22:21:39 +02:00
struct sockaddr * sin ;
2007-02-12 00:53:31 -08:00
int len ;
2005-04-16 15:20:36 -07:00
2007-02-12 00:53:31 -08:00
len = kernel_recvmsg ( svsk - > sk_sock , & msg , iov , nr , buflen ,
msg . msg_flags ) ;
2005-04-16 15:20:36 -07:00
/* sock_recvmsg doesn't fill in the name/namelen, so we must..
*/
2007-02-12 00:53:30 -08:00
memcpy ( & rqstp - > rq_addr , & svsk - > sk_remote , svsk - > sk_remotelen ) ;
rqstp - > rq_addrlen = svsk - > sk_remotelen ;
2005-04-16 15:20:36 -07:00
2007-07-09 22:21:39 +02:00
/* Destination address in request is needed for binding the
* source address in RPC callbacks later .
*/
sin = ( struct sockaddr * ) & svsk - > sk_local ;
switch ( sin - > sa_family ) {
case AF_INET :
rqstp - > rq_daddr . addr = ( ( struct sockaddr_in * ) sin ) - > sin_addr ;
break ;
case AF_INET6 :
rqstp - > rq_daddr . addr6 = ( ( struct sockaddr_in6 * ) sin ) - > sin6_addr ;
break ;
}
2005-04-16 15:20:36 -07:00
dprintk ( " svc: socket %p recvfrom(%p, %Zu) = %d \n " ,
2007-02-12 00:53:31 -08:00
svsk , iov [ 0 ] . iov_base , iov [ 0 ] . iov_len , len ) ;
2005-04-16 15:20:36 -07:00
return len ;
}
/*
* Set socket snd and rcv buffer lengths
*/
static inline void
svc_sock_setbufsize ( struct socket * sock , unsigned int snd , unsigned int rcv )
{
#if 0
mm_segment_t oldfs ;
oldfs = get_fs ( ) ; set_fs ( KERNEL_DS ) ;
sock_setsockopt ( sock , SOL_SOCKET , SO_SNDBUF ,
( char * ) & snd , sizeof ( snd ) ) ;
sock_setsockopt ( sock , SOL_SOCKET , SO_RCVBUF ,
( char * ) & rcv , sizeof ( rcv ) ) ;
# else
/* sock_setsockopt limits use to sysctl_?mem_max,
* which isn ' t acceptable . Until that is made conditional
* on not having CAP_SYS_RESOURCE or similar , we go direct . . .
* DaveM said I could !
*/
lock_sock ( sock - > sk ) ;
sock - > sk - > sk_sndbuf = snd * 2 ;
sock - > sk - > sk_rcvbuf = rcv * 2 ;
sock - > sk - > sk_userlocks | = SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK ;
release_sock ( sock - > sk ) ;
# endif
}
/*
* INET callback when data has been received on the socket .
*/
static void
svc_udp_data_ready ( struct sock * sk , int count )
{
2005-09-13 01:25:39 -07:00
struct svc_sock * svsk = ( struct svc_sock * ) sk - > sk_user_data ;
2005-04-16 15:20:36 -07:00
2005-09-13 01:25:39 -07:00
if ( svsk ) {
dprintk ( " svc: socket %p(inet %p), count=%d, busy=%d \n " ,
2007-12-30 21:07:48 -06:00
svsk , sk , count ,
test_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ) ;
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2005-09-13 01:25:39 -07:00
svc_sock_enqueue ( svsk ) ;
}
2005-04-16 15:20:36 -07:00
if ( sk - > sk_sleep & & waitqueue_active ( sk - > sk_sleep ) )
wake_up_interruptible ( sk - > sk_sleep ) ;
}
/*
* INET callback when space is newly available on the socket .
*/
static void
svc_write_space ( struct sock * sk )
{
struct svc_sock * svsk = ( struct svc_sock * ) ( sk - > sk_user_data ) ;
if ( svsk ) {
dprintk ( " svc: socket %p(inet %p), write_space busy=%d \n " ,
2007-12-30 21:07:48 -06:00
svsk , sk , test_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ) ;
2005-04-16 15:20:36 -07:00
svc_sock_enqueue ( svsk ) ;
}
if ( sk - > sk_sleep & & waitqueue_active ( sk - > sk_sleep ) ) {
2005-09-13 01:25:39 -07:00
dprintk ( " RPC svc_write_space: someone sleeping on %p \n " ,
2005-04-16 15:20:36 -07:00
svsk ) ;
wake_up_interruptible ( sk - > sk_sleep ) ;
}
}
2007-03-06 01:42:21 -08:00
static inline void svc_udp_get_dest_address ( struct svc_rqst * rqstp ,
struct cmsghdr * cmh )
2007-02-12 00:53:38 -08:00
{
switch ( rqstp - > rq_sock - > sk_sk - > sk_family ) {
case AF_INET : {
2007-03-06 01:42:21 -08:00
struct in_pktinfo * pki = CMSG_DATA ( cmh ) ;
rqstp - > rq_daddr . addr . s_addr = pki - > ipi_spec_dst . s_addr ;
2007-02-12 00:53:38 -08:00
break ;
2007-03-06 01:42:21 -08:00
}
2007-02-12 00:53:38 -08:00
case AF_INET6 : {
2007-03-06 01:42:21 -08:00
struct in6_pktinfo * pki = CMSG_DATA ( cmh ) ;
ipv6_addr_copy ( & rqstp - > rq_daddr . addr6 , & pki - > ipi6_addr ) ;
2007-02-12 00:53:38 -08:00
break ;
2007-03-06 01:42:21 -08:00
}
2007-02-12 00:53:38 -08:00
}
}
2005-04-16 15:20:36 -07:00
/*
* Receive a datagram from a UDP socket .
*/
static int
svc_udp_recvfrom ( struct svc_rqst * rqstp )
{
struct svc_sock * svsk = rqstp - > rq_sock ;
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = svsk - > sk_xprt . xpt_server ;
2005-04-16 15:20:36 -07:00
struct sk_buff * skb ;
2007-04-12 13:35:59 -07:00
union {
struct cmsghdr hdr ;
long all [ SVC_PKTINFO_SPACE / sizeof ( long ) ] ;
} buffer ;
struct cmsghdr * cmh = & buffer . hdr ;
2005-04-16 15:20:36 -07:00
int err , len ;
2007-03-06 01:42:21 -08:00
struct msghdr msg = {
. msg_name = svc_addr ( rqstp ) ,
. msg_control = cmh ,
. msg_controllen = sizeof ( buffer ) ,
. msg_flags = MSG_DONTWAIT ,
} ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
if ( test_and_clear_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
/* udp sockets need large rcvbuf as all pending
* requests are still in that buffer . sndbuf must
* also be large enough that there is enough space
2006-10-02 02:17:58 -07:00
* for one reply per thread . We count all threads
* rather than threads in a particular pool , which
* provides an upper bound on the number of threads
* which will access the socket .
2005-04-16 15:20:36 -07:00
*/
svc_sock_setbufsize ( svsk - > sk_sock ,
2006-10-06 00:44:05 -07:00
( serv - > sv_nrthreads + 3 ) * serv - > sv_max_mesg ,
( serv - > sv_nrthreads + 3 ) * serv - > sv_max_mesg ) ;
2005-04-16 15:20:36 -07:00
if ( ( rqstp - > rq_deferred = svc_deferred_dequeue ( svsk ) ) ) {
svc_sock_received ( svsk ) ;
return svc_deferred_recv ( rqstp ) ;
}
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2007-05-09 02:34:55 -07:00
skb = NULL ;
err = kernel_recvmsg ( svsk - > sk_sock , & msg , NULL ,
0 , 0 , MSG_PEEK | MSG_DONTWAIT ) ;
if ( err > = 0 )
skb = skb_recv_datagram ( svsk - > sk_sk , 0 , 1 , & err ) ;
if ( skb = = NULL ) {
if ( err ! = - EAGAIN ) {
/* possibly an icmp error */
dprintk ( " svc: recvfrom returned error %d \n " , - err ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
}
2007-05-09 02:34:55 -07:00
svc_sock_received ( svsk ) ;
return - EAGAIN ;
2005-04-16 15:20:36 -07:00
}
2007-03-06 01:42:21 -08:00
rqstp - > rq_addrlen = sizeof ( rqstp - > rq_addr ) ;
2007-04-19 16:16:32 -07:00
if ( skb - > tstamp . tv64 = = 0 ) {
skb - > tstamp = ktime_get_real ( ) ;
2007-02-09 15:38:13 -08:00
/* Don't enable netstamp, sunrpc doesn't
2005-04-16 15:20:36 -07:00
need that much accuracy */
}
2007-04-19 16:16:32 -07:00
svsk - > sk_sk - > sk_stamp = skb - > tstamp ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ; /* there may be more data... */
2005-04-16 15:20:36 -07:00
/*
* Maybe more packets - kick another thread ASAP .
*/
svc_sock_received ( svsk ) ;
len = skb - > len - sizeof ( struct udphdr ) ;
rqstp - > rq_arg . len = len ;
2007-02-12 00:53:38 -08:00
rqstp - > rq_prot = IPPROTO_UDP ;
2007-02-12 00:53:34 -08:00
2007-03-06 01:42:21 -08:00
if ( cmh - > cmsg_level ! = IPPROTO_IP | |
cmh - > cmsg_type ! = IP_PKTINFO ) {
if ( net_ratelimit ( ) )
printk ( " rpcsvc: received unknown control message: "
" %d/%d \n " ,
cmh - > cmsg_level , cmh - > cmsg_type ) ;
skb_free_datagram ( svsk - > sk_sk , skb ) ;
return 0 ;
}
svc_udp_get_dest_address ( rqstp , cmh ) ;
2005-04-16 15:20:36 -07:00
if ( skb_is_nonlinear ( skb ) ) {
/* we have to copy */
local_bh_disable ( ) ;
if ( csum_partial_copy_to_xdr ( & rqstp - > rq_arg , skb ) ) {
local_bh_enable ( ) ;
/* checksum error */
skb_free_datagram ( svsk - > sk_sk , skb ) ;
return 0 ;
}
local_bh_enable ( ) ;
2007-02-09 15:38:13 -08:00
skb_free_datagram ( svsk - > sk_sk , skb ) ;
2005-04-16 15:20:36 -07:00
} else {
/* we can use it in-place */
rqstp - > rq_arg . head [ 0 ] . iov_base = skb - > data + sizeof ( struct udphdr ) ;
rqstp - > rq_arg . head [ 0 ] . iov_len = len ;
2005-11-10 13:01:24 -08:00
if ( skb_checksum_complete ( skb ) ) {
skb_free_datagram ( svsk - > sk_sk , skb ) ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
2007-12-30 21:07:25 -06:00
rqstp - > rq_xprt_ctxt = skb ;
2005-04-16 15:20:36 -07:00
}
rqstp - > rq_arg . page_base = 0 ;
if ( len < = rqstp - > rq_arg . head [ 0 ] . iov_len ) {
rqstp - > rq_arg . head [ 0 ] . iov_len = len ;
rqstp - > rq_arg . page_len = 0 ;
2006-10-04 02:15:46 -07:00
rqstp - > rq_respages = rqstp - > rq_pages + 1 ;
2005-04-16 15:20:36 -07:00
} else {
rqstp - > rq_arg . page_len = len - rqstp - > rq_arg . head [ 0 ] . iov_len ;
2006-10-04 02:15:46 -07:00
rqstp - > rq_respages = rqstp - > rq_pages + 1 +
2007-08-28 15:50:33 -07:00
DIV_ROUND_UP ( rqstp - > rq_arg . page_len , PAGE_SIZE ) ;
2005-04-16 15:20:36 -07:00
}
if ( serv - > sv_stats )
serv - > sv_stats - > netudpcnt + + ;
return len ;
}
static int
svc_udp_sendto ( struct svc_rqst * rqstp )
{
int error ;
error = svc_sendto ( rqstp , & rqstp - > rq_res ) ;
if ( error = = - ECONNREFUSED )
/* ICMP error on earlier request. */
error = svc_sendto ( rqstp , & rqstp - > rq_res ) ;
return error ;
}
2007-12-30 21:07:29 -06:00
static void svc_udp_prep_reply_hdr ( struct svc_rqst * rqstp )
{
}
2007-12-30 21:07:31 -06:00
static int svc_udp_has_wspace ( struct svc_xprt * xprt )
{
struct svc_sock * svsk = container_of ( xprt , struct svc_sock , sk_xprt ) ;
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = xprt - > xpt_server ;
2007-12-30 21:07:31 -06:00
unsigned long required ;
/*
* Set the SOCK_NOSPACE flag before checking the available
* sock space .
*/
set_bit ( SOCK_NOSPACE , & svsk - > sk_sock - > flags ) ;
required = atomic_read ( & svsk - > sk_reserved ) + serv - > sv_max_mesg ;
if ( required * 2 > sock_wspace ( svsk - > sk_sk ) )
return 0 ;
clear_bit ( SOCK_NOSPACE , & svsk - > sk_sock - > flags ) ;
return 1 ;
}
2007-12-30 21:07:36 -06:00
static struct svc_xprt * svc_udp_accept ( struct svc_xprt * xprt )
{
BUG ( ) ;
return NULL ;
}
2007-12-30 21:07:42 -06:00
static struct svc_xprt * svc_udp_create ( struct svc_serv * serv ,
struct sockaddr * sa , int salen ,
int flags )
{
return svc_create_socket ( serv , IPPROTO_UDP , sa , salen , flags ) ;
}
2007-12-30 21:07:17 -06:00
static struct svc_xprt_ops svc_udp_ops = {
2007-12-30 21:07:42 -06:00
. xpo_create = svc_udp_create ,
2007-12-30 21:07:23 -06:00
. xpo_recvfrom = svc_udp_recvfrom ,
. xpo_sendto = svc_udp_sendto ,
2007-12-30 21:07:25 -06:00
. xpo_release_rqst = svc_release_skb ,
2007-12-30 21:07:27 -06:00
. xpo_detach = svc_sock_detach ,
. xpo_free = svc_sock_free ,
2007-12-30 21:07:29 -06:00
. xpo_prep_reply_hdr = svc_udp_prep_reply_hdr ,
2007-12-30 21:07:31 -06:00
. xpo_has_wspace = svc_udp_has_wspace ,
2007-12-30 21:07:36 -06:00
. xpo_accept = svc_udp_accept ,
2007-12-30 21:07:17 -06:00
} ;
static struct svc_xprt_class svc_udp_class = {
. xcl_name = " udp " ,
2007-12-30 21:07:42 -06:00
. xcl_owner = THIS_MODULE ,
2007-12-30 21:07:17 -06:00
. xcl_ops = & svc_udp_ops ,
2007-12-30 21:07:21 -06:00
. xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP ,
2007-12-30 21:07:17 -06:00
} ;
2007-12-30 21:07:50 -06:00
static void svc_udp_init ( struct svc_sock * svsk , struct svc_serv * serv )
2005-04-16 15:20:36 -07:00
{
2007-03-06 01:42:21 -08:00
int one = 1 ;
mm_segment_t oldfs ;
2007-12-30 21:07:50 -06:00
svc_xprt_init ( & svc_udp_class , & svsk - > sk_xprt , serv ) ;
2005-04-16 15:20:36 -07:00
svsk - > sk_sk - > sk_data_ready = svc_udp_data_ready ;
svsk - > sk_sk - > sk_write_space = svc_write_space ;
/* initialise setting must have enough space to
2007-02-09 15:38:13 -08:00
* receive and respond to one request .
2005-04-16 15:20:36 -07:00
* svc_udp_recvfrom will re - adjust if necessary
*/
svc_sock_setbufsize ( svsk - > sk_sock ,
2007-12-30 21:07:50 -06:00
3 * svsk - > sk_xprt . xpt_server - > sv_max_mesg ,
3 * svsk - > sk_xprt . xpt_server - > sv_max_mesg ) ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ; /* might have come in before data_ready set up */
set_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) ;
2007-03-06 01:42:21 -08:00
oldfs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
/* make sure we get destination address info */
svsk - > sk_sock - > ops - > setsockopt ( svsk - > sk_sock , IPPROTO_IP , IP_PKTINFO ,
( char __user * ) & one , sizeof ( one ) ) ;
set_fs ( oldfs ) ;
2005-04-16 15:20:36 -07:00
}
/*
* A data_ready event on a listening socket means there ' s a connection
* pending . Do not use state_change as a substitute for it .
*/
static void
svc_tcp_listen_data_ready ( struct sock * sk , int count_unused )
{
2005-09-13 01:25:39 -07:00
struct svc_sock * svsk = ( struct svc_sock * ) sk - > sk_user_data ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: socket %p TCP (listen) state change %d \n " ,
2005-09-13 01:25:39 -07:00
sk , sk - > sk_state ) ;
2005-04-16 15:20:36 -07:00
2005-09-13 01:25:39 -07:00
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
* from a parent LISTEN socket .
* 1 ) data_ready method of the parent socket will be called
* when one of child sockets become ESTABLISHED .
* 2 ) data_ready method of the child socket may be called
* when it receives data before the socket is accepted .
* In case of 2 , we should ignore it silently .
*/
if ( sk - > sk_state = = TCP_LISTEN ) {
if ( svsk ) {
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) ;
2005-09-13 01:25:39 -07:00
svc_sock_enqueue ( svsk ) ;
} else
printk ( " svc: socket %p: no user data \n " , sk ) ;
2005-04-16 15:20:36 -07:00
}
2005-09-13 01:25:39 -07:00
2005-04-16 15:20:36 -07:00
if ( sk - > sk_sleep & & waitqueue_active ( sk - > sk_sleep ) )
wake_up_interruptible_all ( sk - > sk_sleep ) ;
}
/*
* A state change on a connected socket means it ' s dying or dead .
*/
static void
svc_tcp_state_change ( struct sock * sk )
{
2005-09-13 01:25:39 -07:00
struct svc_sock * svsk = ( struct svc_sock * ) sk - > sk_user_data ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: socket %p TCP (connected) state change %d (svsk %p) \n " ,
2005-09-13 01:25:39 -07:00
sk , sk - > sk_state , sk - > sk_user_data ) ;
2005-04-16 15:20:36 -07:00
2005-09-13 01:25:39 -07:00
if ( ! svsk )
2005-04-16 15:20:36 -07:00
printk ( " svc: socket %p: no user data \n " , sk ) ;
2005-09-13 01:25:39 -07:00
else {
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
2005-09-13 01:25:39 -07:00
svc_sock_enqueue ( svsk ) ;
2005-04-16 15:20:36 -07:00
}
if ( sk - > sk_sleep & & waitqueue_active ( sk - > sk_sleep ) )
wake_up_interruptible_all ( sk - > sk_sleep ) ;
}
static void
svc_tcp_data_ready ( struct sock * sk , int count )
{
2005-09-13 01:25:39 -07:00
struct svc_sock * svsk = ( struct svc_sock * ) sk - > sk_user_data ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: socket %p TCP data ready (svsk %p) \n " ,
2005-09-13 01:25:39 -07:00
sk , sk - > sk_user_data ) ;
if ( svsk ) {
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2005-09-13 01:25:39 -07:00
svc_sock_enqueue ( svsk ) ;
}
2005-04-16 15:20:36 -07:00
if ( sk - > sk_sleep & & waitqueue_active ( sk - > sk_sleep ) )
wake_up_interruptible ( sk - > sk_sleep ) ;
}
2007-02-12 00:53:37 -08:00
static inline int svc_port_is_privileged ( struct sockaddr * sin )
{
switch ( sin - > sa_family ) {
case AF_INET :
return ntohs ( ( ( struct sockaddr_in * ) sin ) - > sin_port )
< PROT_SOCK ;
case AF_INET6 :
return ntohs ( ( ( struct sockaddr_in6 * ) sin ) - > sin6_port )
< PROT_SOCK ;
default :
return 0 ;
}
}
2005-04-16 15:20:36 -07:00
/*
* Accept a TCP connection
*/
2007-12-30 21:07:36 -06:00
static struct svc_xprt * svc_tcp_accept ( struct svc_xprt * xprt )
2005-04-16 15:20:36 -07:00
{
2007-12-30 21:07:36 -06:00
struct svc_sock * svsk = container_of ( xprt , struct svc_sock , sk_xprt ) ;
2007-02-12 00:53:38 -08:00
struct sockaddr_storage addr ;
struct sockaddr * sin = ( struct sockaddr * ) & addr ;
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = svsk - > sk_xprt . xpt_server ;
2005-04-16 15:20:36 -07:00
struct socket * sock = svsk - > sk_sock ;
struct socket * newsock ;
struct svc_sock * newsvsk ;
int err , slen ;
2007-02-12 00:53:32 -08:00
char buf [ RPC_MAX_ADDRBUFLEN ] ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: tcp_accept %p sock %p \n " , svsk , sock ) ;
if ( ! sock )
2007-12-30 21:07:36 -06:00
return NULL ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) ;
2006-08-07 20:58:01 -07:00
err = kernel_accept ( sock , & newsock , O_NONBLOCK ) ;
if ( err < 0 ) {
2005-04-16 15:20:36 -07:00
if ( err = = - ENOMEM )
printk ( KERN_WARNING " %s: no more sockets! \n " ,
serv - > sv_name ) ;
2006-08-07 20:58:01 -07:00
else if ( err ! = - EAGAIN & & net_ratelimit ( ) )
2005-04-16 15:20:36 -07:00
printk ( KERN_WARNING " %s: accept failed (err %d)! \n " ,
serv - > sv_name , - err ) ;
2007-12-30 21:07:36 -06:00
return NULL ;
2005-04-16 15:20:36 -07:00
}
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
2007-02-12 00:53:38 -08:00
err = kernel_getpeername ( newsock , sin , & slen ) ;
2005-04-16 15:20:36 -07:00
if ( err < 0 ) {
if ( net_ratelimit ( ) )
printk ( KERN_WARNING " %s: peername failed (err %d)! \n " ,
serv - > sv_name , - err ) ;
goto failed ; /* aborted connection or whatever */
}
/* Ideally, we would want to reject connections from unauthorized
2007-02-12 00:53:32 -08:00
* hosts here , but when we get encryption , the IP of the host won ' t
* tell us anything . For now just warn about unpriv connections .
2005-04-16 15:20:36 -07:00
*/
2007-02-12 00:53:38 -08:00
if ( ! svc_port_is_privileged ( sin ) ) {
2005-04-16 15:20:36 -07:00
dprintk ( KERN_WARNING
2007-02-12 00:53:32 -08:00
" %s: connect from unprivileged port: %s \n " ,
2007-02-09 15:38:13 -08:00
serv - > sv_name ,
2007-02-12 00:53:38 -08:00
__svc_print_addr ( sin , buf , sizeof ( buf ) ) ) ;
2005-04-16 15:20:36 -07:00
}
2007-02-12 00:53:32 -08:00
dprintk ( " %s: connect from %s \n " , serv - > sv_name ,
2007-02-12 00:53:38 -08:00
__svc_print_addr ( sin , buf , sizeof ( buf ) ) ) ;
2005-04-16 15:20:36 -07:00
/* make sure that a write doesn't block forever when
* low on memory
*/
newsock - > sk - > sk_sndtimeo = HZ * 30 ;
2007-02-12 00:53:28 -08:00
if ( ! ( newsvsk = svc_setup_socket ( serv , newsock , & err ,
( SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY ) ) ) )
2005-04-16 15:20:36 -07:00
goto failed ;
2007-02-12 00:53:38 -08:00
memcpy ( & newsvsk - > sk_remote , sin , slen ) ;
2007-02-12 00:53:30 -08:00
newsvsk - > sk_remotelen = slen ;
2007-07-09 22:21:39 +02:00
err = kernel_getsockname ( newsock , sin , & slen ) ;
if ( unlikely ( err < 0 ) ) {
dprintk ( " svc_tcp_accept: kernel_getsockname error %d \n " , - err ) ;
slen = offsetof ( struct sockaddr , sa_data ) ;
}
memcpy ( & newsvsk - > sk_local , sin , slen ) ;
2007-02-12 00:53:30 -08:00
2007-02-12 00:53:30 -08:00
svc_sock_received ( newsvsk ) ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:40 -06:00
if ( serv - > sv_stats )
serv - > sv_stats - > nettcpconn + + ;
return & newsvsk - > sk_xprt ;
failed :
sock_release ( newsock ) ;
return NULL ;
}
2005-04-16 15:20:36 -07:00
/*
* Receive data from a TCP socket .
*/
static int
svc_tcp_recvfrom ( struct svc_rqst * rqstp )
{
struct svc_sock * svsk = rqstp - > rq_sock ;
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = svsk - > sk_xprt . xpt_server ;
2005-04-16 15:20:36 -07:00
int len ;
2006-10-04 02:15:47 -07:00
struct kvec * vec ;
2005-04-16 15:20:36 -07:00
int pnum , vlen ;
dprintk ( " svc: tcp_recv %p data %d conn %d close %d \n " ,
2007-12-30 21:07:48 -06:00
svsk , test_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ,
test_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) ,
test_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ) ;
2005-04-16 15:20:36 -07:00
if ( ( rqstp - > rq_deferred = svc_deferred_dequeue ( svsk ) ) ) {
svc_sock_received ( svsk ) ;
return svc_deferred_recv ( rqstp ) ;
}
2007-12-30 21:07:48 -06:00
if ( test_and_clear_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
/* sndbuf needs to have room for one request
* per thread , otherwise we can stall even when the
* network isn ' t a bottleneck .
2006-10-02 02:17:58 -07:00
*
* We count all threads rather than threads in a
* particular pool , which provides an upper bound
* on the number of threads which will access the socket .
*
2005-04-16 15:20:36 -07:00
* rcvbuf just needs to be able to hold a few requests .
2007-02-09 15:38:13 -08:00
* Normally they will be removed from the queue
2005-04-16 15:20:36 -07:00
* as soon a a complete request arrives .
*/
svc_sock_setbufsize ( svsk - > sk_sock ,
2006-10-06 00:44:05 -07:00
( serv - > sv_nrthreads + 3 ) * serv - > sv_max_mesg ,
3 * serv - > sv_max_mesg ) ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
/* Receive data. If we haven't got the record length yet, get
* the next four bytes . Otherwise try to gobble up as much as
* possible up to the complete record length .
*/
if ( svsk - > sk_tcplen < 4 ) {
unsigned long want = 4 - svsk - > sk_tcplen ;
struct kvec iov ;
iov . iov_base = ( ( char * ) & svsk - > sk_reclen ) + svsk - > sk_tcplen ;
iov . iov_len = want ;
if ( ( len = svc_recvfrom ( rqstp , & iov , 1 , want ) ) < 0 )
goto error ;
svsk - > sk_tcplen + = len ;
if ( len < want ) {
dprintk ( " svc: short recvfrom while reading record length (%d of %lu) \n " ,
2007-02-09 15:38:13 -08:00
len , want ) ;
2005-04-16 15:20:36 -07:00
svc_sock_received ( svsk ) ;
return - EAGAIN ; /* record header not complete */
}
svsk - > sk_reclen = ntohl ( svsk - > sk_reclen ) ;
if ( ! ( svsk - > sk_reclen & 0x80000000 ) ) {
/* FIXME: technically, a record can be fragmented,
* and non - terminal fragments will not have the top
* bit set in the fragment length header .
* But apparently no known nfs clients send fragmented
* records . */
2007-01-29 13:19:52 -08:00
if ( net_ratelimit ( ) )
printk ( KERN_NOTICE " RPC: bad TCP reclen 0x%08lx "
" (non-terminal) \n " ,
( unsigned long ) svsk - > sk_reclen ) ;
2005-04-16 15:20:36 -07:00
goto err_delete ;
}
svsk - > sk_reclen & = 0x7fffffff ;
dprintk ( " svc: TCP record, %d bytes \n " , svsk - > sk_reclen ) ;
2006-10-06 00:44:05 -07:00
if ( svsk - > sk_reclen > serv - > sv_max_mesg ) {
2007-01-29 13:19:52 -08:00
if ( net_ratelimit ( ) )
printk ( KERN_NOTICE " RPC: bad TCP reclen 0x%08lx "
" (large) \n " ,
( unsigned long ) svsk - > sk_reclen ) ;
2005-04-16 15:20:36 -07:00
goto err_delete ;
}
}
/* Check whether enough data is available */
len = svc_recv_available ( svsk ) ;
if ( len < 0 )
goto error ;
if ( len < svsk - > sk_reclen ) {
dprintk ( " svc: incomplete TCP record (%d of %d) \n " ,
len , svsk - > sk_reclen ) ;
svc_sock_received ( svsk ) ;
return - EAGAIN ; /* record not complete */
}
len = svsk - > sk_reclen ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
2006-10-04 02:15:47 -07:00
vec = rqstp - > rq_vec ;
2005-04-16 15:20:36 -07:00
vec [ 0 ] = rqstp - > rq_arg . head [ 0 ] ;
vlen = PAGE_SIZE ;
pnum = 1 ;
while ( vlen < len ) {
2006-10-04 02:15:46 -07:00
vec [ pnum ] . iov_base = page_address ( rqstp - > rq_pages [ pnum ] ) ;
2005-04-16 15:20:36 -07:00
vec [ pnum ] . iov_len = PAGE_SIZE ;
pnum + + ;
vlen + = PAGE_SIZE ;
}
2006-10-04 02:15:46 -07:00
rqstp - > rq_respages = & rqstp - > rq_pages [ pnum ] ;
2005-04-16 15:20:36 -07:00
/* Now receive data */
len = svc_recvfrom ( rqstp , vec , pnum , len ) ;
if ( len < 0 )
goto error ;
dprintk ( " svc: TCP complete record (%d bytes) \n " , len ) ;
rqstp - > rq_arg . len = len ;
rqstp - > rq_arg . page_base = 0 ;
if ( len < = rqstp - > rq_arg . head [ 0 ] . iov_len ) {
rqstp - > rq_arg . head [ 0 ] . iov_len = len ;
rqstp - > rq_arg . page_len = 0 ;
} else {
rqstp - > rq_arg . page_len = len - rqstp - > rq_arg . head [ 0 ] . iov_len ;
}
2007-12-30 21:07:25 -06:00
rqstp - > rq_xprt_ctxt = NULL ;
2005-04-16 15:20:36 -07:00
rqstp - > rq_prot = IPPROTO_TCP ;
/* Reset TCP read info */
svsk - > sk_reclen = 0 ;
svsk - > sk_tcplen = 0 ;
svc_sock_received ( svsk ) ;
if ( serv - > sv_stats )
serv - > sv_stats - > nettcpcnt + + ;
return len ;
err_delete :
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
return - EAGAIN ;
error :
if ( len = = - EAGAIN ) {
dprintk ( " RPC: TCP recvfrom got EAGAIN \n " ) ;
svc_sock_received ( svsk ) ;
} else {
printk ( KERN_NOTICE " %s: recvfrom returned errno %d \n " ,
2007-12-30 21:07:50 -06:00
svsk - > sk_xprt . xpt_server - > sv_name , - len ) ;
2006-01-06 00:19:56 -08:00
goto err_delete ;
2005-04-16 15:20:36 -07:00
}
return len ;
}
/*
* Send out data on TCP socket .
*/
static int
svc_tcp_sendto ( struct svc_rqst * rqstp )
{
struct xdr_buf * xbufp = & rqstp - > rq_res ;
int sent ;
2006-09-26 22:29:38 -07:00
__be32 reclen ;
2005-04-16 15:20:36 -07:00
/* Set up the first element of the reply kvec.
* Any other kvecs that may be in use have been taken
* care of by the server implementation itself .
*/
reclen = htonl ( 0x80000000 | ( ( xbufp - > len ) - 4 ) ) ;
memcpy ( xbufp - > head [ 0 ] . iov_base , & reclen , 4 ) ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_DEAD , & rqstp - > rq_sock - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
return - ENOTCONN ;
sent = svc_sendto ( rqstp , & rqstp - > rq_res ) ;
if ( sent ! = xbufp - > len ) {
printk ( KERN_NOTICE " rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket \n " ,
2007-12-30 21:07:50 -06:00
rqstp - > rq_sock - > sk_xprt . xpt_server - > sv_name ,
2005-04-16 15:20:36 -07:00
( sent < 0 ) ? " got error " : " sent only " ,
sent , xbufp - > len ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & rqstp - > rq_sock - > sk_xprt . xpt_flags ) ;
2007-02-08 14:20:30 -08:00
svc_sock_enqueue ( rqstp - > rq_sock ) ;
2005-04-16 15:20:36 -07:00
sent = - EAGAIN ;
}
return sent ;
}
2007-12-30 21:07:29 -06:00
/*
* Setup response header . TCP has a 4 B record length field .
*/
static void svc_tcp_prep_reply_hdr ( struct svc_rqst * rqstp )
{
struct kvec * resv = & rqstp - > rq_res . head [ 0 ] ;
/* tcp needs a space for the record length... */
svc_putnl ( resv , 0 ) ;
}
2007-12-30 21:07:31 -06:00
static int svc_tcp_has_wspace ( struct svc_xprt * xprt )
{
struct svc_sock * svsk = container_of ( xprt , struct svc_sock , sk_xprt ) ;
2007-12-30 21:07:50 -06:00
struct svc_serv * serv = svsk - > sk_xprt . xpt_server ;
2007-12-30 21:07:31 -06:00
int required ;
int wspace ;
/*
* Set the SOCK_NOSPACE flag before checking the available
* sock space .
*/
set_bit ( SOCK_NOSPACE , & svsk - > sk_sock - > flags ) ;
required = atomic_read ( & svsk - > sk_reserved ) + serv - > sv_max_mesg ;
wspace = sk_stream_wspace ( svsk - > sk_sk ) ;
if ( wspace < sk_stream_min_wspace ( svsk - > sk_sk ) )
return 0 ;
if ( required * 2 > wspace )
return 0 ;
clear_bit ( SOCK_NOSPACE , & svsk - > sk_sock - > flags ) ;
return 1 ;
}
2007-12-30 21:07:42 -06:00
static struct svc_xprt * svc_tcp_create ( struct svc_serv * serv ,
struct sockaddr * sa , int salen ,
int flags )
{
return svc_create_socket ( serv , IPPROTO_TCP , sa , salen , flags ) ;
}
2007-12-30 21:07:17 -06:00
static struct svc_xprt_ops svc_tcp_ops = {
2007-12-30 21:07:42 -06:00
. xpo_create = svc_tcp_create ,
2007-12-30 21:07:23 -06:00
. xpo_recvfrom = svc_tcp_recvfrom ,
. xpo_sendto = svc_tcp_sendto ,
2007-12-30 21:07:25 -06:00
. xpo_release_rqst = svc_release_skb ,
2007-12-30 21:07:27 -06:00
. xpo_detach = svc_sock_detach ,
. xpo_free = svc_sock_free ,
2007-12-30 21:07:29 -06:00
. xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr ,
2007-12-30 21:07:31 -06:00
. xpo_has_wspace = svc_tcp_has_wspace ,
2007-12-30 21:07:36 -06:00
. xpo_accept = svc_tcp_accept ,
2007-12-30 21:07:17 -06:00
} ;
static struct svc_xprt_class svc_tcp_class = {
. xcl_name = " tcp " ,
2007-12-30 21:07:42 -06:00
. xcl_owner = THIS_MODULE ,
2007-12-30 21:07:17 -06:00
. xcl_ops = & svc_tcp_ops ,
2007-12-30 21:07:21 -06:00
. xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP ,
2007-12-30 21:07:17 -06:00
} ;
void svc_init_xprt_sock ( void )
{
svc_reg_xprt_class ( & svc_tcp_class ) ;
svc_reg_xprt_class ( & svc_udp_class ) ;
}
void svc_cleanup_xprt_sock ( void )
{
svc_unreg_xprt_class ( & svc_tcp_class ) ;
svc_unreg_xprt_class ( & svc_udp_class ) ;
}
2007-12-30 21:07:50 -06:00
static void svc_tcp_init ( struct svc_sock * svsk , struct svc_serv * serv )
2005-04-16 15:20:36 -07:00
{
struct sock * sk = svsk - > sk_sk ;
struct tcp_sock * tp = tcp_sk ( sk ) ;
2007-12-30 21:07:50 -06:00
svc_xprt_init ( & svc_tcp_class , & svsk - > sk_xprt , serv ) ;
2005-04-16 15:20:36 -07:00
if ( sk - > sk_state = = TCP_LISTEN ) {
dprintk ( " setting up TCP socket for listening \n " ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_LISTENER , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
sk - > sk_data_ready = svc_tcp_listen_data_ready ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CONN , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
} else {
dprintk ( " setting up TCP socket for reading \n " ) ;
sk - > sk_state_change = svc_tcp_state_change ;
sk - > sk_data_ready = svc_tcp_data_ready ;
sk - > sk_write_space = svc_write_space ;
svsk - > sk_reclen = 0 ;
svsk - > sk_tcplen = 0 ;
tp - > nonagle = 1 ; /* disable Nagle's algorithm */
/* initialise setting must have enough space to
2007-02-09 15:38:13 -08:00
* receive and respond to one request .
2005-04-16 15:20:36 -07:00
* svc_tcp_recvfrom will re - adjust if necessary
*/
svc_sock_setbufsize ( svsk - > sk_sock ,
2007-12-30 21:07:50 -06:00
3 * svsk - > sk_xprt . xpt_server - > sv_max_mesg ,
3 * svsk - > sk_xprt . xpt_server - > sv_max_mesg ) ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) ;
set_bit ( XPT_DATA , & svsk - > sk_xprt . xpt_flags ) ;
2007-02-09 15:38:13 -08:00
if ( sk - > sk_state ! = TCP_ESTABLISHED )
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
}
}
void
svc_sock_update_bufs ( struct svc_serv * serv )
{
/*
* The number of server threads has changed . Update
* rcvbuf and sndbuf accordingly on all sockets
*/
struct list_head * le ;
spin_lock_bh ( & serv - > sv_lock ) ;
list_for_each ( le , & serv - > sv_permsocks ) {
2007-02-09 15:38:13 -08:00
struct svc_sock * svsk =
2005-04-16 15:20:36 -07:00
list_entry ( le , struct svc_sock , sk_list ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
}
list_for_each ( le , & serv - > sv_tempsocks ) {
struct svc_sock * svsk =
list_entry ( le , struct svc_sock , sk_list ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CHNGBUF , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
}
spin_unlock_bh ( & serv - > sv_lock ) ;
}
2007-12-30 21:07:46 -06:00
/*
* Make sure that we don ' t have too many active connections . If we
* have , something must be dropped .
*
* There ' s no point in trying to do random drop here for DoS
* prevention . The NFS clients does 1 reconnect in 15 seconds . An
* attacker can easily beat that .
*
* The only somewhat efficient mechanism would be if drop old
* connections from the same IP first . But right now we don ' t even
* record the client IP in svc_sock .
*/
static void svc_check_conn_limits ( struct svc_serv * serv )
{
if ( serv - > sv_tmpcnt > ( serv - > sv_nrthreads + 3 ) * 20 ) {
struct svc_sock * svsk = NULL ;
spin_lock_bh ( & serv - > sv_lock ) ;
if ( ! list_empty ( & serv - > sv_tempsocks ) ) {
if ( net_ratelimit ( ) ) {
/* Try to help the admin */
printk ( KERN_NOTICE " %s: too many open TCP "
" sockets, consider increasing the "
" number of nfsd threads \n " ,
serv - > sv_name ) ;
}
/*
* Always select the oldest socket . It ' s not fair ,
* but so is life
*/
svsk = list_entry ( serv - > sv_tempsocks . prev ,
struct svc_sock ,
sk_list ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
2007-12-30 21:07:46 -06:00
svc_xprt_get ( & svsk - > sk_xprt ) ;
}
spin_unlock_bh ( & serv - > sv_lock ) ;
if ( svsk ) {
svc_sock_enqueue ( svsk ) ;
svc_xprt_put ( & svsk - > sk_xprt ) ;
}
}
}
2005-04-16 15:20:36 -07:00
/*
2006-10-02 02:17:58 -07:00
* Receive the next request on any socket . This code is carefully
* organised not to touch any cachelines in the shared svc_serv
* structure , only cachelines in the local svc_pool .
2005-04-16 15:20:36 -07:00
*/
int
2006-10-02 02:17:50 -07:00
svc_recv ( struct svc_rqst * rqstp , long timeout )
2005-04-16 15:20:36 -07:00
{
2007-02-12 00:53:34 -08:00
struct svc_sock * svsk = NULL ;
2006-10-02 02:17:50 -07:00
struct svc_serv * serv = rqstp - > rq_server ;
2006-10-02 02:17:58 -07:00
struct svc_pool * pool = rqstp - > rq_pool ;
2006-10-04 02:15:46 -07:00
int len , i ;
2005-04-16 15:20:36 -07:00
int pages ;
struct xdr_buf * arg ;
DECLARE_WAITQUEUE ( wait , current ) ;
dprintk ( " svc: server %p waiting for data (to = %ld) \n " ,
rqstp , timeout ) ;
if ( rqstp - > rq_sock )
2007-02-09 15:38:13 -08:00
printk ( KERN_ERR
2005-04-16 15:20:36 -07:00
" svc_recv: service %p, socket not NULL! \n " ,
rqstp ) ;
if ( waitqueue_active ( & rqstp - > rq_wait ) )
2007-02-09 15:38:13 -08:00
printk ( KERN_ERR
2005-04-16 15:20:36 -07:00
" svc_recv: service %p, wait queue active! \n " ,
rqstp ) ;
/* now allocate needed pages. If we get a failure, sleep briefly */
2006-10-06 00:44:05 -07:00
pages = ( serv - > sv_max_mesg + PAGE_SIZE ) / PAGE_SIZE ;
2006-10-04 02:15:46 -07:00
for ( i = 0 ; i < pages ; i + + )
while ( rqstp - > rq_pages [ i ] = = NULL ) {
struct page * p = alloc_page ( GFP_KERNEL ) ;
if ( ! p )
schedule_timeout_uninterruptible ( msecs_to_jiffies ( 500 ) ) ;
rqstp - > rq_pages [ i ] = p ;
2005-04-16 15:20:36 -07:00
}
2007-01-26 00:56:59 -08:00
rqstp - > rq_pages [ i + + ] = NULL ; /* this might be seen in nfs_read_actor */
BUG_ON ( pages > = RPCSVC_MAXPAGES ) ;
2005-04-16 15:20:36 -07:00
/* Make arg->head point to first page and arg->pages point to rest */
arg = & rqstp - > rq_arg ;
2006-10-04 02:15:46 -07:00
arg - > head [ 0 ] . iov_base = page_address ( rqstp - > rq_pages [ 0 ] ) ;
2005-04-16 15:20:36 -07:00
arg - > head [ 0 ] . iov_len = PAGE_SIZE ;
2006-10-04 02:15:46 -07:00
arg - > pages = rqstp - > rq_pages + 1 ;
2005-04-16 15:20:36 -07:00
arg - > page_base = 0 ;
/* save at least one page for response */
arg - > page_len = ( pages - 2 ) * PAGE_SIZE ;
arg - > len = ( pages - 1 ) * PAGE_SIZE ;
arg - > tail [ 0 ] . iov_len = 0 ;
2005-06-24 23:13:50 -07:00
try_to_freeze ( ) ;
2005-11-15 00:09:10 -08:00
cond_resched ( ) ;
2005-04-16 15:20:36 -07:00
if ( signalled ( ) )
return - EINTR ;
2006-10-02 02:17:58 -07:00
spin_lock_bh ( & pool - > sp_lock ) ;
if ( ( svsk = svc_sock_dequeue ( pool ) ) ! = NULL ) {
2005-04-16 15:20:36 -07:00
rqstp - > rq_sock = svsk ;
2007-12-30 21:07:46 -06:00
svc_xprt_get ( & svsk - > sk_xprt ) ;
2006-10-06 00:44:05 -07:00
rqstp - > rq_reserved = serv - > sv_max_mesg ;
2006-10-02 02:17:56 -07:00
atomic_add ( rqstp - > rq_reserved , & svsk - > sk_reserved ) ;
2005-04-16 15:20:36 -07:00
} else {
/* No data pending. Go to sleep */
2006-10-02 02:17:58 -07:00
svc_thread_enqueue ( pool , rqstp ) ;
2005-04-16 15:20:36 -07:00
/*
* We have to be able to interrupt this wait
* to bring down the daemons . . .
*/
set_current_state ( TASK_INTERRUPTIBLE ) ;
add_wait_queue ( & rqstp - > rq_wait , & wait ) ;
2006-10-02 02:17:58 -07:00
spin_unlock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
schedule_timeout ( timeout ) ;
2005-06-24 23:13:50 -07:00
try_to_freeze ( ) ;
2005-04-16 15:20:36 -07:00
2006-10-02 02:17:58 -07:00
spin_lock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
remove_wait_queue ( & rqstp - > rq_wait , & wait ) ;
if ( ! ( svsk = rqstp - > rq_sock ) ) {
2006-10-02 02:17:58 -07:00
svc_thread_dequeue ( pool , rqstp ) ;
spin_unlock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: server %p, no data yet \n " , rqstp ) ;
return signalled ( ) ? - EINTR : - EAGAIN ;
}
}
2006-10-02 02:17:58 -07:00
spin_unlock_bh ( & pool - > sp_lock ) ;
2005-04-16 15:20:36 -07:00
2007-12-30 21:07:34 -06:00
len = 0 ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ) {
dprintk ( " svc_recv: found XPT_CLOSE \n " ) ;
2007-12-30 21:07:34 -06:00
svc_delete_socket ( svsk ) ;
2007-12-30 21:07:48 -06:00
} else if ( test_bit ( XPT_LISTENER , & svsk - > sk_xprt . xpt_flags ) ) {
2007-12-30 21:07:36 -06:00
struct svc_xprt * newxpt ;
newxpt = svsk - > sk_xprt . xpt_ops - > xpo_accept ( & svsk - > sk_xprt ) ;
2007-12-30 21:07:42 -06:00
if ( newxpt ) {
/*
* We know this module_get will succeed because the
* listener holds a reference too
*/
__module_get ( newxpt - > xpt_class - > xcl_owner ) ;
2007-12-30 21:07:50 -06:00
svc_check_conn_limits ( svsk - > sk_xprt . xpt_server ) ;
2007-12-30 21:07:42 -06:00
}
2007-12-30 21:07:36 -06:00
svc_sock_received ( svsk ) ;
2007-12-30 21:07:34 -06:00
} else {
dprintk ( " svc: server %p, pool %u, socket %p, inuse=%d \n " ,
2007-12-30 21:07:46 -06:00
rqstp , pool - > sp_id , svsk ,
atomic_read ( & svsk - > sk_xprt . xpt_ref . refcount ) ) ;
2007-12-30 21:07:34 -06:00
len = svsk - > sk_xprt . xpt_ops - > xpo_recvfrom ( rqstp ) ;
dprintk ( " svc: got len=%d \n " , len ) ;
}
2005-04-16 15:20:36 -07:00
/* No data, incomplete (TCP) read, or accept() */
if ( len = = 0 | | len = = - EAGAIN ) {
rqstp - > rq_res . len = 0 ;
svc_sock_release ( rqstp ) ;
return - EAGAIN ;
}
svsk - > sk_lastrecv = get_seconds ( ) ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_OLD , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
2007-02-12 00:53:37 -08:00
rqstp - > rq_secure = svc_port_is_privileged ( svc_addr ( rqstp ) ) ;
2005-04-16 15:20:36 -07:00
rqstp - > rq_chandle . defer = svc_defer ;
if ( serv - > sv_stats )
serv - > sv_stats - > netcnt + + ;
return len ;
}
2007-02-09 15:38:13 -08:00
/*
2005-04-16 15:20:36 -07:00
* Drop request
*/
void
svc_drop ( struct svc_rqst * rqstp )
{
dprintk ( " svc: socket %p dropped request \n " , rqstp - > rq_sock ) ;
svc_sock_release ( rqstp ) ;
}
/*
* Return reply to client .
*/
int
svc_send ( struct svc_rqst * rqstp )
{
struct svc_sock * svsk ;
int len ;
struct xdr_buf * xb ;
if ( ( svsk = rqstp - > rq_sock ) = = NULL ) {
printk ( KERN_WARNING " NULL socket pointer in %s:%d \n " ,
__FILE__ , __LINE__ ) ;
return - EFAULT ;
}
/* release the receive skb before sending the reply */
2007-12-30 21:07:25 -06:00
rqstp - > rq_xprt - > xpt_ops - > xpo_release_rqst ( rqstp ) ;
2005-04-16 15:20:36 -07:00
/* calculate over-all length */
xb = & rqstp - > rq_res ;
xb - > len = xb - > head [ 0 ] . iov_len +
xb - > page_len +
xb - > tail [ 0 ] . iov_len ;
2006-03-20 22:35:41 -08:00
/* Grab svsk->sk_mutex to serialize outgoing data. */
mutex_lock ( & svsk - > sk_mutex ) ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_DEAD , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
len = - ENOTCONN ;
else
2007-12-30 21:07:23 -06:00
len = svsk - > sk_xprt . xpt_ops - > xpo_sendto ( rqstp ) ;
2006-03-20 22:35:41 -08:00
mutex_unlock ( & svsk - > sk_mutex ) ;
2005-04-16 15:20:36 -07:00
svc_sock_release ( rqstp ) ;
if ( len = = - ECONNREFUSED | | len = = - ENOTCONN | | len = = - EAGAIN )
return 0 ;
return len ;
}
2006-10-02 02:17:54 -07:00
/*
* Timer function to close old temporary sockets , using
* a mark - and - sweep algorithm .
*/
static void
svc_age_temp_sockets ( unsigned long closure )
{
struct svc_serv * serv = ( struct svc_serv * ) closure ;
struct svc_sock * svsk ;
struct list_head * le , * next ;
LIST_HEAD ( to_be_aged ) ;
dprintk ( " svc_age_temp_sockets \n " ) ;
if ( ! spin_trylock_bh ( & serv - > sv_lock ) ) {
/* busy, try again 1 sec later */
dprintk ( " svc_age_temp_sockets: busy \n " ) ;
mod_timer ( & serv - > sv_temptimer , jiffies + HZ ) ;
return ;
}
list_for_each_safe ( le , next , & serv - > sv_tempsocks ) {
svsk = list_entry ( le , struct svc_sock , sk_list ) ;
2007-12-30 21:07:48 -06:00
if ( ! test_and_set_bit ( XPT_OLD , & svsk - > sk_xprt . xpt_flags ) )
2006-10-02 02:17:54 -07:00
continue ;
2007-12-30 21:07:46 -06:00
if ( atomic_read ( & svsk - > sk_xprt . xpt_ref . refcount ) > 1
2007-12-30 21:07:48 -06:00
| | test_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) )
2006-10-02 02:17:54 -07:00
continue ;
2007-12-30 21:07:46 -06:00
svc_xprt_get ( & svsk - > sk_xprt ) ;
2006-10-02 02:17:54 -07:00
list_move ( le , & to_be_aged ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
set_bit ( XPT_DETACHED , & svsk - > sk_xprt . xpt_flags ) ;
2006-10-02 02:17:54 -07:00
}
spin_unlock_bh ( & serv - > sv_lock ) ;
while ( ! list_empty ( & to_be_aged ) ) {
le = to_be_aged . next ;
2007-12-30 21:07:48 -06:00
/* fiddling the sk_list node is safe 'cos we're XPT_DETACHED */
2006-10-02 02:17:54 -07:00
list_del_init ( le ) ;
svsk = list_entry ( le , struct svc_sock , sk_list ) ;
dprintk ( " queuing svsk %p for closing, %lu seconds old \n " ,
svsk , get_seconds ( ) - svsk - > sk_lastrecv ) ;
/* a thread will dequeue and close it soon */
svc_sock_enqueue ( svsk ) ;
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & svsk - > sk_xprt ) ;
2006-10-02 02:17:54 -07:00
}
mod_timer ( & serv - > sv_temptimer , jiffies + svc_conn_age_period * HZ ) ;
}
2005-04-16 15:20:36 -07:00
/*
* Initialize socket for RPC use and create svc_sock struct
* XXX : May want to setsockopt SO_SNDBUF and SO_RCVBUF .
*/
2007-02-12 00:53:28 -08:00
static struct svc_sock * svc_setup_socket ( struct svc_serv * serv ,
struct socket * sock ,
int * errp , int flags )
2005-04-16 15:20:36 -07:00
{
struct svc_sock * svsk ;
struct sock * inet ;
2007-02-12 00:53:28 -08:00
int pmap_register = ! ( flags & SVC_SOCK_ANONYMOUS ) ;
int is_temporary = flags & SVC_SOCK_TEMPORARY ;
2005-04-16 15:20:36 -07:00
dprintk ( " svc: svc_setup_socket %p \n " , sock ) ;
2006-07-21 14:51:30 -07:00
if ( ! ( svsk = kzalloc ( sizeof ( * svsk ) , GFP_KERNEL ) ) ) {
2005-04-16 15:20:36 -07:00
* errp = - ENOMEM ;
return NULL ;
}
inet = sock - > sk ;
/* Register socket with portmapper */
if ( * errp > = 0 & & pmap_register )
* errp = svc_register ( serv , inet - > sk_protocol ,
ntohs ( inet_sk ( inet ) - > sport ) ) ;
if ( * errp < 0 ) {
kfree ( svsk ) ;
return NULL ;
}
2007-12-30 21:07:48 -06:00
set_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
inet - > sk_user_data = svsk ;
svsk - > sk_sock = sock ;
svsk - > sk_sk = inet ;
svsk - > sk_ostate = inet - > sk_state_change ;
svsk - > sk_odata = inet - > sk_data_ready ;
svsk - > sk_owspace = inet - > sk_write_space ;
svsk - > sk_lastrecv = get_seconds ( ) ;
2007-05-09 02:34:48 -07:00
spin_lock_init ( & svsk - > sk_lock ) ;
2005-04-16 15:20:36 -07:00
INIT_LIST_HEAD ( & svsk - > sk_deferred ) ;
INIT_LIST_HEAD ( & svsk - > sk_ready ) ;
2006-03-20 22:35:41 -08:00
mutex_init ( & svsk - > sk_mutex ) ;
2005-04-16 15:20:36 -07:00
/* Initialize the socket */
if ( sock - > type = = SOCK_DGRAM )
2007-12-30 21:07:50 -06:00
svc_udp_init ( svsk , serv ) ;
2005-04-16 15:20:36 -07:00
else
2007-12-30 21:07:50 -06:00
svc_tcp_init ( svsk , serv ) ;
2005-04-16 15:20:36 -07:00
spin_lock_bh ( & serv - > sv_lock ) ;
2007-02-12 00:53:28 -08:00
if ( is_temporary ) {
2007-12-30 21:07:48 -06:00
set_bit ( XPT_TEMP , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
list_add ( & svsk - > sk_list , & serv - > sv_tempsocks ) ;
serv - > sv_tmpcnt + + ;
2006-10-02 02:17:54 -07:00
if ( serv - > sv_temptimer . function = = NULL ) {
/* setup timer to age temp sockets */
setup_timer ( & serv - > sv_temptimer , svc_age_temp_sockets ,
( unsigned long ) serv ) ;
mod_timer ( & serv - > sv_temptimer ,
jiffies + svc_conn_age_period * HZ ) ;
}
2005-04-16 15:20:36 -07:00
} else {
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_TEMP , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
list_add ( & svsk - > sk_list , & serv - > sv_permsocks ) ;
}
spin_unlock_bh ( & serv - > sv_lock ) ;
dprintk ( " svc: svc_setup_socket created %p (inet %p) \n " ,
svsk , svsk - > sk_sk ) ;
return svsk ;
}
2006-10-02 02:17:48 -07:00
int svc_addsock ( struct svc_serv * serv ,
int fd ,
char * name_return ,
int * proto )
{
int err = 0 ;
struct socket * so = sockfd_lookup ( fd , & err ) ;
struct svc_sock * svsk = NULL ;
if ( ! so )
return err ;
if ( so - > sk - > sk_family ! = AF_INET )
err = - EAFNOSUPPORT ;
else if ( so - > sk - > sk_protocol ! = IPPROTO_TCP & &
so - > sk - > sk_protocol ! = IPPROTO_UDP )
err = - EPROTONOSUPPORT ;
else if ( so - > state > SS_UNCONNECTED )
err = - EISCONN ;
else {
2007-02-12 00:53:28 -08:00
svsk = svc_setup_socket ( serv , so , & err , SVC_SOCK_DEFAULTS ) ;
2007-02-12 00:53:30 -08:00
if ( svsk ) {
svc_sock_received ( svsk ) ;
2006-10-02 02:17:48 -07:00
err = 0 ;
2007-02-12 00:53:30 -08:00
}
2006-10-02 02:17:48 -07:00
}
if ( err ) {
sockfd_put ( so ) ;
return err ;
}
if ( proto ) * proto = so - > sk - > sk_protocol ;
return one_sock_name ( name_return , svsk ) ;
}
EXPORT_SYMBOL_GPL ( svc_addsock ) ;
2005-04-16 15:20:36 -07:00
/*
* Create socket for RPC service .
*/
2007-12-30 21:07:42 -06:00
static struct svc_xprt * svc_create_socket ( struct svc_serv * serv ,
int protocol ,
struct sockaddr * sin , int len ,
int flags )
2005-04-16 15:20:36 -07:00
{
struct svc_sock * svsk ;
struct socket * sock ;
int error ;
int type ;
2007-02-12 00:53:32 -08:00
char buf [ RPC_MAX_ADDRBUFLEN ] ;
2005-04-16 15:20:36 -07:00
2007-02-12 00:53:32 -08:00
dprintk ( " svc: svc_create_socket(%s, %d, %s) \n " ,
serv - > sv_program - > pg_name , protocol ,
2007-02-12 00:53:39 -08:00
__svc_print_addr ( sin , buf , sizeof ( buf ) ) ) ;
2005-04-16 15:20:36 -07:00
if ( protocol ! = IPPROTO_UDP & & protocol ! = IPPROTO_TCP ) {
printk ( KERN_WARNING " svc: only UDP and TCP "
" sockets supported \n " ) ;
2007-12-30 21:07:42 -06:00
return ERR_PTR ( - EINVAL ) ;
2005-04-16 15:20:36 -07:00
}
type = ( protocol = = IPPROTO_UDP ) ? SOCK_DGRAM : SOCK_STREAM ;
2007-02-12 00:53:39 -08:00
error = sock_create_kern ( sin - > sa_family , type , protocol , & sock ) ;
if ( error < 0 )
2007-12-30 21:07:42 -06:00
return ERR_PTR ( error ) ;
2005-04-16 15:20:36 -07:00
2006-12-06 20:35:24 -08:00
svc_reclassify_socket ( sock ) ;
2006-09-28 14:37:07 -07:00
if ( type = = SOCK_STREAM )
2007-02-12 00:53:39 -08:00
sock - > sk - > sk_reuse = 1 ; /* allow address reuse */
error = kernel_bind ( sock , sin , len ) ;
2006-09-28 14:37:07 -07:00
if ( error < 0 )
goto bummer ;
2005-04-16 15:20:36 -07:00
if ( protocol = = IPPROTO_TCP ) {
2006-08-07 20:58:01 -07:00
if ( ( error = kernel_listen ( sock , 64 ) ) < 0 )
2005-04-16 15:20:36 -07:00
goto bummer ;
}
2007-02-12 00:53:30 -08:00
if ( ( svsk = svc_setup_socket ( serv , sock , & error , flags ) ) ! = NULL ) {
svc_sock_received ( svsk ) ;
2007-12-30 21:07:42 -06:00
return ( struct svc_xprt * ) svsk ;
2007-02-12 00:53:30 -08:00
}
2005-04-16 15:20:36 -07:00
bummer :
dprintk ( " svc: svc_create_socket error = %d \n " , - error ) ;
sock_release ( sock ) ;
2007-12-30 21:07:42 -06:00
return ERR_PTR ( error ) ;
2005-04-16 15:20:36 -07:00
}
2007-12-30 21:07:27 -06:00
/*
* Detach the svc_sock from the socket so that no
* more callbacks occur .
*/
static void svc_sock_detach ( struct svc_xprt * xprt )
{
struct svc_sock * svsk = container_of ( xprt , struct svc_sock , sk_xprt ) ;
struct sock * sk = svsk - > sk_sk ;
dprintk ( " svc: svc_sock_detach(%p) \n " , svsk ) ;
/* put back the old socket callbacks */
sk - > sk_state_change = svsk - > sk_ostate ;
sk - > sk_data_ready = svsk - > sk_odata ;
sk - > sk_write_space = svsk - > sk_owspace ;
}
/*
* Free the svc_sock ' s socket resources and the svc_sock itself .
*/
static void svc_sock_free ( struct svc_xprt * xprt )
{
struct svc_sock * svsk = container_of ( xprt , struct svc_sock , sk_xprt ) ;
dprintk ( " svc: svc_sock_free(%p) \n " , svsk ) ;
if ( svsk - > sk_info_authunix ! = NULL )
svcauth_unix_info_release ( svsk - > sk_info_authunix ) ;
if ( svsk - > sk_sock - > file )
sockfd_put ( svsk - > sk_sock ) ;
else
sock_release ( svsk - > sk_sock ) ;
kfree ( svsk ) ;
}
2005-04-16 15:20:36 -07:00
/*
* Remove a dead socket
*/
2007-02-08 14:20:30 -08:00
static void
2005-04-16 15:20:36 -07:00
svc_delete_socket ( struct svc_sock * svsk )
{
struct svc_serv * serv ;
struct sock * sk ;
dprintk ( " svc: svc_delete_socket(%p) \n " , svsk ) ;
2007-12-30 21:07:50 -06:00
serv = svsk - > sk_xprt . xpt_server ;
2005-04-16 15:20:36 -07:00
sk = svsk - > sk_sk ;
2007-12-30 21:07:27 -06:00
svsk - > sk_xprt . xpt_ops - > xpo_detach ( & svsk - > sk_xprt ) ;
2005-04-16 15:20:36 -07:00
spin_lock_bh ( & serv - > sv_lock ) ;
2007-12-30 21:07:48 -06:00
if ( ! test_and_set_bit ( XPT_DETACHED , & svsk - > sk_xprt . xpt_flags ) )
2006-10-02 02:17:54 -07:00
list_del_init ( & svsk - > sk_list ) ;
2007-02-09 15:38:13 -08:00
/*
2006-10-02 02:17:58 -07:00
* We used to delete the svc_sock from whichever list
* it ' s sk_ready node was on , but we don ' t actually
* need to . This is because the only time we ' re called
* while still attached to a queue , the queue itself
* is about to be destroyed ( in svc_destroy ) .
*/
2007-12-30 21:07:48 -06:00
if ( ! test_and_set_bit ( XPT_DEAD , & svsk - > sk_xprt . xpt_flags ) ) {
2007-12-30 21:07:46 -06:00
BUG_ON ( atomic_read ( & svsk - > sk_xprt . xpt_ref . refcount ) < 2 ) ;
2007-12-30 21:07:48 -06:00
if ( test_bit ( XPT_TEMP , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
serv - > sv_tmpcnt - - ;
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & svsk - > sk_xprt ) ;
2007-02-08 14:20:30 -08:00
}
2005-04-16 15:20:36 -07:00
2006-10-29 22:46:45 -08:00
spin_unlock_bh ( & serv - > sv_lock ) ;
2007-02-08 14:20:30 -08:00
}
2007-03-06 01:42:22 -08:00
static void svc_close_socket ( struct svc_sock * svsk )
2007-02-08 14:20:30 -08:00
{
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
if ( test_and_set_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) )
2007-02-08 14:20:30 -08:00
/* someone else will have to effect the close */
return ;
2007-12-30 21:07:46 -06:00
svc_xprt_get ( & svsk - > sk_xprt ) ;
2007-02-08 14:20:30 -08:00
svc_delete_socket ( svsk ) ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ;
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & svsk - > sk_xprt ) ;
2005-04-16 15:20:36 -07:00
}
2007-03-06 01:42:22 -08:00
void svc_force_close_socket ( struct svc_sock * svsk )
{
2007-12-30 21:07:48 -06:00
set_bit ( XPT_CLOSE , & svsk - > sk_xprt . xpt_flags ) ;
if ( test_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ) {
2007-03-06 01:42:22 -08:00
/* Waiting to be processed, but no threads left,
* So just remove it from the waiting list
*/
list_del_init ( & svsk - > sk_ready ) ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_BUSY , & svsk - > sk_xprt . xpt_flags ) ;
2007-03-06 01:42:22 -08:00
}
svc_close_socket ( svsk ) ;
}
2005-04-16 15:20:36 -07:00
/*
2007-02-09 15:38:13 -08:00
* Handle defer and revisit of requests
2005-04-16 15:20:36 -07:00
*/
static void svc_revisit ( struct cache_deferred_req * dreq , int too_many )
{
struct svc_deferred_req * dr = container_of ( dreq , struct svc_deferred_req , handle ) ;
struct svc_sock * svsk ;
if ( too_many ) {
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & dr - > svsk - > sk_xprt ) ;
2005-04-16 15:20:36 -07:00
kfree ( dr ) ;
return ;
}
dprintk ( " revisit queued \n " ) ;
svsk = dr - > svsk ;
dr - > svsk = NULL ;
2007-05-09 02:34:48 -07:00
spin_lock ( & svsk - > sk_lock ) ;
2005-04-16 15:20:36 -07:00
list_add ( & dr - > handle . recent , & svsk - > sk_deferred ) ;
2007-05-09 02:34:48 -07:00
spin_unlock ( & svsk - > sk_lock ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DEFERRED , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
svc_sock_enqueue ( svsk ) ;
2007-12-30 21:07:46 -06:00
svc_xprt_put ( & svsk - > sk_xprt ) ;
2005-04-16 15:20:36 -07:00
}
static struct cache_deferred_req *
svc_defer ( struct cache_req * req )
{
struct svc_rqst * rqstp = container_of ( req , struct svc_rqst , rq_chandle ) ;
int size = sizeof ( struct svc_deferred_req ) + ( rqstp - > rq_arg . len ) ;
struct svc_deferred_req * dr ;
if ( rqstp - > rq_arg . page_len )
return NULL ; /* if more than a page, give up FIXME */
if ( rqstp - > rq_deferred ) {
dr = rqstp - > rq_deferred ;
rqstp - > rq_deferred = NULL ;
} else {
int skip = rqstp - > rq_arg . len - rqstp - > rq_arg . head [ 0 ] . iov_len ;
/* FIXME maybe discard if size too large */
dr = kmalloc ( size , GFP_KERNEL ) ;
if ( dr = = NULL )
return NULL ;
dr - > handle . owner = rqstp - > rq_server ;
dr - > prot = rqstp - > rq_prot ;
2007-02-12 00:53:33 -08:00
memcpy ( & dr - > addr , & rqstp - > rq_addr , rqstp - > rq_addrlen ) ;
dr - > addrlen = rqstp - > rq_addrlen ;
2006-01-18 17:43:16 -08:00
dr - > daddr = rqstp - > rq_daddr ;
2005-04-16 15:20:36 -07:00
dr - > argslen = rqstp - > rq_arg . len > > 2 ;
memcpy ( dr - > args , rqstp - > rq_arg . head [ 0 ] . iov_base - skip , dr - > argslen < < 2 ) ;
}
2007-12-30 21:07:46 -06:00
svc_xprt_get ( rqstp - > rq_xprt ) ;
2005-04-16 15:20:36 -07:00
dr - > svsk = rqstp - > rq_sock ;
dr - > handle . revisit = svc_revisit ;
return & dr - > handle ;
}
/*
* recv data from a deferred request into an active one
*/
static int svc_deferred_recv ( struct svc_rqst * rqstp )
{
struct svc_deferred_req * dr = rqstp - > rq_deferred ;
rqstp - > rq_arg . head [ 0 ] . iov_base = dr - > args ;
rqstp - > rq_arg . head [ 0 ] . iov_len = dr - > argslen < < 2 ;
rqstp - > rq_arg . page_len = 0 ;
rqstp - > rq_arg . len = dr - > argslen < < 2 ;
rqstp - > rq_prot = dr - > prot ;
2007-02-12 00:53:33 -08:00
memcpy ( & rqstp - > rq_addr , & dr - > addr , dr - > addrlen ) ;
rqstp - > rq_addrlen = dr - > addrlen ;
2006-01-18 17:43:16 -08:00
rqstp - > rq_daddr = dr - > daddr ;
2006-10-04 02:15:46 -07:00
rqstp - > rq_respages = rqstp - > rq_pages ;
2005-04-16 15:20:36 -07:00
return dr - > argslen < < 2 ;
}
static struct svc_deferred_req * svc_deferred_dequeue ( struct svc_sock * svsk )
{
struct svc_deferred_req * dr = NULL ;
2007-02-09 15:38:13 -08:00
2007-12-30 21:07:48 -06:00
if ( ! test_bit ( XPT_DEFERRED , & svsk - > sk_xprt . xpt_flags ) )
2005-04-16 15:20:36 -07:00
return NULL ;
2007-05-09 02:34:48 -07:00
spin_lock ( & svsk - > sk_lock ) ;
2007-12-30 21:07:48 -06:00
clear_bit ( XPT_DEFERRED , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
if ( ! list_empty ( & svsk - > sk_deferred ) ) {
dr = list_entry ( svsk - > sk_deferred . next ,
struct svc_deferred_req ,
handle . recent ) ;
list_del_init ( & dr - > handle . recent ) ;
2007-12-30 21:07:48 -06:00
set_bit ( XPT_DEFERRED , & svsk - > sk_xprt . xpt_flags ) ;
2005-04-16 15:20:36 -07:00
}
2007-05-09 02:34:48 -07:00
spin_unlock ( & svsk - > sk_lock ) ;
2005-04-16 15:20:36 -07:00
return dr ;
}