2016-03-08 01:11:06 +03:00
# include <linux/bpf.h>
# include <linux/errno.h>
# include <linux/errqueue.h>
# include <linux/file.h>
# include <linux/in.h>
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/net.h>
# include <linux/netdevice.h>
# include <linux/poll.h>
# include <linux/rculist.h>
# include <linux/skbuff.h>
# include <linux/socket.h>
# include <linux/uaccess.h>
# include <linux/workqueue.h>
# include <net/kcm.h>
# include <net/netns/generic.h>
# include <net/sock.h>
# include <net/tcp.h>
# include <uapi/linux/kcm.h>
unsigned int kcm_net_id ;
static struct kmem_cache * kcm_psockp __read_mostly ;
static struct kmem_cache * kcm_muxp __read_mostly ;
static struct workqueue_struct * kcm_wq ;
static inline struct kcm_sock * kcm_sk ( const struct sock * sk )
{
return ( struct kcm_sock * ) sk ;
}
static inline struct kcm_tx_msg * kcm_tx_msg ( struct sk_buff * skb )
{
return ( struct kcm_tx_msg * ) skb - > cb ;
}
static inline struct kcm_rx_msg * kcm_rx_msg ( struct sk_buff * skb )
{
return ( struct kcm_rx_msg * ) ( ( void * ) skb - > cb +
offsetof ( struct qdisc_skb_cb , data ) ) ;
}
static void report_csk_error ( struct sock * csk , int err )
{
csk - > sk_err = EPIPE ;
csk - > sk_error_report ( csk ) ;
}
/* Callback lock held */
static void kcm_abort_rx_psock ( struct kcm_psock * psock , int err ,
struct sk_buff * skb )
{
struct sock * csk = psock - > sk ;
/* Unrecoverable error in receive */
2016-03-08 01:11:11 +03:00
del_timer ( & psock - > rx_msg_timer ) ;
2016-03-08 01:11:06 +03:00
if ( psock - > rx_stopped )
return ;
psock - > rx_stopped = 1 ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_aborts ) ;
2016-03-08 01:11:06 +03:00
/* Report an error on the lower socket */
report_csk_error ( csk , err ) ;
}
static void kcm_abort_tx_psock ( struct kcm_psock * psock , int err ,
bool wakeup_kcm )
{
struct sock * csk = psock - > sk ;
struct kcm_mux * mux = psock - > mux ;
/* Unrecoverable error in transmit */
spin_lock_bh ( & mux - > lock ) ;
if ( psock - > tx_stopped ) {
spin_unlock_bh ( & mux - > lock ) ;
return ;
}
psock - > tx_stopped = 1 ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . tx_aborts ) ;
2016-03-08 01:11:06 +03:00
if ( ! psock - > tx_kcm ) {
/* Take off psocks_avail list */
list_del ( & psock - > psock_avail_list ) ;
} else if ( wakeup_kcm ) {
/* In this case psock is being aborted while outside of
* write_msgs and psock is reserved . Schedule tx_work
* to handle the failure there . Need to commit tx_stopped
* before queuing work .
*/
smp_mb ( ) ;
queue_work ( kcm_wq , & psock - > tx_kcm - > tx_work ) ;
}
spin_unlock_bh ( & mux - > lock ) ;
/* Report error on lower socket */
report_csk_error ( csk , err ) ;
}
2016-03-08 01:11:07 +03:00
/* RX mux lock held. */
static void kcm_update_rx_mux_stats ( struct kcm_mux * mux ,
struct kcm_psock * psock )
{
KCM_STATS_ADD ( mux - > stats . rx_bytes ,
psock - > stats . rx_bytes - psock - > saved_rx_bytes ) ;
mux - > stats . rx_msgs + =
psock - > stats . rx_msgs - psock - > saved_rx_msgs ;
psock - > saved_rx_msgs = psock - > stats . rx_msgs ;
psock - > saved_rx_bytes = psock - > stats . rx_bytes ;
}
static void kcm_update_tx_mux_stats ( struct kcm_mux * mux ,
struct kcm_psock * psock )
{
KCM_STATS_ADD ( mux - > stats . tx_bytes ,
psock - > stats . tx_bytes - psock - > saved_tx_bytes ) ;
mux - > stats . tx_msgs + =
psock - > stats . tx_msgs - psock - > saved_tx_msgs ;
psock - > saved_tx_msgs = psock - > stats . tx_msgs ;
psock - > saved_tx_bytes = psock - > stats . tx_bytes ;
}
2016-03-08 01:11:06 +03:00
static int kcm_queue_rcv_skb ( struct sock * sk , struct sk_buff * skb ) ;
/* KCM is ready to receive messages on its queue-- either the KCM is new or
* has become unblocked after being blocked on full socket buffer . Queue any
* pending ready messages on a psock . RX mux lock held .
*/
static void kcm_rcv_ready ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
struct kcm_psock * psock ;
struct sk_buff * skb ;
if ( unlikely ( kcm - > rx_wait | | kcm - > rx_psock | | kcm - > rx_disabled ) )
return ;
while ( unlikely ( ( skb = __skb_dequeue ( & mux - > rx_hold_queue ) ) ) ) {
if ( kcm_queue_rcv_skb ( & kcm - > sk , skb ) ) {
/* Assuming buffer limit has been reached */
skb_queue_head ( & mux - > rx_hold_queue , skb ) ;
WARN_ON ( ! sk_rmem_alloc_get ( & kcm - > sk ) ) ;
return ;
}
}
while ( ! list_empty ( & mux - > psocks_ready ) ) {
psock = list_first_entry ( & mux - > psocks_ready , struct kcm_psock ,
psock_ready_list ) ;
if ( kcm_queue_rcv_skb ( & kcm - > sk , psock - > ready_rx_msg ) ) {
/* Assuming buffer limit has been reached */
WARN_ON ( ! sk_rmem_alloc_get ( & kcm - > sk ) ) ;
return ;
}
/* Consumed the ready message on the psock. Schedule rx_work to
* get more messages .
*/
list_del ( & psock - > psock_ready_list ) ;
psock - > ready_rx_msg = NULL ;
/* Commit clearing of ready_rx_msg for queuing work */
smp_mb ( ) ;
queue_work ( kcm_wq , & psock - > rx_work ) ;
}
/* Buffer limit is okay now, add to ready list */
list_add_tail ( & kcm - > wait_rx_list ,
& kcm - > mux - > kcm_rx_waiters ) ;
kcm - > rx_wait = true ;
}
static void kcm_rfree ( struct sk_buff * skb )
{
struct sock * sk = skb - > sk ;
struct kcm_sock * kcm = kcm_sk ( sk ) ;
struct kcm_mux * mux = kcm - > mux ;
unsigned int len = skb - > truesize ;
sk_mem_uncharge ( sk , len ) ;
atomic_sub ( len , & sk - > sk_rmem_alloc ) ;
/* For reading rx_wait and rx_psock without holding lock */
smp_mb__after_atomic ( ) ;
if ( ! kcm - > rx_wait & & ! kcm - > rx_psock & &
sk_rmem_alloc_get ( sk ) < sk - > sk_rcvlowat ) {
spin_lock_bh ( & mux - > rx_lock ) ;
kcm_rcv_ready ( kcm ) ;
spin_unlock_bh ( & mux - > rx_lock ) ;
}
}
static int kcm_queue_rcv_skb ( struct sock * sk , struct sk_buff * skb )
{
struct sk_buff_head * list = & sk - > sk_receive_queue ;
if ( atomic_read ( & sk - > sk_rmem_alloc ) > = sk - > sk_rcvbuf )
return - ENOMEM ;
if ( ! sk_rmem_schedule ( sk , skb , skb - > truesize ) )
return - ENOBUFS ;
skb - > dev = NULL ;
skb_orphan ( skb ) ;
skb - > sk = sk ;
skb - > destructor = kcm_rfree ;
atomic_add ( skb - > truesize , & sk - > sk_rmem_alloc ) ;
sk_mem_charge ( sk , skb - > truesize ) ;
skb_queue_tail ( list , skb ) ;
if ( ! sock_flag ( sk , SOCK_DEAD ) )
sk - > sk_data_ready ( sk ) ;
return 0 ;
}
/* Requeue received messages for a kcm socket to other kcm sockets. This is
* called with a kcm socket is receive disabled .
* RX mux lock held .
*/
static void requeue_rx_msgs ( struct kcm_mux * mux , struct sk_buff_head * head )
{
struct sk_buff * skb ;
struct kcm_sock * kcm ;
while ( ( skb = __skb_dequeue ( head ) ) ) {
/* Reset destructor to avoid calling kcm_rcv_ready */
skb - > destructor = sock_rfree ;
skb_orphan ( skb ) ;
try_again :
if ( list_empty ( & mux - > kcm_rx_waiters ) ) {
skb_queue_tail ( & mux - > rx_hold_queue , skb ) ;
continue ;
}
kcm = list_first_entry ( & mux - > kcm_rx_waiters ,
struct kcm_sock , wait_rx_list ) ;
if ( kcm_queue_rcv_skb ( & kcm - > sk , skb ) ) {
/* Should mean socket buffer full */
list_del ( & kcm - > wait_rx_list ) ;
kcm - > rx_wait = false ;
/* Commit rx_wait to read in kcm_free */
smp_wmb ( ) ;
goto try_again ;
}
}
}
/* Lower sock lock held */
static struct kcm_sock * reserve_rx_kcm ( struct kcm_psock * psock ,
struct sk_buff * head )
{
struct kcm_mux * mux = psock - > mux ;
struct kcm_sock * kcm ;
WARN_ON ( psock - > ready_rx_msg ) ;
if ( psock - > rx_kcm )
return psock - > rx_kcm ;
spin_lock_bh ( & mux - > rx_lock ) ;
if ( psock - > rx_kcm ) {
spin_unlock_bh ( & mux - > rx_lock ) ;
return psock - > rx_kcm ;
}
2016-03-08 01:11:07 +03:00
kcm_update_rx_mux_stats ( mux , psock ) ;
2016-03-08 01:11:06 +03:00
if ( list_empty ( & mux - > kcm_rx_waiters ) ) {
psock - > ready_rx_msg = head ;
list_add_tail ( & psock - > psock_ready_list ,
& mux - > psocks_ready ) ;
spin_unlock_bh ( & mux - > rx_lock ) ;
return NULL ;
}
kcm = list_first_entry ( & mux - > kcm_rx_waiters ,
struct kcm_sock , wait_rx_list ) ;
list_del ( & kcm - > wait_rx_list ) ;
kcm - > rx_wait = false ;
psock - > rx_kcm = kcm ;
kcm - > rx_psock = psock ;
spin_unlock_bh ( & mux - > rx_lock ) ;
return kcm ;
}
static void kcm_done ( struct kcm_sock * kcm ) ;
static void kcm_done_work ( struct work_struct * w )
{
kcm_done ( container_of ( w , struct kcm_sock , done_work ) ) ;
}
/* Lower sock held */
static void unreserve_rx_kcm ( struct kcm_psock * psock ,
bool rcv_ready )
{
struct kcm_sock * kcm = psock - > rx_kcm ;
struct kcm_mux * mux = psock - > mux ;
if ( ! kcm )
return ;
spin_lock_bh ( & mux - > rx_lock ) ;
psock - > rx_kcm = NULL ;
kcm - > rx_psock = NULL ;
/* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
* kcm_rfree
*/
smp_mb ( ) ;
if ( unlikely ( kcm - > done ) ) {
spin_unlock_bh ( & mux - > rx_lock ) ;
/* Need to run kcm_done in a task since we need to qcquire
* callback locks which may already be held here .
*/
INIT_WORK ( & kcm - > done_work , kcm_done_work ) ;
schedule_work ( & kcm - > done_work ) ;
return ;
}
if ( unlikely ( kcm - > rx_disabled ) ) {
requeue_rx_msgs ( mux , & kcm - > sk . sk_receive_queue ) ;
} else if ( rcv_ready | | unlikely ( ! sk_rmem_alloc_get ( & kcm - > sk ) ) ) {
/* Check for degenerative race with rx_wait that all
* data was dequeued ( accounted for in kcm_rfree ) .
*/
kcm_rcv_ready ( kcm ) ;
}
spin_unlock_bh ( & mux - > rx_lock ) ;
}
2016-03-08 01:11:11 +03:00
static void kcm_start_rx_timer ( struct kcm_psock * psock )
{
if ( psock - > sk - > sk_rcvtimeo )
mod_timer ( & psock - > rx_msg_timer , psock - > sk - > sk_rcvtimeo ) ;
}
2016-03-08 01:11:06 +03:00
/* Macro to invoke filter function. */
# define KCM_RUN_FILTER(prog, ctx) \
( * prog - > bpf_func ) ( ctx , prog - > insnsi )
/* Lower socket lock held */
static int kcm_tcp_recv ( read_descriptor_t * desc , struct sk_buff * orig_skb ,
unsigned int orig_offset , size_t orig_len )
{
struct kcm_psock * psock = ( struct kcm_psock * ) desc - > arg . data ;
struct kcm_rx_msg * rxm ;
struct kcm_sock * kcm ;
struct sk_buff * head , * skb ;
size_t eaten = 0 , cand_len ;
ssize_t extra ;
int err ;
bool cloned_orig = false ;
if ( psock - > ready_rx_msg )
return 0 ;
head = psock - > rx_skb_head ;
if ( head ) {
/* Message already in progress */
2016-03-08 01:11:10 +03:00
rxm = kcm_rx_msg ( head ) ;
if ( unlikely ( rxm - > early_eaten ) ) {
/* Already some number of bytes on the receive sock
* data saved in rx_skb_head , just indicate they
* are consumed .
*/
eaten = orig_len < = rxm - > early_eaten ?
orig_len : rxm - > early_eaten ;
rxm - > early_eaten - = eaten ;
return eaten ;
}
2016-03-08 01:11:06 +03:00
if ( unlikely ( orig_offset ) ) {
/* Getting data with a non-zero offset when a message is
* in progress is not expected . If it does happen , we
* need to clone and pull since we can ' t deal with
* offsets in the skbs for a message expect in the head .
*/
orig_skb = skb_clone ( orig_skb , GFP_ATOMIC ) ;
if ( ! orig_skb ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
desc - > error = - ENOMEM ;
return 0 ;
}
if ( ! pskb_pull ( orig_skb , orig_offset ) ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
kfree_skb ( orig_skb ) ;
desc - > error = - ENOMEM ;
return 0 ;
}
cloned_orig = true ;
orig_offset = 0 ;
}
if ( ! psock - > rx_skb_nextp ) {
/* We are going to append to the frags_list of head.
* Need to unshare the frag_list .
*/
err = skb_unclone ( head , GFP_ATOMIC ) ;
if ( err ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
desc - > error = err ;
return 0 ;
}
if ( unlikely ( skb_shinfo ( head ) - > frag_list ) ) {
/* We can't append to an sk_buff that already
* has a frag_list . We create a new head , point
* the frag_list of that to the old head , and
* then are able to use the old head - > next for
* appending to the message .
*/
if ( WARN_ON ( head - > next ) ) {
desc - > error = - EINVAL ;
return 0 ;
}
skb = alloc_skb ( 0 , GFP_ATOMIC ) ;
if ( ! skb ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
desc - > error = - ENOMEM ;
return 0 ;
}
skb - > len = head - > len ;
skb - > data_len = head - > len ;
skb - > truesize = head - > truesize ;
* kcm_rx_msg ( skb ) = * kcm_rx_msg ( head ) ;
psock - > rx_skb_nextp = & head - > next ;
skb_shinfo ( skb ) - > frag_list = head ;
psock - > rx_skb_head = skb ;
head = skb ;
} else {
psock - > rx_skb_nextp =
& skb_shinfo ( head ) - > frag_list ;
}
}
}
while ( eaten < orig_len ) {
/* Always clone since we will consume something */
skb = skb_clone ( orig_skb , GFP_ATOMIC ) ;
if ( ! skb ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
desc - > error = - ENOMEM ;
break ;
}
cand_len = orig_len - eaten ;
head = psock - > rx_skb_head ;
if ( ! head ) {
head = skb ;
psock - > rx_skb_head = head ;
/* Will set rx_skb_nextp on next packet if needed */
psock - > rx_skb_nextp = NULL ;
rxm = kcm_rx_msg ( head ) ;
memset ( rxm , 0 , sizeof ( * rxm ) ) ;
rxm - > offset = orig_offset + eaten ;
} else {
/* Unclone since we may be appending to an skb that we
* already share a frag_list with .
*/
err = skb_unclone ( skb , GFP_ATOMIC ) ;
if ( err ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_mem_fail ) ;
2016-03-08 01:11:06 +03:00
desc - > error = err ;
break ;
}
rxm = kcm_rx_msg ( head ) ;
* psock - > rx_skb_nextp = skb ;
psock - > rx_skb_nextp = & skb - > next ;
head - > data_len + = skb - > len ;
head - > len + = skb - > len ;
head - > truesize + = skb - > truesize ;
}
if ( ! rxm - > full_len ) {
ssize_t len ;
len = KCM_RUN_FILTER ( psock - > bpf_prog , head ) ;
if ( ! len ) {
/* Need more header to determine length */
2016-03-08 01:11:11 +03:00
if ( ! rxm - > accum_len ) {
/* Start RX timer for new message */
kcm_start_rx_timer ( psock ) ;
}
2016-03-08 01:11:06 +03:00
rxm - > accum_len + = cand_len ;
eaten + = cand_len ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_need_more_hdr ) ;
2016-03-08 01:11:06 +03:00
WARN_ON ( eaten ! = orig_len ) ;
break ;
2016-03-08 01:11:10 +03:00
} else if ( len > psock - > sk - > sk_rcvbuf ) {
/* Message length exceeds maximum allowed */
KCM_STATS_INCR ( psock - > stats . rx_msg_too_big ) ;
desc - > error = - EMSGSIZE ;
psock - > rx_skb_head = NULL ;
kcm_abort_rx_psock ( psock , EMSGSIZE , head ) ;
break ;
2016-03-08 01:11:06 +03:00
} else if ( len < = ( ssize_t ) head - > len -
skb - > len - rxm - > offset ) {
/* Length must be into new skb (and also
* greater than zero )
*/
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_bad_hdr_len ) ;
2016-03-08 01:11:06 +03:00
desc - > error = - EPROTO ;
psock - > rx_skb_head = NULL ;
kcm_abort_rx_psock ( psock , EPROTO , head ) ;
break ;
}
rxm - > full_len = len ;
}
extra = ( ssize_t ) ( rxm - > accum_len + cand_len ) - rxm - > full_len ;
if ( extra < 0 ) {
/* Message not complete yet. */
2016-03-08 01:11:10 +03:00
if ( rxm - > full_len - rxm - > accum_len >
tcp_inq ( psock - > sk ) ) {
/* Don't have the whole messages in the socket
* buffer . Set psock - > rx_need_bytes to wait for
* the rest of the message . Also , set " early
* eaten " since we've already buffered the skb
* but don ' t consume yet per tcp_read_sock .
*/
2016-03-08 01:11:11 +03:00
if ( ! rxm - > accum_len ) {
/* Start RX timer for new message */
kcm_start_rx_timer ( psock ) ;
}
2016-03-08 01:11:10 +03:00
psock - > rx_need_bytes = rxm - > full_len -
rxm - > accum_len ;
rxm - > accum_len + = cand_len ;
rxm - > early_eaten = cand_len ;
KCM_STATS_ADD ( psock - > stats . rx_bytes , cand_len ) ;
desc - > count = 0 ; /* Stop reading socket */
break ;
}
2016-03-08 01:11:06 +03:00
rxm - > accum_len + = cand_len ;
eaten + = cand_len ;
WARN_ON ( eaten ! = orig_len ) ;
break ;
}
/* Positive extra indicates ore bytes than needed for the
* message
*/
WARN_ON ( extra > cand_len ) ;
eaten + = ( cand_len - extra ) ;
/* Hurray, we have a new message! */
2016-03-08 01:11:11 +03:00
del_timer ( & psock - > rx_msg_timer ) ;
2016-03-08 01:11:06 +03:00
psock - > rx_skb_head = NULL ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . rx_msgs ) ;
2016-03-08 01:11:06 +03:00
try_queue :
kcm = reserve_rx_kcm ( psock , head ) ;
if ( ! kcm ) {
/* Unable to reserve a KCM, message is held in psock. */
break ;
}
if ( kcm_queue_rcv_skb ( & kcm - > sk , head ) ) {
/* Should mean socket buffer full */
unreserve_rx_kcm ( psock , false ) ;
goto try_queue ;
}
}
if ( cloned_orig )
kfree_skb ( orig_skb ) ;
2016-03-08 01:11:07 +03:00
KCM_STATS_ADD ( psock - > stats . rx_bytes , eaten ) ;
2016-03-08 01:11:06 +03:00
return eaten ;
}
/* Called with lock held on lower socket */
static int psock_tcp_read_sock ( struct kcm_psock * psock )
{
read_descriptor_t desc ;
desc . arg . data = psock ;
desc . error = 0 ;
desc . count = 1 ; /* give more than one skb per call */
/* sk should be locked here, so okay to do tcp_read_sock */
tcp_read_sock ( psock - > sk , & desc , kcm_tcp_recv ) ;
unreserve_rx_kcm ( psock , true ) ;
return desc . error ;
}
/* Lower sock lock held */
static void psock_tcp_data_ready ( struct sock * sk )
{
struct kcm_psock * psock ;
read_lock_bh ( & sk - > sk_callback_lock ) ;
psock = ( struct kcm_psock * ) sk - > sk_user_data ;
if ( unlikely ( ! psock | | psock - > rx_stopped ) )
goto out ;
if ( psock - > ready_rx_msg )
goto out ;
2016-03-08 01:11:10 +03:00
if ( psock - > rx_need_bytes ) {
if ( tcp_inq ( sk ) > = psock - > rx_need_bytes )
psock - > rx_need_bytes = 0 ;
else
goto out ;
}
2016-03-08 01:11:06 +03:00
if ( psock_tcp_read_sock ( psock ) = = - ENOMEM )
queue_delayed_work ( kcm_wq , & psock - > rx_delayed_work , 0 ) ;
out :
read_unlock_bh ( & sk - > sk_callback_lock ) ;
}
static void do_psock_rx_work ( struct kcm_psock * psock )
{
read_descriptor_t rd_desc ;
struct sock * csk = psock - > sk ;
/* We need the read lock to synchronize with psock_tcp_data_ready. We
* need the socket lock for calling tcp_read_sock .
*/
lock_sock ( csk ) ;
read_lock_bh ( & csk - > sk_callback_lock ) ;
if ( unlikely ( csk - > sk_user_data ! = psock ) )
goto out ;
if ( unlikely ( psock - > rx_stopped ) )
goto out ;
if ( psock - > ready_rx_msg )
goto out ;
rd_desc . arg . data = psock ;
if ( psock_tcp_read_sock ( psock ) = = - ENOMEM )
queue_delayed_work ( kcm_wq , & psock - > rx_delayed_work , 0 ) ;
out :
read_unlock_bh ( & csk - > sk_callback_lock ) ;
release_sock ( csk ) ;
}
static void psock_rx_work ( struct work_struct * w )
{
do_psock_rx_work ( container_of ( w , struct kcm_psock , rx_work ) ) ;
}
static void psock_rx_delayed_work ( struct work_struct * w )
{
do_psock_rx_work ( container_of ( w , struct kcm_psock ,
rx_delayed_work . work ) ) ;
}
static void psock_tcp_state_change ( struct sock * sk )
{
/* TCP only does a POLLIN for a half close. Do a POLLHUP here
* since application will normally not poll with POLLIN
* on the TCP sockets .
*/
report_csk_error ( sk , EPIPE ) ;
}
static void psock_tcp_write_space ( struct sock * sk )
{
struct kcm_psock * psock ;
struct kcm_mux * mux ;
struct kcm_sock * kcm ;
read_lock_bh ( & sk - > sk_callback_lock ) ;
psock = ( struct kcm_psock * ) sk - > sk_user_data ;
if ( unlikely ( ! psock ) )
goto out ;
mux = psock - > mux ;
spin_lock_bh ( & mux - > lock ) ;
/* Check if the socket is reserved so someone is waiting for sending. */
kcm = psock - > tx_kcm ;
if ( kcm )
queue_work ( kcm_wq , & kcm - > tx_work ) ;
spin_unlock_bh ( & mux - > lock ) ;
out :
read_unlock_bh ( & sk - > sk_callback_lock ) ;
}
static void unreserve_psock ( struct kcm_sock * kcm ) ;
/* kcm sock is locked. */
static struct kcm_psock * reserve_psock ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
struct kcm_psock * psock ;
psock = kcm - > tx_psock ;
smp_rmb ( ) ; /* Must read tx_psock before tx_wait */
if ( psock ) {
WARN_ON ( kcm - > tx_wait ) ;
if ( unlikely ( psock - > tx_stopped ) )
unreserve_psock ( kcm ) ;
else
return kcm - > tx_psock ;
}
spin_lock_bh ( & mux - > lock ) ;
/* Check again under lock to see if psock was reserved for this
* psock via psock_unreserve .
*/
psock = kcm - > tx_psock ;
if ( unlikely ( psock ) ) {
WARN_ON ( kcm - > tx_wait ) ;
spin_unlock_bh ( & mux - > lock ) ;
return kcm - > tx_psock ;
}
if ( ! list_empty ( & mux - > psocks_avail ) ) {
psock = list_first_entry ( & mux - > psocks_avail ,
struct kcm_psock ,
psock_avail_list ) ;
list_del ( & psock - > psock_avail_list ) ;
if ( kcm - > tx_wait ) {
list_del ( & kcm - > wait_psock_list ) ;
kcm - > tx_wait = false ;
}
kcm - > tx_psock = psock ;
psock - > tx_kcm = kcm ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . reserved ) ;
2016-03-08 01:11:06 +03:00
} else if ( ! kcm - > tx_wait ) {
list_add_tail ( & kcm - > wait_psock_list ,
& mux - > kcm_tx_waiters ) ;
kcm - > tx_wait = true ;
}
spin_unlock_bh ( & mux - > lock ) ;
return psock ;
}
/* mux lock held */
static void psock_now_avail ( struct kcm_psock * psock )
{
struct kcm_mux * mux = psock - > mux ;
struct kcm_sock * kcm ;
if ( list_empty ( & mux - > kcm_tx_waiters ) ) {
list_add_tail ( & psock - > psock_avail_list ,
& mux - > psocks_avail ) ;
} else {
kcm = list_first_entry ( & mux - > kcm_tx_waiters ,
struct kcm_sock ,
wait_psock_list ) ;
list_del ( & kcm - > wait_psock_list ) ;
kcm - > tx_wait = false ;
psock - > tx_kcm = kcm ;
/* Commit before changing tx_psock since that is read in
* reserve_psock before queuing work .
*/
smp_mb ( ) ;
kcm - > tx_psock = psock ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . reserved ) ;
2016-03-08 01:11:06 +03:00
queue_work ( kcm_wq , & kcm - > tx_work ) ;
}
}
/* kcm sock is locked. */
static void unreserve_psock ( struct kcm_sock * kcm )
{
struct kcm_psock * psock ;
struct kcm_mux * mux = kcm - > mux ;
spin_lock_bh ( & mux - > lock ) ;
psock = kcm - > tx_psock ;
if ( WARN_ON ( ! psock ) ) {
spin_unlock_bh ( & mux - > lock ) ;
return ;
}
smp_rmb ( ) ; /* Read tx_psock before tx_wait */
2016-03-08 01:11:07 +03:00
kcm_update_tx_mux_stats ( mux , psock ) ;
2016-03-08 01:11:06 +03:00
WARN_ON ( kcm - > tx_wait ) ;
kcm - > tx_psock = NULL ;
psock - > tx_kcm = NULL ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . unreserved ) ;
2016-03-08 01:11:06 +03:00
if ( unlikely ( psock - > tx_stopped ) ) {
if ( psock - > done ) {
/* Deferred free */
list_del ( & psock - > psock_list ) ;
mux - > psocks_cnt - - ;
sock_put ( psock - > sk ) ;
fput ( psock - > sk - > sk_socket - > file ) ;
kmem_cache_free ( kcm_psockp , psock ) ;
}
/* Don't put back on available list */
spin_unlock_bh ( & mux - > lock ) ;
return ;
}
psock_now_avail ( psock ) ;
spin_unlock_bh ( & mux - > lock ) ;
}
2016-03-08 01:11:07 +03:00
static void kcm_report_tx_retry ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
spin_lock_bh ( & mux - > lock ) ;
KCM_STATS_INCR ( mux - > stats . tx_retries ) ;
spin_unlock_bh ( & mux - > lock ) ;
}
2016-03-08 01:11:06 +03:00
/* Write any messages ready on the kcm socket. Called with kcm sock lock
* held . Return bytes actually sent or error .
*/
static int kcm_write_msgs ( struct kcm_sock * kcm )
{
struct sock * sk = & kcm - > sk ;
struct kcm_psock * psock ;
struct sk_buff * skb , * head ;
struct kcm_tx_msg * txm ;
unsigned short fragidx , frag_offset ;
unsigned int sent , total_sent = 0 ;
int ret = 0 ;
kcm - > tx_wait_more = false ;
psock = kcm - > tx_psock ;
if ( unlikely ( psock & & psock - > tx_stopped ) ) {
/* A reserved psock was aborted asynchronously. Unreserve
* it and we ' ll retry the message .
*/
unreserve_psock ( kcm ) ;
2016-03-08 01:11:07 +03:00
kcm_report_tx_retry ( kcm ) ;
2016-03-08 01:11:06 +03:00
if ( skb_queue_empty ( & sk - > sk_write_queue ) )
return 0 ;
kcm_tx_msg ( skb_peek ( & sk - > sk_write_queue ) ) - > sent = 0 ;
} else if ( skb_queue_empty ( & sk - > sk_write_queue ) ) {
return 0 ;
}
head = skb_peek ( & sk - > sk_write_queue ) ;
txm = kcm_tx_msg ( head ) ;
if ( txm - > sent ) {
/* Send of first skbuff in queue already in progress */
if ( WARN_ON ( ! psock ) ) {
ret = - EINVAL ;
goto out ;
}
sent = txm - > sent ;
frag_offset = txm - > frag_offset ;
fragidx = txm - > fragidx ;
skb = txm - > frag_skb ;
goto do_frag ;
}
try_again :
psock = reserve_psock ( kcm ) ;
if ( ! psock )
goto out ;
do {
skb = head ;
txm = kcm_tx_msg ( head ) ;
sent = 0 ;
do_frag_list :
if ( WARN_ON ( ! skb_shinfo ( skb ) - > nr_frags ) ) {
ret = - EINVAL ;
goto out ;
}
for ( fragidx = 0 ; fragidx < skb_shinfo ( skb ) - > nr_frags ;
fragidx + + ) {
skb_frag_t * frag ;
frag_offset = 0 ;
do_frag :
frag = & skb_shinfo ( skb ) - > frags [ fragidx ] ;
if ( WARN_ON ( ! frag - > size ) ) {
ret = - EINVAL ;
goto out ;
}
ret = kernel_sendpage ( psock - > sk - > sk_socket ,
frag - > page . p ,
frag - > page_offset + frag_offset ,
frag - > size - frag_offset ,
MSG_DONTWAIT ) ;
if ( ret < = 0 ) {
if ( ret = = - EAGAIN ) {
/* Save state to try again when there's
* write space on the socket
*/
txm - > sent = sent ;
txm - > frag_offset = frag_offset ;
txm - > fragidx = fragidx ;
txm - > frag_skb = skb ;
ret = 0 ;
goto out ;
}
/* Hard failure in sending message, abort this
* psock since it has lost framing
* synchonization and retry sending the
* message from the beginning .
*/
kcm_abort_tx_psock ( psock , ret ? - ret : EPIPE ,
true ) ;
unreserve_psock ( kcm ) ;
txm - > sent = 0 ;
2016-03-08 01:11:07 +03:00
kcm_report_tx_retry ( kcm ) ;
2016-03-08 01:11:06 +03:00
ret = 0 ;
goto try_again ;
}
sent + = ret ;
frag_offset + = ret ;
2016-03-08 01:11:07 +03:00
KCM_STATS_ADD ( psock - > stats . tx_bytes , ret ) ;
2016-03-08 01:11:06 +03:00
if ( frag_offset < frag - > size ) {
/* Not finished with this frag */
goto do_frag ;
}
}
if ( skb = = head ) {
if ( skb_has_frag_list ( skb ) ) {
skb = skb_shinfo ( skb ) - > frag_list ;
goto do_frag_list ;
}
} else if ( skb - > next ) {
skb = skb - > next ;
goto do_frag_list ;
}
/* Successfully sent the whole packet, account for it. */
skb_dequeue ( & sk - > sk_write_queue ) ;
kfree_skb ( head ) ;
sk - > sk_wmem_queued - = sent ;
total_sent + = sent ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( psock - > stats . tx_msgs ) ;
2016-03-08 01:11:06 +03:00
} while ( ( head = skb_peek ( & sk - > sk_write_queue ) ) ) ;
out :
if ( ! head ) {
/* Done with all queued messages. */
WARN_ON ( ! skb_queue_empty ( & sk - > sk_write_queue ) ) ;
unreserve_psock ( kcm ) ;
}
/* Check if write space is available */
sk - > sk_write_space ( sk ) ;
return total_sent ? : ret ;
}
static void kcm_tx_work ( struct work_struct * w )
{
struct kcm_sock * kcm = container_of ( w , struct kcm_sock , tx_work ) ;
struct sock * sk = & kcm - > sk ;
int err ;
lock_sock ( sk ) ;
/* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
* aborts
*/
err = kcm_write_msgs ( kcm ) ;
if ( err < 0 ) {
/* Hard failure in write, report error on KCM socket */
pr_warn ( " KCM: Hard failure on kcm_write_msgs %d \n " , err ) ;
report_csk_error ( & kcm - > sk , - err ) ;
goto out ;
}
/* Primarily for SOCK_SEQPACKET sockets */
if ( likely ( sk - > sk_socket ) & &
test_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ) {
clear_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ;
sk - > sk_write_space ( sk ) ;
}
out :
release_sock ( sk ) ;
}
static void kcm_push ( struct kcm_sock * kcm )
{
if ( kcm - > tx_wait_more )
kcm_write_msgs ( kcm ) ;
}
2016-03-08 01:11:09 +03:00
static ssize_t kcm_sendpage ( struct socket * sock , struct page * page ,
int offset , size_t size , int flags )
{
struct sock * sk = sock - > sk ;
struct kcm_sock * kcm = kcm_sk ( sk ) ;
struct sk_buff * skb = NULL , * head = NULL ;
long timeo = sock_sndtimeo ( sk , flags & MSG_DONTWAIT ) ;
bool eor ;
int err = 0 ;
int i ;
if ( flags & MSG_SENDPAGE_NOTLAST )
flags | = MSG_MORE ;
/* No MSG_EOR from splice, only look at MSG_MORE */
eor = ! ( flags & MSG_MORE ) ;
lock_sock ( sk ) ;
sk_clear_bit ( SOCKWQ_ASYNC_NOSPACE , sk ) ;
err = - EPIPE ;
if ( sk - > sk_err )
goto out_error ;
if ( kcm - > seq_skb ) {
/* Previously opened message */
head = kcm - > seq_skb ;
skb = kcm_tx_msg ( head ) - > last_skb ;
i = skb_shinfo ( skb ) - > nr_frags ;
if ( skb_can_coalesce ( skb , i , page , offset ) ) {
skb_frag_size_add ( & skb_shinfo ( skb ) - > frags [ i - 1 ] , size ) ;
skb_shinfo ( skb ) - > tx_flags | = SKBTX_SHARED_FRAG ;
goto coalesced ;
}
if ( i > = MAX_SKB_FRAGS ) {
struct sk_buff * tskb ;
tskb = alloc_skb ( 0 , sk - > sk_allocation ) ;
while ( ! tskb ) {
kcm_push ( kcm ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
}
if ( head = = skb )
skb_shinfo ( head ) - > frag_list = tskb ;
else
skb - > next = tskb ;
skb = tskb ;
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
i = 0 ;
}
} else {
/* Call the sk_stream functions to manage the sndbuf mem. */
if ( ! sk_stream_memory_free ( sk ) ) {
kcm_push ( kcm ) ;
set_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
}
head = alloc_skb ( 0 , sk - > sk_allocation ) ;
while ( ! head ) {
kcm_push ( kcm ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
}
skb = head ;
i = 0 ;
}
get_page ( page ) ;
skb_fill_page_desc ( skb , i , page , offset , size ) ;
skb_shinfo ( skb ) - > tx_flags | = SKBTX_SHARED_FRAG ;
coalesced :
skb - > len + = size ;
skb - > data_len + = size ;
skb - > truesize + = size ;
sk - > sk_wmem_queued + = size ;
sk_mem_charge ( sk , size ) ;
if ( head ! = skb ) {
head - > len + = size ;
head - > data_len + = size ;
head - > truesize + = size ;
}
if ( eor ) {
bool not_busy = skb_queue_empty ( & sk - > sk_write_queue ) ;
/* Message complete, queue it on send buffer */
__skb_queue_tail ( & sk - > sk_write_queue , head ) ;
kcm - > seq_skb = NULL ;
KCM_STATS_INCR ( kcm - > stats . tx_msgs ) ;
if ( flags & MSG_BATCH ) {
kcm - > tx_wait_more = true ;
} else if ( kcm - > tx_wait_more | | not_busy ) {
err = kcm_write_msgs ( kcm ) ;
if ( err < 0 ) {
/* We got a hard error in write_msgs but have
* already queued this message . Report an error
* in the socket , but don ' t affect return value
* from sendmsg
*/
pr_warn ( " KCM: Hard failure on kcm_write_msgs \n " ) ;
report_csk_error ( & kcm - > sk , - err ) ;
}
}
} else {
/* Message not complete, save state */
kcm - > seq_skb = head ;
kcm_tx_msg ( head ) - > last_skb = skb ;
}
KCM_STATS_ADD ( kcm - > stats . tx_bytes , size ) ;
release_sock ( sk ) ;
return size ;
out_error :
kcm_push ( kcm ) ;
err = sk_stream_error ( sk , flags , err ) ;
/* make sure we wake any epoll edge trigger waiter */
if ( unlikely ( skb_queue_len ( & sk - > sk_write_queue ) = = 0 & & err = = - EAGAIN ) )
sk - > sk_write_space ( sk ) ;
release_sock ( sk ) ;
return err ;
}
2016-03-08 01:11:06 +03:00
static int kcm_sendmsg ( struct socket * sock , struct msghdr * msg , size_t len )
{
struct sock * sk = sock - > sk ;
struct kcm_sock * kcm = kcm_sk ( sk ) ;
struct sk_buff * skb = NULL , * head = NULL ;
size_t copy , copied = 0 ;
long timeo = sock_sndtimeo ( sk , msg - > msg_flags & MSG_DONTWAIT ) ;
int eor = ( sock - > type = = SOCK_DGRAM ) ?
! ( msg - > msg_flags & MSG_MORE ) : ! ! ( msg - > msg_flags & MSG_EOR ) ;
int err = - EPIPE ;
lock_sock ( sk ) ;
/* Per tcp_sendmsg this should be in poll */
sk_clear_bit ( SOCKWQ_ASYNC_NOSPACE , sk ) ;
if ( sk - > sk_err )
goto out_error ;
if ( kcm - > seq_skb ) {
/* Previously opened message */
head = kcm - > seq_skb ;
skb = kcm_tx_msg ( head ) - > last_skb ;
goto start ;
}
/* Call the sk_stream functions to manage the sndbuf mem. */
if ( ! sk_stream_memory_free ( sk ) ) {
kcm_push ( kcm ) ;
set_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
}
/* New message, alloc head skb */
head = alloc_skb ( 0 , sk - > sk_allocation ) ;
while ( ! head ) {
kcm_push ( kcm ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
head = alloc_skb ( 0 , sk - > sk_allocation ) ;
}
skb = head ;
/* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
* csum_and_copy_from_iter from skb_do_copy_data_nocache .
*/
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
start :
while ( msg_data_left ( msg ) ) {
bool merge = true ;
int i = skb_shinfo ( skb ) - > nr_frags ;
struct page_frag * pfrag = sk_page_frag ( sk ) ;
if ( ! sk_page_frag_refill ( sk , pfrag ) )
goto wait_for_memory ;
if ( ! skb_can_coalesce ( skb , i , pfrag - > page ,
pfrag - > offset ) ) {
if ( i = = MAX_SKB_FRAGS ) {
struct sk_buff * tskb ;
tskb = alloc_skb ( 0 , sk - > sk_allocation ) ;
if ( ! tskb )
goto wait_for_memory ;
if ( head = = skb )
skb_shinfo ( head ) - > frag_list = tskb ;
else
skb - > next = tskb ;
skb = tskb ;
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
continue ;
}
merge = false ;
}
copy = min_t ( int , msg_data_left ( msg ) ,
pfrag - > size - pfrag - > offset ) ;
if ( ! sk_wmem_schedule ( sk , copy ) )
goto wait_for_memory ;
err = skb_copy_to_page_nocache ( sk , & msg - > msg_iter , skb ,
pfrag - > page ,
pfrag - > offset ,
copy ) ;
if ( err )
goto out_error ;
/* Update the skb. */
if ( merge ) {
skb_frag_size_add ( & skb_shinfo ( skb ) - > frags [ i - 1 ] , copy ) ;
} else {
skb_fill_page_desc ( skb , i , pfrag - > page ,
pfrag - > offset , copy ) ;
get_page ( pfrag - > page ) ;
}
pfrag - > offset + = copy ;
copied + = copy ;
if ( head ! = skb ) {
head - > len + = copy ;
head - > data_len + = copy ;
}
continue ;
wait_for_memory :
kcm_push ( kcm ) ;
err = sk_stream_wait_memory ( sk , & timeo ) ;
if ( err )
goto out_error ;
}
if ( eor ) {
bool not_busy = skb_queue_empty ( & sk - > sk_write_queue ) ;
/* Message complete, queue it on send buffer */
__skb_queue_tail ( & sk - > sk_write_queue , head ) ;
kcm - > seq_skb = NULL ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( kcm - > stats . tx_msgs ) ;
2016-03-08 01:11:06 +03:00
if ( msg - > msg_flags & MSG_BATCH ) {
kcm - > tx_wait_more = true ;
} else if ( kcm - > tx_wait_more | | not_busy ) {
err = kcm_write_msgs ( kcm ) ;
if ( err < 0 ) {
/* We got a hard error in write_msgs but have
* already queued this message . Report an error
* in the socket , but don ' t affect return value
* from sendmsg
*/
pr_warn ( " KCM: Hard failure on kcm_write_msgs \n " ) ;
report_csk_error ( & kcm - > sk , - err ) ;
}
}
} else {
/* Message not complete, save state */
partial_message :
kcm - > seq_skb = head ;
kcm_tx_msg ( head ) - > last_skb = skb ;
}
2016-03-08 01:11:07 +03:00
KCM_STATS_ADD ( kcm - > stats . tx_bytes , copied ) ;
2016-03-08 01:11:06 +03:00
release_sock ( sk ) ;
return copied ;
out_error :
kcm_push ( kcm ) ;
if ( copied & & sock - > type = = SOCK_SEQPACKET ) {
/* Wrote some bytes before encountering an
* error , return partial success .
*/
goto partial_message ;
}
if ( head ! = kcm - > seq_skb )
kfree_skb ( head ) ;
err = sk_stream_error ( sk , msg - > msg_flags , err ) ;
/* make sure we wake any epoll edge trigger waiter */
if ( unlikely ( skb_queue_len ( & sk - > sk_write_queue ) = = 0 & & err = = - EAGAIN ) )
sk - > sk_write_space ( sk ) ;
release_sock ( sk ) ;
return err ;
}
static struct sk_buff * kcm_wait_data ( struct sock * sk , int flags ,
long timeo , int * err )
{
struct sk_buff * skb ;
while ( ! ( skb = skb_peek ( & sk - > sk_receive_queue ) ) ) {
if ( sk - > sk_err ) {
* err = sock_error ( sk ) ;
return NULL ;
}
if ( sock_flag ( sk , SOCK_DONE ) )
return NULL ;
if ( ( flags & MSG_DONTWAIT ) | | ! timeo ) {
* err = - EAGAIN ;
return NULL ;
}
sk_wait_data ( sk , & timeo , NULL ) ;
/* Handle signals */
if ( signal_pending ( current ) ) {
* err = sock_intr_errno ( timeo ) ;
return NULL ;
}
}
return skb ;
}
static int kcm_recvmsg ( struct socket * sock , struct msghdr * msg ,
size_t len , int flags )
{
struct sock * sk = sock - > sk ;
2016-03-08 01:11:07 +03:00
struct kcm_sock * kcm = kcm_sk ( sk ) ;
2016-03-08 01:11:06 +03:00
int err = 0 ;
long timeo ;
struct kcm_rx_msg * rxm ;
int copied = 0 ;
struct sk_buff * skb ;
timeo = sock_rcvtimeo ( sk , flags & MSG_DONTWAIT ) ;
lock_sock ( sk ) ;
skb = kcm_wait_data ( sk , flags , timeo , & err ) ;
if ( ! skb )
goto out ;
/* Okay, have a message on the receive queue */
rxm = kcm_rx_msg ( skb ) ;
if ( len > rxm - > full_len )
len = rxm - > full_len ;
err = skb_copy_datagram_msg ( skb , rxm - > offset , msg , len ) ;
if ( err < 0 )
goto out ;
copied = len ;
if ( likely ( ! ( flags & MSG_PEEK ) ) ) {
2016-03-08 01:11:07 +03:00
KCM_STATS_ADD ( kcm - > stats . rx_bytes , copied ) ;
2016-03-08 01:11:06 +03:00
if ( copied < rxm - > full_len ) {
if ( sock - > type = = SOCK_DGRAM ) {
/* Truncated message */
msg - > msg_flags | = MSG_TRUNC ;
goto msg_finished ;
}
rxm - > offset + = copied ;
rxm - > full_len - = copied ;
} else {
msg_finished :
/* Finished with message */
msg - > msg_flags | = MSG_EOR ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( kcm - > stats . rx_msgs ) ;
2016-03-08 01:11:06 +03:00
skb_unlink ( skb , & sk - > sk_receive_queue ) ;
kfree_skb ( skb ) ;
}
}
out :
release_sock ( sk ) ;
return copied ? : err ;
}
2016-03-08 01:11:08 +03:00
static ssize_t kcm_sock_splice ( struct sock * sk ,
struct pipe_inode_info * pipe ,
struct splice_pipe_desc * spd )
{
int ret ;
release_sock ( sk ) ;
ret = splice_to_pipe ( pipe , spd ) ;
lock_sock ( sk ) ;
return ret ;
}
static ssize_t kcm_splice_read ( struct socket * sock , loff_t * ppos ,
struct pipe_inode_info * pipe , size_t len ,
unsigned int flags )
{
struct sock * sk = sock - > sk ;
struct kcm_sock * kcm = kcm_sk ( sk ) ;
long timeo ;
struct kcm_rx_msg * rxm ;
int err = 0 ;
2016-05-18 00:05:49 +03:00
ssize_t copied ;
2016-03-08 01:11:08 +03:00
struct sk_buff * skb ;
/* Only support splice for SOCKSEQPACKET */
timeo = sock_rcvtimeo ( sk , flags & MSG_DONTWAIT ) ;
lock_sock ( sk ) ;
skb = kcm_wait_data ( sk , flags , timeo , & err ) ;
if ( ! skb )
goto err_out ;
/* Okay, have a message on the receive queue */
rxm = kcm_rx_msg ( skb ) ;
if ( len > rxm - > full_len )
len = rxm - > full_len ;
copied = skb_splice_bits ( skb , sk , rxm - > offset , pipe , len , flags ,
kcm_sock_splice ) ;
if ( copied < 0 ) {
err = copied ;
goto err_out ;
}
KCM_STATS_ADD ( kcm - > stats . rx_bytes , copied ) ;
rxm - > offset + = copied ;
rxm - > full_len - = copied ;
/* We have no way to return MSG_EOR. If all the bytes have been
* read we still leave the message in the receive socket buffer .
* A subsequent recvmsg needs to be done to return MSG_EOR and
* finish reading the message .
*/
release_sock ( sk ) ;
return copied ;
err_out :
release_sock ( sk ) ;
return err ;
}
2016-03-08 01:11:06 +03:00
/* kcm sock lock held */
static void kcm_recv_disable ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
if ( kcm - > rx_disabled )
return ;
spin_lock_bh ( & mux - > rx_lock ) ;
kcm - > rx_disabled = 1 ;
/* If a psock is reserved we'll do cleanup in unreserve */
if ( ! kcm - > rx_psock ) {
if ( kcm - > rx_wait ) {
list_del ( & kcm - > wait_rx_list ) ;
kcm - > rx_wait = false ;
}
requeue_rx_msgs ( mux , & kcm - > sk . sk_receive_queue ) ;
}
spin_unlock_bh ( & mux - > rx_lock ) ;
}
/* kcm sock lock held */
static void kcm_recv_enable ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
if ( ! kcm - > rx_disabled )
return ;
spin_lock_bh ( & mux - > rx_lock ) ;
kcm - > rx_disabled = 0 ;
kcm_rcv_ready ( kcm ) ;
spin_unlock_bh ( & mux - > rx_lock ) ;
}
static int kcm_setsockopt ( struct socket * sock , int level , int optname ,
char __user * optval , unsigned int optlen )
{
struct kcm_sock * kcm = kcm_sk ( sock - > sk ) ;
int val , valbool ;
int err = 0 ;
if ( level ! = SOL_KCM )
return - ENOPROTOOPT ;
if ( optlen < sizeof ( int ) )
return - EINVAL ;
if ( get_user ( val , ( int __user * ) optval ) )
return - EINVAL ;
valbool = val ? 1 : 0 ;
switch ( optname ) {
case KCM_RECV_DISABLE :
lock_sock ( & kcm - > sk ) ;
if ( valbool )
kcm_recv_disable ( kcm ) ;
else
kcm_recv_enable ( kcm ) ;
release_sock ( & kcm - > sk ) ;
break ;
default :
err = - ENOPROTOOPT ;
}
return err ;
}
static int kcm_getsockopt ( struct socket * sock , int level , int optname ,
char __user * optval , int __user * optlen )
{
struct kcm_sock * kcm = kcm_sk ( sock - > sk ) ;
int val , len ;
if ( level ! = SOL_KCM )
return - ENOPROTOOPT ;
if ( get_user ( len , optlen ) )
return - EFAULT ;
len = min_t ( unsigned int , len , sizeof ( int ) ) ;
if ( len < 0 )
return - EINVAL ;
switch ( optname ) {
case KCM_RECV_DISABLE :
val = kcm - > rx_disabled ;
break ;
default :
return - ENOPROTOOPT ;
}
if ( put_user ( len , optlen ) )
return - EFAULT ;
if ( copy_to_user ( optval , & val , len ) )
return - EFAULT ;
return 0 ;
}
static void init_kcm_sock ( struct kcm_sock * kcm , struct kcm_mux * mux )
{
struct kcm_sock * tkcm ;
struct list_head * head ;
int index = 0 ;
/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
* we set sk_state , otherwise epoll_wait always returns right away with
* POLLHUP
*/
kcm - > sk . sk_state = TCP_ESTABLISHED ;
/* Add to mux's kcm sockets list */
kcm - > mux = mux ;
spin_lock_bh ( & mux - > lock ) ;
head = & mux - > kcm_socks ;
list_for_each_entry ( tkcm , & mux - > kcm_socks , kcm_sock_list ) {
if ( tkcm - > index ! = index )
break ;
head = & tkcm - > kcm_sock_list ;
index + + ;
}
list_add ( & kcm - > kcm_sock_list , head ) ;
kcm - > index = index ;
mux - > kcm_socks_cnt + + ;
spin_unlock_bh ( & mux - > lock ) ;
INIT_WORK ( & kcm - > tx_work , kcm_tx_work ) ;
spin_lock_bh ( & mux - > rx_lock ) ;
kcm_rcv_ready ( kcm ) ;
spin_unlock_bh ( & mux - > rx_lock ) ;
}
2016-03-08 01:11:11 +03:00
static void kcm_rx_msg_timeout ( unsigned long arg )
{
struct kcm_psock * psock = ( struct kcm_psock * ) arg ;
/* Message assembly timed out */
KCM_STATS_INCR ( psock - > stats . rx_msg_timeouts ) ;
kcm_abort_rx_psock ( psock , ETIMEDOUT , NULL ) ;
}
2016-03-08 01:11:06 +03:00
static int kcm_attach ( struct socket * sock , struct socket * csock ,
struct bpf_prog * prog )
{
struct kcm_sock * kcm = kcm_sk ( sock - > sk ) ;
struct kcm_mux * mux = kcm - > mux ;
struct sock * csk ;
struct kcm_psock * psock = NULL , * tpsock ;
struct list_head * head ;
int index = 0 ;
if ( csock - > ops - > family ! = PF_INET & &
csock - > ops - > family ! = PF_INET6 )
return - EINVAL ;
csk = csock - > sk ;
if ( ! csk )
return - EINVAL ;
/* Only support TCP for now */
if ( csk - > sk_protocol ! = IPPROTO_TCP )
return - EINVAL ;
psock = kmem_cache_zalloc ( kcm_psockp , GFP_KERNEL ) ;
if ( ! psock )
return - ENOMEM ;
psock - > mux = mux ;
psock - > sk = csk ;
psock - > bpf_prog = prog ;
2016-03-08 01:11:11 +03:00
setup_timer ( & psock - > rx_msg_timer , kcm_rx_msg_timeout ,
( unsigned long ) psock ) ;
2016-03-08 01:11:06 +03:00
INIT_WORK ( & psock - > rx_work , psock_rx_work ) ;
INIT_DELAYED_WORK ( & psock - > rx_delayed_work , psock_rx_delayed_work ) ;
sock_hold ( csk ) ;
write_lock_bh ( & csk - > sk_callback_lock ) ;
psock - > save_data_ready = csk - > sk_data_ready ;
psock - > save_write_space = csk - > sk_write_space ;
psock - > save_state_change = csk - > sk_state_change ;
csk - > sk_user_data = psock ;
csk - > sk_data_ready = psock_tcp_data_ready ;
csk - > sk_write_space = psock_tcp_write_space ;
csk - > sk_state_change = psock_tcp_state_change ;
write_unlock_bh ( & csk - > sk_callback_lock ) ;
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh ( & mux - > lock ) ;
head = & mux - > psocks ;
list_for_each_entry ( tpsock , & mux - > psocks , psock_list ) {
if ( tpsock - > index ! = index )
break ;
head = & tpsock - > psock_list ;
index + + ;
}
list_add ( & psock - > psock_list , head ) ;
psock - > index = index ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( mux - > stats . psock_attach ) ;
2016-03-08 01:11:06 +03:00
mux - > psocks_cnt + + ;
psock_now_avail ( psock ) ;
spin_unlock_bh ( & mux - > lock ) ;
/* Schedule RX work in case there are already bytes queued */
queue_work ( kcm_wq , & psock - > rx_work ) ;
return 0 ;
}
static int kcm_attach_ioctl ( struct socket * sock , struct kcm_attach * info )
{
struct socket * csock ;
struct bpf_prog * prog ;
int err ;
csock = sockfd_lookup ( info - > fd , & err ) ;
if ( ! csock )
return - ENOENT ;
2016-06-30 18:24:44 +03:00
prog = bpf_prog_get_type ( info - > bpf_fd , BPF_PROG_TYPE_SOCKET_FILTER ) ;
2016-03-08 01:11:06 +03:00
if ( IS_ERR ( prog ) ) {
err = PTR_ERR ( prog ) ;
goto out ;
}
err = kcm_attach ( sock , csock , prog ) ;
if ( err ) {
bpf_prog_put ( prog ) ;
goto out ;
}
/* Keep reference on file also */
return 0 ;
out :
fput ( csock - > file ) ;
return err ;
}
static void kcm_unattach ( struct kcm_psock * psock )
{
struct sock * csk = psock - > sk ;
struct kcm_mux * mux = psock - > mux ;
/* Stop getting callbacks from TCP socket. After this there should
* be no way to reserve a kcm for this psock .
*/
write_lock_bh ( & csk - > sk_callback_lock ) ;
csk - > sk_user_data = NULL ;
csk - > sk_data_ready = psock - > save_data_ready ;
csk - > sk_write_space = psock - > save_write_space ;
csk - > sk_state_change = psock - > save_state_change ;
psock - > rx_stopped = 1 ;
if ( WARN_ON ( psock - > rx_kcm ) ) {
write_unlock_bh ( & csk - > sk_callback_lock ) ;
return ;
}
spin_lock_bh ( & mux - > rx_lock ) ;
/* Stop receiver activities. After this point psock should not be
* able to get onto ready list either through callbacks or work .
*/
if ( psock - > ready_rx_msg ) {
list_del ( & psock - > psock_ready_list ) ;
kfree_skb ( psock - > ready_rx_msg ) ;
psock - > ready_rx_msg = NULL ;
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( mux - > stats . rx_ready_drops ) ;
2016-03-08 01:11:06 +03:00
}
spin_unlock_bh ( & mux - > rx_lock ) ;
write_unlock_bh ( & csk - > sk_callback_lock ) ;
2016-03-08 01:11:11 +03:00
del_timer_sync ( & psock - > rx_msg_timer ) ;
2016-03-08 01:11:06 +03:00
cancel_work_sync ( & psock - > rx_work ) ;
cancel_delayed_work_sync ( & psock - > rx_delayed_work ) ;
bpf_prog_put ( psock - > bpf_prog ) ;
kfree_skb ( psock - > rx_skb_head ) ;
psock - > rx_skb_head = NULL ;
spin_lock_bh ( & mux - > lock ) ;
2016-03-08 01:11:07 +03:00
aggregate_psock_stats ( & psock - > stats , & mux - > aggregate_psock_stats ) ;
KCM_STATS_INCR ( mux - > stats . psock_unattach ) ;
2016-03-08 01:11:06 +03:00
if ( psock - > tx_kcm ) {
/* psock was reserved. Just mark it finished and we will clean
* up in the kcm paths , we need kcm lock which can not be
* acquired here .
*/
2016-03-08 01:11:07 +03:00
KCM_STATS_INCR ( mux - > stats . psock_unattach_rsvd ) ;
2016-03-08 01:11:06 +03:00
spin_unlock_bh ( & mux - > lock ) ;
/* We are unattaching a socket that is reserved. Abort the
* socket since we may be out of sync in sending on it . We need
* to do this without the mux lock .
*/
kcm_abort_tx_psock ( psock , EPIPE , false ) ;
spin_lock_bh ( & mux - > lock ) ;
if ( ! psock - > tx_kcm ) {
/* psock now unreserved in window mux was unlocked */
goto no_reserved ;
}
psock - > done = 1 ;
/* Commit done before queuing work to process it */
smp_mb ( ) ;
/* Queue tx work to make sure psock->done is handled */
queue_work ( kcm_wq , & psock - > tx_kcm - > tx_work ) ;
spin_unlock_bh ( & mux - > lock ) ;
} else {
no_reserved :
if ( ! psock - > tx_stopped )
list_del ( & psock - > psock_avail_list ) ;
list_del ( & psock - > psock_list ) ;
mux - > psocks_cnt - - ;
spin_unlock_bh ( & mux - > lock ) ;
sock_put ( csk ) ;
fput ( csk - > sk_socket - > file ) ;
kmem_cache_free ( kcm_psockp , psock ) ;
}
}
static int kcm_unattach_ioctl ( struct socket * sock , struct kcm_unattach * info )
{
struct kcm_sock * kcm = kcm_sk ( sock - > sk ) ;
struct kcm_mux * mux = kcm - > mux ;
struct kcm_psock * psock ;
struct socket * csock ;
struct sock * csk ;
int err ;
csock = sockfd_lookup ( info - > fd , & err ) ;
if ( ! csock )
return - ENOENT ;
csk = csock - > sk ;
if ( ! csk ) {
err = - EINVAL ;
goto out ;
}
err = - ENOENT ;
spin_lock_bh ( & mux - > lock ) ;
list_for_each_entry ( psock , & mux - > psocks , psock_list ) {
if ( psock - > sk ! = csk )
continue ;
/* Found the matching psock */
if ( psock - > unattaching | | WARN_ON ( psock - > done ) ) {
err = - EALREADY ;
break ;
}
psock - > unattaching = 1 ;
spin_unlock_bh ( & mux - > lock ) ;
kcm_unattach ( psock ) ;
err = 0 ;
goto out ;
}
spin_unlock_bh ( & mux - > lock ) ;
out :
fput ( csock - > file ) ;
return err ;
}
static struct proto kcm_proto = {
. name = " KCM " ,
. owner = THIS_MODULE ,
. obj_size = sizeof ( struct kcm_sock ) ,
} ;
/* Clone a kcm socket. */
static int kcm_clone ( struct socket * osock , struct kcm_clone * info ,
struct socket * * newsockp )
{
struct socket * newsock ;
struct sock * newsk ;
struct file * newfile ;
int err , newfd ;
err = - ENFILE ;
newsock = sock_alloc ( ) ;
if ( ! newsock )
goto out ;
newsock - > type = osock - > type ;
newsock - > ops = osock - > ops ;
__module_get ( newsock - > ops - > owner ) ;
newfd = get_unused_fd_flags ( 0 ) ;
if ( unlikely ( newfd < 0 ) ) {
err = newfd ;
goto out_fd_fail ;
}
newfile = sock_alloc_file ( newsock , 0 , osock - > sk - > sk_prot_creator - > name ) ;
if ( unlikely ( IS_ERR ( newfile ) ) ) {
err = PTR_ERR ( newfile ) ;
goto out_sock_alloc_fail ;
}
newsk = sk_alloc ( sock_net ( osock - > sk ) , PF_KCM , GFP_KERNEL ,
& kcm_proto , true ) ;
if ( ! newsk ) {
err = - ENOMEM ;
goto out_sk_alloc_fail ;
}
sock_init_data ( newsock , newsk ) ;
init_kcm_sock ( kcm_sk ( newsk ) , kcm_sk ( osock - > sk ) - > mux ) ;
fd_install ( newfd , newfile ) ;
* newsockp = newsock ;
info - > fd = newfd ;
return 0 ;
out_sk_alloc_fail :
fput ( newfile ) ;
out_sock_alloc_fail :
put_unused_fd ( newfd ) ;
out_fd_fail :
sock_release ( newsock ) ;
out :
return err ;
}
static int kcm_ioctl ( struct socket * sock , unsigned int cmd , unsigned long arg )
{
int err ;
switch ( cmd ) {
case SIOCKCMATTACH : {
struct kcm_attach info ;
if ( copy_from_user ( & info , ( void __user * ) arg , sizeof ( info ) ) )
err = - EFAULT ;
err = kcm_attach_ioctl ( sock , & info ) ;
break ;
}
case SIOCKCMUNATTACH : {
struct kcm_unattach info ;
if ( copy_from_user ( & info , ( void __user * ) arg , sizeof ( info ) ) )
err = - EFAULT ;
err = kcm_unattach_ioctl ( sock , & info ) ;
break ;
}
case SIOCKCMCLONE : {
struct kcm_clone info ;
struct socket * newsock = NULL ;
if ( copy_from_user ( & info , ( void __user * ) arg , sizeof ( info ) ) )
err = - EFAULT ;
err = kcm_clone ( sock , & info , & newsock ) ;
if ( ! err ) {
if ( copy_to_user ( ( void __user * ) arg , & info ,
sizeof ( info ) ) ) {
err = - EFAULT ;
sock_release ( newsock ) ;
}
}
break ;
}
default :
err = - ENOIOCTLCMD ;
break ;
}
return err ;
}
static void free_mux ( struct rcu_head * rcu )
{
struct kcm_mux * mux = container_of ( rcu ,
struct kcm_mux , rcu ) ;
kmem_cache_free ( kcm_muxp , mux ) ;
}
static void release_mux ( struct kcm_mux * mux )
{
struct kcm_net * knet = mux - > knet ;
struct kcm_psock * psock , * tmp_psock ;
/* Release psocks */
list_for_each_entry_safe ( psock , tmp_psock ,
& mux - > psocks , psock_list ) {
if ( ! WARN_ON ( psock - > unattaching ) )
kcm_unattach ( psock ) ;
}
if ( WARN_ON ( mux - > psocks_cnt ) )
return ;
__skb_queue_purge ( & mux - > rx_hold_queue ) ;
mutex_lock ( & knet - > mutex ) ;
2016-03-08 01:11:07 +03:00
aggregate_mux_stats ( & mux - > stats , & knet - > aggregate_mux_stats ) ;
aggregate_psock_stats ( & mux - > aggregate_psock_stats ,
& knet - > aggregate_psock_stats ) ;
2016-03-08 01:11:06 +03:00
list_del_rcu ( & mux - > kcm_mux_list ) ;
knet - > count - - ;
mutex_unlock ( & knet - > mutex ) ;
call_rcu ( & mux - > rcu , free_mux ) ;
}
static void kcm_done ( struct kcm_sock * kcm )
{
struct kcm_mux * mux = kcm - > mux ;
struct sock * sk = & kcm - > sk ;
int socks_cnt ;
spin_lock_bh ( & mux - > rx_lock ) ;
if ( kcm - > rx_psock ) {
/* Cleanup in unreserve_rx_kcm */
WARN_ON ( kcm - > done ) ;
kcm - > rx_disabled = 1 ;
kcm - > done = 1 ;
spin_unlock_bh ( & mux - > rx_lock ) ;
return ;
}
if ( kcm - > rx_wait ) {
list_del ( & kcm - > wait_rx_list ) ;
kcm - > rx_wait = false ;
}
/* Move any pending receive messages to other kcm sockets */
requeue_rx_msgs ( mux , & sk - > sk_receive_queue ) ;
spin_unlock_bh ( & mux - > rx_lock ) ;
if ( WARN_ON ( sk_rmem_alloc_get ( sk ) ) )
return ;
/* Detach from MUX */
spin_lock_bh ( & mux - > lock ) ;
list_del ( & kcm - > kcm_sock_list ) ;
mux - > kcm_socks_cnt - - ;
socks_cnt = mux - > kcm_socks_cnt ;
spin_unlock_bh ( & mux - > lock ) ;
if ( ! socks_cnt ) {
/* We are done with the mux now. */
release_mux ( mux ) ;
}
WARN_ON ( kcm - > rx_wait ) ;
sock_put ( & kcm - > sk ) ;
}
/* Called by kcm_release to close a KCM socket.
* If this is the last KCM socket on the MUX , destroy the MUX .
*/
static int kcm_release ( struct socket * sock )
{
struct sock * sk = sock - > sk ;
struct kcm_sock * kcm ;
struct kcm_mux * mux ;
struct kcm_psock * psock ;
if ( ! sk )
return 0 ;
kcm = kcm_sk ( sk ) ;
mux = kcm - > mux ;
sock_orphan ( sk ) ;
kfree_skb ( kcm - > seq_skb ) ;
lock_sock ( sk ) ;
/* Purge queue under lock to avoid race condition with tx_work trying
* to act when queue is nonempty . If tx_work runs after this point
* it will just return .
*/
__skb_queue_purge ( & sk - > sk_write_queue ) ;
release_sock ( sk ) ;
spin_lock_bh ( & mux - > lock ) ;
if ( kcm - > tx_wait ) {
/* Take of tx_wait list, after this point there should be no way
* that a psock will be assigned to this kcm .
*/
list_del ( & kcm - > wait_psock_list ) ;
kcm - > tx_wait = false ;
}
spin_unlock_bh ( & mux - > lock ) ;
/* Cancel work. After this point there should be no outside references
* to the kcm socket .
*/
cancel_work_sync ( & kcm - > tx_work ) ;
lock_sock ( sk ) ;
psock = kcm - > tx_psock ;
if ( psock ) {
/* A psock was reserved, so we need to kill it since it
* may already have some bytes queued from a message . We
* need to do this after removing kcm from tx_wait list .
*/
kcm_abort_tx_psock ( psock , EPIPE , false ) ;
unreserve_psock ( kcm ) ;
}
release_sock ( sk ) ;
WARN_ON ( kcm - > tx_wait ) ;
WARN_ON ( kcm - > tx_psock ) ;
sock - > sk = NULL ;
kcm_done ( kcm ) ;
return 0 ;
}
2016-03-08 01:11:08 +03:00
static const struct proto_ops kcm_dgram_ops = {
2016-03-08 01:11:06 +03:00
. family = PF_KCM ,
. owner = THIS_MODULE ,
. release = kcm_release ,
. bind = sock_no_bind ,
. connect = sock_no_connect ,
. socketpair = sock_no_socketpair ,
. accept = sock_no_accept ,
. getname = sock_no_getname ,
. poll = datagram_poll ,
. ioctl = kcm_ioctl ,
. listen = sock_no_listen ,
. shutdown = sock_no_shutdown ,
. setsockopt = kcm_setsockopt ,
. getsockopt = kcm_getsockopt ,
. sendmsg = kcm_sendmsg ,
. recvmsg = kcm_recvmsg ,
. mmap = sock_no_mmap ,
2016-03-08 01:11:09 +03:00
. sendpage = kcm_sendpage ,
2016-03-08 01:11:06 +03:00
} ;
2016-03-08 01:11:08 +03:00
static const struct proto_ops kcm_seqpacket_ops = {
. family = PF_KCM ,
. owner = THIS_MODULE ,
. release = kcm_release ,
. bind = sock_no_bind ,
. connect = sock_no_connect ,
. socketpair = sock_no_socketpair ,
. accept = sock_no_accept ,
. getname = sock_no_getname ,
. poll = datagram_poll ,
. ioctl = kcm_ioctl ,
. listen = sock_no_listen ,
. shutdown = sock_no_shutdown ,
. setsockopt = kcm_setsockopt ,
. getsockopt = kcm_getsockopt ,
. sendmsg = kcm_sendmsg ,
. recvmsg = kcm_recvmsg ,
. mmap = sock_no_mmap ,
2016-03-08 01:11:09 +03:00
. sendpage = kcm_sendpage ,
2016-03-08 01:11:08 +03:00
. splice_read = kcm_splice_read ,
} ;
2016-03-08 01:11:06 +03:00
/* Create proto operation for kcm sockets */
static int kcm_create ( struct net * net , struct socket * sock ,
int protocol , int kern )
{
struct kcm_net * knet = net_generic ( net , kcm_net_id ) ;
struct sock * sk ;
struct kcm_mux * mux ;
switch ( sock - > type ) {
case SOCK_DGRAM :
2016-03-08 01:11:08 +03:00
sock - > ops = & kcm_dgram_ops ;
break ;
2016-03-08 01:11:06 +03:00
case SOCK_SEQPACKET :
2016-03-08 01:11:08 +03:00
sock - > ops = & kcm_seqpacket_ops ;
2016-03-08 01:11:06 +03:00
break ;
default :
return - ESOCKTNOSUPPORT ;
}
if ( protocol ! = KCMPROTO_CONNECTED )
return - EPROTONOSUPPORT ;
sk = sk_alloc ( net , PF_KCM , GFP_KERNEL , & kcm_proto , kern ) ;
if ( ! sk )
return - ENOMEM ;
/* Allocate a kcm mux, shared between KCM sockets */
mux = kmem_cache_zalloc ( kcm_muxp , GFP_KERNEL ) ;
if ( ! mux ) {
sk_free ( sk ) ;
return - ENOMEM ;
}
spin_lock_init ( & mux - > lock ) ;
spin_lock_init ( & mux - > rx_lock ) ;
INIT_LIST_HEAD ( & mux - > kcm_socks ) ;
INIT_LIST_HEAD ( & mux - > kcm_rx_waiters ) ;
INIT_LIST_HEAD ( & mux - > kcm_tx_waiters ) ;
INIT_LIST_HEAD ( & mux - > psocks ) ;
INIT_LIST_HEAD ( & mux - > psocks_ready ) ;
INIT_LIST_HEAD ( & mux - > psocks_avail ) ;
mux - > knet = knet ;
/* Add new MUX to list */
mutex_lock ( & knet - > mutex ) ;
list_add_rcu ( & mux - > kcm_mux_list , & knet - > mux_list ) ;
knet - > count + + ;
mutex_unlock ( & knet - > mutex ) ;
skb_queue_head_init ( & mux - > rx_hold_queue ) ;
/* Init KCM socket */
sock_init_data ( sock , sk ) ;
init_kcm_sock ( kcm_sk ( sk ) , mux ) ;
return 0 ;
}
static struct net_proto_family kcm_family_ops = {
. family = PF_KCM ,
. create = kcm_create ,
. owner = THIS_MODULE ,
} ;
static __net_init int kcm_init_net ( struct net * net )
{
struct kcm_net * knet = net_generic ( net , kcm_net_id ) ;
INIT_LIST_HEAD_RCU ( & knet - > mux_list ) ;
mutex_init ( & knet - > mutex ) ;
return 0 ;
}
static __net_exit void kcm_exit_net ( struct net * net )
{
struct kcm_net * knet = net_generic ( net , kcm_net_id ) ;
/* All KCM sockets should be closed at this point, which should mean
* that all multiplexors and psocks have been destroyed .
*/
WARN_ON ( ! list_empty ( & knet - > mux_list ) ) ;
}
static struct pernet_operations kcm_net_ops = {
. init = kcm_init_net ,
. exit = kcm_exit_net ,
. id = & kcm_net_id ,
. size = sizeof ( struct kcm_net ) ,
} ;
static int __init kcm_init ( void )
{
int err = - ENOMEM ;
kcm_muxp = kmem_cache_create ( " kcm_mux_cache " ,
sizeof ( struct kcm_mux ) , 0 ,
SLAB_HWCACHE_ALIGN | SLAB_PANIC , NULL ) ;
if ( ! kcm_muxp )
goto fail ;
kcm_psockp = kmem_cache_create ( " kcm_psock_cache " ,
sizeof ( struct kcm_psock ) , 0 ,
SLAB_HWCACHE_ALIGN | SLAB_PANIC , NULL ) ;
if ( ! kcm_psockp )
goto fail ;
kcm_wq = create_singlethread_workqueue ( " kkcmd " ) ;
if ( ! kcm_wq )
goto fail ;
err = proto_register ( & kcm_proto , 1 ) ;
if ( err )
goto fail ;
err = sock_register ( & kcm_family_ops ) ;
if ( err )
goto sock_register_fail ;
err = register_pernet_device ( & kcm_net_ops ) ;
if ( err )
goto net_ops_fail ;
2016-03-08 01:11:07 +03:00
err = kcm_proc_init ( ) ;
if ( err )
goto proc_init_fail ;
2016-03-08 01:11:06 +03:00
return 0 ;
2016-03-08 01:11:07 +03:00
proc_init_fail :
unregister_pernet_device ( & kcm_net_ops ) ;
2016-03-08 01:11:06 +03:00
net_ops_fail :
sock_unregister ( PF_KCM ) ;
sock_register_fail :
proto_unregister ( & kcm_proto ) ;
fail :
kmem_cache_destroy ( kcm_muxp ) ;
kmem_cache_destroy ( kcm_psockp ) ;
if ( kcm_wq )
destroy_workqueue ( kcm_wq ) ;
return err ;
}
static void __exit kcm_exit ( void )
{
2016-03-08 01:11:07 +03:00
kcm_proc_exit ( ) ;
2016-03-08 01:11:06 +03:00
unregister_pernet_device ( & kcm_net_ops ) ;
sock_unregister ( PF_KCM ) ;
proto_unregister ( & kcm_proto ) ;
destroy_workqueue ( kcm_wq ) ;
kmem_cache_destroy ( kcm_muxp ) ;
kmem_cache_destroy ( kcm_psockp ) ;
}
module_init ( kcm_init ) ;
module_exit ( kcm_exit ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_ALIAS_NETPROTO ( PF_KCM ) ;