2006-01-02 21:04:38 +03:00
/*
* net / tipc / socket . c : TIPC socket API
2007-02-09 17:25:21 +03:00
*
2012-11-27 15:15:29 +04:00
* Copyright ( c ) 2001 - 2007 , 2012 Ericsson AB
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
* Copyright ( c ) 2004 - 2008 , 2010 - 2013 , Wind River Systems
2006-01-02 21:04:38 +03:00
* All rights reserved .
*
2006-01-11 15:30:43 +03:00
* Redistribution and use in source and binary forms , with or without
2006-01-02 21:04:38 +03:00
* modification , are permitted provided that the following conditions are met :
*
2006-01-11 15:30:43 +03:00
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
2006-01-02 21:04:38 +03:00
*
2006-01-11 15:30:43 +03:00
* Alternatively , this software may be distributed under the terms of the
* GNU General Public License ( " GPL " ) version 2 as published by the Free
* Software Foundation .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
2006-01-02 21:04:38 +03:00
* POSSIBILITY OF SUCH DAMAGE .
*/
# include "core.h"
2010-11-30 15:00:53 +03:00
# include "port.h"
2006-01-02 21:04:38 +03:00
2012-06-29 08:16:37 +04:00
# include <linux/export.h>
# include <net/sock.h>
2006-01-02 21:04:38 +03:00
# define SS_LISTENING -1 /* socket is listening */
# define SS_READY -2 /* socket is connectionless */
2008-04-14 08:35:11 +04:00
# define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
2006-01-02 21:04:38 +03:00
struct tipc_sock {
struct sock sk ;
struct tipc_port * p ;
2008-07-15 09:43:32 +04:00
struct tipc_portid peer_name ;
2011-05-26 21:44:34 +04:00
unsigned int conn_timeout ;
2006-01-02 21:04:38 +03:00
} ;
2008-04-15 11:22:02 +04:00
# define tipc_sk(sk) ((struct tipc_sock *)(sk))
2012-06-03 21:41:40 +04:00
# define tipc_sk_port(sk) (tipc_sk(sk)->p)
2006-01-02 21:04:38 +03:00
2011-02-23 22:52:14 +03:00
# define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \
( sock - > state = = SS_DISCONNECTING ) )
2008-04-15 11:22:02 +04:00
static int backlog_rcv ( struct sock * sk , struct sk_buff * skb ) ;
2006-01-02 21:04:38 +03:00
static u32 dispatch ( struct tipc_port * tport , struct sk_buff * buf ) ;
static void wakeupdispatch ( struct tipc_port * tport ) ;
2012-08-21 07:16:57 +04:00
static void tipc_data_ready ( struct sock * sk , int len ) ;
static void tipc_write_space ( struct sock * sk ) ;
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
static int release ( struct socket * sock ) ;
static int accept ( struct socket * sock , struct socket * new_sock , int flags ) ;
2006-01-02 21:04:38 +03:00
2008-02-08 05:18:01 +03:00
static const struct proto_ops packet_ops ;
static const struct proto_ops stream_ops ;
static const struct proto_ops msg_ops ;
2006-01-02 21:04:38 +03:00
static struct proto tipc_proto ;
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
static struct proto tipc_proto_kern ;
2006-01-02 21:04:38 +03:00
2010-12-31 21:59:34 +03:00
static int sockets_enabled ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
/*
2008-04-15 11:22:02 +04:00
* Revised TIPC socket locking policy :
*
* Most socket operations take the standard socket lock when they start
* and hold it until they finish ( or until they need to sleep ) . Acquiring
* this lock grants the owner exclusive access to the fields of the socket
* data structures , with the exception of the backlog queue . A few socket
* operations can be done without taking the socket lock because they only
* read socket information that never changes during the life of the socket .
*
* Socket operations may acquire the lock for the associated TIPC port if they
* need to perform an operation on the port . If any routine needs to acquire
* both the socket lock and the port lock it must take the socket lock first
* to avoid the risk of deadlock .
*
* The dispatcher handling incoming messages cannot grab the socket lock in
* the standard fashion , since invoked it runs at the BH level and cannot block .
* Instead , it checks to see if the socket lock is currently owned by someone ,
* and either handles the message itself or adds it to the socket ' s backlog
* queue ; in the latter case the queued message is processed once the process
* owning the socket lock releases it .
*
* NOTE : Releasing the socket lock while an operation is sleeping overcomes
* the problem of a blocked socket operation preventing any other operations
* from occurring . However , applications must be careful if they have
* multiple threads trying to send ( or receive ) on the same socket , as these
* operations might interfere with each other . For example , doing a connect
* and a receive at the same time might allow the receive to consume the
* ACK message meant for the connect . While additional work could be done
* to try and overcome this , it doesn ' t seem to be worthwhile at the present .
*
* NOTE : Releasing the socket lock while an operation is sleeping also ensures
* that another operation that must be performed in a non - blocking manner is
* not delayed for very long because the lock has already been taken .
*
* NOTE : This code assumes that certain fields of a port / socket pair are
* constant over its lifetime ; such fields can be examined without taking
* the socket lock and / or port lock , and do not need to be re - read even
* after resuming processing after waiting . These fields include :
* - socket type
* - pointer to socket sk structure ( aka tipc_sock structure )
* - pointer to port structure
* - port reference
*/
/**
* advance_rx_queue - discard first buffer in socket receive queue
*
* Caller must hold socket lock
2006-01-02 21:04:38 +03:00
*/
2008-04-15 11:22:02 +04:00
static void advance_rx_queue ( struct sock * sk )
2006-01-02 21:04:38 +03:00
{
2011-11-04 21:24:29 +04:00
kfree_skb ( __skb_dequeue ( & sk - > sk_receive_queue ) ) ;
2006-01-02 21:04:38 +03:00
}
/**
2008-04-15 11:22:02 +04:00
* reject_rx_queue - reject all buffers in socket receive queue
*
* Caller must hold socket lock
2006-01-02 21:04:38 +03:00
*/
2008-04-15 11:22:02 +04:00
static void reject_rx_queue ( struct sock * sk )
2006-01-02 21:04:38 +03:00
{
2008-04-15 11:22:02 +04:00
struct sk_buff * buf ;
tipc: eliminate aggregate sk_receive_queue limit
As a complement to the per-socket sk_recv_queue limit, TIPC keeps a
global atomic counter for the sum of sk_recv_queue sizes across all
tipc sockets. When incremented, the counter is compared to an upper
threshold value, and if this is reached, the message is rejected
with error code TIPC_OVERLOAD.
This check was originally meant to protect the node against
buffer exhaustion and general CPU overload. However, all experience
indicates that the feature not only is redundant on Linux, but even
harmful. Users run into the limit very often, causing disturbances
for their applications, while removing it seems to have no negative
effects at all. We have also seen that overall performance is
boosted significantly when this bottleneck is removed.
Furthermore, we don't see any other network protocols maintaining
such a mechanism, something strengthening our conviction that this
control can be eliminated.
As a result, the atomic variable tipc_queue_size is now unused
and so it can be deleted. There is a getsockopt call that used
to allow reading it; we retain that but just return zero for
maximum compatibility.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
[PG: phase out tipc_queue_size as pointed out by Neil Horman]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-27 15:15:27 +04:00
while ( ( buf = __skb_dequeue ( & sk - > sk_receive_queue ) ) )
2008-04-15 11:22:02 +04:00
tipc_reject_msg ( buf , TIPC_ERR_NO_PORT ) ;
2006-01-02 21:04:38 +03:00
}
/**
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
* tipc_sk_create - create a TIPC socket
2008-04-15 11:22:02 +04:00
* @ net : network namespace ( must be default network )
2006-01-02 21:04:38 +03:00
* @ sock : pre - allocated socket structure
* @ protocol : protocol indicator ( must be 0 )
2009-11-06 09:18:14 +03:00
* @ kern : caused by kernel or by userspace ?
2007-02-09 17:25:21 +03:00
*
2008-04-15 11:22:02 +04:00
* This routine creates additional data structures used by the TIPC socket ,
* initializes them , and links them together .
2006-01-02 21:04:38 +03:00
*
* Returns 0 on success , errno otherwise
*/
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
static int tipc_sk_create ( struct net * net , struct socket * sock , int protocol ,
int kern )
2006-01-02 21:04:38 +03:00
{
2008-04-15 11:22:02 +04:00
const struct proto_ops * ops ;
socket_state state ;
2006-01-02 21:04:38 +03:00
struct sock * sk ;
2008-05-13 02:42:28 +04:00
struct tipc_port * tp_ptr ;
2008-04-15 11:22:02 +04:00
/* Validate arguments */
2006-01-02 21:04:38 +03:00
if ( unlikely ( protocol ! = 0 ) )
return - EPROTONOSUPPORT ;
switch ( sock - > type ) {
case SOCK_STREAM :
2008-04-15 11:22:02 +04:00
ops = & stream_ops ;
state = SS_UNCONNECTED ;
2006-01-02 21:04:38 +03:00
break ;
case SOCK_SEQPACKET :
2008-04-15 11:22:02 +04:00
ops = & packet_ops ;
state = SS_UNCONNECTED ;
2006-01-02 21:04:38 +03:00
break ;
case SOCK_DGRAM :
case SOCK_RDM :
2008-04-15 11:22:02 +04:00
ops = & msg_ops ;
state = SS_READY ;
2006-01-02 21:04:38 +03:00
break ;
2006-06-26 10:47:18 +04:00
default :
return - EPROTOTYPE ;
2006-01-02 21:04:38 +03:00
}
2008-04-15 11:22:02 +04:00
/* Allocate socket's protocol area */
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
if ( ! kern )
sk = sk_alloc ( net , AF_TIPC , GFP_KERNEL , & tipc_proto ) ;
else
sk = sk_alloc ( net , AF_TIPC , GFP_KERNEL , & tipc_proto_kern ) ;
2008-04-15 11:22:02 +04:00
if ( sk = = NULL )
2006-01-02 21:04:38 +03:00
return - ENOMEM ;
2008-04-15 11:22:02 +04:00
/* Allocate TIPC port for socket to use */
2013-06-17 18:54:44 +04:00
tp_ptr = tipc_createport ( sk , & dispatch , & wakeupdispatch ,
TIPC_LOW_IMPORTANCE ) ;
2008-07-15 09:42:19 +04:00
if ( unlikely ( ! tp_ptr ) ) {
2008-04-15 11:22:02 +04:00
sk_free ( sk ) ;
return - ENOMEM ;
}
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/* Finish initializing socket data structures */
sock - > ops = ops ;
sock - > state = state ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
sock_init_data ( sock , sk ) ;
sk - > sk_backlog_rcv = backlog_rcv ;
2013-06-17 18:54:37 +04:00
sk - > sk_rcvbuf = sysctl_tipc_rmem [ 1 ] ;
2012-08-21 07:16:57 +04:00
sk - > sk_data_ready = tipc_data_ready ;
sk - > sk_write_space = tipc_write_space ;
2008-07-15 09:42:19 +04:00
tipc_sk ( sk ) - > p = tp_ptr ;
2011-05-26 21:44:34 +04:00
tipc_sk ( sk ) - > conn_timeout = CONN_TIMEOUT_DEFAULT ;
2006-01-02 21:04:38 +03:00
2008-05-13 02:42:28 +04:00
spin_unlock_bh ( tp_ptr - > lock ) ;
2008-04-15 11:22:02 +04:00
if ( sock - > state = = SS_READY ) {
2008-07-15 09:42:19 +04:00
tipc_set_portunreturnable ( tp_ptr - > ref , 1 ) ;
2008-04-15 11:22:02 +04:00
if ( sock - > type = = SOCK_DGRAM )
2008-07-15 09:42:19 +04:00
tipc_set_portunreliable ( tp_ptr - > ref , 1 ) ;
2008-04-15 11:22:02 +04:00
}
2006-01-02 21:04:38 +03:00
return 0 ;
}
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
/**
* tipc_sock_create_local - create TIPC socket from inside TIPC module
* @ type : socket type - SOCK_RDM or SOCK_SEQPACKET
*
* We cannot use sock_creat_kern here because it bumps module user count .
* Since socket owner and creator is the same module we must make sure
* that module count remains zero for module local sockets , otherwise
* we cannot do rmmod .
*
* Returns 0 on success , errno otherwise
*/
int tipc_sock_create_local ( int type , struct socket * * res )
{
int rc ;
rc = sock_create_lite ( AF_TIPC , type , 0 , res ) ;
if ( rc < 0 ) {
pr_err ( " Failed to create kernel socket \n " ) ;
return rc ;
}
tipc_sk_create ( & init_net , * res , 0 , 1 ) ;
return 0 ;
}
/**
* tipc_sock_release_local - release socket created by tipc_sock_create_local
* @ sock : the socket to be released .
*
* Module reference count is not incremented when such sockets are created ,
* so we must keep it from being decremented when they are released .
*/
void tipc_sock_release_local ( struct socket * sock )
{
release ( sock ) ;
sock - > ops = NULL ;
sock_release ( sock ) ;
}
/**
* tipc_sock_accept_local - accept a connection on a socket created
* with tipc_sock_create_local . Use this function to avoid that
* module reference count is inadvertently incremented .
*
* @ sock : the accepting socket
* @ newsock : reference to the new socket to be created
* @ flags : socket flags
*/
int tipc_sock_accept_local ( struct socket * sock , struct socket * * newsock ,
2013-06-17 18:54:47 +04:00
int flags )
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
{
struct sock * sk = sock - > sk ;
int ret ;
ret = sock_create_lite ( sk - > sk_family , sk - > sk_type ,
sk - > sk_protocol , newsock ) ;
if ( ret < 0 )
return ret ;
ret = accept ( sock , * newsock , flags ) ;
if ( ret < 0 ) {
sock_release ( * newsock ) ;
return ret ;
}
( * newsock ) - > ops = sock - > ops ;
return ret ;
}
2006-01-02 21:04:38 +03:00
/**
* release - destroy a TIPC socket
* @ sock : socket to destroy
*
* This routine cleans up any messages that are still queued on the socket .
* For DGRAM and RDM socket types , all queued messages are rejected .
* For SEQPACKET and STREAM socket types , the first message is rejected
* and any others are discarded . ( If the first message on a STREAM socket
* is partially - read , it is discarded and the next one is rejected instead . )
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* NOTE : Rejected messages are not necessarily returned to the sender ! They
* are returned or discarded according to the " destination droppable " setting
* specified for the message by the sender .
*
* Returns 0 on success , errno otherwise
*/
static int release ( struct socket * sock )
{
struct sock * sk = sock - > sk ;
2008-04-15 11:22:02 +04:00
struct tipc_port * tport ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
2008-04-15 11:22:02 +04:00
int res ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/*
* Exit if socket isn ' t fully initialized ( occurs when a failed accept ( )
* releases a pre - allocated child socket that was never used )
*/
if ( sk = = NULL )
2006-01-02 21:04:38 +03:00
return 0 ;
2007-02-09 17:25:21 +03:00
2008-04-15 11:22:02 +04:00
tport = tipc_sk_port ( sk ) ;
lock_sock ( sk ) ;
/*
* Reject all unreceived messages , except on an active connection
* ( which disconnects locally & sends a ' FIN + ' to peer )
*/
2006-01-02 21:04:38 +03:00
while ( sock - > state ! = SS_DISCONNECTING ) {
2008-04-15 11:22:02 +04:00
buf = __skb_dequeue ( & sk - > sk_receive_queue ) ;
if ( buf = = NULL )
2006-01-02 21:04:38 +03:00
break ;
2013-10-18 09:23:16 +04:00
if ( TIPC_SKB_CB ( buf ) - > handle ! = NULL )
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2008-04-15 11:22:02 +04:00
else {
if ( ( sock - > state = = SS_CONNECTING ) | |
( sock - > state = = SS_CONNECTED ) ) {
sock - > state = SS_DISCONNECTING ;
tipc_disconnect ( tport - > ref ) ;
}
2006-01-02 21:04:38 +03:00
tipc_reject_msg ( buf , TIPC_ERR_NO_PORT ) ;
2008-04-15 11:22:02 +04:00
}
2006-01-02 21:04:38 +03:00
}
2008-04-15 11:22:02 +04:00
/*
* Delete TIPC port ; this ensures no more messages are queued
* ( also disconnects an active connection & sends a ' FIN - ' to peer )
*/
2013-12-27 06:18:28 +04:00
res = tipc_deleteport ( tport ) ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/* Discard any remaining (connection-based) messages in receive queue */
2013-01-21 02:30:08 +04:00
__skb_queue_purge ( & sk - > sk_receive_queue ) ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/* Reject any messages that accumulated in backlog queue */
sock - > state = SS_DISCONNECTING ;
release_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
sock_put ( sk ) ;
2008-04-15 11:22:02 +04:00
sock - > sk = NULL ;
2006-01-02 21:04:38 +03:00
return res ;
}
/**
* bind - associate or disassocate TIPC name ( s ) with a socket
* @ sock : socket structure
* @ uaddr : socket address describing name ( s ) and desired operation
* @ uaddr_len : size of socket address data structure
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Name and name sequence binding is indicated using a positive scope value ;
* a negative scope value unbinds the specified name . Specifying no name
* ( i . e . a socket address length of 0 ) unbinds all names from the socket .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
2008-04-15 11:22:02 +04:00
*
* NOTE : This routine doesn ' t need to take the socket lock since it doesn ' t
* access any non - constant socket information .
2006-01-02 21:04:38 +03:00
*/
static int bind ( struct socket * sock , struct sockaddr * uaddr , int uaddr_len )
{
2013-12-27 06:18:28 +04:00
struct sock * sk = sock - > sk ;
2006-01-02 21:04:38 +03:00
struct sockaddr_tipc * addr = ( struct sockaddr_tipc * ) uaddr ;
2013-12-27 06:18:28 +04:00
struct tipc_port * tport = tipc_sk_port ( sock - > sk ) ;
int res = - EINVAL ;
2006-01-02 21:04:38 +03:00
2013-12-27 06:18:28 +04:00
lock_sock ( sk ) ;
if ( unlikely ( ! uaddr_len ) ) {
res = tipc_withdraw ( tport , 0 , NULL ) ;
goto exit ;
}
2007-02-09 17:25:21 +03:00
2013-12-27 06:18:28 +04:00
if ( uaddr_len < sizeof ( struct sockaddr_tipc ) ) {
res = - EINVAL ;
goto exit ;
}
if ( addr - > family ! = AF_TIPC ) {
res = - EAFNOSUPPORT ;
goto exit ;
}
2006-01-02 21:04:38 +03:00
if ( addr - > addrtype = = TIPC_ADDR_NAME )
addr - > addr . nameseq . upper = addr - > addr . nameseq . lower ;
2013-12-27 06:18:28 +04:00
else if ( addr - > addrtype ! = TIPC_ADDR_NAMESEQ ) {
res = - EAFNOSUPPORT ;
goto exit ;
}
2007-02-09 17:25:21 +03:00
tipc: convert topology server to use new server facility
As the new TIPC server infrastructure has been introduced, we can
now convert the TIPC topology server to it. We get two benefits
from doing this:
1) It simplifies the topology server locking policy. In the
original locking policy, we placed one spin lock pointer in the
tipc_subscriber structure to reuse the lock of the subscriber's
server port, controlling access to members of tipc_subscriber
instance. That is, we only used one lock to ensure both
tipc_port and tipc_subscriber members were safely accessed.
Now we introduce another spin lock for tipc_subscriber structure
only protecting themselves, to get a finer granularity locking
policy. Moreover, the change will allow us to make the topology
server code more readable and maintainable.
2) It fixes a bug where sent subscription events may be lost when
the topology port is congested. Using the new service, the
topology server now queues sent events into an outgoing buffer,
and then wakes up a sender process which has been blocked in
workqueue context. The process will keep picking events from the
buffer and send them to their respective subscribers, using the
kernel socket interface, until the buffer is empty. Even if the
socket is congested during transmission there is no risk that
events may be dropped, since the sender process may block when
needed.
Some minor reordering of initialization is done, since we now
have a scenario where the topology server must be started after
socket initialization has taken place, as the former depends
on the latter. And overall, we see a simplification of the
TIPC subscriber code in making this changeover.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:40 +04:00
if ( ( addr - > addr . nameseq . type < TIPC_RESERVED_TYPES ) & &
2013-06-17 18:54:41 +04:00
( addr - > addr . nameseq . type ! = TIPC_TOP_SRV ) & &
2013-12-27 06:18:28 +04:00
( addr - > addr . nameseq . type ! = TIPC_CFG_SRV ) ) {
res = - EACCES ;
goto exit ;
}
2011-11-02 23:49:40 +04:00
2013-12-27 06:18:28 +04:00
res = ( addr - > scope > 0 ) ?
tipc_publish ( tport , addr - > scope , & addr - > addr . nameseq ) :
tipc_withdraw ( tport , - addr - > scope , & addr - > addr . nameseq ) ;
exit :
release_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* get_name - get port ID of socket or peer socket
* @ sock : socket structure
* @ uaddr : area for returned socket address
* @ uaddr_len : area for returned length of socket address
2008-07-15 09:43:32 +04:00
* @ peer : 0 = own ID , 1 = current peer ID , 2 = current / former peer ID
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
2008-04-15 11:22:02 +04:00
*
2008-07-15 09:43:32 +04:00
* NOTE : This routine doesn ' t need to take the socket lock since it only
* accesses socket information that is unchanging ( or which changes in
2010-12-31 21:59:32 +03:00
* a completely predictable manner ) .
2006-01-02 21:04:38 +03:00
*/
2007-02-09 17:25:21 +03:00
static int get_name ( struct socket * sock , struct sockaddr * uaddr ,
2006-01-02 21:04:38 +03:00
int * uaddr_len , int peer )
{
struct sockaddr_tipc * addr = ( struct sockaddr_tipc * ) uaddr ;
2008-07-15 09:43:32 +04:00
struct tipc_sock * tsock = tipc_sk ( sock - > sk ) ;
2006-01-02 21:04:38 +03:00
2010-10-31 10:10:32 +03:00
memset ( addr , 0 , sizeof ( * addr ) ) ;
2008-04-15 11:22:02 +04:00
if ( peer ) {
2008-07-15 09:43:32 +04:00
if ( ( sock - > state ! = SS_CONNECTED ) & &
( ( peer ! = 2 ) | | ( sock - > state ! = SS_DISCONNECTING ) ) )
return - ENOTCONN ;
addr - > addr . id . ref = tsock - > peer_name . ref ;
addr - > addr . id . node = tsock - > peer_name . node ;
2008-04-15 11:22:02 +04:00
} else {
2010-11-30 15:01:03 +03:00
addr - > addr . id . ref = tsock - > p - > ref ;
addr - > addr . id . node = tipc_own_addr ;
2008-04-15 11:22:02 +04:00
}
2006-01-02 21:04:38 +03:00
* uaddr_len = sizeof ( * addr ) ;
addr - > addrtype = TIPC_ADDR_ID ;
addr - > family = AF_TIPC ;
addr - > scope = 0 ;
addr - > addr . name . domain = 0 ;
2008-04-15 11:22:02 +04:00
return 0 ;
2006-01-02 21:04:38 +03:00
}
/**
* poll - read and possibly block on pollmask
* @ file : file structure associated with the socket
* @ sock : socket for which to calculate the poll bits
* @ wait : ? ? ?
*
2008-03-27 02:48:21 +03:00
* Returns pollmask value
*
* COMMENTARY :
* It appears that the usual socket locking mechanisms are not useful here
* since the pollmask info is potentially out - of - date the moment this routine
* exits . TCP and other protocols seem to rely on higher level poll routines
* to handle any preventable race conditions , so TIPC will do the same . . .
*
* TIPC sets the returned events as follows :
2010-08-17 15:00:06 +04:00
*
* socket state flags set
* - - - - - - - - - - - - - - - - - - - - -
* unconnected no read flags
2012-10-16 18:47:06 +04:00
* POLLOUT if port is not congested
2010-08-17 15:00:06 +04:00
*
* connecting POLLIN / POLLRDNORM if ACK / NACK in rx queue
* no write flags
*
* connected POLLIN / POLLRDNORM if data in rx queue
* POLLOUT if port is not congested
*
* disconnecting POLLIN / POLLRDNORM / POLLHUP
* no write flags
*
* listening POLLIN if SYN in rx queue
* no write flags
*
* ready POLLIN / POLLRDNORM if data in rx queue
* [ connectionless ] POLLOUT ( since port cannot be congested )
*
* IMPORTANT : The fact that a read or write operation is indicated does NOT
* imply that the operation will succeed , merely that it should be performed
* and will not block .
2006-01-02 21:04:38 +03:00
*/
2007-02-09 17:25:21 +03:00
static unsigned int poll ( struct file * file , struct socket * sock ,
2006-01-02 21:04:38 +03:00
poll_table * wait )
{
2008-03-27 02:48:21 +03:00
struct sock * sk = sock - > sk ;
2010-08-17 15:00:06 +04:00
u32 mask = 0 ;
2008-03-27 02:48:21 +03:00
2012-08-21 07:16:57 +04:00
sock_poll_wait ( file , sk_sleep ( sk ) , wait ) ;
2008-03-27 02:48:21 +03:00
2010-08-17 15:00:06 +04:00
switch ( ( int ) sock - > state ) {
2012-10-16 18:47:06 +04:00
case SS_UNCONNECTED :
if ( ! tipc_sk_port ( sk ) - > congested )
mask | = POLLOUT ;
break ;
2010-08-17 15:00:06 +04:00
case SS_READY :
case SS_CONNECTED :
if ( ! tipc_sk_port ( sk ) - > congested )
mask | = POLLOUT ;
/* fall thru' */
case SS_CONNECTING :
case SS_LISTENING :
if ( ! skb_queue_empty ( & sk - > sk_receive_queue ) )
mask | = ( POLLIN | POLLRDNORM ) ;
break ;
case SS_DISCONNECTING :
mask = ( POLLIN | POLLRDNORM | POLLHUP ) ;
break ;
}
2008-03-27 02:48:21 +03:00
return mask ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* dest_name_check - verify user is permitted to send to specified port name
* @ dest : destination address
* @ m : descriptor for message to be sent
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Prevents restricted configuration commands from being issued by
* unauthorized users .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 if permission is granted , otherwise errno
*/
2006-03-21 09:37:04 +03:00
static int dest_name_check ( struct sockaddr_tipc * dest , struct msghdr * m )
2006-01-02 21:04:38 +03:00
{
struct tipc_cfg_msg_hdr hdr ;
2007-02-09 17:25:21 +03:00
if ( likely ( dest - > addr . name . name . type > = TIPC_RESERVED_TYPES ) )
return 0 ;
if ( likely ( dest - > addr . name . name . type = = TIPC_TOP_SRV ) )
return 0 ;
if ( likely ( dest - > addr . name . name . type ! = TIPC_CFG_SRV ) )
return - EACCES ;
2006-01-02 21:04:38 +03:00
2011-01-18 21:09:29 +03:00
if ( ! m - > msg_iovlen | | ( m - > msg_iov [ 0 ] . iov_len < sizeof ( hdr ) ) )
return - EMSGSIZE ;
2007-02-09 17:25:21 +03:00
if ( copy_from_user ( & hdr , m - > msg_iov [ 0 ] . iov_base , sizeof ( hdr ) ) )
2006-01-02 21:04:38 +03:00
return - EFAULT ;
2006-06-26 10:41:47 +04:00
if ( ( ntohs ( hdr . tcm_type ) & 0xC000 ) & & ( ! capable ( CAP_NET_ADMIN ) ) )
2006-01-02 21:04:38 +03:00
return - EACCES ;
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
return 0 ;
}
/**
* send_msg - send message in connectionless manner
2008-04-15 11:22:02 +04:00
* @ iocb : if NULL , indicates that socket lock is already held
2006-01-02 21:04:38 +03:00
* @ sock : socket structure
* @ m : message to send
2006-06-26 10:43:57 +04:00
* @ total_len : length of message
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Message must have an destination specified explicitly .
2007-02-09 17:25:21 +03:00
* Used for SOCK_RDM and SOCK_DGRAM messages ,
2006-01-02 21:04:38 +03:00
* and for ' SYN ' messages on SOCK_SEQPACKET and SOCK_STREAM connections .
* ( Note : ' SYN + ' is prohibited on SOCK_STREAM . )
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns the number of bytes sent on success , or errno otherwise
*/
static int send_msg ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * m , size_t total_len )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2007-02-09 17:25:21 +03:00
struct sockaddr_tipc * dest = ( struct sockaddr_tipc * ) m - > msg_name ;
2006-01-02 21:04:38 +03:00
int needs_conn ;
2011-07-06 13:53:15 +04:00
long timeout_val ;
2006-01-02 21:04:38 +03:00
int res = - EINVAL ;
if ( unlikely ( ! dest ) )
return - EDESTADDRREQ ;
2006-06-26 10:49:06 +04:00
if ( unlikely ( ( m - > msg_namelen < sizeof ( * dest ) ) | |
( dest - > family ! = AF_TIPC ) ) )
2006-01-02 21:04:38 +03:00
return - EINVAL ;
2013-02-01 00:51:47 +04:00
if ( total_len > TIPC_MAX_USER_MSG_SIZE )
2010-04-21 01:58:24 +04:00
return - EMSGSIZE ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
if ( iocb )
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
needs_conn = ( sock - > state ! = SS_READY ) ;
if ( unlikely ( needs_conn ) ) {
2008-04-15 11:22:02 +04:00
if ( sock - > state = = SS_LISTENING ) {
res = - EPIPE ;
goto exit ;
}
if ( sock - > state ! = SS_UNCONNECTED ) {
res = - EISCONN ;
goto exit ;
}
2013-06-17 18:54:38 +04:00
if ( tport - > published ) {
2008-04-15 11:22:02 +04:00
res = - EOPNOTSUPP ;
goto exit ;
}
2006-06-26 10:44:57 +04:00
if ( dest - > addrtype = = TIPC_ADDR_NAME ) {
2008-04-15 11:22:02 +04:00
tport - > conn_type = dest - > addr . name . name . type ;
tport - > conn_instance = dest - > addr . name . name . instance ;
2006-06-26 10:44:57 +04:00
}
2006-01-02 21:04:38 +03:00
/* Abort any pending connection attempts (very unlikely) */
2008-04-15 11:22:02 +04:00
reject_rx_queue ( sk ) ;
2006-01-02 21:04:38 +03:00
}
2011-07-06 13:53:15 +04:00
timeout_val = sock_sndtimeo ( sk , m - > msg_flags & MSG_DONTWAIT ) ;
2007-02-09 17:25:21 +03:00
do {
if ( dest - > addrtype = = TIPC_ADDR_NAME ) {
2010-12-31 21:59:33 +03:00
res = dest_name_check ( dest , m ) ;
if ( res )
2008-04-15 11:22:02 +04:00
break ;
res = tipc_send2name ( tport - > ref ,
2007-02-09 17:25:21 +03:00
& dest - > addr . name . name ,
dest - > addr . name . domain ,
tipc: Avoid recomputation of outgoing message length
Rework TIPC's message sending routines to take advantage of the total
amount of data value passed to it by the kernel socket infrastructure.
This change eliminates the need for TIPC to compute the size of outgoing
messages itself, as well as the check for an oversize message in
tipc_msg_build(). In addition, this change warrants an explanation:
- res = send_packet(NULL, sock, &my_msg, 0);
+ res = send_packet(NULL, sock, &my_msg, bytes_to_send);
Previously, the final argument to send_packet() was ignored (since the
amount of data being sent was recalculated by a lower-level routine)
and we could just pass in a dummy value (0). Now that the
recalculation is being eliminated, the argument value being passed to
send_packet() is significant and we have to supply the actual amount
of data we want to send.
Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-04-21 19:42:07 +04:00
m - > msg_iov ,
total_len ) ;
2010-12-31 21:59:32 +03:00
} else if ( dest - > addrtype = = TIPC_ADDR_ID ) {
2008-04-15 11:22:02 +04:00
res = tipc_send2port ( tport - > ref ,
2007-02-09 17:25:21 +03:00
& dest - > addr . id ,
tipc: Avoid recomputation of outgoing message length
Rework TIPC's message sending routines to take advantage of the total
amount of data value passed to it by the kernel socket infrastructure.
This change eliminates the need for TIPC to compute the size of outgoing
messages itself, as well as the check for an oversize message in
tipc_msg_build(). In addition, this change warrants an explanation:
- res = send_packet(NULL, sock, &my_msg, 0);
+ res = send_packet(NULL, sock, &my_msg, bytes_to_send);
Previously, the final argument to send_packet() was ignored (since the
amount of data being sent was recalculated by a lower-level routine)
and we could just pass in a dummy value (0). Now that the
recalculation is being eliminated, the argument value being passed to
send_packet() is significant and we have to supply the actual amount
of data we want to send.
Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-04-21 19:42:07 +04:00
m - > msg_iov ,
total_len ) ;
2010-12-31 21:59:32 +03:00
} else if ( dest - > addrtype = = TIPC_ADDR_MCAST ) {
2006-01-02 21:04:38 +03:00
if ( needs_conn ) {
res = - EOPNOTSUPP ;
2008-04-15 11:22:02 +04:00
break ;
2006-01-02 21:04:38 +03:00
}
2010-12-31 21:59:33 +03:00
res = dest_name_check ( dest , m ) ;
if ( res )
2008-04-15 11:22:02 +04:00
break ;
res = tipc_multicast ( tport - > ref ,
2007-02-09 17:25:21 +03:00
& dest - > addr . nameseq ,
tipc: Avoid recomputation of outgoing message length
Rework TIPC's message sending routines to take advantage of the total
amount of data value passed to it by the kernel socket infrastructure.
This change eliminates the need for TIPC to compute the size of outgoing
messages itself, as well as the check for an oversize message in
tipc_msg_build(). In addition, this change warrants an explanation:
- res = send_packet(NULL, sock, &my_msg, 0);
+ res = send_packet(NULL, sock, &my_msg, bytes_to_send);
Previously, the final argument to send_packet() was ignored (since the
amount of data being sent was recalculated by a lower-level routine)
and we could just pass in a dummy value (0). Now that the
recalculation is being eliminated, the argument value being passed to
send_packet() is significant and we have to supply the actual amount
of data we want to send.
Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-04-21 19:42:07 +04:00
m - > msg_iov ,
total_len ) ;
2007-02-09 17:25:21 +03:00
}
if ( likely ( res ! = - ELINKCONG ) ) {
2010-12-31 21:59:35 +03:00
if ( needs_conn & & ( res > = 0 ) )
2008-04-15 11:22:02 +04:00
sock - > state = SS_CONNECTING ;
break ;
2007-02-09 17:25:21 +03:00
}
2011-07-06 13:53:15 +04:00
if ( timeout_val < = 0L ) {
res = timeout_val ? timeout_val : - EWOULDBLOCK ;
2008-04-15 11:22:02 +04:00
break ;
2007-02-09 17:25:21 +03:00
}
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2011-07-06 13:53:15 +04:00
timeout_val = wait_event_interruptible_timeout ( * sk_sleep ( sk ) ,
! tport - > congested , timeout_val ) ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2007-02-09 17:25:21 +03:00
} while ( 1 ) ;
2008-04-15 11:22:02 +04:00
exit :
if ( iocb )
release_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* send_packet - send a connection - oriented message
2008-04-15 11:22:02 +04:00
* @ iocb : if NULL , indicates that socket lock is already held
2006-01-02 21:04:38 +03:00
* @ sock : socket structure
* @ m : message to send
2006-06-26 10:43:57 +04:00
* @ total_len : length of message
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Used for SOCK_SEQPACKET messages and SOCK_STREAM data .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns the number of bytes sent on success , or errno otherwise
*/
static int send_packet ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * m , size_t total_len )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2007-02-09 17:25:21 +03:00
struct sockaddr_tipc * dest = ( struct sockaddr_tipc * ) m - > msg_name ;
2011-07-06 13:53:15 +04:00
long timeout_val ;
2006-01-02 21:04:38 +03:00
int res ;
/* Handle implied connection establishment */
if ( unlikely ( dest ) )
return send_msg ( iocb , sock , m , total_len ) ;
2013-02-01 00:51:47 +04:00
if ( total_len > TIPC_MAX_USER_MSG_SIZE )
2010-04-21 01:58:24 +04:00
return - EMSGSIZE ;
2008-04-15 11:22:02 +04:00
if ( iocb )
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
2011-07-06 13:53:15 +04:00
timeout_val = sock_sndtimeo ( sk , m - > msg_flags & MSG_DONTWAIT ) ;
2007-02-09 17:25:21 +03:00
do {
2006-06-26 10:45:53 +04:00
if ( unlikely ( sock - > state ! = SS_CONNECTED ) ) {
if ( sock - > state = = SS_DISCONNECTING )
2007-02-09 17:25:21 +03:00
res = - EPIPE ;
2006-06-26 10:45:53 +04:00
else
res = - ENOTCONN ;
2008-04-15 11:22:02 +04:00
break ;
2006-06-26 10:45:53 +04:00
}
2013-10-18 09:23:15 +04:00
res = tipc_send ( tport - > ref , m - > msg_iov , total_len ) ;
2010-12-31 21:59:35 +03:00
if ( likely ( res ! = - ELINKCONG ) )
2008-04-15 11:22:02 +04:00
break ;
2011-07-06 13:53:15 +04:00
if ( timeout_val < = 0L ) {
res = timeout_val ? timeout_val : - EWOULDBLOCK ;
2008-04-15 11:22:02 +04:00
break ;
2007-02-09 17:25:21 +03:00
}
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2011-07-06 13:53:15 +04:00
timeout_val = wait_event_interruptible_timeout ( * sk_sleep ( sk ) ,
( ! tport - > congested | | ! tport - > connected ) , timeout_val ) ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2007-02-09 17:25:21 +03:00
} while ( 1 ) ;
2008-04-15 11:22:02 +04:00
if ( iocb )
release_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* send_stream - send stream - oriented data
* @ iocb : ( unused )
* @ sock : socket structure
* @ m : data to send
* @ total_len : total length of data to be sent
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Used for SOCK_STREAM data .
2007-02-09 17:25:21 +03:00
*
* Returns the number of bytes sent on success ( or partial success ) ,
2006-06-26 10:46:50 +04:00
* or errno if no data sent
2006-01-02 21:04:38 +03:00
*/
static int send_stream ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * m , size_t total_len )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2006-01-02 21:04:38 +03:00
struct msghdr my_msg ;
struct iovec my_iov ;
struct iovec * curr_iov ;
int curr_iovlen ;
char __user * curr_start ;
2007-06-11 04:25:24 +04:00
u32 hdr_size ;
2006-01-02 21:04:38 +03:00
int curr_left ;
int bytes_to_send ;
2006-06-26 10:46:50 +04:00
int bytes_sent ;
2006-01-02 21:04:38 +03:00
int res ;
2007-02-09 17:25:21 +03:00
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2007-06-11 04:25:24 +04:00
/* Handle special cases where there is no connection */
2007-02-09 17:25:21 +03:00
if ( unlikely ( sock - > state ! = SS_CONNECTED ) ) {
2013-12-12 05:36:40 +04:00
if ( sock - > state = = SS_UNCONNECTED )
2008-04-15 11:22:02 +04:00
res = send_packet ( NULL , sock , m , total_len ) ;
2013-12-27 06:09:39 +04:00
else
res = sock - > state = = SS_DISCONNECTING ? - EPIPE : - ENOTCONN ;
2013-12-12 05:36:40 +04:00
goto exit ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
if ( unlikely ( m - > msg_name ) ) {
res = - EISCONN ;
goto exit ;
}
2006-10-17 08:43:54 +04:00
2013-02-01 00:51:47 +04:00
if ( total_len > ( unsigned int ) INT_MAX ) {
2010-04-21 01:58:24 +04:00
res = - EMSGSIZE ;
goto exit ;
}
2007-02-09 17:25:21 +03:00
/*
2006-01-02 21:04:38 +03:00
* Send each iovec entry using one or more messages
*
2007-02-09 17:25:21 +03:00
* Note : This algorithm is good for the most likely case
2006-01-02 21:04:38 +03:00
* ( i . e . one large iovec entry ) , but could be improved to pass sets
* of small iovec entries into send_packet ( ) .
*/
2006-06-26 10:46:50 +04:00
curr_iov = m - > msg_iov ;
curr_iovlen = m - > msg_iovlen ;
2006-01-02 21:04:38 +03:00
my_msg . msg_iov = & my_iov ;
my_msg . msg_iovlen = 1 ;
2006-10-17 08:43:54 +04:00
my_msg . msg_flags = m - > msg_flags ;
my_msg . msg_name = NULL ;
2006-06-26 10:46:50 +04:00
bytes_sent = 0 ;
2006-01-02 21:04:38 +03:00
2007-06-11 04:25:24 +04:00
hdr_size = msg_hdr_sz ( & tport - > phdr ) ;
2006-01-02 21:04:38 +03:00
while ( curr_iovlen - - ) {
curr_start = curr_iov - > iov_base ;
curr_left = curr_iov - > iov_len ;
while ( curr_left ) {
2007-06-11 04:25:24 +04:00
bytes_to_send = tport - > max_pkt - hdr_size ;
if ( bytes_to_send > TIPC_MAX_USER_MSG_SIZE )
bytes_to_send = TIPC_MAX_USER_MSG_SIZE ;
if ( curr_left < bytes_to_send )
bytes_to_send = curr_left ;
2006-01-02 21:04:38 +03:00
my_iov . iov_base = curr_start ;
my_iov . iov_len = bytes_to_send ;
tipc: Avoid recomputation of outgoing message length
Rework TIPC's message sending routines to take advantage of the total
amount of data value passed to it by the kernel socket infrastructure.
This change eliminates the need for TIPC to compute the size of outgoing
messages itself, as well as the check for an oversize message in
tipc_msg_build(). In addition, this change warrants an explanation:
- res = send_packet(NULL, sock, &my_msg, 0);
+ res = send_packet(NULL, sock, &my_msg, bytes_to_send);
Previously, the final argument to send_packet() was ignored (since the
amount of data being sent was recalculated by a lower-level routine)
and we could just pass in a dummy value (0). Now that the
recalculation is being eliminated, the argument value being passed to
send_packet() is significant and we have to supply the actual amount
of data we want to send.
Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-04-21 19:42:07 +04:00
res = send_packet ( NULL , sock , & my_msg , bytes_to_send ) ;
2010-12-31 21:59:33 +03:00
if ( res < 0 ) {
2008-04-15 11:22:02 +04:00
if ( bytes_sent )
2007-06-11 04:25:24 +04:00
res = bytes_sent ;
2008-04-15 11:22:02 +04:00
goto exit ;
2006-06-26 10:46:50 +04:00
}
2006-01-02 21:04:38 +03:00
curr_left - = bytes_to_send ;
curr_start + = bytes_to_send ;
2006-06-26 10:46:50 +04:00
bytes_sent + = bytes_to_send ;
2006-01-02 21:04:38 +03:00
}
curr_iov + + ;
}
2008-04-15 11:22:02 +04:00
res = bytes_sent ;
exit :
release_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
/**
* auto_connect - complete connection setup to a remote port
* @ sock : socket structure
* @ msg : peer ' s response message
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
2008-04-15 11:22:02 +04:00
static int auto_connect ( struct socket * sock , struct tipc_msg * msg )
2006-01-02 21:04:38 +03:00
{
2008-07-15 09:43:32 +04:00
struct tipc_sock * tsock = tipc_sk ( sock - > sk ) ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
struct tipc_port * p_ptr ;
2006-01-02 21:04:38 +03:00
2008-07-15 09:43:32 +04:00
tsock - > peer_name . ref = msg_origport ( msg ) ;
tsock - > peer_name . node = msg_orignode ( msg ) ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
p_ptr = tipc_port_deref ( tsock - > p - > ref ) ;
if ( ! p_ptr )
return - EINVAL ;
__tipc_connect ( tsock - > p - > ref , p_ptr , & tsock - > peer_name ) ;
if ( msg_importance ( msg ) > TIPC_CRITICAL_IMPORTANCE )
return - EINVAL ;
msg_set_importance ( & p_ptr - > phdr , ( u32 ) msg_importance ( msg ) ) ;
2006-01-02 21:04:38 +03:00
sock - > state = SS_CONNECTED ;
return 0 ;
}
/**
* set_orig_addr - capture sender ' s address for received message
* @ m : descriptor for message info
* @ msg : received message header
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Note : Address is not captured if not requested by receiver .
*/
2006-03-21 09:37:04 +03:00
static void set_orig_addr ( struct msghdr * m , struct tipc_msg * msg )
2006-01-02 21:04:38 +03:00
{
2007-02-09 17:25:21 +03:00
struct sockaddr_tipc * addr = ( struct sockaddr_tipc * ) m - > msg_name ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
if ( addr ) {
2006-01-02 21:04:38 +03:00
addr - > family = AF_TIPC ;
addr - > addrtype = TIPC_ADDR_ID ;
2013-04-07 05:52:00 +04:00
memset ( & addr - > addr , 0 , sizeof ( addr - > addr ) ) ;
2006-01-02 21:04:38 +03:00
addr - > addr . id . ref = msg_origport ( msg ) ;
addr - > addr . id . node = msg_orignode ( msg ) ;
2010-12-31 21:59:32 +03:00
addr - > addr . name . domain = 0 ; /* could leave uninitialized */
addr - > scope = 0 ; /* could leave uninitialized */
2006-01-02 21:04:38 +03:00
m - > msg_namelen = sizeof ( struct sockaddr_tipc ) ;
}
}
/**
2007-02-09 17:25:21 +03:00
* anc_data_recv - optionally capture ancillary data for received message
2006-01-02 21:04:38 +03:00
* @ m : descriptor for message info
* @ msg : received message header
* @ tport : TIPC port associated with message
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Note : Ancillary data is not captured if not requested by receiver .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 if successful , otherwise errno
*/
2006-03-21 09:37:04 +03:00
static int anc_data_recv ( struct msghdr * m , struct tipc_msg * msg ,
2013-06-17 18:54:47 +04:00
struct tipc_port * tport )
2006-01-02 21:04:38 +03:00
{
u32 anc_data [ 3 ] ;
u32 err ;
u32 dest_type ;
2006-06-26 10:45:24 +04:00
int has_name ;
2006-01-02 21:04:38 +03:00
int res ;
if ( likely ( m - > msg_controllen = = 0 ) )
return 0 ;
/* Optionally capture errored message object(s) */
err = msg ? msg_errcode ( msg ) : 0 ;
if ( unlikely ( err ) ) {
anc_data [ 0 ] = err ;
anc_data [ 1 ] = msg_data_sz ( msg ) ;
2010-12-31 21:59:33 +03:00
res = put_cmsg ( m , SOL_TIPC , TIPC_ERRINFO , 8 , anc_data ) ;
if ( res )
2006-01-02 21:04:38 +03:00
return res ;
2010-12-31 21:59:33 +03:00
if ( anc_data [ 1 ] ) {
res = put_cmsg ( m , SOL_TIPC , TIPC_RETDATA , anc_data [ 1 ] ,
msg_data ( msg ) ) ;
if ( res )
return res ;
}
2006-01-02 21:04:38 +03:00
}
/* Optionally capture message destination object */
dest_type = msg ? msg_type ( msg ) : TIPC_DIRECT_MSG ;
switch ( dest_type ) {
case TIPC_NAMED_MSG :
2006-06-26 10:45:24 +04:00
has_name = 1 ;
2006-01-02 21:04:38 +03:00
anc_data [ 0 ] = msg_nametype ( msg ) ;
anc_data [ 1 ] = msg_namelower ( msg ) ;
anc_data [ 2 ] = msg_namelower ( msg ) ;
break ;
case TIPC_MCAST_MSG :
2006-06-26 10:45:24 +04:00
has_name = 1 ;
2006-01-02 21:04:38 +03:00
anc_data [ 0 ] = msg_nametype ( msg ) ;
anc_data [ 1 ] = msg_namelower ( msg ) ;
anc_data [ 2 ] = msg_nameupper ( msg ) ;
break ;
case TIPC_CONN_MSG :
2006-06-26 10:45:24 +04:00
has_name = ( tport - > conn_type ! = 0 ) ;
2006-01-02 21:04:38 +03:00
anc_data [ 0 ] = tport - > conn_type ;
anc_data [ 1 ] = tport - > conn_instance ;
anc_data [ 2 ] = tport - > conn_instance ;
break ;
default :
2006-06-26 10:45:24 +04:00
has_name = 0 ;
2006-01-02 21:04:38 +03:00
}
2010-12-31 21:59:33 +03:00
if ( has_name ) {
res = put_cmsg ( m , SOL_TIPC , TIPC_DESTNAME , 12 , anc_data ) ;
if ( res )
return res ;
}
2006-01-02 21:04:38 +03:00
return 0 ;
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* recv_msg - receive packet - oriented message
* @ iocb : ( unused )
* @ m : descriptor for message info
* @ buf_len : total size of user buffer area
* @ flags : receive flags
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Used for SOCK_DGRAM , SOCK_RDM , and SOCK_SEQPACKET messages .
* If the complete message doesn ' t fit in user area , truncate it .
*
* Returns size of returned message data , errno otherwise
*/
static int recv_msg ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * m , size_t buf_len , int flags )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
struct tipc_msg * msg ;
2011-02-23 22:52:14 +03:00
long timeout ;
2006-01-02 21:04:38 +03:00
unsigned int sz ;
u32 err ;
int res ;
2008-04-15 11:22:02 +04:00
/* Catch invalid receive requests */
2006-01-02 21:04:38 +03:00
if ( unlikely ( ! buf_len ) )
return - EINVAL ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
if ( unlikely ( sock - > state = = SS_UNCONNECTED ) ) {
res = - ENOTCONN ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
2011-02-23 22:52:14 +03:00
timeout = sock_rcvtimeo ( sk , flags & MSG_DONTWAIT ) ;
2008-04-15 11:22:02 +04:00
restart :
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/* Look for a message in receive queue; wait if necessary */
while ( skb_queue_empty ( & sk - > sk_receive_queue ) ) {
if ( sock - > state = = SS_DISCONNECTING ) {
res = - ENOTCONN ;
goto exit ;
}
2011-02-23 22:52:14 +03:00
if ( timeout < = 0L ) {
res = timeout ? timeout : - EWOULDBLOCK ;
2008-04-15 11:22:02 +04:00
goto exit ;
}
release_sock ( sk ) ;
2011-02-23 22:52:14 +03:00
timeout = wait_event_interruptible_timeout ( * sk_sleep ( sk ) ,
tipc_rx_ready ( sock ) ,
timeout ) ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
}
2008-04-15 11:22:02 +04:00
/* Look at first message in receive queue */
buf = skb_peek ( & sk - > sk_receive_queue ) ;
2006-01-02 21:04:38 +03:00
msg = buf_msg ( buf ) ;
sz = msg_data_sz ( msg ) ;
err = msg_errcode ( msg ) ;
/* Discard an empty non-errored message & try again */
if ( ( ! sz ) & & ( ! err ) ) {
2008-04-15 11:22:02 +04:00
advance_rx_queue ( sk ) ;
2006-01-02 21:04:38 +03:00
goto restart ;
}
/* Capture sender's address (optional) */
set_orig_addr ( m , msg ) ;
/* Capture ancillary data (optional) */
2008-04-15 11:22:02 +04:00
res = anc_data_recv ( m , msg , tport ) ;
if ( res )
2006-01-02 21:04:38 +03:00
goto exit ;
/* Capture message data (if valid) & compute return value (always) */
if ( ! err ) {
if ( unlikely ( buf_len < sz ) ) {
sz = buf_len ;
m - > msg_flags | = MSG_TRUNC ;
}
2011-02-21 17:45:40 +03:00
res = skb_copy_datagram_iovec ( buf , msg_hdr_sz ( msg ) ,
m - > msg_iov , sz ) ;
if ( res )
2006-01-02 21:04:38 +03:00
goto exit ;
res = sz ;
} else {
if ( ( sock - > state = = SS_READY ) | |
( ( err = = TIPC_CONN_SHUTDOWN ) | | m - > msg_control ) )
res = 0 ;
else
res = - ECONNRESET ;
}
/* Consume received message (optional) */
if ( likely ( ! ( flags & MSG_PEEK ) ) ) {
2008-04-15 11:06:12 +04:00
if ( ( sock - > state ! = SS_READY ) & &
2008-04-15 11:22:02 +04:00
( + + tport - > conn_unacked > = TIPC_FLOW_CONTROL_WIN ) )
tipc_acknowledge ( tport - > ref , tport - > conn_unacked ) ;
advance_rx_queue ( sk ) ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
exit :
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* recv_stream - receive stream - oriented data
* @ iocb : ( unused )
* @ m : descriptor for message info
* @ buf_len : total size of user buffer area
* @ flags : receive flags
2007-02-09 17:25:21 +03:00
*
* Used for SOCK_STREAM messages only . If not enough data is available
2006-01-02 21:04:38 +03:00
* will optionally wait for more ; never truncates data .
*
* Returns size of returned message data , errno otherwise
*/
static int recv_stream ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * m , size_t buf_len , int flags )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
struct tipc_msg * msg ;
2011-02-23 22:52:14 +03:00
long timeout ;
2006-01-02 21:04:38 +03:00
unsigned int sz ;
2010-08-17 15:00:04 +04:00
int sz_to_copy , target , needed ;
2006-01-02 21:04:38 +03:00
int sz_copied = 0 ;
u32 err ;
2008-04-15 11:22:02 +04:00
int res = 0 ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/* Catch invalid receive attempts */
2006-01-02 21:04:38 +03:00
if ( unlikely ( ! buf_len ) )
return - EINVAL ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
2013-06-17 18:54:38 +04:00
if ( unlikely ( ( sock - > state = = SS_UNCONNECTED ) ) ) {
2008-04-15 11:22:02 +04:00
res = - ENOTCONN ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
2010-08-17 15:00:04 +04:00
target = sock_rcvlowat ( sk , flags & MSG_WAITALL , buf_len ) ;
2011-02-23 22:52:14 +03:00
timeout = sock_rcvtimeo ( sk , flags & MSG_DONTWAIT ) ;
2006-01-02 21:04:38 +03:00
2012-04-30 23:29:02 +04:00
restart :
2008-04-15 11:22:02 +04:00
/* Look for a message in receive queue; wait if necessary */
while ( skb_queue_empty ( & sk - > sk_receive_queue ) ) {
if ( sock - > state = = SS_DISCONNECTING ) {
res = - ENOTCONN ;
goto exit ;
}
2011-02-23 22:52:14 +03:00
if ( timeout < = 0L ) {
res = timeout ? timeout : - EWOULDBLOCK ;
2008-04-15 11:22:02 +04:00
goto exit ;
}
release_sock ( sk ) ;
2011-02-23 22:52:14 +03:00
timeout = wait_event_interruptible_timeout ( * sk_sleep ( sk ) ,
tipc_rx_ready ( sock ) ,
timeout ) ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
}
2008-04-15 11:22:02 +04:00
/* Look at first message in receive queue */
buf = skb_peek ( & sk - > sk_receive_queue ) ;
2006-01-02 21:04:38 +03:00
msg = buf_msg ( buf ) ;
sz = msg_data_sz ( msg ) ;
err = msg_errcode ( msg ) ;
/* Discard an empty non-errored message & try again */
if ( ( ! sz ) & & ( ! err ) ) {
2008-04-15 11:22:02 +04:00
advance_rx_queue ( sk ) ;
2006-01-02 21:04:38 +03:00
goto restart ;
}
/* Optionally capture sender's address & ancillary data of first msg */
if ( sz_copied = = 0 ) {
set_orig_addr ( m , msg ) ;
2008-04-15 11:22:02 +04:00
res = anc_data_recv ( m , msg , tport ) ;
if ( res )
2006-01-02 21:04:38 +03:00
goto exit ;
}
/* Capture message data (if valid) & compute return value (always) */
if ( ! err ) {
2011-02-21 17:45:40 +03:00
u32 offset = ( u32 ) ( unsigned long ) ( TIPC_SKB_CB ( buf ) - > handle ) ;
2006-01-02 21:04:38 +03:00
2011-02-21 17:45:40 +03:00
sz - = offset ;
2006-01-02 21:04:38 +03:00
needed = ( buf_len - sz_copied ) ;
sz_to_copy = ( sz < = needed ) ? sz : needed ;
2011-02-21 17:45:40 +03:00
res = skb_copy_datagram_iovec ( buf , msg_hdr_sz ( msg ) + offset ,
m - > msg_iov , sz_to_copy ) ;
if ( res )
2006-01-02 21:04:38 +03:00
goto exit ;
2011-02-21 17:45:40 +03:00
2006-01-02 21:04:38 +03:00
sz_copied + = sz_to_copy ;
if ( sz_to_copy < sz ) {
if ( ! ( flags & MSG_PEEK ) )
2011-02-21 17:45:40 +03:00
TIPC_SKB_CB ( buf ) - > handle =
( void * ) ( unsigned long ) ( offset + sz_to_copy ) ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
} else {
if ( sz_copied ! = 0 )
goto exit ; /* can't add error msg to valid data */
if ( ( err = = TIPC_CONN_SHUTDOWN ) | | m - > msg_control )
res = 0 ;
else
res = - ECONNRESET ;
}
/* Consume received message (optional) */
if ( likely ( ! ( flags & MSG_PEEK ) ) ) {
2008-04-15 11:22:02 +04:00
if ( unlikely ( + + tport - > conn_unacked > = TIPC_FLOW_CONTROL_WIN ) )
tipc_acknowledge ( tport - > ref , tport - > conn_unacked ) ;
advance_rx_queue ( sk ) ;
2007-02-09 17:25:21 +03:00
}
2006-01-02 21:04:38 +03:00
/* Loop around if more data is required */
2009-11-30 03:55:45 +03:00
if ( ( sz_copied < buf_len ) & & /* didn't get all requested data */
( ! skb_queue_empty ( & sk - > sk_receive_queue ) | |
2010-08-17 15:00:04 +04:00
( sz_copied < target ) ) & & /* and more is ready or required */
2009-11-30 03:55:45 +03:00
( ! ( flags & MSG_PEEK ) ) & & /* and aren't just peeking at data */
( ! err ) ) /* and haven't reached a FIN */
2006-01-02 21:04:38 +03:00
goto restart ;
exit :
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2006-06-26 10:48:22 +04:00
return sz_copied ? sz_copied : res ;
2006-01-02 21:04:38 +03:00
}
2012-08-21 07:16:57 +04:00
/**
* tipc_write_space - wake up thread if port congestion is released
* @ sk : socket
*/
static void tipc_write_space ( struct sock * sk )
{
struct socket_wq * wq ;
rcu_read_lock ( ) ;
wq = rcu_dereference ( sk - > sk_wq ) ;
if ( wq_has_sleeper ( wq ) )
wake_up_interruptible_sync_poll ( & wq - > wait , POLLOUT |
POLLWRNORM | POLLWRBAND ) ;
rcu_read_unlock ( ) ;
}
/**
* tipc_data_ready - wake up threads to indicate messages have been received
* @ sk : socket
* @ len : the length of messages
*/
static void tipc_data_ready ( struct sock * sk , int len )
{
struct socket_wq * wq ;
rcu_read_lock ( ) ;
wq = rcu_dereference ( sk - > sk_wq ) ;
if ( wq_has_sleeper ( wq ) )
wake_up_interruptible_sync_poll ( & wq - > wait , POLLIN |
POLLRDNORM | POLLRDBAND ) ;
rcu_read_unlock ( ) ;
}
2012-11-30 03:39:14 +04:00
/**
* filter_connect - Handle all incoming messages for a connection - based socket
* @ tsock : TIPC socket
* @ msg : message
*
* Returns TIPC error status code and socket error status code
* once it encounters some errors
*/
static u32 filter_connect ( struct tipc_sock * tsock , struct sk_buff * * buf )
{
struct socket * sock = tsock - > sk . sk_socket ;
struct tipc_msg * msg = buf_msg ( * buf ) ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
struct sock * sk = & tsock - > sk ;
2012-11-30 03:39:14 +04:00
u32 retval = TIPC_ERR_NO_PORT ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
int res ;
2012-11-30 03:39:14 +04:00
if ( msg_mcast ( msg ) )
return retval ;
switch ( ( int ) sock - > state ) {
case SS_CONNECTED :
/* Accept only connection-based messages sent by peer */
if ( msg_connected ( msg ) & & tipc_port_peer_msg ( tsock - > p , msg ) ) {
if ( unlikely ( msg_errcode ( msg ) ) ) {
sock - > state = SS_DISCONNECTING ;
__tipc_disconnect ( tsock - > p ) ;
}
retval = TIPC_OK ;
}
break ;
case SS_CONNECTING :
/* Accept only ACK or NACK message */
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
if ( unlikely ( msg_errcode ( msg ) ) ) {
sock - > state = SS_DISCONNECTING ;
tipc: set sk_err correctly when connection fails
Should a connect fail, if the publication/server is unavailable or
due to some other error, a positive value will be returned and errno
is never set. If the application code checks for an explicit zero
return from connect (success) or a negative return (failure), it
will not catch the error and subsequent send() calls will fail as
shown from the strace snippet below.
socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3
connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111
sendto(3, "test", 4, 0, NULL, 0) = -1 EPIPE (Broken pipe)
The reason for this behaviour is that TIPC wrongly inverts error
codes set in sk_err.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-28 11:29:58 +04:00
sk - > sk_err = ECONNREFUSED ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
retval = TIPC_OK ;
break ;
}
if ( unlikely ( ! msg_connected ( msg ) ) )
break ;
res = auto_connect ( sock , msg ) ;
if ( res ) {
sock - > state = SS_DISCONNECTING ;
tipc: set sk_err correctly when connection fails
Should a connect fail, if the publication/server is unavailable or
due to some other error, a positive value will be returned and errno
is never set. If the application code checks for an explicit zero
return from connect (success) or a negative return (failure), it
will not catch the error and subsequent send() calls will fail as
shown from the strace snippet below.
socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3
connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111
sendto(3, "test", 4, 0, NULL, 0) = -1 EPIPE (Broken pipe)
The reason for this behaviour is that TIPC wrongly inverts error
codes set in sk_err.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-28 11:29:58 +04:00
sk - > sk_err = - res ;
2012-11-30 03:39:14 +04:00
retval = TIPC_OK ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
break ;
}
/* If an incoming message is an 'ACK-', it should be
* discarded here because it doesn ' t contain useful
* data . In addition , we should try to wake up
* connect ( ) routine if sleeping .
*/
if ( msg_data_sz ( msg ) = = 0 ) {
kfree_skb ( * buf ) ;
* buf = NULL ;
if ( waitqueue_active ( sk_sleep ( sk ) ) )
wake_up_interruptible ( sk_sleep ( sk ) ) ;
}
retval = TIPC_OK ;
2012-11-30 03:39:14 +04:00
break ;
case SS_LISTENING :
case SS_UNCONNECTED :
/* Accept only SYN message */
if ( ! msg_connected ( msg ) & & ! ( msg_errcode ( msg ) ) )
retval = TIPC_OK ;
break ;
case SS_DISCONNECTING :
break ;
default :
pr_err ( " Unknown socket state %u \n " , sock - > state ) ;
}
return retval ;
}
2013-01-21 02:30:09 +04:00
/**
* rcvbuf_limit - get proper overload limit of socket receive queue
* @ sk : socket
* @ buf : message
*
* For all connection oriented messages , irrespective of importance ,
* the default overload value ( i . e . 67 MB ) is set as limit .
*
* For all connectionless messages , by default new queue limits are
* as belows :
*
2013-06-17 18:54:37 +04:00
* TIPC_LOW_IMPORTANCE ( 4 MB )
* TIPC_MEDIUM_IMPORTANCE ( 8 MB )
* TIPC_HIGH_IMPORTANCE ( 16 MB )
* TIPC_CRITICAL_IMPORTANCE ( 32 MB )
2013-01-21 02:30:09 +04:00
*
* Returns overload limit according to corresponding message importance
*/
static unsigned int rcvbuf_limit ( struct sock * sk , struct sk_buff * buf )
{
struct tipc_msg * msg = buf_msg ( buf ) ;
if ( msg_connected ( msg ) )
2013-12-12 05:36:39 +04:00
return sysctl_tipc_rmem [ 2 ] ;
return sk - > sk_rcvbuf > > TIPC_CRITICAL_IMPORTANCE < <
msg_importance ( msg ) ;
2013-01-21 02:30:09 +04:00
}
2007-02-09 17:25:21 +03:00
/**
2008-04-15 11:22:02 +04:00
* filter_rcv - validate incoming message
* @ sk : socket
2006-01-02 21:04:38 +03:00
* @ buf : message
2007-02-09 17:25:21 +03:00
*
2008-04-15 11:22:02 +04:00
* Enqueues message on receive queue if acceptable ; optionally handles
* disconnect indication for a connected socket .
*
* Called with socket lock already taken ; port lock may also be taken .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns TIPC error status code ( TIPC_OK if message is not to be rejected )
*/
2008-04-15 11:22:02 +04:00
static u32 filter_rcv ( struct sock * sk , struct sk_buff * buf )
2006-01-02 21:04:38 +03:00
{
2008-04-15 11:22:02 +04:00
struct socket * sock = sk - > sk_socket ;
2006-01-02 21:04:38 +03:00
struct tipc_msg * msg = buf_msg ( buf ) ;
2013-01-21 02:30:09 +04:00
unsigned int limit = rcvbuf_limit ( sk , buf ) ;
2012-11-30 03:39:14 +04:00
u32 res = TIPC_OK ;
2006-01-02 21:04:38 +03:00
/* Reject message if it is wrong sort of message for socket */
2012-04-27 02:13:08 +04:00
if ( msg_type ( msg ) > TIPC_DIRECT_MSG )
return TIPC_ERR_NO_PORT ;
2008-04-15 11:22:02 +04:00
2006-01-02 21:04:38 +03:00
if ( sock - > state = = SS_READY ) {
2010-12-31 21:59:25 +03:00
if ( msg_connected ( msg ) )
2006-01-02 21:04:38 +03:00
return TIPC_ERR_NO_PORT ;
} else {
2012-11-30 03:39:14 +04:00
res = filter_connect ( tipc_sk ( sk ) , & buf ) ;
if ( res ! = TIPC_OK | | buf = = NULL )
return res ;
2006-01-02 21:04:38 +03:00
}
/* Reject message if there isn't room to queue it */
2013-01-21 02:30:09 +04:00
if ( sk_rmem_alloc_get ( sk ) + buf - > truesize > = limit )
return TIPC_ERR_OVERLOAD ;
2006-01-02 21:04:38 +03:00
2013-01-21 02:30:09 +04:00
/* Enqueue message */
2013-10-18 09:23:16 +04:00
TIPC_SKB_CB ( buf ) - > handle = NULL ;
2008-04-15 11:22:02 +04:00
__skb_queue_tail ( & sk - > sk_receive_queue , buf ) ;
2013-01-21 02:30:09 +04:00
skb_set_owner_r ( buf , sk ) ;
2008-04-15 11:22:02 +04:00
2012-08-21 07:16:57 +04:00
sk - > sk_data_ready ( sk , 0 ) ;
2008-04-15 11:22:02 +04:00
return TIPC_OK ;
}
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
/**
* backlog_rcv - handle incoming message from backlog queue
* @ sk : socket
* @ buf : message
*
* Caller must hold socket lock , but not port lock .
*
* Returns 0
*/
static int backlog_rcv ( struct sock * sk , struct sk_buff * buf )
{
u32 res ;
res = filter_rcv ( sk , buf ) ;
if ( res )
tipc_reject_msg ( buf , res ) ;
return 0 ;
}
/**
* dispatch - handle incoming message
* @ tport : TIPC port that received message
* @ buf : message
*
* Called with port lock already taken .
*
* Returns TIPC error status code ( TIPC_OK if message is not to be rejected )
*/
static u32 dispatch ( struct tipc_port * tport , struct sk_buff * buf )
{
2013-06-17 18:54:46 +04:00
struct sock * sk = tport - > sk ;
2008-04-15 11:22:02 +04:00
u32 res ;
/*
* Process message if socket is unlocked ; otherwise add to backlog queue
*
* This code is based on sk_receive_skb ( ) , but must be distinct from it
* since a TIPC - specific filter / reject mechanism is utilized
*/
bh_lock_sock ( sk ) ;
if ( ! sock_owned_by_user ( sk ) ) {
res = filter_rcv ( sk , buf ) ;
} else {
2013-01-21 02:30:09 +04:00
if ( sk_add_backlog ( sk , buf , rcvbuf_limit ( sk , buf ) ) )
2010-03-04 21:01:45 +03:00
res = TIPC_ERR_OVERLOAD ;
else
res = TIPC_OK ;
2008-04-15 11:22:02 +04:00
}
bh_unlock_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* wakeupdispatch - wake up port after congestion
* @ tport : port to wakeup
2007-02-09 17:25:21 +03:00
*
2008-04-15 11:22:02 +04:00
* Called with port lock already taken .
2006-01-02 21:04:38 +03:00
*/
static void wakeupdispatch ( struct tipc_port * tport )
{
2013-06-17 18:54:46 +04:00
struct sock * sk = tport - > sk ;
2006-01-02 21:04:38 +03:00
2012-08-21 07:16:57 +04:00
sk - > sk_write_space ( sk ) ;
2006-01-02 21:04:38 +03:00
}
2014-01-17 05:50:03 +04:00
static int tipc_wait_for_connect ( struct socket * sock , long * timeo_p )
{
struct sock * sk = sock - > sk ;
DEFINE_WAIT ( wait ) ;
int done ;
do {
int err = sock_error ( sk ) ;
if ( err )
return err ;
if ( ! * timeo_p )
return - ETIMEDOUT ;
if ( signal_pending ( current ) )
return sock_intr_errno ( * timeo_p ) ;
prepare_to_wait ( sk_sleep ( sk ) , & wait , TASK_INTERRUPTIBLE ) ;
done = sk_wait_event ( sk , timeo_p , sock - > state ! = SS_CONNECTING ) ;
finish_wait ( sk_sleep ( sk ) , & wait ) ;
} while ( ! done ) ;
return 0 ;
}
2006-01-02 21:04:38 +03:00
/**
* connect - establish a connection to another TIPC port
* @ sock : socket structure
* @ dest : socket address for destination port
* @ destlen : size of socket address data structure
2008-04-15 11:22:02 +04:00
* @ flags : file - related flags associated with socket
2006-01-02 21:04:38 +03:00
*
* Returns 0 on success , errno otherwise
*/
2007-02-09 17:25:21 +03:00
static int connect ( struct socket * sock , struct sockaddr * dest , int destlen ,
2006-01-02 21:04:38 +03:00
int flags )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
2008-04-15 11:20:37 +04:00
struct sockaddr_tipc * dst = ( struct sockaddr_tipc * ) dest ;
struct msghdr m = { NULL , } ;
2014-01-17 05:50:03 +04:00
long timeout = ( flags & O_NONBLOCK ) ? 0 : tipc_sk ( sk ) - > conn_timeout ;
socket_state previous ;
2008-04-15 11:20:37 +04:00
int res ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2008-04-15 11:20:37 +04:00
/* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
2008-04-15 11:22:02 +04:00
if ( sock - > state = = SS_READY ) {
res = - EOPNOTSUPP ;
goto exit ;
}
2008-04-15 11:20:37 +04:00
/*
* Reject connection attempt using multicast address
*
* Note : send_msg ( ) validates the rest of the address fields ,
* so there ' s no need to do it here
*/
2008-04-15 11:22:02 +04:00
if ( dst - > addrtype = = TIPC_ADDR_MCAST ) {
res = - EINVAL ;
goto exit ;
}
2014-01-17 05:50:03 +04:00
previous = sock - > state ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
switch ( sock - > state ) {
case SS_UNCONNECTED :
/* Send a 'SYN-' to destination */
m . msg_name = dest ;
m . msg_namelen = destlen ;
/* If connect is in non-blocking case, set MSG_DONTWAIT to
* indicate send_msg ( ) is never blocked .
*/
if ( ! timeout )
m . msg_flags = MSG_DONTWAIT ;
res = send_msg ( NULL , sock , & m , 0 ) ;
if ( ( res < 0 ) & & ( res ! = - EWOULDBLOCK ) )
goto exit ;
/* Just entered SS_CONNECTING state; the only
* difference is that return value in non - blocking
* case is EINPROGRESS , rather than EALREADY .
*/
res = - EINPROGRESS ;
case SS_CONNECTING :
2014-01-17 05:50:03 +04:00
if ( previous = = SS_CONNECTING )
res = - EALREADY ;
if ( ! timeout )
goto exit ;
timeout = msecs_to_jiffies ( timeout ) ;
/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
res = tipc_wait_for_connect ( sock , & timeout ) ;
tipc: introduce non-blocking socket connect
TIPC has so far only supported blocking connect(), meaning that a call
to connect() doesn't return until either the connection is fully
established, or an error occurs. This has proved insufficient for many
users, so we now introduce non-blocking connect(), analogous to how
this is done in TCP and other protocols.
With this feature, if a connection cannot be established instantly,
connect() will return the error code "-EINPROGRESS".
If the user later calls connect() again, he will either have the
return code "-EALREADY" or "-EISCONN", depending on whether the
connection has been established or not.
The user must have explicitly set the socket to be non-blocking
(SOCK_NONBLOCK or O_NONBLOCK, depending on method used), so unless
for some reason they had set this already (the socket would anyway
remain blocking in current TIPC) this change should be completely
backwards compatible.
It is also now possible to call select() or poll() to wait for the
completion of a connection.
An effect of the above is that the actual completion of a connection
may now be performed asynchronously, independent of the calls from
user space. Therefore, we now execute this code in BH context, in
the function filter_rcv(), which is executed upon reception of
messages in the socket.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: minor refactoring for improved connect/disconnect function names]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-30 03:51:19 +04:00
break ;
case SS_CONNECTED :
res = - EISCONN ;
break ;
default :
res = - EINVAL ;
2014-01-17 05:50:03 +04:00
break ;
2008-04-15 11:20:37 +04:00
}
2008-04-15 11:22:02 +04:00
exit :
release_sock ( sk ) ;
2008-04-15 11:20:37 +04:00
return res ;
2006-01-02 21:04:38 +03:00
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* listen - allow socket to listen for incoming connections
* @ sock : socket structure
* @ len : ( unused )
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
static int listen ( struct socket * sock , int len )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
int res ;
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
2011-07-06 14:01:13 +04:00
if ( sock - > state ! = SS_UNCONNECTED )
2008-04-15 11:22:02 +04:00
res = - EINVAL ;
else {
sock - > state = SS_LISTENING ;
res = 0 ;
}
release_sock ( sk ) ;
return res ;
2006-01-02 21:04:38 +03:00
}
2014-01-17 05:50:04 +04:00
static int tipc_wait_for_accept ( struct socket * sock , long timeo )
{
struct sock * sk = sock - > sk ;
DEFINE_WAIT ( wait ) ;
int err ;
/* True wake-one mechanism for incoming connections: only
* one process gets woken up , not the ' whole herd ' .
* Since we do not ' race & poll ' for established sockets
* anymore , the common case will execute the loop only once .
*/
for ( ; ; ) {
prepare_to_wait_exclusive ( sk_sleep ( sk ) , & wait ,
TASK_INTERRUPTIBLE ) ;
if ( skb_queue_empty ( & sk - > sk_receive_queue ) ) {
release_sock ( sk ) ;
timeo = schedule_timeout ( timeo ) ;
lock_sock ( sk ) ;
}
err = 0 ;
if ( ! skb_queue_empty ( & sk - > sk_receive_queue ) )
break ;
err = - EINVAL ;
if ( sock - > state ! = SS_LISTENING )
break ;
err = sock_intr_errno ( timeo ) ;
if ( signal_pending ( current ) )
break ;
err = - EAGAIN ;
if ( ! timeo )
break ;
}
finish_wait ( sk_sleep ( sk ) , & wait ) ;
return err ;
}
2007-02-09 17:25:21 +03:00
/**
2006-01-02 21:04:38 +03:00
* accept - wait for connection request
* @ sock : listening socket
* @ newsock : new socket that is to be connected
* @ flags : file - related flags associated with socket
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
2008-04-15 11:22:02 +04:00
static int accept ( struct socket * sock , struct socket * new_sock , int flags )
2006-01-02 21:04:38 +03:00
{
2012-12-04 20:01:55 +04:00
struct sock * new_sk , * sk = sock - > sk ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
2012-12-04 20:01:55 +04:00
struct tipc_sock * new_tsock ;
struct tipc_port * new_tport ;
struct tipc_msg * msg ;
u32 new_ref ;
2014-01-17 05:50:04 +04:00
long timeo ;
2008-04-15 11:22:02 +04:00
int res ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
if ( sock - > state ! = SS_LISTENING ) {
res = - EINVAL ;
2006-01-02 21:04:38 +03:00
goto exit ;
}
2014-01-17 05:50:04 +04:00
timeo = sock_rcvtimeo ( sk , flags & O_NONBLOCK ) ;
res = tipc_wait_for_accept ( sock , timeo ) ;
if ( res )
goto exit ;
2008-04-15 11:22:02 +04:00
buf = skb_peek ( & sk - > sk_receive_queue ) ;
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
res = tipc_sk_create ( sock_net ( sock - > sk ) , new_sock , 0 , 1 ) ;
2012-12-04 20:01:55 +04:00
if ( res )
goto exit ;
2006-01-02 21:04:38 +03:00
2012-12-04 20:01:55 +04:00
new_sk = new_sock - > sk ;
new_tsock = tipc_sk ( new_sk ) ;
new_tport = new_tsock - > p ;
new_ref = new_tport - > ref ;
msg = buf_msg ( buf ) ;
2006-01-02 21:04:38 +03:00
2012-12-04 20:01:55 +04:00
/* we lock on new_sk; but lockdep sees the lock on sk */
lock_sock_nested ( new_sk , SINGLE_DEPTH_NESTING ) ;
/*
* Reject any stray messages received by new socket
* before the socket lock was taken ( very , very unlikely )
*/
reject_rx_queue ( new_sk ) ;
/* Connect new socket to it's peer */
new_tsock - > peer_name . ref = msg_origport ( msg ) ;
new_tsock - > peer_name . node = msg_orignode ( msg ) ;
tipc_connect ( new_ref , & new_tsock - > peer_name ) ;
new_sock - > state = SS_CONNECTED ;
tipc_set_portimportance ( new_ref , msg_importance ( msg ) ) ;
if ( msg_named ( msg ) ) {
new_tport - > conn_type = msg_nametype ( msg ) ;
new_tport - > conn_instance = msg_nameinst ( msg ) ;
2006-01-02 21:04:38 +03:00
}
2012-12-04 20:01:55 +04:00
/*
* Respond to ' SYN - ' by discarding it & returning ' ACK ' - .
* Respond to ' SYN + ' by queuing it on new socket .
*/
if ( ! msg_data_sz ( msg ) ) {
struct msghdr m = { NULL , } ;
advance_rx_queue ( sk ) ;
send_packet ( NULL , new_sock , & m , 0 ) ;
} else {
__skb_dequeue ( & sk - > sk_receive_queue ) ;
__skb_queue_head ( & new_sk - > sk_receive_queue , buf ) ;
2013-01-21 02:30:09 +04:00
skb_set_owner_r ( buf , new_sk ) ;
2012-12-04 20:01:55 +04:00
}
release_sock ( new_sk ) ;
2006-01-02 21:04:38 +03:00
exit :
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
/**
* shutdown - shutdown socket connection
* @ sock : socket structure
2008-03-07 02:05:38 +03:00
* @ how : direction to close ( must be SHUT_RDWR )
2006-01-02 21:04:38 +03:00
*
* Terminates connection ( if necessary ) , then purges socket ' s receive queue .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
static int shutdown ( struct socket * sock , int how )
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2006-01-02 21:04:38 +03:00
struct sk_buff * buf ;
int res ;
2008-03-07 02:05:38 +03:00
if ( how ! = SHUT_RDWR )
return - EINVAL ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
switch ( sock - > state ) {
2008-04-15 11:22:02 +04:00
case SS_CONNECTING :
2006-01-02 21:04:38 +03:00
case SS_CONNECTED :
restart :
2012-04-30 23:29:02 +04:00
/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
2008-04-15 11:22:02 +04:00
buf = __skb_dequeue ( & sk - > sk_receive_queue ) ;
if ( buf ) {
2013-10-18 09:23:16 +04:00
if ( TIPC_SKB_CB ( buf ) - > handle ! = NULL ) {
2011-11-04 21:24:29 +04:00
kfree_skb ( buf ) ;
2006-01-02 21:04:38 +03:00
goto restart ;
}
2008-04-15 11:22:02 +04:00
tipc_disconnect ( tport - > ref ) ;
2006-01-02 21:04:38 +03:00
tipc_reject_msg ( buf , TIPC_CONN_SHUTDOWN ) ;
2008-04-15 11:22:02 +04:00
} else {
tipc_shutdown ( tport - > ref ) ;
2006-01-02 21:04:38 +03:00
}
2008-04-15 11:22:02 +04:00
sock - > state = SS_DISCONNECTING ;
2006-01-02 21:04:38 +03:00
/* fall through */
case SS_DISCONNECTING :
2012-10-29 17:38:15 +04:00
/* Discard any unreceived messages */
2013-01-21 02:30:08 +04:00
__skb_queue_purge ( & sk - > sk_receive_queue ) ;
2012-10-29 17:38:15 +04:00
/* Wake up anyone sleeping in poll */
sk - > sk_state_change ( sk ) ;
2006-01-02 21:04:38 +03:00
res = 0 ;
break ;
default :
res = - ENOTCONN ;
}
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
/**
* setsockopt - set socket option
* @ sock : socket structure
* @ lvl : option level
* @ opt : option identifier
* @ ov : pointer to new option value
* @ ol : length of option value
2007-02-09 17:25:21 +03:00
*
* For stream sockets only , accepts and ignores all IPPROTO_TCP options
2006-01-02 21:04:38 +03:00
* ( to ease compatibility ) .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
2013-06-17 18:54:47 +04:00
static int setsockopt ( struct socket * sock , int lvl , int opt , char __user * ov ,
unsigned int ol )
2006-01-02 21:04:38 +03:00
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2006-01-02 21:04:38 +03:00
u32 value ;
int res ;
2007-02-09 17:25:21 +03:00
if ( ( lvl = = IPPROTO_TCP ) & & ( sock - > type = = SOCK_STREAM ) )
return 0 ;
2006-01-02 21:04:38 +03:00
if ( lvl ! = SOL_TIPC )
return - ENOPROTOOPT ;
if ( ol < sizeof ( value ) )
return - EINVAL ;
2010-12-31 21:59:33 +03:00
res = get_user ( value , ( u32 __user * ) ov ) ;
if ( res )
2006-01-02 21:04:38 +03:00
return res ;
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2007-02-09 17:25:21 +03:00
2006-01-02 21:04:38 +03:00
switch ( opt ) {
case TIPC_IMPORTANCE :
2008-04-15 11:22:02 +04:00
res = tipc_set_portimportance ( tport - > ref , value ) ;
2006-01-02 21:04:38 +03:00
break ;
case TIPC_SRC_DROPPABLE :
if ( sock - > type ! = SOCK_STREAM )
2008-04-15 11:22:02 +04:00
res = tipc_set_portunreliable ( tport - > ref , value ) ;
2007-02-09 17:25:21 +03:00
else
2006-01-02 21:04:38 +03:00
res = - ENOPROTOOPT ;
break ;
case TIPC_DEST_DROPPABLE :
2008-04-15 11:22:02 +04:00
res = tipc_set_portunreturnable ( tport - > ref , value ) ;
2006-01-02 21:04:38 +03:00
break ;
case TIPC_CONN_TIMEOUT :
2011-05-26 21:44:34 +04:00
tipc_sk ( sk ) - > conn_timeout = value ;
2008-04-15 11:22:02 +04:00
/* no need to set "res", since already 0 at this point */
2006-01-02 21:04:38 +03:00
break ;
default :
res = - EINVAL ;
}
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
return res ;
}
/**
* getsockopt - get socket option
* @ sock : socket structure
* @ lvl : option level
* @ opt : option identifier
* @ ov : receptacle for option value
* @ ol : receptacle for length of option value
2007-02-09 17:25:21 +03:00
*
* For stream sockets only , returns 0 length result for all IPPROTO_TCP options
2006-01-02 21:04:38 +03:00
* ( to ease compatibility ) .
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
2013-06-17 18:54:47 +04:00
static int getsockopt ( struct socket * sock , int lvl , int opt , char __user * ov ,
int __user * ol )
2006-01-02 21:04:38 +03:00
{
2008-04-15 11:22:02 +04:00
struct sock * sk = sock - > sk ;
struct tipc_port * tport = tipc_sk_port ( sk ) ;
2007-02-09 17:25:21 +03:00
int len ;
2006-01-02 21:04:38 +03:00
u32 value ;
2007-02-09 17:25:21 +03:00
int res ;
2006-01-02 21:04:38 +03:00
2007-02-09 17:25:21 +03:00
if ( ( lvl = = IPPROTO_TCP ) & & ( sock - > type = = SOCK_STREAM ) )
return put_user ( 0 , ol ) ;
2006-01-02 21:04:38 +03:00
if ( lvl ! = SOL_TIPC )
return - ENOPROTOOPT ;
2010-12-31 21:59:33 +03:00
res = get_user ( len , ol ) ;
if ( res )
2007-02-09 17:25:21 +03:00
return res ;
2006-01-02 21:04:38 +03:00
2008-04-15 11:22:02 +04:00
lock_sock ( sk ) ;
2006-01-02 21:04:38 +03:00
switch ( opt ) {
case TIPC_IMPORTANCE :
2008-04-15 11:22:02 +04:00
res = tipc_portimportance ( tport - > ref , & value ) ;
2006-01-02 21:04:38 +03:00
break ;
case TIPC_SRC_DROPPABLE :
2008-04-15 11:22:02 +04:00
res = tipc_portunreliable ( tport - > ref , & value ) ;
2006-01-02 21:04:38 +03:00
break ;
case TIPC_DEST_DROPPABLE :
2008-04-15 11:22:02 +04:00
res = tipc_portunreturnable ( tport - > ref , & value ) ;
2006-01-02 21:04:38 +03:00
break ;
case TIPC_CONN_TIMEOUT :
2011-05-26 21:44:34 +04:00
value = tipc_sk ( sk ) - > conn_timeout ;
2008-04-15 11:22:02 +04:00
/* no need to set "res", since already 0 at this point */
2006-01-02 21:04:38 +03:00
break ;
2010-12-31 21:59:32 +03:00
case TIPC_NODE_RECVQ_DEPTH :
tipc: eliminate aggregate sk_receive_queue limit
As a complement to the per-socket sk_recv_queue limit, TIPC keeps a
global atomic counter for the sum of sk_recv_queue sizes across all
tipc sockets. When incremented, the counter is compared to an upper
threshold value, and if this is reached, the message is rejected
with error code TIPC_OVERLOAD.
This check was originally meant to protect the node against
buffer exhaustion and general CPU overload. However, all experience
indicates that the feature not only is redundant on Linux, but even
harmful. Users run into the limit very often, causing disturbances
for their applications, while removing it seems to have no negative
effects at all. We have also seen that overall performance is
boosted significantly when this bottleneck is removed.
Furthermore, we don't see any other network protocols maintaining
such a mechanism, something strengthening our conviction that this
control can be eliminated.
As a result, the atomic variable tipc_queue_size is now unused
and so it can be deleted. There is a getsockopt call that used
to allow reading it; we retain that but just return zero for
maximum compatibility.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
[PG: phase out tipc_queue_size as pointed out by Neil Horman]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-27 15:15:27 +04:00
value = 0 ; /* was tipc_queue_size, now obsolete */
2009-06-30 07:25:39 +04:00
break ;
2010-12-31 21:59:32 +03:00
case TIPC_SOCK_RECVQ_DEPTH :
2009-06-30 07:25:39 +04:00
value = skb_queue_len ( & sk - > sk_receive_queue ) ;
break ;
2006-01-02 21:04:38 +03:00
default :
res = - EINVAL ;
}
2008-04-15 11:22:02 +04:00
release_sock ( sk ) ;
2010-12-31 21:59:31 +03:00
if ( res )
return res ; /* "get" failed */
2006-01-02 21:04:38 +03:00
2010-12-31 21:59:31 +03:00
if ( len < sizeof ( value ) )
return - EINVAL ;
if ( copy_to_user ( ov , & value , sizeof ( value ) ) )
return - EFAULT ;
return put_user ( sizeof ( value ) , ol ) ;
2006-01-02 21:04:38 +03:00
}
2012-07-10 14:55:35 +04:00
/* Protocol switches for the various types of TIPC sockets */
2008-02-08 05:18:01 +03:00
static const struct proto_ops msg_ops = {
2010-12-31 21:59:32 +03:00
. owner = THIS_MODULE ,
2006-01-02 21:04:38 +03:00
. family = AF_TIPC ,
. release = release ,
. bind = bind ,
. connect = connect ,
2007-06-11 04:24:55 +04:00
. socketpair = sock_no_socketpair ,
2011-07-06 14:01:13 +04:00
. accept = sock_no_accept ,
2006-01-02 21:04:38 +03:00
. getname = get_name ,
. poll = poll ,
2007-06-11 04:24:55 +04:00
. ioctl = sock_no_ioctl ,
2011-07-06 14:01:13 +04:00
. listen = sock_no_listen ,
2006-01-02 21:04:38 +03:00
. shutdown = shutdown ,
. setsockopt = setsockopt ,
. getsockopt = getsockopt ,
. sendmsg = send_msg ,
. recvmsg = recv_msg ,
2007-07-19 05:44:56 +04:00
. mmap = sock_no_mmap ,
. sendpage = sock_no_sendpage
2006-01-02 21:04:38 +03:00
} ;
2008-02-08 05:18:01 +03:00
static const struct proto_ops packet_ops = {
2010-12-31 21:59:32 +03:00
. owner = THIS_MODULE ,
2006-01-02 21:04:38 +03:00
. family = AF_TIPC ,
. release = release ,
. bind = bind ,
. connect = connect ,
2007-06-11 04:24:55 +04:00
. socketpair = sock_no_socketpair ,
2006-01-02 21:04:38 +03:00
. accept = accept ,
. getname = get_name ,
. poll = poll ,
2007-06-11 04:24:55 +04:00
. ioctl = sock_no_ioctl ,
2006-01-02 21:04:38 +03:00
. listen = listen ,
. shutdown = shutdown ,
. setsockopt = setsockopt ,
. getsockopt = getsockopt ,
. sendmsg = send_packet ,
. recvmsg = recv_msg ,
2007-07-19 05:44:56 +04:00
. mmap = sock_no_mmap ,
. sendpage = sock_no_sendpage
2006-01-02 21:04:38 +03:00
} ;
2008-02-08 05:18:01 +03:00
static const struct proto_ops stream_ops = {
2010-12-31 21:59:32 +03:00
. owner = THIS_MODULE ,
2006-01-02 21:04:38 +03:00
. family = AF_TIPC ,
. release = release ,
. bind = bind ,
. connect = connect ,
2007-06-11 04:24:55 +04:00
. socketpair = sock_no_socketpair ,
2006-01-02 21:04:38 +03:00
. accept = accept ,
. getname = get_name ,
. poll = poll ,
2007-06-11 04:24:55 +04:00
. ioctl = sock_no_ioctl ,
2006-01-02 21:04:38 +03:00
. listen = listen ,
. shutdown = shutdown ,
. setsockopt = setsockopt ,
. getsockopt = getsockopt ,
. sendmsg = send_stream ,
. recvmsg = recv_stream ,
2007-07-19 05:44:56 +04:00
. mmap = sock_no_mmap ,
. sendpage = sock_no_sendpage
2006-01-02 21:04:38 +03:00
} ;
2008-02-08 05:18:01 +03:00
static const struct net_proto_family tipc_family_ops = {
2010-12-31 21:59:32 +03:00
. owner = THIS_MODULE ,
2006-01-02 21:04:38 +03:00
. family = AF_TIPC ,
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
. create = tipc_sk_create
2006-01-02 21:04:38 +03:00
} ;
static struct proto tipc_proto = {
. name = " TIPC " ,
. owner = THIS_MODULE ,
2013-06-17 18:54:37 +04:00
. obj_size = sizeof ( struct tipc_sock ) ,
. sysctl_rmem = sysctl_tipc_rmem
2006-01-02 21:04:38 +03:00
} ;
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 18:54:39 +04:00
static struct proto tipc_proto_kern = {
. name = " TIPC " ,
. obj_size = sizeof ( struct tipc_sock ) ,
. sysctl_rmem = sysctl_tipc_rmem
} ;
2006-01-02 21:04:38 +03:00
/**
2006-01-18 02:38:21 +03:00
* tipc_socket_init - initialize TIPC socket interface
2007-02-09 17:25:21 +03:00
*
2006-01-02 21:04:38 +03:00
* Returns 0 on success , errno otherwise
*/
2006-01-18 02:38:21 +03:00
int tipc_socket_init ( void )
2006-01-02 21:04:38 +03:00
{
int res ;
2007-02-09 17:25:21 +03:00
res = proto_register ( & tipc_proto , 1 ) ;
2006-01-02 21:04:38 +03:00
if ( res ) {
2012-06-29 08:16:37 +04:00
pr_err ( " Failed to register TIPC protocol type \n " ) ;
2006-01-02 21:04:38 +03:00
goto out ;
}
res = sock_register ( & tipc_family_ops ) ;
if ( res ) {
2012-06-29 08:16:37 +04:00
pr_err ( " Failed to register TIPC socket type \n " ) ;
2006-01-02 21:04:38 +03:00
proto_unregister ( & tipc_proto ) ;
goto out ;
}
sockets_enabled = 1 ;
out :
return res ;
}
/**
2006-01-18 02:38:21 +03:00
* tipc_socket_stop - stop TIPC socket interface
2006-01-02 21:04:38 +03:00
*/
2006-01-18 02:38:21 +03:00
void tipc_socket_stop ( void )
2006-01-02 21:04:38 +03:00
{
if ( ! sockets_enabled )
return ;
sockets_enabled = 0 ;
sock_unregister ( tipc_family_ops . family ) ;
proto_unregister ( & tipc_proto ) ;
}