2009-02-26 15:37:44 +01:00
/*
Unix SMB / CIFS implementation .
Copyright ( C ) Stefan Metzmacher 2009
2009-12-15 12:56:44 +01:00
* * NOTE ! The following LGPL license applies to the tsocket
2009-02-26 15:37:44 +01:00
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 3 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "replace.h"
2009-03-31 20:33:33 +02:00
# include "system/filesys.h"
2009-02-26 15:37:44 +01:00
# include "system/network.h"
# include "tsocket.h"
# include "tsocket_internal.h"
2015-02-16 13:50:25 +00:00
# include "lib/util/iov_buf.h"
2015-06-05 11:02:45 +02:00
# include "lib/util/blocking.h"
2019-02-18 17:09:14 +01:00
# include "lib/util/util_net.h"
2021-09-19 17:41:42 +12:00
# include "lib/util/samba_util.h"
2009-02-26 15:37:44 +01:00
2009-03-26 14:27:45 +01:00
static int tsocket_bsd_error_from_errno ( int ret ,
int sys_errno ,
bool * retry )
{
* retry = false ;
if ( ret > = 0 ) {
return 0 ;
}
if ( ret ! = - 1 ) {
return EIO ;
}
if ( sys_errno = = 0 ) {
return EIO ;
}
if ( sys_errno = = EINTR ) {
* retry = true ;
return sys_errno ;
}
if ( sys_errno = = EINPROGRESS ) {
* retry = true ;
return sys_errno ;
}
if ( sys_errno = = EAGAIN ) {
* retry = true ;
return sys_errno ;
}
2013-03-13 13:36:11 +00:00
/* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
if ( sys_errno = = ENOMEM ) {
* retry = true ;
return sys_errno ;
}
2009-03-26 14:27:45 +01:00
# ifdef EWOULDBLOCK
if ( sys_errno = = EWOULDBLOCK ) {
* retry = true ;
return sys_errno ;
}
# endif
return sys_errno ;
}
static int tsocket_bsd_common_prepare_fd ( int fd , bool high_fd )
{
int i ;
int sys_errno = 0 ;
int fds [ 3 ] ;
int num_fds = 0 ;
2015-06-05 11:02:45 +02:00
int result ;
bool ok ;
2009-03-26 14:27:45 +01:00
if ( fd = = - 1 ) {
return - 1 ;
}
/* first make a fd >= 3 */
if ( high_fd ) {
while ( fd < 3 ) {
fds [ num_fds + + ] = fd ;
fd = dup ( fd ) ;
if ( fd = = - 1 ) {
sys_errno = errno ;
break ;
}
}
for ( i = 0 ; i < num_fds ; i + + ) {
close ( fds [ i ] ) ;
}
if ( fd = = - 1 ) {
errno = sys_errno ;
return fd ;
}
}
2015-06-05 11:02:45 +02:00
result = set_blocking ( fd , false ) ;
if ( result = = - 1 ) {
2009-03-26 14:27:45 +01:00
goto fail ;
}
2015-06-05 11:02:45 +02:00
ok = smb_set_close_on_exec ( fd ) ;
if ( ! ok ) {
2009-03-26 14:27:45 +01:00
goto fail ;
}
return fd ;
fail :
if ( fd ! = - 1 ) {
sys_errno = errno ;
close ( fd ) ;
errno = sys_errno ;
}
return - 1 ;
}
2016-02-04 15:35:06 +01:00
# ifdef HAVE_LINUX_RTNETLINK_H
/**
* Get the amount of pending bytes from a netlink socket
*
* For some reason netlink sockets don ' t support querying the amount of pending
* data via ioctl with FIONREAD , which is what we use in tsocket_bsd_pending ( )
* below .
*
* We know we are on Linux as we ' re using netlink , which means we have a working
* MSG_TRUNC flag to recvmsg ( ) as well , so we use that together with MSG_PEEK .
* */
static ssize_t tsocket_bsd_netlink_pending ( int fd )
{
struct iovec iov ;
struct msghdr msg ;
char buf [ 1 ] ;
iov = ( struct iovec ) {
. iov_base = buf ,
. iov_len = sizeof ( buf )
} ;
msg = ( struct msghdr ) {
. msg_iov = & iov ,
. msg_iovlen = 1
} ;
return recvmsg ( fd , & msg , MSG_PEEK | MSG_TRUNC ) ;
}
# else
static ssize_t tsocket_bsd_netlink_pending ( int fd )
{
errno = ENOSYS ;
return - 1 ;
}
# endif
2009-03-26 14:27:45 +01:00
static ssize_t tsocket_bsd_pending ( int fd )
{
2022-10-13 10:39:59 +02:00
int ret ;
2009-03-26 14:27:45 +01:00
int value = 0 ;
ret = ioctl ( fd , FIONREAD , & value ) ;
if ( ret = = - 1 ) {
return ret ;
}
2009-05-19 23:31:55 +02:00
if ( ret ! = 0 ) {
/* this should not be reached */
errno = EIO ;
return - 1 ;
}
if ( value ! = 0 ) {
2009-03-26 14:27:45 +01:00
return value ;
}
2023-01-12 11:35:11 +01:00
return samba_socket_poll_or_sock_error ( fd ) ;
2009-03-26 14:27:45 +01:00
}
2009-02-26 15:37:44 +01:00
static const struct tsocket_address_ops tsocket_address_bsd_ops ;
2009-11-04 19:03:41 +01:00
int _tsocket_address_bsd_from_sockaddr ( TALLOC_CTX * mem_ctx ,
2013-06-11 19:36:09 +02:00
const struct sockaddr * sa ,
2010-08-28 08:25:19 +02:00
size_t sa_socklen ,
2009-11-04 19:03:41 +01:00
struct tsocket_address * * _addr ,
const char * location )
2009-02-26 15:37:44 +01:00
{
struct tsocket_address * addr ;
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = NULL ;
2009-02-26 15:37:44 +01:00
2010-08-28 08:25:19 +02:00
if ( sa_socklen < sizeof ( sa - > sa_family ) ) {
2009-12-23 19:31:41 +01:00
errno = EINVAL ;
return - 1 ;
}
2009-02-26 15:37:44 +01:00
switch ( sa - > sa_family ) {
case AF_UNIX :
2010-08-28 08:25:19 +02:00
if ( sa_socklen > sizeof ( struct sockaddr_un ) ) {
sa_socklen = sizeof ( struct sockaddr_un ) ;
2009-12-23 19:34:32 +01:00
}
2009-02-26 15:37:44 +01:00
break ;
case AF_INET :
2010-08-28 08:25:19 +02:00
if ( sa_socklen < sizeof ( struct sockaddr_in ) ) {
2009-02-26 15:37:44 +01:00
errno = EINVAL ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
sa_socklen = sizeof ( struct sockaddr_in ) ;
2009-02-26 15:37:44 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2010-08-28 08:25:19 +02:00
if ( sa_socklen < sizeof ( struct sockaddr_in6 ) ) {
2009-02-26 15:37:44 +01:00
errno = EINVAL ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
sa_socklen = sizeof ( struct sockaddr_in6 ) ;
2009-02-26 15:37:44 +01:00
break ;
# endif
default :
errno = EAFNOSUPPORT ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
if ( sa_socklen > sizeof ( struct sockaddr_storage ) ) {
2009-02-26 15:37:44 +01:00
errno = EINVAL ;
return - 1 ;
}
addr = tsocket_address_create ( mem_ctx ,
& tsocket_address_bsd_ops ,
& bsda ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ,
2009-02-26 15:37:44 +01:00
location ) ;
if ( ! addr ) {
errno = ENOMEM ;
return - 1 ;
}
ZERO_STRUCTP ( bsda ) ;
2010-08-28 08:25:19 +02:00
memcpy ( & bsda - > u . ss , sa , sa_socklen ) ;
2009-02-26 15:37:44 +01:00
2010-08-28 08:25:19 +02:00
bsda - > sa_socklen = sa_socklen ;
2010-10-21 07:00:34 +02:00
# ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
bsda - > u . sa . sa_len = bsda - > sa_socklen ;
# endif
2010-04-07 10:42:37 +10:00
2009-02-26 15:37:44 +01:00
* _addr = addr ;
return 0 ;
}
2019-02-18 17:27:46 +01:00
int _tsocket_address_bsd_from_samba_sockaddr ( TALLOC_CTX * mem_ctx ,
2020-02-27 09:59:32 +01:00
const struct samba_sockaddr * xs_addr ,
2019-02-18 17:27:46 +01:00
struct tsocket_address * * t_addr ,
const char * location )
{
return _tsocket_address_bsd_from_sockaddr ( mem_ctx ,
2020-02-27 09:59:32 +01:00
& xs_addr - > u . sa ,
xs_addr - > sa_socklen ,
2019-02-18 17:27:46 +01:00
t_addr ,
location ) ;
}
2009-11-04 19:03:41 +01:00
ssize_t tsocket_address_bsd_sockaddr ( const struct tsocket_address * addr ,
struct sockaddr * sa ,
2010-08-28 08:25:19 +02:00
size_t sa_socklen )
2009-11-04 19:03:41 +01:00
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-11-04 19:03:41 +01:00
if ( ! bsda ) {
errno = EINVAL ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
if ( sa_socklen < bsda - > sa_socklen ) {
2009-11-04 19:03:41 +01:00
errno = EINVAL ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
if ( sa_socklen > bsda - > sa_socklen ) {
memset ( sa , 0 , sa_socklen ) ;
sa_socklen = bsda - > sa_socklen ;
2009-11-04 19:03:41 +01:00
}
2010-08-28 08:25:19 +02:00
memcpy ( sa , & bsda - > u . ss , sa_socklen ) ;
2010-10-21 07:00:34 +02:00
# ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
sa - > sa_len = sa_socklen ;
# endif
2010-08-28 08:25:19 +02:00
return sa_socklen ;
2009-11-04 19:03:41 +01:00
}
2010-04-27 10:34:15 +02:00
bool tsocket_address_is_inet ( const struct tsocket_address * addr , const char * fam )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2010-04-27 10:34:15 +02:00
if ( ! bsda ) {
return false ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_INET :
if ( strcasecmp ( fam , " ip " ) = = 0 ) {
return true ;
}
if ( strcasecmp ( fam , " ipv4 " ) = = 0 ) {
return true ;
}
return false ;
# ifdef HAVE_IPV6
case AF_INET6 :
if ( strcasecmp ( fam , " ip " ) = = 0 ) {
return true ;
}
if ( strcasecmp ( fam , " ipv6 " ) = = 0 ) {
return true ;
}
return false ;
# endif
}
return false ;
}
2009-02-26 15:37:44 +01:00
int _tsocket_address_inet_from_strings ( TALLOC_CTX * mem_ctx ,
const char * fam ,
const char * addr ,
uint16_t port ,
struct tsocket_address * * _addr ,
const char * location )
{
struct addrinfo hints ;
struct addrinfo * result = NULL ;
char port_str [ 6 ] ;
int ret ;
ZERO_STRUCT ( hints ) ;
/*
* we use SOCKET_STREAM here to get just one result
* back from getaddrinfo ( ) .
*/
hints . ai_socktype = SOCK_STREAM ;
hints . ai_flags = AI_NUMERICHOST | AI_NUMERICSERV ;
if ( strcasecmp ( fam , " ip " ) = = 0 ) {
hints . ai_family = AF_UNSPEC ;
if ( ! addr ) {
# ifdef HAVE_IPV6
addr = " :: " ;
# else
addr = " 0.0.0.0 " ;
# endif
}
} else if ( strcasecmp ( fam , " ipv4 " ) = = 0 ) {
hints . ai_family = AF_INET ;
if ( ! addr ) {
addr = " 0.0.0.0 " ;
}
# ifdef HAVE_IPV6
} else if ( strcasecmp ( fam , " ipv6 " ) = = 0 ) {
hints . ai_family = AF_INET6 ;
if ( ! addr ) {
addr = " :: " ;
}
# endif
} else {
errno = EAFNOSUPPORT ;
return - 1 ;
}
2013-07-01 17:05:33 +02:00
snprintf ( port_str , sizeof ( port_str ) , " %u " , port ) ;
2009-02-26 15:37:44 +01:00
ret = getaddrinfo ( addr , port_str , & hints , & result ) ;
if ( ret ! = 0 ) {
switch ( ret ) {
case EAI_FAIL :
2021-09-12 22:23:53 +03:00
case EAI_NONAME :
2021-09-15 14:29:28 +10:00
# ifdef EAI_ADDRFAMILY
2021-09-12 22:23:53 +03:00
case EAI_ADDRFAMILY :
2021-09-15 14:29:28 +10:00
# endif
2009-02-26 15:37:44 +01:00
errno = EINVAL ;
break ;
}
ret = - 1 ;
goto done ;
}
if ( result - > ai_socktype ! = SOCK_STREAM ) {
errno = EINVAL ;
ret = - 1 ;
goto done ;
}
ret = _tsocket_address_bsd_from_sockaddr ( mem_ctx ,
result - > ai_addr ,
result - > ai_addrlen ,
_addr ,
location ) ;
done :
if ( result ) {
freeaddrinfo ( result ) ;
}
return ret ;
}
2021-09-19 17:41:42 +12:00
int _tsocket_address_inet_from_hostport_strings ( TALLOC_CTX * mem_ctx ,
const char * fam ,
const char * host_port_addr ,
uint16_t default_port ,
struct tsocket_address * * _addr ,
const char * location )
{
char * pl_sq = NULL ;
char * pr_sq = NULL ;
char * pl_period = NULL ;
char * port_sep = NULL ;
char * cport = NULL ;
char * buf = NULL ;
uint64_t port = 0 ;
int ret ;
char * s_addr = NULL ;
uint16_t s_port = default_port ;
bool conv_ret ;
bool is_ipv6_by_squares = false ;
if ( host_port_addr = = NULL ) {
/* got straight to next function if host_port_addr is NULL */
goto get_addr ;
}
buf = talloc_strdup ( mem_ctx , host_port_addr ) ;
if ( buf = = NULL ) {
errno = ENOMEM ;
return - 1 ;
}
pl_period = strchr_m ( buf , ' . ' ) ;
port_sep = strrchr_m ( buf , ' : ' ) ;
pl_sq = strchr_m ( buf , ' [ ' ) ;
pr_sq = strrchr_m ( buf , ' ] ' ) ;
/* See if its IPv4 or IPv6 */
/* Only parse IPv6 with squares with/without port, and IPv4 with port */
/* Everything else, let tsocket_address_inet_from string() */
/* find parsing errors */
# ifdef HAVE_IPV6
is_ipv6_by_squares = ( pl_sq ! = NULL & & pr_sq ! = NULL & & pr_sq > pl_sq ) ;
# endif
if ( is_ipv6_by_squares ) {
/* IPv6 possibly with port - squares detected */
port_sep = pr_sq + 1 ;
if ( * port_sep = = ' \0 ' ) {
s_addr = pl_sq + 1 ;
* pr_sq = 0 ;
s_port = default_port ;
goto get_addr ;
}
if ( * port_sep ! = ' : ' ) {
errno = EINVAL ;
return - 1 ;
}
cport = port_sep + 1 ;
conv_ret = conv_str_u64 ( cport , & port ) ;
if ( ! conv_ret ) {
errno = EINVAL ;
return - 1 ;
}
if ( port > 65535 ) {
errno = EINVAL ;
return - 1 ;
}
s_port = ( uint16_t ) port ;
* port_sep = 0 ;
* pr_sq = 0 ;
s_addr = pl_sq + 1 ;
* pl_sq = 0 ;
goto get_addr ;
} else if ( pl_period ! = NULL & & port_sep ! = NULL ) {
/* IPv4 with port - more than one period in string */
cport = port_sep + 1 ;
conv_ret = conv_str_u64 ( cport , & port ) ;
if ( ! conv_ret ) {
errno = EINVAL ;
return - 1 ;
}
if ( port > 65535 ) {
errno = EINVAL ;
return - 1 ;
}
s_port = ( uint16_t ) port ;
* port_sep = 0 ;
s_addr = buf ;
goto get_addr ;
} else {
/* Everything else, let tsocket_address_inet_from string() */
/* find parsing errors */
s_addr = buf ;
s_port = default_port ;
goto get_addr ;
}
get_addr :
ret = _tsocket_address_inet_from_strings (
mem_ctx , fam , s_addr , s_port , _addr , location ) ;
return ret ;
}
2009-02-26 15:37:44 +01:00
char * tsocket_address_inet_addr_string ( const struct tsocket_address * addr ,
TALLOC_CTX * mem_ctx )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
char addr_str [ INET6_ADDRSTRLEN + 1 ] ;
const char * str ;
if ( ! bsda ) {
errno = EINVAL ;
return NULL ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_INET :
2009-03-27 11:34:13 +01:00
str = inet_ntop ( bsda - > u . in . sin_family ,
& bsda - > u . in . sin_addr ,
2009-02-26 15:37:44 +01:00
addr_str , sizeof ( addr_str ) ) ;
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2009-03-27 11:34:13 +01:00
str = inet_ntop ( bsda - > u . in6 . sin6_family ,
& bsda - > u . in6 . sin6_addr ,
2009-02-26 15:37:44 +01:00
addr_str , sizeof ( addr_str ) ) ;
break ;
# endif
default :
errno = EINVAL ;
return NULL ;
}
if ( ! str ) {
return NULL ;
}
return talloc_strdup ( mem_ctx , str ) ;
}
uint16_t tsocket_address_inet_port ( const struct tsocket_address * addr )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
uint16_t port = 0 ;
if ( ! bsda ) {
errno = EINVAL ;
return 0 ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_INET :
2009-03-27 11:34:13 +01:00
port = ntohs ( bsda - > u . in . sin_port ) ;
2009-02-26 15:37:44 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2009-03-27 11:34:13 +01:00
port = ntohs ( bsda - > u . in6 . sin6_port ) ;
2009-02-26 15:37:44 +01:00
break ;
# endif
default :
errno = EINVAL ;
return 0 ;
}
return port ;
}
int tsocket_address_inet_set_port ( struct tsocket_address * addr ,
uint16_t port )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
if ( ! bsda ) {
errno = EINVAL ;
return - 1 ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_INET :
2009-03-27 11:34:13 +01:00
bsda - > u . in . sin_port = htons ( port ) ;
2009-02-26 15:37:44 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2009-03-27 11:34:13 +01:00
bsda - > u . in6 . sin6_port = htons ( port ) ;
2009-02-26 15:37:44 +01:00
break ;
# endif
default :
errno = EINVAL ;
return - 1 ;
}
return 0 ;
}
2010-04-27 10:41:46 +02:00
bool tsocket_address_is_unix ( const struct tsocket_address * addr )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2010-04-27 10:41:46 +02:00
if ( ! bsda ) {
return false ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_UNIX :
return true ;
}
return false ;
}
2009-02-26 15:37:44 +01:00
int _tsocket_address_unix_from_path ( TALLOC_CTX * mem_ctx ,
const char * path ,
struct tsocket_address * * _addr ,
const char * location )
{
2009-03-28 23:48:45 +01:00
struct sockaddr_un un ;
void * p = & un ;
2009-02-26 15:37:44 +01:00
int ret ;
if ( ! path ) {
path = " " ;
}
2009-11-03 17:23:07 +01:00
if ( strlen ( path ) > sizeof ( un . sun_path ) - 1 ) {
errno = ENAMETOOLONG ;
return - 1 ;
}
2009-03-28 23:48:45 +01:00
ZERO_STRUCT ( un ) ;
un . sun_family = AF_UNIX ;
2009-11-03 17:23:07 +01:00
strncpy ( un . sun_path , path , sizeof ( un . sun_path ) - 1 ) ;
2009-02-26 15:37:44 +01:00
ret = _tsocket_address_bsd_from_sockaddr ( mem_ctx ,
( struct sockaddr * ) p ,
2009-03-28 23:48:45 +01:00
sizeof ( un ) ,
2009-02-26 15:37:44 +01:00
_addr ,
location ) ;
return ret ;
}
char * tsocket_address_unix_path ( const struct tsocket_address * addr ,
TALLOC_CTX * mem_ctx )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
const char * str ;
if ( ! bsda ) {
errno = EINVAL ;
return NULL ;
}
switch ( bsda - > u . sa . sa_family ) {
case AF_UNIX :
2009-03-27 11:34:13 +01:00
str = bsda - > u . un . sun_path ;
2009-02-26 15:37:44 +01:00
break ;
default :
errno = EINVAL ;
return NULL ;
}
return talloc_strdup ( mem_ctx , str ) ;
}
static char * tsocket_address_bsd_string ( const struct tsocket_address * addr ,
TALLOC_CTX * mem_ctx )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
char * str ;
char * addr_str ;
const char * prefix = NULL ;
uint16_t port ;
switch ( bsda - > u . sa . sa_family ) {
case AF_UNIX :
return talloc_asprintf ( mem_ctx , " unix:%s " ,
2009-03-27 11:34:13 +01:00
bsda - > u . un . sun_path ) ;
2009-02-26 15:37:44 +01:00
case AF_INET :
prefix = " ipv4 " ;
break ;
2009-04-16 07:51:01 +02:00
# ifdef HAVE_IPV6
2009-02-26 15:37:44 +01:00
case AF_INET6 :
prefix = " ipv6 " ;
break ;
2009-04-16 07:51:01 +02:00
# endif
2009-02-26 15:37:44 +01:00
default :
errno = EINVAL ;
return NULL ;
}
addr_str = tsocket_address_inet_addr_string ( addr , mem_ctx ) ;
if ( ! addr_str ) {
return NULL ;
}
port = tsocket_address_inet_port ( addr ) ;
str = talloc_asprintf ( mem_ctx , " %s:%s:%u " ,
prefix , addr_str , port ) ;
talloc_free ( addr_str ) ;
return str ;
}
static struct tsocket_address * tsocket_address_bsd_copy ( const struct tsocket_address * addr ,
TALLOC_CTX * mem_ctx ,
const char * location )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = talloc_get_type ( addr - > private_data ,
struct samba_sockaddr ) ;
2009-02-26 15:37:44 +01:00
struct tsocket_address * copy ;
int ret ;
ret = _tsocket_address_bsd_from_sockaddr ( mem_ctx ,
& bsda - > u . sa ,
2010-08-28 08:25:19 +02:00
bsda - > sa_socklen ,
2009-02-26 15:37:44 +01:00
& copy ,
location ) ;
if ( ret ! = 0 ) {
return NULL ;
}
return copy ;
}
static const struct tsocket_address_ops tsocket_address_bsd_ops = {
. name = " bsd " ,
. string = tsocket_address_bsd_string ,
. copy = tsocket_address_bsd_copy ,
} ;
2009-03-26 14:27:45 +01:00
struct tdgram_bsd {
int fd ;
void * event_ptr ;
struct tevent_fd * fde ;
2012-11-02 13:45:49 +01:00
bool optimize_recvfrom ;
2016-02-04 15:35:06 +01:00
bool netlink ;
2009-03-26 14:27:45 +01:00
void * readable_private ;
void ( * readable_handler ) ( void * private_data ) ;
void * writeable_private ;
void ( * writeable_handler ) ( void * private_data ) ;
} ;
2012-11-02 13:45:49 +01:00
bool tdgram_bsd_optimize_recvfrom ( struct tdgram_context * dgram ,
bool on )
{
struct tdgram_bsd * bsds =
talloc_get_type ( _tdgram_context_data ( dgram ) ,
struct tdgram_bsd ) ;
bool old ;
if ( bsds = = NULL ) {
/* not a bsd socket */
return false ;
}
old = bsds - > optimize_recvfrom ;
bsds - > optimize_recvfrom = on ;
return old ;
}
2009-03-26 14:27:45 +01:00
static void tdgram_bsd_fde_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags ,
void * private_data )
{
struct tdgram_bsd * bsds = talloc_get_type_abort ( private_data ,
struct tdgram_bsd ) ;
if ( flags & TEVENT_FD_WRITE ) {
bsds - > writeable_handler ( bsds - > writeable_private ) ;
return ;
}
if ( flags & TEVENT_FD_READ ) {
2009-04-02 10:36:03 +02:00
if ( ! bsds - > readable_handler ) {
TEVENT_FD_NOT_READABLE ( bsds - > fde ) ;
return ;
}
2009-03-26 14:27:45 +01:00
bsds - > readable_handler ( bsds - > readable_private ) ;
return ;
}
}
static int tdgram_bsd_set_readable_handler ( struct tdgram_bsd * bsds ,
struct tevent_context * ev ,
void ( * handler ) ( void * private_data ) ,
void * private_data )
{
if ( ev = = NULL ) {
if ( handler ) {
errno = EINVAL ;
return - 1 ;
}
2009-04-02 10:36:03 +02:00
if ( ! bsds - > readable_handler ) {
return 0 ;
}
2009-03-26 14:27:45 +01:00
bsds - > readable_handler = NULL ;
bsds - > readable_private = NULL ;
return 0 ;
}
2009-04-02 10:36:03 +02:00
/* read and write must use the same tevent_context */
if ( bsds - > event_ptr ! = ev ) {
if ( bsds - > readable_handler | | bsds - > writeable_handler ) {
errno = EINVAL ;
return - 1 ;
}
bsds - > event_ptr = NULL ;
TALLOC_FREE ( bsds - > fde ) ;
}
2009-06-29 13:05:27 +02:00
if ( tevent_fd_get_flags ( bsds - > fde ) = = 0 ) {
TALLOC_FREE ( bsds - > fde ) ;
2009-03-26 14:27:45 +01:00
bsds - > fde = tevent_add_fd ( ev , bsds ,
bsds - > fd , TEVENT_FD_READ ,
tdgram_bsd_fde_handler ,
bsds ) ;
if ( ! bsds - > fde ) {
2009-05-19 23:48:41 +02:00
errno = ENOMEM ;
2009-03-26 14:27:45 +01:00
return - 1 ;
}
/* cache the event context we're running on */
bsds - > event_ptr = ev ;
2009-04-02 10:36:03 +02:00
} else if ( ! bsds - > readable_handler ) {
TEVENT_FD_READABLE ( bsds - > fde ) ;
2009-03-26 14:27:45 +01:00
}
bsds - > readable_handler = handler ;
bsds - > readable_private = private_data ;
return 0 ;
}
static int tdgram_bsd_set_writeable_handler ( struct tdgram_bsd * bsds ,
struct tevent_context * ev ,
void ( * handler ) ( void * private_data ) ,
void * private_data )
{
if ( ev = = NULL ) {
if ( handler ) {
errno = EINVAL ;
return - 1 ;
}
2009-04-02 10:36:03 +02:00
if ( ! bsds - > writeable_handler ) {
return 0 ;
}
2009-03-26 14:27:45 +01:00
bsds - > writeable_handler = NULL ;
bsds - > writeable_private = NULL ;
TEVENT_FD_NOT_WRITEABLE ( bsds - > fde ) ;
return 0 ;
}
2009-04-02 10:36:03 +02:00
/* read and write must use the same tevent_context */
if ( bsds - > event_ptr ! = ev ) {
if ( bsds - > readable_handler | | bsds - > writeable_handler ) {
errno = EINVAL ;
return - 1 ;
}
bsds - > event_ptr = NULL ;
TALLOC_FREE ( bsds - > fde ) ;
}
2009-06-29 13:05:27 +02:00
if ( tevent_fd_get_flags ( bsds - > fde ) = = 0 ) {
TALLOC_FREE ( bsds - > fde ) ;
2009-03-26 14:27:45 +01:00
bsds - > fde = tevent_add_fd ( ev , bsds ,
bsds - > fd , TEVENT_FD_WRITE ,
tdgram_bsd_fde_handler ,
bsds ) ;
if ( ! bsds - > fde ) {
2009-05-19 23:48:41 +02:00
errno = ENOMEM ;
2009-03-26 14:27:45 +01:00
return - 1 ;
}
/* cache the event context we're running on */
bsds - > event_ptr = ev ;
2009-04-02 10:36:03 +02:00
} else if ( ! bsds - > writeable_handler ) {
TEVENT_FD_WRITEABLE ( bsds - > fde ) ;
2009-03-26 14:27:45 +01:00
}
bsds - > writeable_handler = handler ;
bsds - > writeable_private = private_data ;
return 0 ;
}
struct tdgram_bsd_recvfrom_state {
struct tdgram_context * dgram ;
2012-10-02 12:20:26 +02:00
bool first_try ;
2009-03-26 14:27:45 +01:00
uint8_t * buf ;
size_t len ;
struct tsocket_address * src ;
} ;
static int tdgram_bsd_recvfrom_destructor ( struct tdgram_bsd_recvfrom_state * state )
{
struct tdgram_bsd * bsds = tdgram_context_data ( state - > dgram ,
struct tdgram_bsd ) ;
tdgram_bsd_set_readable_handler ( bsds , NULL , NULL , NULL ) ;
return 0 ;
}
static void tdgram_bsd_recvfrom_handler ( void * private_data ) ;
static struct tevent_req * tdgram_bsd_recvfrom_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tdgram_context * dgram )
{
struct tevent_req * req ;
struct tdgram_bsd_recvfrom_state * state ;
struct tdgram_bsd * bsds = tdgram_context_data ( dgram , struct tdgram_bsd ) ;
int ret ;
req = tevent_req_create ( mem_ctx , & state ,
struct tdgram_bsd_recvfrom_state ) ;
if ( ! req ) {
return NULL ;
}
state - > dgram = dgram ;
2012-10-02 12:20:26 +02:00
state - > first_try = true ;
2009-03-26 14:27:45 +01:00
state - > buf = NULL ;
state - > len = 0 ;
state - > src = NULL ;
talloc_set_destructor ( state , tdgram_bsd_recvfrom_destructor ) ;
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
2012-11-02 13:45:49 +01:00
2009-04-02 10:36:03 +02:00
/*
* this is a fast path , not waiting for the
* socket to become explicit readable gains
* about 10 % - 20 % performance in benchmark tests .
*/
2012-11-02 13:45:49 +01:00
if ( bsds - > optimize_recvfrom ) {
/*
* We only do the optimization on
* recvfrom if the caller asked for it .
*
* This is needed because in most cases
2015-06-12 09:03:21 +00:00
* we prefer to flush send buffers before
2012-11-02 13:45:49 +01:00
* receiving incoming requests .
*/
tdgram_bsd_recvfrom_handler ( req ) ;
if ( ! tevent_req_is_in_progress ( req ) ) {
goto post ;
}
2009-04-02 10:36:03 +02:00
}
2009-03-26 14:27:45 +01:00
ret = tdgram_bsd_set_readable_handler ( bsds , ev ,
tdgram_bsd_recvfrom_handler ,
req ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
return req ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static void tdgram_bsd_recvfrom_handler ( void * private_data )
{
struct tevent_req * req = talloc_get_type_abort ( private_data ,
struct tevent_req ) ;
struct tdgram_bsd_recvfrom_state * state = tevent_req_data ( req ,
struct tdgram_bsd_recvfrom_state ) ;
struct tdgram_context * dgram = state - > dgram ;
struct tdgram_bsd * bsds = tdgram_context_data ( dgram , struct tdgram_bsd ) ;
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda = NULL ;
2009-03-26 14:27:45 +01:00
ssize_t ret ;
int err ;
bool retry ;
2016-02-04 15:35:06 +01:00
if ( bsds - > netlink ) {
ret = tsocket_bsd_netlink_pending ( bsds - > fd ) ;
} else {
ret = tsocket_bsd_pending ( bsds - > fd ) ;
}
2012-10-02 12:20:26 +02:00
if ( state - > first_try & & ret = = 0 ) {
state - > first_try = false ;
/* retry later */
return ;
}
state - > first_try = false ;
2009-03-26 14:27:45 +01:00
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
if ( retry ) {
/* retry later */
return ;
}
if ( tevent_req_error ( req , err ) ) {
return ;
}
2012-09-21 22:54:26 +02:00
/* note that 'ret' can be 0 here */
2009-03-26 14:27:45 +01:00
state - > buf = talloc_array ( state , uint8_t , ret ) ;
if ( tevent_req_nomem ( state - > buf , req ) ) {
return ;
}
state - > len = ret ;
state - > src = tsocket_address_create ( state ,
& tsocket_address_bsd_ops ,
& bsda ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ,
2009-03-26 14:27:45 +01:00
__location__ " bsd_recvfrom " ) ;
if ( tevent_req_nomem ( state - > src , req ) ) {
return ;
}
ZERO_STRUCTP ( bsda ) ;
2010-08-28 08:25:19 +02:00
bsda - > sa_socklen = sizeof ( bsda - > u . ss ) ;
2010-10-21 07:00:34 +02:00
# ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
bsda - > u . sa . sa_len = bsda - > sa_socklen ;
# endif
2009-03-26 14:27:45 +01:00
2010-04-07 10:42:37 +10:00
ret = recvfrom ( bsds - > fd , state - > buf , state - > len , 0 ,
2010-08-28 08:25:19 +02:00
& bsda - > u . sa , & bsda - > sa_socklen ) ;
2009-04-03 17:29:12 +02:00
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
2009-03-26 14:27:45 +01:00
if ( retry ) {
/* retry later */
return ;
}
if ( tevent_req_error ( req , err ) ) {
return ;
}
2010-02-17 13:53:02 +01:00
/*
2010-02-17 19:11:11 +01:00
* Some systems ( FreeBSD , see bug # 7115 ) return too much
* bytes in tsocket_bsd_pending ( ) / ioctl ( fd , FIONREAD , . . . ) ,
* the return value includes some IP / UDP header bytes ,
* while recvfrom ( ) just returns the payload .
2010-02-17 13:53:02 +01:00
*/
2010-02-17 09:24:34 -08:00
state - > buf = talloc_realloc ( state , state - > buf , uint8_t , ret ) ;
if ( tevent_req_nomem ( state - > buf , req ) ) {
return ;
}
2010-02-17 13:53:02 +01:00
state - > len = ret ;
2009-03-26 14:27:45 +01:00
tevent_req_done ( req ) ;
}
static ssize_t tdgram_bsd_recvfrom_recv ( struct tevent_req * req ,
int * perrno ,
TALLOC_CTX * mem_ctx ,
uint8_t * * buf ,
struct tsocket_address * * src )
{
struct tdgram_bsd_recvfrom_state * state = tevent_req_data ( req ,
struct tdgram_bsd_recvfrom_state ) ;
ssize_t ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
if ( ret = = 0 ) {
* buf = talloc_move ( mem_ctx , & state - > buf ) ;
ret = state - > len ;
if ( src ) {
* src = talloc_move ( mem_ctx , & state - > src ) ;
}
}
tevent_req_received ( req ) ;
return ret ;
}
struct tdgram_bsd_sendto_state {
struct tdgram_context * dgram ;
const uint8_t * buf ;
size_t len ;
const struct tsocket_address * dst ;
ssize_t ret ;
} ;
static int tdgram_bsd_sendto_destructor ( struct tdgram_bsd_sendto_state * state )
{
struct tdgram_bsd * bsds = tdgram_context_data ( state - > dgram ,
struct tdgram_bsd ) ;
tdgram_bsd_set_writeable_handler ( bsds , NULL , NULL , NULL ) ;
2009-04-14 10:44:25 +02:00
2009-03-26 14:27:45 +01:00
return 0 ;
}
static void tdgram_bsd_sendto_handler ( void * private_data ) ;
static struct tevent_req * tdgram_bsd_sendto_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tdgram_context * dgram ,
const uint8_t * buf ,
size_t len ,
const struct tsocket_address * dst )
{
struct tevent_req * req ;
struct tdgram_bsd_sendto_state * state ;
struct tdgram_bsd * bsds = tdgram_context_data ( dgram , struct tdgram_bsd ) ;
int ret ;
req = tevent_req_create ( mem_ctx , & state ,
struct tdgram_bsd_sendto_state ) ;
if ( ! req ) {
return NULL ;
}
state - > dgram = dgram ;
state - > buf = buf ;
state - > len = len ;
state - > dst = dst ;
state - > ret = - 1 ;
talloc_set_destructor ( state , tdgram_bsd_sendto_destructor ) ;
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
2009-04-02 10:36:03 +02:00
/*
* this is a fast path , not waiting for the
* socket to become explicit writeable gains
* about 10 % - 20 % performance in benchmark tests .
*/
tdgram_bsd_sendto_handler ( req ) ;
if ( ! tevent_req_is_in_progress ( req ) ) {
goto post ;
}
2009-03-26 14:27:45 +01:00
ret = tdgram_bsd_set_writeable_handler ( bsds , ev ,
tdgram_bsd_sendto_handler ,
req ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
return req ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static void tdgram_bsd_sendto_handler ( void * private_data )
{
struct tevent_req * req = talloc_get_type_abort ( private_data ,
struct tevent_req ) ;
struct tdgram_bsd_sendto_state * state = tevent_req_data ( req ,
struct tdgram_bsd_sendto_state ) ;
struct tdgram_context * dgram = state - > dgram ;
struct tdgram_bsd * bsds = tdgram_context_data ( dgram , struct tdgram_bsd ) ;
struct sockaddr * sa = NULL ;
2010-08-28 08:25:19 +02:00
socklen_t sa_socklen = 0 ;
2009-03-26 14:27:45 +01:00
ssize_t ret ;
int err ;
bool retry ;
if ( state - > dst ) {
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * bsda =
2009-03-26 14:27:45 +01:00
talloc_get_type ( state - > dst - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-03-26 14:27:45 +01:00
sa = & bsda - > u . sa ;
2010-08-28 08:25:19 +02:00
sa_socklen = bsda - > sa_socklen ;
2009-03-26 14:27:45 +01:00
}
2010-08-28 08:25:19 +02:00
ret = sendto ( bsds - > fd , state - > buf , state - > len , 0 , sa , sa_socklen ) ;
2009-04-03 17:29:12 +02:00
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
2009-03-26 14:27:45 +01:00
if ( retry ) {
/* retry later */
return ;
}
2013-03-04 14:06:14 +11:00
if ( err = = EMSGSIZE ) {
/* round up in 1K increments */
int bufsize = ( ( state - > len + 1023 ) & ( ~ 1023 ) ) ;
ret = setsockopt ( bsds - > fd , SOL_SOCKET , SO_SNDBUF , & bufsize ,
sizeof ( bufsize ) ) ;
if ( ret = = 0 ) {
/*
2015-02-16 13:24:04 +00:00
* We do the retry here , rather then via the
2013-03-04 14:06:14 +11:00
* handler , as we only want to retry once for
* this condition , so if there is a mismatch
* between what setsockopt ( ) accepts and what can
* actually be sent , we do not end up in a
* loop .
*/
ret = sendto ( bsds - > fd , state - > buf , state - > len ,
0 , sa , sa_socklen ) ;
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
if ( retry ) { /* retry later */
return ;
}
}
}
2009-03-26 14:27:45 +01:00
if ( tevent_req_error ( req , err ) ) {
return ;
}
state - > ret = ret ;
tevent_req_done ( req ) ;
}
static ssize_t tdgram_bsd_sendto_recv ( struct tevent_req * req , int * perrno )
{
struct tdgram_bsd_sendto_state * state = tevent_req_data ( req ,
struct tdgram_bsd_sendto_state ) ;
ssize_t ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
if ( ret = = 0 ) {
ret = state - > ret ;
}
tevent_req_received ( req ) ;
return ret ;
}
struct tdgram_bsd_disconnect_state {
2009-04-06 16:35:56 +02:00
uint8_t __dummy ;
2009-03-26 14:27:45 +01:00
} ;
static struct tevent_req * tdgram_bsd_disconnect_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tdgram_context * dgram )
{
struct tdgram_bsd * bsds = tdgram_context_data ( dgram , struct tdgram_bsd ) ;
struct tevent_req * req ;
struct tdgram_bsd_disconnect_state * state ;
int ret ;
int err ;
bool dummy ;
req = tevent_req_create ( mem_ctx , & state ,
struct tdgram_bsd_disconnect_state ) ;
if ( req = = NULL ) {
return NULL ;
}
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
2010-09-27 23:57:34 +02:00
TALLOC_FREE ( bsds - > fde ) ;
2009-04-03 14:29:51 +02:00
ret = close ( bsds - > fd ) ;
2009-03-26 14:27:45 +01:00
bsds - > fd = - 1 ;
2009-04-03 17:29:12 +02:00
err = tsocket_bsd_error_from_errno ( ret , errno , & dummy ) ;
2009-03-26 14:27:45 +01:00
if ( tevent_req_error ( req , err ) ) {
goto post ;
}
tevent_req_done ( req ) ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static int tdgram_bsd_disconnect_recv ( struct tevent_req * req ,
int * perrno )
{
int ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
tevent_req_received ( req ) ;
return ret ;
}
static const struct tdgram_context_ops tdgram_bsd_ops = {
. name = " bsd " ,
. recvfrom_send = tdgram_bsd_recvfrom_send ,
. recvfrom_recv = tdgram_bsd_recvfrom_recv ,
. sendto_send = tdgram_bsd_sendto_send ,
. sendto_recv = tdgram_bsd_sendto_recv ,
. disconnect_send = tdgram_bsd_disconnect_send ,
. disconnect_recv = tdgram_bsd_disconnect_recv ,
} ;
static int tdgram_bsd_destructor ( struct tdgram_bsd * bsds )
{
TALLOC_FREE ( bsds - > fde ) ;
if ( bsds - > fd ! = - 1 ) {
close ( bsds - > fd ) ;
bsds - > fd = - 1 ;
}
return 0 ;
}
static int tdgram_bsd_dgram_socket ( const struct tsocket_address * local ,
const struct tsocket_address * remote ,
2009-04-03 17:29:12 +02:00
bool broadcast ,
2009-03-26 14:27:45 +01:00
TALLOC_CTX * mem_ctx ,
struct tdgram_context * * _dgram ,
const char * location )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-03-26 14:27:45 +01:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
struct samba_sockaddr * rbsda = NULL ;
2009-03-26 14:27:45 +01:00
struct tdgram_context * dgram ;
struct tdgram_bsd * bsds ;
int fd ;
int ret ;
bool do_bind = false ;
bool do_reuseaddr = false ;
2010-02-17 09:33:18 +01:00
bool do_ipv6only = false ;
2010-02-17 08:49:28 +01:00
bool is_inet = false ;
int sa_fam = lbsda - > u . sa . sa_family ;
2009-03-26 14:27:45 +01:00
if ( remote ) {
rbsda = talloc_get_type_abort ( remote - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-03-26 14:27:45 +01:00
}
switch ( lbsda - > u . sa . sa_family ) {
case AF_UNIX :
2009-04-03 17:29:12 +02:00
if ( broadcast ) {
errno = EINVAL ;
return - 1 ;
}
2009-03-26 14:27:45 +01:00
if ( lbsda - > u . un . sun_path [ 0 ] ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
break ;
case AF_INET :
if ( lbsda - > u . in . sin_port ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
2010-02-17 08:42:22 +01:00
if ( lbsda - > u . in . sin_addr . s_addr ! = INADDR_ANY ) {
2009-03-26 14:27:45 +01:00
do_bind = true ;
}
2010-02-17 08:49:28 +01:00
is_inet = true ;
2009-03-26 14:27:45 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
if ( lbsda - > u . in6 . sin6_port ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
if ( memcmp ( & in6addr_any ,
& lbsda - > u . in6 . sin6_addr ,
sizeof ( in6addr_any ) ) ! = 0 ) {
do_bind = true ;
}
2010-02-17 08:49:28 +01:00
is_inet = true ;
2010-02-17 09:33:18 +01:00
do_ipv6only = true ;
2009-03-26 14:27:45 +01:00
break ;
# endif
default :
errno = EINVAL ;
return - 1 ;
}
2010-02-17 08:49:28 +01:00
if ( ! do_bind & & is_inet & & rbsda ) {
sa_fam = rbsda - > u . sa . sa_family ;
switch ( sa_fam ) {
case AF_INET :
2010-02-17 09:33:18 +01:00
do_ipv6only = false ;
2010-02-17 08:49:28 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2010-02-17 09:33:18 +01:00
do_ipv6only = true ;
2010-02-17 08:49:28 +01:00
break ;
# endif
}
}
fd = socket ( sa_fam , SOCK_DGRAM , 0 ) ;
2009-03-26 14:27:45 +01:00
if ( fd < 0 ) {
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
fd = tsocket_bsd_common_prepare_fd ( fd , true ) ;
if ( fd < 0 ) {
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
dgram = tdgram_context_create ( mem_ctx ,
& tdgram_bsd_ops ,
& bsds ,
struct tdgram_bsd ,
location ) ;
if ( ! dgram ) {
int saved_errno = errno ;
close ( fd ) ;
errno = saved_errno ;
return - 1 ;
}
ZERO_STRUCTP ( bsds ) ;
bsds - > fd = fd ;
talloc_set_destructor ( bsds , tdgram_bsd_destructor ) ;
2010-04-24 19:36:01 +02:00
# ifdef HAVE_IPV6
2010-02-17 09:33:18 +01:00
if ( do_ipv6only ) {
int val = 1 ;
ret = setsockopt ( fd , IPPROTO_IPV6 , IPV6_V6ONLY ,
( const void * ) & val , sizeof ( val ) ) ;
if ( ret = = - 1 ) {
int saved_errno = errno ;
talloc_free ( dgram ) ;
errno = saved_errno ;
2010-09-15 11:21:43 +10:00
return - 1 ;
2010-02-17 09:33:18 +01:00
}
}
# endif
2009-04-03 17:29:12 +02:00
if ( broadcast ) {
2009-03-26 14:27:45 +01:00
int val = 1 ;
ret = setsockopt ( fd , SOL_SOCKET , SO_BROADCAST ,
( const void * ) & val , sizeof ( val ) ) ;
if ( ret = = - 1 ) {
int saved_errno = errno ;
talloc_free ( dgram ) ;
errno = saved_errno ;
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
}
if ( do_reuseaddr ) {
int val = 1 ;
ret = setsockopt ( fd , SOL_SOCKET , SO_REUSEADDR ,
( const void * ) & val , sizeof ( val ) ) ;
if ( ret = = - 1 ) {
int saved_errno = errno ;
talloc_free ( dgram ) ;
errno = saved_errno ;
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
}
if ( do_bind ) {
2010-08-28 08:25:19 +02:00
ret = bind ( fd , & lbsda - > u . sa , lbsda - > sa_socklen ) ;
2009-03-26 14:27:45 +01:00
if ( ret = = - 1 ) {
int saved_errno = errno ;
talloc_free ( dgram ) ;
errno = saved_errno ;
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
}
if ( rbsda ) {
2010-02-17 08:49:28 +01:00
if ( rbsda - > u . sa . sa_family ! = sa_fam ) {
talloc_free ( dgram ) ;
errno = EINVAL ;
return - 1 ;
}
2010-08-28 08:25:19 +02:00
ret = connect ( fd , & rbsda - > u . sa , rbsda - > sa_socklen ) ;
2009-03-26 14:27:45 +01:00
if ( ret = = - 1 ) {
int saved_errno = errno ;
talloc_free ( dgram ) ;
errno = saved_errno ;
2010-09-15 11:21:43 +10:00
return - 1 ;
2009-03-26 14:27:45 +01:00
}
}
* _dgram = dgram ;
return 0 ;
}
2015-05-21 11:37:06 +02:00
int _tdgram_bsd_existing_socket ( TALLOC_CTX * mem_ctx ,
int fd ,
struct tdgram_context * * _dgram ,
const char * location )
{
struct tdgram_context * dgram ;
struct tdgram_bsd * bsds ;
2016-02-04 15:35:06 +01:00
# ifdef HAVE_LINUX_RTNETLINK_H
int result ;
struct sockaddr sa ;
socklen_t sa_len = sizeof ( struct sockaddr ) ;
# endif
2015-05-21 11:37:06 +02:00
dgram = tdgram_context_create ( mem_ctx ,
& tdgram_bsd_ops ,
& bsds ,
struct tdgram_bsd ,
location ) ;
if ( ! dgram ) {
return - 1 ;
}
ZERO_STRUCTP ( bsds ) ;
bsds - > fd = fd ;
talloc_set_destructor ( bsds , tdgram_bsd_destructor ) ;
* _dgram = dgram ;
2016-02-04 15:35:06 +01:00
# ifdef HAVE_LINUX_RTNETLINK_H
/*
* Try to determine the protocol family and remember if it ' s
* AF_NETLINK . We don ' t care if this fails .
*/
result = getsockname ( fd , & sa , & sa_len ) ;
if ( result = = 0 & & sa . sa_family = = AF_NETLINK ) {
bsds - > netlink = true ;
}
# endif
2015-05-21 11:37:06 +02:00
return 0 ;
}
2009-03-26 14:27:45 +01:00
int _tdgram_inet_udp_socket ( const struct tsocket_address * local ,
const struct tsocket_address * remote ,
TALLOC_CTX * mem_ctx ,
struct tdgram_context * * dgram ,
const char * location )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-03-26 14:27:45 +01:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-03-26 14:27:45 +01:00
int ret ;
switch ( lbsda - > u . sa . sa_family ) {
case AF_INET :
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
break ;
# endif
default :
errno = EINVAL ;
return - 1 ;
}
2009-04-03 17:29:12 +02:00
ret = tdgram_bsd_dgram_socket ( local , remote , false ,
mem_ctx , dgram , location ) ;
2009-03-26 14:27:45 +01:00
return ret ;
}
2015-05-21 11:39:38 +02:00
int _tdgram_inet_udp_broadcast_socket ( const struct tsocket_address * local ,
TALLOC_CTX * mem_ctx ,
struct tdgram_context * * dgram ,
const char * location )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2015-05-21 11:39:38 +02:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2015-05-21 11:39:38 +02:00
int ret ;
switch ( lbsda - > u . sa . sa_family ) {
case AF_INET :
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
/* only ipv4 */
errno = EINVAL ;
return - 1 ;
# endif
default :
errno = EINVAL ;
return - 1 ;
}
ret = tdgram_bsd_dgram_socket ( local , NULL , true ,
mem_ctx , dgram , location ) ;
return ret ;
}
2009-04-30 13:10:54 +02:00
int _tdgram_unix_socket ( const struct tsocket_address * local ,
const struct tsocket_address * remote ,
TALLOC_CTX * mem_ctx ,
struct tdgram_context * * dgram ,
const char * location )
2009-03-26 14:27:45 +01:00
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-03-26 14:27:45 +01:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-03-26 14:27:45 +01:00
int ret ;
switch ( lbsda - > u . sa . sa_family ) {
case AF_UNIX :
break ;
default :
errno = EINVAL ;
return - 1 ;
}
2009-04-03 17:29:12 +02:00
ret = tdgram_bsd_dgram_socket ( local , remote , false ,
mem_ctx , dgram , location ) ;
2009-03-26 14:27:45 +01:00
return ret ;
}
2009-04-03 12:15:27 +02:00
struct tstream_bsd {
int fd ;
2022-10-13 16:23:03 +02:00
int error ;
2009-04-03 12:15:27 +02:00
void * event_ptr ;
struct tevent_fd * fde ;
2012-11-02 13:45:49 +01:00
bool optimize_readv ;
2023-01-12 10:08:56 +01:00
bool fail_readv_first_error ;
2009-04-03 12:15:27 +02:00
void * readable_private ;
void ( * readable_handler ) ( void * private_data ) ;
void * writeable_private ;
void ( * writeable_handler ) ( void * private_data ) ;
} ;
2012-11-02 13:45:49 +01:00
bool tstream_bsd_optimize_readv ( struct tstream_context * stream ,
bool on )
{
struct tstream_bsd * bsds =
talloc_get_type ( _tstream_context_data ( stream ) ,
struct tstream_bsd ) ;
bool old ;
if ( bsds = = NULL ) {
/* not a bsd socket */
return false ;
}
old = bsds - > optimize_readv ;
bsds - > optimize_readv = on ;
return old ;
}
2023-01-12 10:08:56 +01:00
bool tstream_bsd_fail_readv_first_error ( struct tstream_context * stream ,
bool on )
{
struct tstream_bsd * bsds =
talloc_get_type ( _tstream_context_data ( stream ) ,
struct tstream_bsd ) ;
bool old ;
if ( bsds = = NULL ) {
/* not a bsd socket */
return false ;
}
old = bsds - > fail_readv_first_error ;
bsds - > fail_readv_first_error = on ;
return old ;
}
2009-04-03 12:15:27 +02:00
static void tstream_bsd_fde_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags ,
void * private_data )
{
struct tstream_bsd * bsds = talloc_get_type_abort ( private_data ,
struct tstream_bsd ) ;
2023-01-11 20:17:06 +01:00
if ( flags & TEVENT_FD_ERROR ) {
/*
* We lazily keep TEVENT_FD_READ alive
* in tstream_bsd_set_readable_handler ( )
*
* So we have to check TEVENT_FD_READ
* as well as bsds - > readable_handler
*
2023-01-12 10:08:56 +01:00
* We only drain remaining data from the
* the recv queue if available and desired .
2023-01-11 20:17:06 +01:00
*/
if ( ( flags & TEVENT_FD_READ ) & &
2023-01-12 10:08:56 +01:00
! bsds - > fail_readv_first_error & &
2023-01-11 20:17:06 +01:00
( bsds - > readable_handler ! = NULL ) )
{
/*
* If there ' s still data to read
* we allow it to be read until
* we reach EOF ( = > EPIPE ) .
*/
bsds - > readable_handler ( bsds - > readable_private ) ;
return ;
}
/*
* If there ' s no data left to read ,
* we get the error .
*
* It means we no longer call any readv or
* writev , as bsds - > error is checked first .
*/
if ( bsds - > error = = 0 ) {
int ret = samba_socket_poll_or_sock_error ( bsds - > fd ) ;
if ( ret = = - 1 ) {
bsds - > error = errno ;
}
/* fallback to EPIPE */
if ( bsds - > error = = 0 ) {
bsds - > error = EPIPE ;
}
}
/*
* Let write to fail early .
*
* Note we only need to check TEVENT_FD_WRITE
* as tstream_bsd_set_writeable_handler ( )
* clear it together with the handler .
*/
if ( flags & TEVENT_FD_WRITE ) {
bsds - > writeable_handler ( bsds - > writeable_private ) ;
return ;
}
/* We prefer the readable handler to fire first. */
if ( bsds - > readable_handler ! = NULL ) {
bsds - > readable_handler ( bsds - > readable_private ) ;
return ;
}
/* As last resort we notify the writeable handler */
if ( bsds - > writeable_handler ! = NULL ) {
bsds - > writeable_handler ( bsds - > writeable_private ) ;
return ;
}
/*
* We may hit this because we don ' t clear TEVENT_FD_ERROR
* in tstream_bsd_set_readable_handler ( ) nor
* tstream_bsd_set_writeable_handler ( ) .
*
* As we already captured the error , we can remove
* the fde completely .
*/
TALLOC_FREE ( bsds - > fde ) ;
return ;
}
2009-04-03 12:15:27 +02:00
if ( flags & TEVENT_FD_WRITE ) {
bsds - > writeable_handler ( bsds - > writeable_private ) ;
return ;
}
if ( flags & TEVENT_FD_READ ) {
if ( ! bsds - > readable_handler ) {
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
/*
2023-01-11 20:17:06 +01:00
* tstream_bsd_set_readable_handler
* doesn ' t clear TEVENT_FD_READ .
*
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
* In order to avoid cpu - spinning
2023-01-11 20:17:06 +01:00
* we need to clear it here .
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
*/
TEVENT_FD_NOT_READABLE ( bsds - > fde ) ;
/*
2023-01-11 20:17:06 +01:00
* Here we ' re lazy and keep TEVENT_FD_ERROR
* alive . If it ' s triggered the next time
* we ' ll handle it gracefully above
* and end up with TALLOC_FREE ( bsds - > fde ) ;
* in order to spin on TEVENT_FD_ERROR .
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
*/
2009-04-03 12:15:27 +02:00
return ;
}
bsds - > readable_handler ( bsds - > readable_private ) ;
return ;
}
}
static int tstream_bsd_set_readable_handler ( struct tstream_bsd * bsds ,
struct tevent_context * ev ,
void ( * handler ) ( void * private_data ) ,
void * private_data )
{
if ( ev = = NULL ) {
if ( handler ) {
errno = EINVAL ;
return - 1 ;
}
if ( ! bsds - > readable_handler ) {
return 0 ;
}
bsds - > readable_handler = NULL ;
bsds - > readable_private = NULL ;
2023-01-11 20:17:06 +01:00
/*
* Here we are lazy as it ' s very likely that the next
* tevent_readv_send ( ) will come in shortly ,
* so we keep TEVENT_FD_READ alive .
*/
2009-04-03 12:15:27 +02:00
return 0 ;
}
/* read and write must use the same tevent_context */
if ( bsds - > event_ptr ! = ev ) {
if ( bsds - > readable_handler | | bsds - > writeable_handler ) {
errno = EINVAL ;
return - 1 ;
}
bsds - > event_ptr = NULL ;
TALLOC_FREE ( bsds - > fde ) ;
}
2009-06-29 13:05:27 +02:00
if ( tevent_fd_get_flags ( bsds - > fde ) = = 0 ) {
TALLOC_FREE ( bsds - > fde ) ;
2009-04-03 12:15:27 +02:00
bsds - > fde = tevent_add_fd ( ev , bsds ,
2023-01-11 20:17:06 +01:00
bsds - > fd ,
TEVENT_FD_ERROR | TEVENT_FD_READ ,
2009-04-03 12:15:27 +02:00
tstream_bsd_fde_handler ,
bsds ) ;
if ( ! bsds - > fde ) {
2009-05-19 23:48:41 +02:00
errno = ENOMEM ;
2009-04-03 12:15:27 +02:00
return - 1 ;
}
/* cache the event context we're running on */
bsds - > event_ptr = ev ;
} else if ( ! bsds - > readable_handler ) {
TEVENT_FD_READABLE ( bsds - > fde ) ;
2023-01-11 20:17:06 +01:00
/*
* TEVENT_FD_ERROR is likely already set , so
* TEVENT_FD_WANTERROR ( ) is most likely a no - op .
*/
TEVENT_FD_WANTERROR ( bsds - > fde ) ;
2009-04-03 12:15:27 +02:00
}
bsds - > readable_handler = handler ;
bsds - > readable_private = private_data ;
return 0 ;
}
static int tstream_bsd_set_writeable_handler ( struct tstream_bsd * bsds ,
struct tevent_context * ev ,
void ( * handler ) ( void * private_data ) ,
void * private_data )
{
if ( ev = = NULL ) {
if ( handler ) {
errno = EINVAL ;
return - 1 ;
}
if ( ! bsds - > writeable_handler ) {
return 0 ;
}
bsds - > writeable_handler = NULL ;
bsds - > writeable_private = NULL ;
2023-01-11 20:17:06 +01:00
/*
* The writeable handler is only
* set if we got EAGAIN or a short
* writev on the first try , so
* this isn ' t the hot path .
*
* Here we are lazy and leave TEVENT_FD_ERROR
* alive as it ' s shared with the readable
* handler . So we only clear TEVENT_FD_WRITE .
*/
2009-04-03 12:15:27 +02:00
TEVENT_FD_NOT_WRITEABLE ( bsds - > fde ) ;
return 0 ;
}
/* read and write must use the same tevent_context */
if ( bsds - > event_ptr ! = ev ) {
if ( bsds - > readable_handler | | bsds - > writeable_handler ) {
errno = EINVAL ;
return - 1 ;
}
bsds - > event_ptr = NULL ;
TALLOC_FREE ( bsds - > fde ) ;
}
2009-06-29 13:05:27 +02:00
if ( tevent_fd_get_flags ( bsds - > fde ) = = 0 ) {
TALLOC_FREE ( bsds - > fde ) ;
2009-04-03 12:15:27 +02:00
bsds - > fde = tevent_add_fd ( ev , bsds ,
2009-06-29 13:13:05 +02:00
bsds - > fd ,
2023-01-11 20:17:06 +01:00
TEVENT_FD_ERROR | TEVENT_FD_WRITE ,
2009-04-03 12:15:27 +02:00
tstream_bsd_fde_handler ,
bsds ) ;
if ( ! bsds - > fde ) {
2009-05-19 23:48:41 +02:00
errno = ENOMEM ;
2009-04-03 12:15:27 +02:00
return - 1 ;
}
/* cache the event context we're running on */
bsds - > event_ptr = ev ;
} else if ( ! bsds - > writeable_handler ) {
2023-01-11 20:17:06 +01:00
TEVENT_FD_WRITEABLE ( bsds - > fde ) ;
/*
* TEVENT_FD_ERROR is likely already set , so
* TEVENT_FD_WANTERROR ( ) is most likely a no - op .
*/
TEVENT_FD_WANTERROR ( bsds - > fde ) ;
2009-04-03 12:15:27 +02:00
}
bsds - > writeable_handler = handler ;
bsds - > writeable_private = private_data ;
return 0 ;
}
static ssize_t tstream_bsd_pending_bytes ( struct tstream_context * stream )
{
struct tstream_bsd * bsds = tstream_context_data ( stream ,
struct tstream_bsd ) ;
ssize_t ret ;
if ( bsds - > fd = = - 1 ) {
errno = ENOTCONN ;
return - 1 ;
}
2022-10-13 16:23:03 +02:00
if ( bsds - > error ! = 0 ) {
errno = bsds - > error ;
return - 1 ;
}
2009-04-03 12:15:27 +02:00
ret = tsocket_bsd_pending ( bsds - > fd ) ;
2022-10-13 16:23:03 +02:00
if ( ret = = - 1 ) {
/*
* remember the error and don ' t
* allow further requests
*/
bsds - > error = errno ;
}
2009-04-03 12:15:27 +02:00
return ret ;
}
struct tstream_bsd_readv_state {
struct tstream_context * stream ;
struct iovec * vector ;
size_t count ;
int ret ;
} ;
static int tstream_bsd_readv_destructor ( struct tstream_bsd_readv_state * state )
{
struct tstream_bsd * bsds = tstream_context_data ( state - > stream ,
struct tstream_bsd ) ;
tstream_bsd_set_readable_handler ( bsds , NULL , NULL , NULL ) ;
return 0 ;
}
static void tstream_bsd_readv_handler ( void * private_data ) ;
static struct tevent_req * tstream_bsd_readv_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tstream_context * stream ,
struct iovec * vector ,
size_t count )
{
struct tevent_req * req ;
struct tstream_bsd_readv_state * state ;
struct tstream_bsd * bsds = tstream_context_data ( stream , struct tstream_bsd ) ;
int ret ;
req = tevent_req_create ( mem_ctx , & state ,
struct tstream_bsd_readv_state ) ;
if ( ! req ) {
return NULL ;
}
state - > stream = stream ;
/* we make a copy of the vector so that we can modify it */
state - > vector = talloc_array ( state , struct iovec , count ) ;
if ( tevent_req_nomem ( state - > vector , req ) ) {
goto post ;
}
memcpy ( state - > vector , vector , sizeof ( struct iovec ) * count ) ;
state - > count = count ;
state - > ret = 0 ;
talloc_set_destructor ( state , tstream_bsd_readv_destructor ) ;
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
/*
* this is a fast path , not waiting for the
* socket to become explicit readable gains
* about 10 % - 20 % performance in benchmark tests .
*/
2012-11-02 13:45:49 +01:00
if ( bsds - > optimize_readv ) {
/*
* We only do the optimization on
* readv if the caller asked for it .
*
* This is needed because in most cases
2015-06-12 09:03:21 +00:00
* we prefer to flush send buffers before
2012-11-02 13:45:49 +01:00
* receiving incoming requests .
*/
tstream_bsd_readv_handler ( req ) ;
if ( ! tevent_req_is_in_progress ( req ) ) {
goto post ;
}
2009-04-03 12:15:27 +02:00
}
ret = tstream_bsd_set_readable_handler ( bsds , ev ,
tstream_bsd_readv_handler ,
req ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
return req ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static void tstream_bsd_readv_handler ( void * private_data )
{
struct tevent_req * req = talloc_get_type_abort ( private_data ,
struct tevent_req ) ;
struct tstream_bsd_readv_state * state = tevent_req_data ( req ,
struct tstream_bsd_readv_state ) ;
struct tstream_context * stream = state - > stream ;
struct tstream_bsd * bsds = tstream_context_data ( stream , struct tstream_bsd ) ;
int ret ;
int err ;
2015-02-16 13:50:25 +00:00
int _count ;
bool ok , retry ;
2009-04-03 12:15:27 +02:00
2022-10-13 16:23:03 +02:00
if ( bsds - > error ! = 0 ) {
tevent_req_error ( req , bsds - > error ) ;
return ;
}
2009-04-03 12:15:27 +02:00
ret = readv ( bsds - > fd , state - > vector , state - > count ) ;
if ( ret = = 0 ) {
/* propagate end of file */
2022-10-13 16:23:03 +02:00
bsds - > error = EPIPE ;
2009-04-03 12:15:27 +02:00
tevent_req_error ( req , EPIPE ) ;
return ;
}
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
if ( retry ) {
/* retry later */
return ;
}
2022-10-13 16:23:03 +02:00
if ( err ! = 0 ) {
/*
* remember the error and don ' t
* allow further requests
*/
bsds - > error = err ;
}
2009-04-03 12:15:27 +02:00
if ( tevent_req_error ( req , err ) ) {
return ;
}
state - > ret + = ret ;
2015-02-16 13:50:25 +00:00
_count = state - > count ; /* tstream has size_t count, readv has int */
ok = iov_advance ( & state - > vector , & _count , ret ) ;
state - > count = _count ;
2009-04-03 12:15:27 +02:00
2015-02-16 13:50:25 +00:00
if ( ! ok ) {
tevent_req_error ( req , EINVAL ) ;
return ;
2009-05-22 12:28:17 +02:00
}
2009-04-03 12:15:27 +02:00
if ( state - > count > 0 ) {
/* we have more to read */
return ;
}
tevent_req_done ( req ) ;
}
static int tstream_bsd_readv_recv ( struct tevent_req * req ,
int * perrno )
{
struct tstream_bsd_readv_state * state = tevent_req_data ( req ,
struct tstream_bsd_readv_state ) ;
int ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
if ( ret = = 0 ) {
ret = state - > ret ;
}
tevent_req_received ( req ) ;
return ret ;
}
struct tstream_bsd_writev_state {
struct tstream_context * stream ;
struct iovec * vector ;
size_t count ;
int ret ;
} ;
static int tstream_bsd_writev_destructor ( struct tstream_bsd_writev_state * state )
{
struct tstream_bsd * bsds = tstream_context_data ( state - > stream ,
struct tstream_bsd ) ;
tstream_bsd_set_writeable_handler ( bsds , NULL , NULL , NULL ) ;
return 0 ;
}
static void tstream_bsd_writev_handler ( void * private_data ) ;
static struct tevent_req * tstream_bsd_writev_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tstream_context * stream ,
const struct iovec * vector ,
size_t count )
{
struct tevent_req * req ;
struct tstream_bsd_writev_state * state ;
struct tstream_bsd * bsds = tstream_context_data ( stream , struct tstream_bsd ) ;
int ret ;
req = tevent_req_create ( mem_ctx , & state ,
struct tstream_bsd_writev_state ) ;
if ( ! req ) {
return NULL ;
}
state - > stream = stream ;
/* we make a copy of the vector so that we can modify it */
state - > vector = talloc_array ( state , struct iovec , count ) ;
if ( tevent_req_nomem ( state - > vector , req ) ) {
goto post ;
}
memcpy ( state - > vector , vector , sizeof ( struct iovec ) * count ) ;
state - > count = count ;
state - > ret = 0 ;
talloc_set_destructor ( state , tstream_bsd_writev_destructor ) ;
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
/*
* this is a fast path , not waiting for the
* socket to become explicit writeable gains
* about 10 % - 20 % performance in benchmark tests .
*/
tstream_bsd_writev_handler ( req ) ;
if ( ! tevent_req_is_in_progress ( req ) ) {
goto post ;
}
ret = tstream_bsd_set_writeable_handler ( bsds , ev ,
tstream_bsd_writev_handler ,
req ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
return req ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static void tstream_bsd_writev_handler ( void * private_data )
{
struct tevent_req * req = talloc_get_type_abort ( private_data ,
struct tevent_req ) ;
struct tstream_bsd_writev_state * state = tevent_req_data ( req ,
struct tstream_bsd_writev_state ) ;
struct tstream_context * stream = state - > stream ;
struct tstream_bsd * bsds = tstream_context_data ( stream , struct tstream_bsd ) ;
ssize_t ret ;
int err ;
2015-02-16 13:50:25 +00:00
int _count ;
bool ok , retry ;
2009-04-03 12:15:27 +02:00
2022-10-13 16:23:03 +02:00
if ( bsds - > error ! = 0 ) {
tevent_req_error ( req , bsds - > error ) ;
return ;
}
2009-04-03 12:15:27 +02:00
ret = writev ( bsds - > fd , state - > vector , state - > count ) ;
if ( ret = = 0 ) {
/* propagate end of file */
2022-10-13 16:23:03 +02:00
bsds - > error = EPIPE ;
2009-04-03 12:15:27 +02:00
tevent_req_error ( req , EPIPE ) ;
return ;
}
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
if ( retry ) {
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
/*
* retry later . . .
*/
2009-04-03 12:15:27 +02:00
return ;
}
2022-10-13 16:23:03 +02:00
if ( err ! = 0 ) {
/*
* remember the error and don ' t
* allow further requests
*/
bsds - > error = err ;
}
2009-04-03 12:15:27 +02:00
if ( tevent_req_error ( req , err ) ) {
return ;
}
state - > ret + = ret ;
2015-02-16 13:50:25 +00:00
_count = state - > count ; /* tstream has size_t count, writev has int */
ok = iov_advance ( & state - > vector , & _count , ret ) ;
state - > count = _count ;
2009-04-03 12:15:27 +02:00
2015-02-16 13:50:25 +00:00
if ( ! ok ) {
tevent_req_error ( req , EINVAL ) ;
return ;
2009-05-22 12:28:17 +02:00
}
2009-04-03 12:15:27 +02:00
if ( state - > count > 0 ) {
lib/tsocket: avoid endless cpu-spinning in tstream_bsd_fde_handler()
There were some reports that strace output an LDAP server socket is in
CLOSE_WAIT state, returning EAGAIN for writev over and over (after a call to
epoll() each time).
In the tstream_bsd code the problem happens when we have a pending
writev_send, while there's no readv_send pending. In that case
we still ask for TEVENT_FD_READ in order to notice connection errors
early, so we try to call writev even if the socket doesn't report TEVENT_FD_WRITE.
And there are situations where we do that over and over again.
It happens like this with a Linux kernel:
tcp_fin() has this:
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
inet_csk_enter_pingpong_mode(sk);
break;
It means RCV_SHUTDOWN gets set as well as TCP_CLOSE_WAIT, but
sk->sk_err is not changed to indicate an error.
tcp_sendmsg_locked has this:
...
err = -EPIPE;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
while (msg_data_left(msg)) {
int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_space;
...
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
err = sk_stream_wait_memory(sk, &timeo);
if (err != 0)
goto do_error;
It means if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) doesn't
hit as we only have RCV_SHUTDOWN and sk_stream_wait_memory returns
-EAGAIN.
tcp_poll has this:
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
So we'll get EPOLLIN | EPOLLRDNORM | EPOLLRDHUP triggering
TEVENT_FD_READ and writev/sendmsg keeps getting EAGAIN.
So we need to always clear TEVENT_FD_READ if we don't
have readable handler in order to avoid burning cpu.
But we turn it on again after a timeout of 1 second
in order to monitor the error state of the connection.
And now that our tsocket_bsd_error() helper checks for POLLRDHUP,
we can check if the socket is in an error state before calling the
writable handler when TEVENT_FD_READ was reported.
Only on error we'll call the writable handler, which will pick
the error without calling writev().
BUG: https://bugzilla.samba.org/show_bug.cgi?id=15202
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
2022-10-12 17:26:16 +02:00
/*
* we have more to write
*/
2009-04-03 12:15:27 +02:00
return ;
}
tevent_req_done ( req ) ;
}
static int tstream_bsd_writev_recv ( struct tevent_req * req , int * perrno )
{
struct tstream_bsd_writev_state * state = tevent_req_data ( req ,
struct tstream_bsd_writev_state ) ;
int ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
if ( ret = = 0 ) {
ret = state - > ret ;
}
tevent_req_received ( req ) ;
return ret ;
}
struct tstream_bsd_disconnect_state {
void * __dummy ;
} ;
static struct tevent_req * tstream_bsd_disconnect_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
struct tstream_context * stream )
{
struct tstream_bsd * bsds = tstream_context_data ( stream , struct tstream_bsd ) ;
struct tevent_req * req ;
struct tstream_bsd_disconnect_state * state ;
int ret ;
int err ;
bool dummy ;
req = tevent_req_create ( mem_ctx , & state ,
struct tstream_bsd_disconnect_state ) ;
if ( req = = NULL ) {
return NULL ;
}
if ( bsds - > fd = = - 1 ) {
tevent_req_error ( req , ENOTCONN ) ;
goto post ;
}
2010-09-27 23:57:34 +02:00
TALLOC_FREE ( bsds - > fde ) ;
2009-04-03 12:15:27 +02:00
ret = close ( bsds - > fd ) ;
bsds - > fd = - 1 ;
err = tsocket_bsd_error_from_errno ( ret , errno , & dummy ) ;
if ( tevent_req_error ( req , err ) ) {
goto post ;
}
tevent_req_done ( req ) ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static int tstream_bsd_disconnect_recv ( struct tevent_req * req ,
int * perrno )
{
int ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
tevent_req_received ( req ) ;
return ret ;
}
static const struct tstream_context_ops tstream_bsd_ops = {
. name = " bsd " ,
. pending_bytes = tstream_bsd_pending_bytes ,
. readv_send = tstream_bsd_readv_send ,
. readv_recv = tstream_bsd_readv_recv ,
. writev_send = tstream_bsd_writev_send ,
. writev_recv = tstream_bsd_writev_recv ,
. disconnect_send = tstream_bsd_disconnect_send ,
. disconnect_recv = tstream_bsd_disconnect_recv ,
} ;
static int tstream_bsd_destructor ( struct tstream_bsd * bsds )
{
TALLOC_FREE ( bsds - > fde ) ;
if ( bsds - > fd ! = - 1 ) {
close ( bsds - > fd ) ;
bsds - > fd = - 1 ;
}
return 0 ;
}
int _tstream_bsd_existing_socket ( TALLOC_CTX * mem_ctx ,
int fd ,
struct tstream_context * * _stream ,
const char * location )
{
struct tstream_context * stream ;
struct tstream_bsd * bsds ;
stream = tstream_context_create ( mem_ctx ,
& tstream_bsd_ops ,
& bsds ,
struct tstream_bsd ,
location ) ;
if ( ! stream ) {
return - 1 ;
}
ZERO_STRUCTP ( bsds ) ;
bsds - > fd = fd ;
talloc_set_destructor ( bsds , tstream_bsd_destructor ) ;
* _stream = stream ;
return 0 ;
}
struct tstream_bsd_connect_state {
int fd ;
struct tevent_fd * fde ;
struct tstream_conext * stream ;
2010-10-21 23:26:14 +02:00
struct tsocket_address * local ;
2009-04-03 12:15:27 +02:00
} ;
static int tstream_bsd_connect_destructor ( struct tstream_bsd_connect_state * state )
{
TALLOC_FREE ( state - > fde ) ;
if ( state - > fd ! = - 1 ) {
close ( state - > fd ) ;
state - > fd = - 1 ;
}
return 0 ;
}
static void tstream_bsd_connect_fde_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags ,
void * private_data ) ;
2010-10-21 23:26:14 +02:00
static struct tevent_req * tstream_bsd_connect_send ( TALLOC_CTX * mem_ctx ,
2009-04-03 12:15:27 +02:00
struct tevent_context * ev ,
int sys_errno ,
const struct tsocket_address * local ,
const struct tsocket_address * remote )
{
struct tevent_req * req ;
struct tstream_bsd_connect_state * state ;
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-04-03 12:15:27 +02:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
struct samba_sockaddr * lrbsda = NULL ;
struct samba_sockaddr * rbsda =
2009-04-03 12:15:27 +02:00
talloc_get_type_abort ( remote - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-04-03 12:15:27 +02:00
int ret ;
bool do_bind = false ;
bool do_reuseaddr = false ;
2010-02-17 09:33:18 +01:00
bool do_ipv6only = false ;
2010-02-17 08:49:28 +01:00
bool is_inet = false ;
int sa_fam = lbsda - > u . sa . sa_family ;
2009-04-03 12:15:27 +02:00
req = tevent_req_create ( mem_ctx , & state ,
struct tstream_bsd_connect_state ) ;
if ( ! req ) {
return NULL ;
}
state - > fd = - 1 ;
state - > fde = NULL ;
talloc_set_destructor ( state , tstream_bsd_connect_destructor ) ;
/* give the wrappers a chance to report an error */
if ( sys_errno ! = 0 ) {
tevent_req_error ( req , sys_errno ) ;
goto post ;
}
switch ( lbsda - > u . sa . sa_family ) {
case AF_UNIX :
if ( lbsda - > u . un . sun_path [ 0 ] ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
break ;
case AF_INET :
if ( lbsda - > u . in . sin_port ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
2010-02-17 08:42:22 +01:00
if ( lbsda - > u . in . sin_addr . s_addr ! = INADDR_ANY ) {
2009-04-03 12:15:27 +02:00
do_bind = true ;
}
2010-02-17 08:49:28 +01:00
is_inet = true ;
2009-04-03 12:15:27 +02:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
if ( lbsda - > u . in6 . sin6_port ! = 0 ) {
do_reuseaddr = true ;
do_bind = true ;
}
if ( memcmp ( & in6addr_any ,
& lbsda - > u . in6 . sin6_addr ,
sizeof ( in6addr_any ) ) ! = 0 ) {
do_bind = true ;
}
2010-02-17 08:49:28 +01:00
is_inet = true ;
2010-02-17 09:33:18 +01:00
do_ipv6only = true ;
2009-04-03 12:15:27 +02:00
break ;
# endif
default :
tevent_req_error ( req , EINVAL ) ;
goto post ;
}
2010-02-17 08:49:28 +01:00
if ( ! do_bind & & is_inet ) {
sa_fam = rbsda - > u . sa . sa_family ;
switch ( sa_fam ) {
case AF_INET :
2010-02-17 09:33:18 +01:00
do_ipv6only = false ;
2010-02-17 08:49:28 +01:00
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
2010-02-17 09:33:18 +01:00
do_ipv6only = true ;
2010-02-17 08:49:28 +01:00
break ;
# endif
}
}
2010-10-21 23:26:14 +02:00
if ( is_inet ) {
state - > local = tsocket_address_create ( state ,
& tsocket_address_bsd_ops ,
& lrbsda ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ,
2010-10-21 23:26:14 +02:00
__location__ " bsd_connect " ) ;
if ( tevent_req_nomem ( state - > local , req ) ) {
goto post ;
}
ZERO_STRUCTP ( lrbsda ) ;
lrbsda - > sa_socklen = sizeof ( lrbsda - > u . ss ) ;
2010-10-21 07:00:34 +02:00
# ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
lrbsda - > u . sa . sa_len = lrbsda - > sa_socklen ;
# endif
2010-10-21 23:26:14 +02:00
}
2010-02-17 08:49:28 +01:00
state - > fd = socket ( sa_fam , SOCK_STREAM , 0 ) ;
2009-04-03 12:15:27 +02:00
if ( state - > fd = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
state - > fd = tsocket_bsd_common_prepare_fd ( state - > fd , true ) ;
if ( state - > fd = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
2010-04-24 19:36:01 +02:00
# ifdef HAVE_IPV6
2010-02-17 09:33:18 +01:00
if ( do_ipv6only ) {
int val = 1 ;
ret = setsockopt ( state - > fd , IPPROTO_IPV6 , IPV6_V6ONLY ,
( const void * ) & val , sizeof ( val ) ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
}
# endif
2009-04-03 12:15:27 +02:00
if ( do_reuseaddr ) {
int val = 1 ;
ret = setsockopt ( state - > fd , SOL_SOCKET , SO_REUSEADDR ,
( const void * ) & val , sizeof ( val ) ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
}
if ( do_bind ) {
2010-08-28 08:25:19 +02:00
ret = bind ( state - > fd , & lbsda - > u . sa , lbsda - > sa_socklen ) ;
2009-04-03 12:15:27 +02:00
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
}
2010-02-17 08:49:28 +01:00
if ( rbsda - > u . sa . sa_family ! = sa_fam ) {
tevent_req_error ( req , EINVAL ) ;
goto post ;
}
2010-08-28 08:25:19 +02:00
ret = connect ( state - > fd , & rbsda - > u . sa , rbsda - > sa_socklen ) ;
2015-10-21 16:08:00 +02:00
if ( ret = = - 1 ) {
if ( errno = = EINPROGRESS ) {
goto async ;
}
tevent_req_error ( req , errno ) ;
2009-04-03 12:15:27 +02:00
goto post ;
}
2010-10-21 23:26:14 +02:00
if ( ! state - > local ) {
tevent_req_done ( req ) ;
goto post ;
}
2016-06-22 15:36:59 +02:00
if ( lrbsda ! = NULL ) {
ret = getsockname ( state - > fd ,
& lrbsda - > u . sa ,
& lrbsda - > sa_socklen ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
goto post ;
}
2010-10-21 23:26:14 +02:00
}
2009-04-03 12:15:27 +02:00
tevent_req_done ( req ) ;
goto post ;
async :
2020-02-26 16:36:30 +01:00
/*
* Note for historic reasons TEVENT_FD_WRITE is not enough
* to get notified for POLLERR or EPOLLHUP even if they
* come together with POLLOUT . That means we need to
* use TEVENT_FD_READ in addition until we have
* TEVENT_FD_ERROR .
*/
2009-04-03 12:15:27 +02:00
state - > fde = tevent_add_fd ( ev , state ,
state - > fd ,
2023-01-11 20:15:33 +01:00
TEVENT_FD_ERROR | TEVENT_FD_WRITE ,
2009-04-03 12:15:27 +02:00
tstream_bsd_connect_fde_handler ,
req ) ;
if ( tevent_req_nomem ( state - > fde , req ) ) {
goto post ;
}
return req ;
post :
tevent_req_post ( req , ev ) ;
return req ;
}
static void tstream_bsd_connect_fde_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags ,
void * private_data )
{
struct tevent_req * req = talloc_get_type_abort ( private_data ,
struct tevent_req ) ;
struct tstream_bsd_connect_state * state = tevent_req_data ( req ,
struct tstream_bsd_connect_state ) ;
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lrbsda = NULL ;
2009-04-03 12:15:27 +02:00
int ret ;
int err ;
bool retry ;
2023-01-12 11:35:11 +01:00
ret = samba_socket_sock_error ( state - > fd ) ;
2009-04-03 12:15:27 +02:00
err = tsocket_bsd_error_from_errno ( ret , errno , & retry ) ;
if ( retry ) {
/* retry later */
return ;
}
if ( tevent_req_error ( req , err ) ) {
return ;
}
2010-10-21 23:26:14 +02:00
if ( ! state - > local ) {
tevent_req_done ( req ) ;
return ;
}
lrbsda = talloc_get_type_abort ( state - > local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2010-10-21 23:26:14 +02:00
ret = getsockname ( state - > fd , & lrbsda - > u . sa , & lrbsda - > sa_socklen ) ;
if ( ret = = - 1 ) {
tevent_req_error ( req , errno ) ;
return ;
}
2009-04-03 12:15:27 +02:00
tevent_req_done ( req ) ;
}
static int tstream_bsd_connect_recv ( struct tevent_req * req ,
int * perrno ,
TALLOC_CTX * mem_ctx ,
struct tstream_context * * stream ,
2010-10-21 23:26:14 +02:00
struct tsocket_address * * local ,
2009-04-03 12:15:27 +02:00
const char * location )
{
struct tstream_bsd_connect_state * state = tevent_req_data ( req ,
struct tstream_bsd_connect_state ) ;
int ret ;
ret = tsocket_simple_int_recv ( req , perrno ) ;
if ( ret = = 0 ) {
ret = _tstream_bsd_existing_socket ( mem_ctx ,
state - > fd ,
stream ,
location ) ;
if ( ret = = - 1 ) {
* perrno = errno ;
goto done ;
}
TALLOC_FREE ( state - > fde ) ;
state - > fd = - 1 ;
2010-10-21 23:26:14 +02:00
if ( local ) {
* local = talloc_move ( mem_ctx , & state - > local ) ;
}
2009-04-03 12:15:27 +02:00
}
done :
tevent_req_received ( req ) ;
return ret ;
}
struct tevent_req * tstream_inet_tcp_connect_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
const struct tsocket_address * local ,
const struct tsocket_address * remote )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-04-03 12:15:27 +02:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-04-03 12:15:27 +02:00
struct tevent_req * req ;
int sys_errno = 0 ;
switch ( lbsda - > u . sa . sa_family ) {
case AF_INET :
break ;
# ifdef HAVE_IPV6
case AF_INET6 :
break ;
# endif
default :
sys_errno = EINVAL ;
break ;
}
req = tstream_bsd_connect_send ( mem_ctx , ev , sys_errno , local , remote ) ;
return req ;
}
int _tstream_inet_tcp_connect_recv ( struct tevent_req * req ,
int * perrno ,
TALLOC_CTX * mem_ctx ,
struct tstream_context * * stream ,
2010-10-21 23:31:41 +02:00
struct tsocket_address * * local ,
2009-04-03 12:15:27 +02:00
const char * location )
{
2010-10-21 23:26:14 +02:00
return tstream_bsd_connect_recv ( req , perrno ,
2010-10-21 23:31:41 +02:00
mem_ctx , stream , local ,
2010-10-21 23:26:14 +02:00
location ) ;
2009-04-03 12:15:27 +02:00
}
struct tevent_req * tstream_unix_connect_send ( TALLOC_CTX * mem_ctx ,
struct tevent_context * ev ,
const struct tsocket_address * local ,
const struct tsocket_address * remote )
{
2019-02-18 17:09:14 +01:00
struct samba_sockaddr * lbsda =
2009-04-03 12:15:27 +02:00
talloc_get_type_abort ( local - > private_data ,
2019-02-18 17:09:14 +01:00
struct samba_sockaddr ) ;
2009-04-03 12:15:27 +02:00
struct tevent_req * req ;
int sys_errno = 0 ;
switch ( lbsda - > u . sa . sa_family ) {
case AF_UNIX :
break ;
default :
sys_errno = EINVAL ;
break ;
}
req = tstream_bsd_connect_send ( mem_ctx , ev , sys_errno , local , remote ) ;
return req ;
}
int _tstream_unix_connect_recv ( struct tevent_req * req ,
int * perrno ,
TALLOC_CTX * mem_ctx ,
struct tstream_context * * stream ,
const char * location )
{
2010-10-21 23:26:14 +02:00
return tstream_bsd_connect_recv ( req , perrno ,
mem_ctx , stream , NULL ,
location ) ;
2009-04-03 12:15:27 +02:00
}
int _tstream_unix_socketpair ( TALLOC_CTX * mem_ctx1 ,
struct tstream_context * * _stream1 ,
TALLOC_CTX * mem_ctx2 ,
struct tstream_context * * _stream2 ,
const char * location )
{
int ret ;
int fds [ 2 ] ;
int fd1 ;
int fd2 ;
struct tstream_context * stream1 = NULL ;
struct tstream_context * stream2 = NULL ;
ret = socketpair ( AF_UNIX , SOCK_STREAM , 0 , fds ) ;
if ( ret = = - 1 ) {
return - 1 ;
}
fd1 = fds [ 0 ] ;
fd2 = fds [ 1 ] ;
fd1 = tsocket_bsd_common_prepare_fd ( fd1 , true ) ;
if ( fd1 = = - 1 ) {
int sys_errno = errno ;
close ( fd2 ) ;
errno = sys_errno ;
return - 1 ;
}
fd2 = tsocket_bsd_common_prepare_fd ( fd2 , true ) ;
if ( fd2 = = - 1 ) {
int sys_errno = errno ;
close ( fd1 ) ;
errno = sys_errno ;
return - 1 ;
}
ret = _tstream_bsd_existing_socket ( mem_ctx1 ,
fd1 ,
& stream1 ,
location ) ;
if ( ret = = - 1 ) {
int sys_errno = errno ;
close ( fd1 ) ;
close ( fd2 ) ;
errno = sys_errno ;
return - 1 ;
}
ret = _tstream_bsd_existing_socket ( mem_ctx2 ,
fd2 ,
& stream2 ,
location ) ;
if ( ret = = - 1 ) {
int sys_errno = errno ;
talloc_free ( stream1 ) ;
close ( fd2 ) ;
errno = sys_errno ;
return - 1 ;
}
* _stream1 = stream1 ;
* _stream2 = stream2 ;
return 0 ;
}